luametatex-2.10.08/000077500000000000000000000000001442250314700140355ustar00rootroot00000000000000luametatex-2.10.08/CMakeLists.txt000066400000000000000000000167171442250314700166110ustar00rootroot00000000000000cmake_minimum_required(VERSION 3.9) project(luametatex VERSION 2.10 LANGUAGES C) set(CMAKE_C_STANDARD 11) # set(CMAKE_CXX_STANDARD 17) # https://sourceforge.net/p/predef/wiki/OperatingSystems/ # https://sourceforge.net/p/predef/wiki/Architectures/ include(GNUInstallDirs) # Optionals (maybe have a LMT_*_TOO for each of them). We might start out with only a very few # optionals at some time, but for now we enable them (there is not not much code involved). The # idea behind thes eoptionals is that we have very simple (!) interfaces, delegating as much as # possible to Lua. We will *not* add interfaces with many bindings because that will introduce # dependencies (and looking at e.g. LuaTeX build updates shows that clearly: a no-go). set(LMT_KPSE_TOO 1) # In case we want to manage MKII scripts (etc) with mtxrun. set(LMT_HB_TOO 1) # Maybe handy for Idris' font development (old converted ffi stuff) # When set, because we're sparse we also strip the binary. Because we only gain some 1-2% on # runtime, enabling it makes not much sense: # set(LMT_OPTIMIZE 1) if (MSVC) if (CMAKE_C_COMPILER_ID STREQUAL "Clang") add_compile_options( -Wall -O2 -Wcast-align -Wcast-qual -Wno-unknown-pragmas -fno-strict-aliasing -Wno-pedantic -Wno-deprecated-declarations -Wno-missing-noreturn -Wno-shadow ) add_definitions(-D_CRT_SECURE_NO_WARNINGS) add_definitions(-DLMT_COMPILER_USED="clang") else() add_compile_options( /Wall /wd4127 # constant conditional expression /wd4131 # old style declarator /wd4152 # function pointer cast /wd4201 # nonstandard extension used: nameless struct/union /wd4244 # assignment in conditional expression /wd4456 # local vars with same name as outer variable /wd4457 # local vars with same function parameter /wd4464 # relative include path /wd4668 # missing defines /wd4702 # unreachable code /wd4710 # inlining /wd4711 # inlining /wd4774 # sprint argument 2 warning /wd4777 # format argument 2 warning /wd4820 # local vars with same name as outer variable /wd4996 # strdup etc warnings /wd5045 # spectre # /GL # whole program link optimization # /Gw # whole program data optimization (a little smaller bin) # /Ob3 # more agressive inline, much larger bin, no gain /wd4061 # enumerator * in switch * is not explicitly handles (mp) /wd4701 # potentially unitialized local variable (lua) /wd4255 # no function prototype given /wd5105 # macro expansion producing 'defined' has undefined behavior /wd4548 # expression before comma has no effect; expected expression with side-effect # indeed a bit faster but also a much larger binary: # /fp:fast # okay for amd processors too but no difference in size so probably no gain: # /favor:INTEL64 # /fsanitize:address # /std:c17 ) # We always optimize ... symbols are not in the binary anyway so there is no advantage # (like when accessing Lua api functions). We could have an additional luametatex-lua.dll # but that also creates a dependency (possible conflict). # if (DEFINED LMT_OPTIMIZE) add_compile_options( /GL # whole program link optimization /Gw # whole program data optimization (a little smaller bin) ) # endif() add_definitions(-DLMT_COMPILER_USED="msvc") endif() else() if (CMAKE_C_COMPILER_ID STREQUAL "Clang") # why not -03 add_compile_options( -O2 ) add_definitions(-DLMT_COMPILER_USED="clang") else() add_compile_options( -O3 # -g0 # -mtune=nocona # fails on arm so more testing needed ) add_definitions(-DLMT_COMPILER_USED="gcc") # add_compile_options(-pg) # add_link_options(-pg) endif() add_compile_options( -Wall -Wcast-align -Wcast-qual -Wno-unknown-pragmas -Wno-unused-result -fno-strict-aliasing ) # for c17 # # add_definitions(-D__STDC_WANT_LIB_EXT2__=1) if (DEFINED LMT_OPTIMIZE) if (NOT (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")) set(CMAKE_EXE_LINKER_FLAGS "-s") endif() endif() endif() if (CMAKE_C_COMPILER_ID STREQUAL "Clang") add_compile_options( -Wno-unknown-warning-option -Wno-nonportable-include-path -Wno-nonportable-system-include-path -Wno-newline-eof -Wno-extra-semi-stmt -Wno-sign-conversion -Wno-unused-macros -Wno-reserved-id-macro -Wno-comma -Wno-switch-enum -Wno-shadow -Wno-missing-noreturn -Wno-implicit-fallthrough # -Wno-format -Wno-reserved-identifier -Wno-date-time -Wno-format-nonliteral -Wno-float-equal ) endif() # Not that tested (converted ffi originals): if ((DEFINED LMT_KPSE_TOO)) add_definitions(-DLMT_KPSE_TOO=1) endif() if ((DEFINED LMT_HB_TOO)) add_definitions(-DLMT_HB_TOO=1) endif() # This needs cmake >= 3.9 and produces a 60K smaller mingw binary but it take quite a bit of # runtime to get there so it should become an option (apart from testing on all builders). if (DEFINED LMT_OPTIMIZE) include(CheckIPOSupported) check_ipo_supported(RESULT ipo_supported OUTPUT ipo_message) if (ipo_supported) # # We only have one program so we do it global (can become an -- option) # # set_property(TARGET luametatex PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE) # # mingw64: 2865664, nocona: 2819584, lto: 2835968 (around 1% gain on manual) # set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE) # else() # No message needed, just accept the fact. endif() endif() # Mimalloc is still under development, so we only support it on a few platforms. By the time it is # stable we can probably remove some of the following tests. A bit of a hack: # # When the old osx version is dropped and armhf is upgraded we can enable unix except solaris which # fails. So, only osx 10.6 and rpi 32 fail. But we will probably drop 32 bit in the future anyway. # CMAKE_HOST_SYSTEM_PROCESSOR arm64 x86_64 if (CMAKE_HOST_SOLARIS) # fails elseif (MSVC) set(luametatex_use_mimalloc 1) elseif (CMAKE_HOST_APPLE AND NOT (${CMAKE_C_COMPILER} MATCHES "arm")) # fails on the osx intel elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7l") # fails on the rpi 32 bit else() set(luametatex_use_mimalloc 1) endif() include_directories(${CMAKE_ROOT}/source) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/source) if ((DEFINED luametatex_use_mimalloc)) add_definitions(-DLUAMETATEX_USE_MIMALLOC=1) # add_definitions(-DMIMALLOC_RESET_DELAY=250) # set(luametatex_use_mimalloc 1) include(cmake/mimalloc.cmake) endif() include(cmake/tex.cmake) include(cmake/lua.cmake) include(cmake/mp.cmake) include(cmake/luarest.cmake) include(cmake/luasocket.cmake) include(cmake/luaoptional.cmake) include(cmake/pplib.cmake) include(cmake/miniz.cmake) include(cmake/softposit.cmake) include(cmake/luametatex.cmake) luametatex-2.10.08/CMakeSettings.json000066400000000000000000000052721442250314700174370ustar00rootroot00000000000000{ "configurations": [ { "name": "msvc-x64-debug", "generator": "Ninja", "configurationType": "Debug", "inheritEnvironments": [ "msvc_x64_x64" ], "buildRoot": "${projectDir}\\build\\${name}", "installRoot": "${projectDir}\\..\\install\\${name}", "cmakeCommandArgs": "", "buildCommandArgs": "-v", "ctestCommandArgs": "" }, { "name": "msvc-x64-release", "generator": "Ninja", "configurationType": "Release", "buildRoot": "${projectDir}\\build\\${name}", "installRoot": "${projectDir}\\..\\install\\${name}", "cmakeCommandArgs": "", "buildCommandArgs": "-v", "ctestCommandArgs": "", "inheritEnvironments": [ "msvc_x64_x64" ] }, { "name": "msvc-x64-clang", "generator": "Ninja", "configurationType": "Release", "buildRoot": "${projectDir}\\build\\${name}", "installRoot": "${projectDir}\\..\\install\\${name}", "cmakeCommandArgs": "", "buildCommandArgs": "-v", "ctestCommandArgs": "", "inheritEnvironments": [ "clang_cl_x64" ] }, { "name": "msvc-arm64-release", "generator": "Ninja", "configurationType": "Debug", "inheritEnvironments": [ "msvc_arm64_x64" ], "buildRoot": "${projectDir}\\build\\${name}", "installRoot": "${projectDir}\\..\\install\\${name}", "cmakeCommandArgs": "", "buildCommandArgs": "-v", "ctestCommandArgs": "" }, { "name": "wsl-gcc-release", "generator": "Ninja", "configurationType": "RelWithDebInfo", "buildRoot": "${projectDir}\\out\\build\\${name}", "installRoot": "${projectDir}\\out\\install\\${name}", "cmakeExecutable": "cmake", "cmakeCommandArgs": "", "buildCommandArgs": "", "ctestCommandArgs": "", "inheritEnvironments": [ "linux_x64" ], "wslPath": "${defaultWSLPath}", "addressSanitizerRuntimeFlags": "detect_leaks=0" }, { "name": "msvc-x86-release", "generator": "Ninja", "configurationType": "Release", "buildRoot": "${projectDir}\\build\\${name}", "installRoot": "${projectDir}\\..\\install\\${name}", "cmakeCommandArgs": "", "buildCommandArgs": "-v", "ctestCommandArgs": "", "inheritEnvironments": [ "msvc_x86_x64" ] } ] }luametatex-2.10.08/README.adoc000066400000000000000000000072731442250314700156330ustar00rootroot00000000000000== LuaMetaTeX This is a follow up on the LuaTeX project. The source is considered part of the ConTeXt distribution and managed by the ConTeXt development team and the ConTeXt user group. That way we can guarantee that the engine and this TeX macro package work together as expected. The idea is that users can easily compile the source themselves and that way have a guaranteed long term (minimal) TeX based installation. Because the management scripts are in Lua, only one binary is needed to serve the ConTeXt distribution. In the source code we try to stay close to the ancestors, LuaTeX, pdfTeX, eTeX and TeX, but in the meantime due to additions there is quite some diverge. There are new primitives and submechanisms, there is more control over the inner workings, font handling is mostly delegated to Lua and there is no built-in backend. The code base is all-inclusive and has no (nor will have) dependencies on external libraries. Performance and memory consumption have been optimized and the additions (compared to LuaTeX) don't have a negative impact. In spite of the extensions, we consider this a more lightweight version of its ancestor and want to keep it that way. There are a few optional interfaces to the outside world but ConTeXt will never depend on them for regular usage. Version numbering starts 2.00 so that there is no confusion with LuaTeX where the stable 1.00+ version numbers now can bump with the yearly TeXlive updates. Backporting features to LuaTeX is yet undecided (also because the codebase is now too different). The internal MetaPost library is an enhanced version of the official one that ships with LuaTeX. Here we started with number 3.00 and to what extend there will be backports to the 2.00+ library is yet unknown. We use Lua 5.4+ and try to keep up to date with the latest greatest, also because the LuaMetaTeX-ConTeXt combination makes for nice test enviroment. Starting with LuaMetaTeX version 2.10 the interfaces are considered stable. Although we will fix bugs as fast as possible, we might end up with the same slow-paced release cycle as traditional TeX. Bugs can be reported to the ConTeXt mailing lists (support or development) or the team members. Such a report should come with a (MEW) ConTeXt .tex file that illustrates the issue. The code repository is managed by the ConTeXt user group and its compile farm team (aka contextgarden built bot). * reference implementation : https://github.com/contextgarden/context.git * compile farm subset : https://github.com/contextgarden/luametatex.git * unstable work copy : https://github.com/contextgarden/luametatex/tree/work The first repository has what we use in ConTeXt. The second normally has the same code and is used by the compile farm. The third repository is (can) be used by us for testing new features and playing around with the code. That one is not to be used because it can have issues and we don't run the test suite every time we submit; code can come and go. Accumulated patches and additions in work become a combined push into main after which we just wipe work and make a new clone which will then be used by the compile farm. Normally that also comes with incrementing the version number. The reason why work is not kept local is because that way we can check if the compilation works, for which it has to be public so that the compile farm can access it conveniently. All kind of aspects of the LuaMetaTeX engine, its development, experiment, ideas and implementation have been (and are) reported in user group publications aas well as several documents in the ConTeXt distribution. The mailing list archives and contextgarden wiki are also a source of information. Hans Hagen image::luametatex.svg[LuaMetaTeX,100%] luametatex-2.10.08/build.cmd000066400000000000000000000052131442250314700156220ustar00rootroot00000000000000rem When something fails, make sure to remove the cmake cache. When compile from rem the Visual Studio environment mixed with compiling from the command line rem some confusion can occur. setlocal @echo . @echo supported flags : --arm64 --x64 --x86 --intel64 --intel86 @echo . set luametatexsources=%~dp0 set luametatexplatform=x64 set msvcplatform=x64 for %%G in (%*) do ( if [%%G] == [--arm64] ( set luametatexplatform=arm64 set msvcplatform=x86_arm64 ) if [%%G] == [--intel64] ( set luametatexplatform=x64 set msvcplatform=amd64 ) if [%%G] == [--intel86] ( set luametatexplatform=x86 set msvcplatform=x86_amd64 ) if [%%G] == [--x64] ( set luametatexplatform=x64 set msvcplatform=amd64 ) if [%%G] == [--x86] ( set luametatexplatform=x86 set msvcplatform=x86_amd64 ) ) set visualstudiopath=c:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build set luametatexbuildpath=msvc-cmd-%luametatexplatform%-release @echo . @echo luametatexplatform : %luametatexplatform% @echo msvcplatform : %msvcplatform% @echo visualstudiopath : %visualstudiopath% @echo luametatexbuildpath : %luametatexbuildpath% @echo . mkdir build chdir build rmdir /S /Q %luametatexbuildpath% mkdir %luametatexbuildpath% chdir %luametatexbuildpath% call "%visualstudiopath%\vcvarsall.bat" %msvcplatform% cmake ../.. cmake --build . --config Release --parallel 8 cd .. cd .. dir build\%luametatexbuildpath%\Release\luametatex.exe @echo . @echo tex trees: @echo . @echo resources like public fonts : tex/texmf/.... @echo the context macro package : tex/texmf-context/.... @echo the luametatex binary : tex/texmf-win64/bin/... @echo optional third party modules : tex/texmf-context/.... @echo fonts installed by the user : tex/texmf-fonts/fonts/data/.... @echo styles made by the user : tex/texmf-projects/tex/context/user/.... @echo . @echo binaries: @echo . @echo tex/texmf-win64/bin/luametatex.exe : the compiled binary (some 2-3MB) @echo tex/texmf-win64/bin/mtxrun.exe : copy of or link to luametatex.exe @echo tex/texmf-win64/bin/context.exe : copy of or link to luametatex.exe @echo tex/texmf-win64/bin/mtxrun.lua : copy of tex/texmf-context/scripts/context/lua/mtxrun.lua @echo tex/texmf-win64/bin/context.lua : copy of tex/texmf-context/scripts/context/lua/context.lua @echo . @echo commands: @echo . @echo mtxrun --generate : create file database @echo mtxrun --script fonts --reload : create font database @echo mtxrun --autogenerate context ... : run tex file (e.g. from editor) @echo . endlocal luametatex-2.10.08/build.sh000066400000000000000000000062561442250314700155010ustar00rootroot00000000000000# The official designated locations are: # # luametatex[.exe] # mtxrun[.exe] -> luametatex[.exe] # mtxrun.lua (latest version) # context.lua (latest version) # This test is not yet okay but I have no time (or motivation) to look into it now, so for now we don't # use ninja (not that critical). #NINJA=$(which ninja); #if (NINJA) then # NINJA="-G Ninja" #else NINJA="" #fi if [ "$1" = "mingw-64" ] || [ "$1" = "mingw64" ] || [ "$1" = "mingw" ] || [ "$1" == "--mingw64" ] then PLATFORM="win64" SUFFIX=".exe" mkdir -p build/mingw-64 cd build/mingw-64 cmake $NINJA -DCMAKE_TOOLCHAIN_FILE=./cmake/mingw-64.cmake ../.. elif [ "$1" = "mingw-32" ] || [ "$1" = "mingw32" ] || [ "$1" == "--mingw32" ] then PLATFORM="mswin" SUFFIX=".exe" mkdir -p build/mingw-32 cd build/mingw-32 cmake $NINJA -DCMAKE_TOOLCHAIN_FILE=./cmake/mingw-32.cmake ../.. elif [ "$1" = "mingw-64-ucrt" ] || [ "$1" = "mingw64ucrt" ] || [ "$1" = "--mingw64ucrt" ] || [ "$1" = "ucrt" ] || [ "$1" = "--ucrt" ] then PLATFORM="win64" SUFFIX=".exe" mkdir -p build/mingw-64-ucrt cd build/mingw-64-ucrt cmake $NINJA -DCMAKE_TOOLCHAIN_FILE=./cmake/mingw-64-ucrt.cmake ../.. elif [ "$1" = "cygwin" ] || [ "$1" = "--cygwin" ] then PLATFORM="cygwin" SUFFIX=".exe" mkdir -p build/cygwin cd build/cygwin cmake $NINJA ../.. else PLATFORM="native" SUFFIX=" " mkdir -p build/native cd build/native cmake $NINJA ../.. fi #~ make -j8 cmake --build . --parallel 8 echo "" echo "tex trees" echo "" echo "resources like public fonts : tex/texmf/...." echo "the context macro package : tex/texmf-context/...." echo "the luametatex binary : tex/texmf-$PLATFORM/bin/..." echo "optional third party modules : tex/texmf-context/...." echo "fonts installed by the user : tex/texmf-fonts/fonts/data/...." echo "styles made by the user : tex/texmf-projects/tex/context/user/...." echo "" echo "binaries:" echo "" echo "tex/texmf-/bin/luametatex$SUFFIX : the compiled binary (some 2-3MB)" echo "tex/texmf-/bin/mtxrun$SUFFIX : copy of or link to luametatex" echo "tex/texmf-/bin/context$SUFFIX : copy of or link to luametatex" echo "tex/texmf-/bin/mtxrun.lua : copy of tex/texmf-context/scripts/context/lua/mtxrun.lua" echo "tex/texmf-/bin/context.lua : copy of tex/texmf-context/scripts/context/lua/context.lua" echo "" echo "commands:" echo "" echo "mtxrun --generate : create file database" echo "mtxrun --script fonts --reload : create font database" echo "mtxrun --autogenerate context ... : run tex file (e.g. from editor)" echo "" luametatex-2.10.08/build.txt000066400000000000000000000040431442250314700156760ustar00rootroot00000000000000Hi, The build script produce efficient static binaries with only a couple of system libraries as dependency. ConTeXt will not depend on anything else than provided here. Lua is the extension language to be used and that has worked well for quite a while now. The build script that is provided will compile under ./build so you might want to make a copy of the source tree to a suitable place that you can wipe after the job is done. The script accepts only a few command line arguments. build.sh : --native build/native meant for unix (linux, freebsd, openbsd, osx, arm) --mingw-32 build/mingw-32 meant for 32 bit windows (crosscompiled) --mingw-64 build/mingw-64 meant for 64 bit windows (crosscompiled) I develop LuaMetaTeX on Windows and use WLS (with OpenSuse) for cross compilation as well as native Linux binaries. Editing is done in Visual Studio with the exception of the MetaPost CWeb files for which I use SciTE. Because we use CMake, you can compile using the MSVC compiler as well as CLang. Currently the MingW crosscompiled binaries are slightly faster, next come the native ones, but till now CLang lags behind. The native compiler produces the smallest binaries and compiles fastest. build.cmd : --x64 build/msvc-cmd-x64 meant for 64 bit windows using intel/amd chips --x32 build/msvc-cmd-x86 meant for 32 bit windows using intel/amd chips --arm64 build/msvc-cmd-arm64 meant for 64 bit windows using arm chips Alternatively you can run a build job from Visual Studio. Of course it only works well if you have the right compilers installed which is easy to do from the user interface. All settings happen in CMakeLists.txt so you have to load that one. Support for LuaMetaTeX and ConTeXt is provided at the (dev-)context mailing lists and at the ConTeXt Wiki. Binaries are available at: https://build.contextgarden.net/#/waterfall?tags=c.luametatex https://dl.contextgarden.net/build/luametatex The first link shows the status, the second link is where the binaries can be downloaded. Hans Hagen luametatex-2.10.08/cmake/000077500000000000000000000000001442250314700151155ustar00rootroot00000000000000luametatex-2.10.08/cmake/debug.cmake000066400000000000000000000005531442250314700172100ustar00rootroot00000000000000# When we run valgrind we need verbose binaries: # # valgrind -v --track-origins=yes --leak-check=full context ... # add_compile_options(-pg) # set(CMAKE_EXE_LINKER_FLAGS "-pg") # In addition to the microsoft compiler alignment suggestions we can run on linux: # # pahole luametatex # add_compile_options(-p -gdwarf) # set(CMAKE_EXE_LINKER_FLAGS "-p -gdwarf") luametatex-2.10.08/cmake/lua.cmake000066400000000000000000000051561442250314700167070ustar00rootroot00000000000000set(lua_sources source/luacore/lua54/src/lapi.c source/luacore/lua54/src/lauxlib.c source/luacore/lua54/src/lbaselib.c source/luacore/lua54/src/lcode.c source/luacore/lua54/src/lcorolib.c source/luacore/lua54/src/lctype.c source/luacore/lua54/src/ldblib.c source/luacore/lua54/src/ldebug.c source/luacore/lua54/src/ldo.c source/luacore/lua54/src/ldump.c source/luacore/lua54/src/lfunc.c source/luacore/lua54/src/lgc.c source/luacore/lua54/src/linit.c source/luacore/lua54/src/liolib.c source/luacore/lua54/src/llex.c source/luacore/lua54/src/lmathlib.c source/luacore/lua54/src/lmem.c source/luacore/lua54/src/loadlib.c source/luacore/lua54/src/lobject.c source/luacore/lua54/src/lopcodes.c source/luacore/lua54/src/loslib.c source/luacore/lua54/src/lparser.c source/luacore/lua54/src/lstate.c source/luacore/lua54/src/lstring.c source/luacore/lua54/src/lstrlib.c source/luacore/lua54/src/ltable.c source/luacore/lua54/src/ltablib.c source/luacore/lua54/src/ltm.c source/luacore/lua54/src/lua.c source/luacore/lua54/src/lundump.c source/luacore/lua54/src/lutf8lib.c source/luacore/lua54/src/lvm.c source/luacore/lua54/src/lzio.c source/luacore/luapeg/lptree.c source/luacore/luapeg/lpvm.c source/luacore/luapeg/lpprint.c source/luacore/luapeg/lpcap.c source/luacore/luapeg/lpcode.c ) add_library(lua STATIC ${lua_sources}) set_property(TARGET lua PROPERTY C_STANDARD 99) target_include_directories(lua PRIVATE source/luacore/lua54/src source/luacore/luapeg ) # luajit: 8000, lua 5.3: 1000000 or 15000 target_compile_definitions(lua PUBLIC # This one should also be set in the lua namespace! # LUAI_HASHLIMIT=6 # obsolete # LUAI_MAXSHORTLEN=48 LUAI_MAXCSTACK=6000 LUA_UCID # LUA_USE_JUMPTABLE=0 LPEG_DEBUG # LUA_NOCVTS2N # LUA_NOBUILTIN # disable likely usage # LUAI_ASSERT # LUA_STRFTIMEOPTIONS="aAbBcCdDeFgGhHIjmMnprRStTuUVwWxXyYzZ%" # MINSTRTABSIZE=65536 ) if (UNIX) target_compile_definitions(lua PUBLIC LUA_USE_POSIX LUA_USE_DLOPEN ) endif (UNIX) if (NOT MSVC) target_compile_options(lua PRIVATE -Wno-cast-align -Wno-cast-qual ) endif (NOT MSVC) # if (CMAKE_HOST_APPLE) # target_compile_definitions(lua PUBLIC # TARGET_OS_IOS=0 # TARGET_OS_WATCH=0 # TARGET_OS_TV=0 # ) # endif (CMAKE_HOST_APPLE) # this seems to be ok for mingw default # # todo: what is the right way to increase the stack (mingw) # target_compile_options(lua PRIVATE -DLUAI_MAXCSTACK=65536 -Wl,--stack,16777216) luametatex-2.10.08/cmake/luametatex.cmake000066400000000000000000000033241442250314700202720ustar00rootroot00000000000000add_compile_options(-DLUA_CORE) set(luametatex_sources source/luametatex.c ) add_executable(luametatex ${luametatex_sources}) target_include_directories(luametatex PRIVATE . source/. source/luacore/lua54/src ) target_link_libraries(luametatex tex lua mp luarest luasocket luaoptional pplib miniz softposit ) if (LUAMETATEX_NOLDL) # mingw ucrt else() target_link_libraries(luametatex ${CMAKE_DL_LIBS} ) endif() install(TARGETS luametatex EXPORT luametatex RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT luametatex_runtime ) if (luametatex_use_mimalloc) target_include_directories(luametatex PRIVATE source/libraries/mimalloc/include ) target_link_libraries(luametatex mimalloc ) if (LUAMETATEX_MINGW) target_link_libraries(luametatex --static) target_link_libraries(luametatex pthread psapi bcrypt ) elseif (NOT MSVC) target_link_libraries(luametatex pthread ) endif() endif() if (NOT MSVC) target_link_libraries(luametatex m ) endif() if (${CMAKE_HOST_SOLARIS}) target_link_libraries(luametatex rt socket nsl resolv ) endif() if (DEFINED LMT_OPTIMIZE) # we strip anyway elseif (CMAKE_HOST_SOLARIS) # no strip elseif (CMAKE_C_COMPILER_ID MATCHES "GNU") # -g -S -d : remove all debugging symbols & sections # -x : remove all non-global symbols # -X : remove any compiler-generated symbols add_custom_command(TARGET luametatex POST_BUILD COMMAND ${CMAKE_STRIP} -g -S -d -x luametatex${CMAKE_EXECUTABLE_SUFFIX}) endif() luametatex-2.10.08/cmake/luaoptional.cmake000066400000000000000000000014141442250314700204460ustar00rootroot00000000000000set(luaoptional_sources source/luaoptional/lmtsqlite.c source/luaoptional/lmtmysql.c source/luaoptional/lmtpostgress.c source/luaoptional/lmtcurl.c source/luaoptional/lmtghostscript.c source/luaoptional/lmtimagemagick.c source/luaoptional/lmtgraphicsmagick.c source/luaoptional/lmtzint.c source/luaoptional/lmtmujs.c source/luaoptional/lmtlzo.c source/luaoptional/lmtlz4.c source/luaoptional/lmtkpse.c source/luaoptional/lmthb.c source/luaoptional/lmtzstd.c source/luaoptional/lmtlzma.c source/luaoptional/lmtforeign.c ) add_library(luaoptional STATIC ${luaoptional_sources}) target_include_directories(luaoptional PRIVATE . source/. source/luacore/lua54/src source/libraries/mimalloc/include ) luametatex-2.10.08/cmake/luarest.cmake000066400000000000000000000016441442250314700176030ustar00rootroot00000000000000# The cerf library is actually optional but for now we compile it with the # rest because the complex interfaces are different per platform. There is # not that much code involved. But, anyway, at some point it might become # a real optional module in which case the following will change. set(luarest_sources source/luaoptional/lmtcerflib.c source/libraries/libcerf/erfcx.c source/libraries/libcerf/err_fcts.c source/libraries/libcerf/im_w_of_x.c source/libraries/libcerf/w_of_z.c source/libraries/libcerf/width.c ) add_library(luarest STATIC ${luarest_sources}) target_include_directories(luarest PRIVATE source/libraries/libcerf source/luacore/lua54/src ) # only when all ok, to avoid messages include(CheckCCompilerFlag) CHECK_C_COMPILER_FLAG("-Wno-discarded-qualifiers" limited_support) if (limited_support) target_compile_options(luarest PRIVATE -Wno-discarded-qualifiers) endif() luametatex-2.10.08/cmake/luasocket.cmake000066400000000000000000000032531442250314700201140ustar00rootroot00000000000000set(luasocket_sources source/luacore/luasocket/src/auxiliar.c source/luacore/luasocket/src/buffer.c source/luacore/luasocket/src/compat.c source/luacore/luasocket/src/except.c source/luacore/luasocket/src/inet.c source/luacore/luasocket/src/io.c source/luacore/luasocket/src/luasocket.c source/luacore/luasocket/src/mime.c source/luacore/luasocket/src/options.c source/luacore/luasocket/src/select.c source/luacore/luasocket/src/socket.c source/luacore/luasocket/src/tcp.c source/luacore/luasocket/src/timeout.c source/luacore/luasocket/src/udp.c # source/luacore/luasocket/src/serial.c # source/luacore/luasocket/src/usocket.c # source/luacore/luasocket/src/wsocket.c # source/luacore/luasec/src/config.c # source/luacore/luasec/src/options.c # source/luacore/luasec/src/ec.c # source/luacore/luasec/src/x509.c # source/luacore/luasec/src/context.c # source/luacore/luasec/src/ssl.c ) add_library(luasocket STATIC ${luasocket_sources}) target_include_directories(luasocket PRIVATE source/luacore/luasocket # source/luacore/luasec # source/luacore/luasec/src source/luacore/lua54/src ) if (NOT MSVC) target_compile_options(luasocket PRIVATE -Wno-cast-qual -Wno-cast-align ) endif() if (WIN32) target_link_libraries(luasocket PRIVATE wsock32 ws2_32 ) endif() # It seems to depend on the mingw installation: if (__MINGW64_TOOLCHAIN_) target_compile_definitions(luasocket PRIVATE LUASOCKET_INET_PTON ) endif() if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") target_compile_definitions(luasocket PRIVATE LUASOCKET_INET_PTON ) endif() luametatex-2.10.08/cmake/mimalloc.cmake000066400000000000000000000024371442250314700177220ustar00rootroot00000000000000include("source/libraries/mimalloc/cmake/mimalloc-config-version.cmake") set(mimalloc_sources source/libraries/mimalloc/src/alloc.c source/libraries/mimalloc/src/alloc-aligned.c source/libraries/mimalloc/src/alloc-posix.c source/libraries/mimalloc/src/arena.c source/libraries/mimalloc/src/bitmap.c source/libraries/mimalloc/src/heap.c source/libraries/mimalloc/src/init.c source/libraries/mimalloc/src/options.c source/libraries/mimalloc/src/os.c source/libraries/mimalloc/src/page.c source/libraries/mimalloc/src/random.c source/libraries/mimalloc/src/segment.c source/libraries/mimalloc/src/segment-map.c source/libraries/mimalloc/src/stats.c source/libraries/mimalloc/src/prim/prim.c ) set(mi_cflags "") set(mi_libraries "") add_library(mimalloc STATIC ${mimalloc_sources}) # set(CMAKE_C_STANDARD 11) # set(CMAKE_CXX_STANDARD 17) target_include_directories(mimalloc PRIVATE source/libraries/mimalloc source/libraries/mimalloc/src source/libraries/mimalloc/prim source/libraries/mimalloc/include ) target_compile_definitions(mimalloc PRIVATE MIMALLOC_LARGE_OS_PAGES=1 MI_DEBUG=0 MI_SECURE=0 ) if (NOT MSVC) target_compile_options(mimalloc PRIVATE -Wno-cast-align -Wno-cast-qual ) endif () luametatex-2.10.08/cmake/mingw-32.cmake000066400000000000000000000005001442250314700174550ustar00rootroot00000000000000# if (NOT __MINGW64_TOOLCHAIN_) # add_compile_options(-DLUASOCKET_INET_PTON) # endif() set(CMAKE_SYSTEM_NAME Windows) set(TOOLCHAIN_PREFIX i686-w64-mingw32) set(CMAKE_C_COMPILER ${TOOLCHAIN_PREFIX}-gcc) add_compile_options(-mtune=nocona) set(LUAMETATEX_MINGW 32) # set(CMAKE_EXE_LINKER_FLAGS "-static-libgcc") luametatex-2.10.08/cmake/mingw-64-ucrt.cmake000066400000000000000000000005361442250314700204460ustar00rootroot00000000000000# if (NOT __MINGW64_TOOLCHAIN_) # add_compile_options(-DLUASOCKET_INET_PTON) # endif() set(CMAKE_SYSTEM_NAME Windows) set(TOOLCHAIN_PREFIX x86_64-w64-mingw32ucrt) set(CMAKE_C_COMPILER ${TOOLCHAIN_PREFIX}-gcc) add_compile_options(-mtune=nocona) set(LUAMETATEX_MINGW 64) set(LUAMETATEX_NOLDL 1) # set(CMAKE_EXE_LINKER_FLAGS "-static-libgcc") luametatex-2.10.08/cmake/mingw-64.cmake000066400000000000000000000005021442250314700174640ustar00rootroot00000000000000# if (NOT __MINGW64_TOOLCHAIN_) # add_compile_options(-DLUASOCKET_INET_PTON) # endif() set(CMAKE_SYSTEM_NAME Windows) set(TOOLCHAIN_PREFIX x86_64-w64-mingw32) set(CMAKE_C_COMPILER ${TOOLCHAIN_PREFIX}-gcc) add_compile_options(-mtune=nocona) set(LUAMETATEX_MINGW 64) # set(CMAKE_EXE_LINKER_FLAGS "-static-libgcc") luametatex-2.10.08/cmake/miniz.cmake000066400000000000000000000005261442250314700172500ustar00rootroot00000000000000set(miniz_sources source/libraries/miniz/miniz.c ) add_library(miniz STATIC ${miniz_sources}) target_compile_definitions(miniz PUBLIC MINIZ_NO_ARCHIVE_APIS=1 MINIZ_NO_STDIO=1 MINIZ_NO_MALLOC=1 ) if (NOT MSVC) target_compile_options(miniz PRIVATE -Wno-cast-align -Wno-cast-qual ) endif (NOT MSVC) luametatex-2.10.08/cmake/mp.cmake000066400000000000000000000022521442250314700165340ustar00rootroot00000000000000set(mp_sources source/mp/mpc/mp.c source/mp/mpc/mpstrings.c source/mp/mpc/mpmath.c source/mp/mpc/mpmathdouble.c source/mp/mpc/mpmathbinary.c source/mp/mpc/mpmathdecimal.c source/mp/mpc/mpmathposit.c source/libraries/decnumber/decContext.c source/libraries/decnumber/decNumber.c source/libraries/avl/avl.c source/lua/lmtmplib.c source/luarest/lmtxdecimallib.c ) add_library(mp STATIC ${mp_sources}) target_include_directories(mp PRIVATE . source/. source/mp/mpc source/luacore/lua54/src source/libraries/avl source/libraries/decnumber source/utilities source/libraries/mimalloc/include source/libraries/softposit/source/include ) target_compile_definitions(mp PUBLIC DECUSE64=1 # DECCHECK=1 # DECBUFFER=512 DECNUMDIGITS=1000 ) if (CMAKE_C_COMPILER_ID STREQUAL "Clang") target_compile_options(mp PRIVATE -Wno-unreachable-code-break ) endif() if (NOT MSVC) target_compile_options(mp PRIVATE -Wno-unused-parameter -Wno-sign-compare -Wno-cast-qual -Wno-cast-align # for decnumber with lto -fno-strict-aliasing ) endif() luametatex-2.10.08/cmake/pplib.cmake000066400000000000000000000023631442250314700172310ustar00rootroot00000000000000set(pplib_sources source/libraries/pplib/pparray.c source/libraries/pplib/ppcrypt.c source/libraries/pplib/ppdict.c source/libraries/pplib/ppheap.c source/libraries/pplib/ppload.c source/libraries/pplib/ppstream.c source/libraries/pplib/ppxref.c source/libraries/pplib/util/utilbasexx.c source/libraries/pplib/util/utilcrypt.c source/libraries/pplib/util/utilflate.c source/libraries/pplib/util/utilfpred.c source/libraries/pplib/util/utiliof.c source/libraries/pplib/util/utillog.c source/libraries/pplib/util/utillzw.c source/libraries/pplib/util/utilmd5.c source/libraries/pplib/util/utilmem.c source/libraries/pplib/util/utilmemheap.c source/libraries/pplib/util/utilmemheapiof.c source/libraries/pplib/util/utilmeminfo.c source/libraries/pplib/util/utilnumber.c source/libraries/pplib/util/utilsha.c ) add_library(pplib STATIC ${pplib_sources}) if (NOT MSVC) target_compile_options(pplib PRIVATE -Wno-missing-declarations ) endif (NOT MSVC) target_include_directories(pplib PRIVATE source/libraries/pplib source/libraries/pplib/util source/libraries/zlib source/libraries/miniz source/utilities/auxmemory source/utilities/auxzlib ) luametatex-2.10.08/cmake/softposit.cmake000066400000000000000000000131121442250314700201470ustar00rootroot00000000000000set(softposit_sources # source/libraries/softposit/source/s_addMagsP8.c # source/libraries/softposit/source/s_subMagsP8.c # source/libraries/softposit/source/s_mulAddP8.c # source/libraries/softposit/source/p8_add.c # source/libraries/softposit/source/p8_sub.c # source/libraries/softposit/source/p8_mul.c # source/libraries/softposit/source/p8_div.c # source/libraries/softposit/source/p8_sqrt.c # source/libraries/softposit/source/p8_to_p16.c # source/libraries/softposit/source/p8_to_p32.c # source/libraries/softposit/source/p8_to_pX2.c # source/libraries/softposit/source/p8_to_i32.c # source/libraries/softposit/source/p8_to_i64.c # source/libraries/softposit/source/p8_to_ui32.c # source/libraries/softposit/source/p8_to_ui64.c # source/libraries/softposit/source/p8_roundToInt.c # source/libraries/softposit/source/p8_mulAdd.c # source/libraries/softposit/source/p8_eq.c # source/libraries/softposit/source/p8_le.c # source/libraries/softposit/source/p8_lt.c # source/libraries/softposit/source/quire8_fdp_add.c # source/libraries/softposit/source/quire8_fdp_sub.c # source/libraries/softposit/source/ui32_to_p8.c # source/libraries/softposit/source/ui64_to_p8.c # source/libraries/softposit/source/i32_to_p8.c # source/libraries/softposit/source/i64_to_p8.c # source/libraries/softposit/source/s_addMagsP16.c # source/libraries/softposit/source/s_subMagsP16.c # source/libraries/softposit/source/s_mulAddP16.c # source/libraries/softposit/source/p16_to_ui32.c # source/libraries/softposit/source/p16_to_ui64.c # source/libraries/softposit/source/p16_to_i32.c # source/libraries/softposit/source/p16_to_i64.c # source/libraries/softposit/source/p16_to_p8.c # source/libraries/softposit/source/p16_to_p32.c # source/libraries/softposit/source/p16_to_pX2.c # source/libraries/softposit/source/p16_roundToInt.c # source/libraries/softposit/source/p16_add.c # source/libraries/softposit/source/p16_sub.c # source/libraries/softposit/source/p16_mul.c # source/libraries/softposit/source/p16_mulAdd.c # source/libraries/softposit/source/p16_div.c # source/libraries/softposit/source/p16_eq.c # source/libraries/softposit/source/p16_le.c # source/libraries/softposit/source/p16_lt.c # source/libraries/softposit/source/p16_sqrt.c # source/libraries/softposit/source/quire16_fdp_add.c # source/libraries/softposit/source/quire16_fdp_sub.c # source/libraries/softposit/source/quire_helper.c # source/libraries/softposit/source/ui32_to_p16.c # source/libraries/softposit/source/ui64_to_p16.c # source/libraries/softposit/source/i32_to_p16.c # source/libraries/softposit/source/i64_to_p16.c source/libraries/softposit/source/s_addMagsP32.c source/libraries/softposit/source/s_subMagsP32.c source/libraries/softposit/source/s_mulAddP32.c source/libraries/softposit/source/p32_to_ui32.c source/libraries/softposit/source/p32_to_ui64.c source/libraries/softposit/source/p32_to_i32.c source/libraries/softposit/source/p32_to_i64.c # source/libraries/softposit/source/p32_to_p8.c # source/libraries/softposit/source/p32_to_p16.c ##source/libraries/softposit/source/p32_to_pX2.c source/libraries/softposit/source/p32_roundToInt.c source/libraries/softposit/source/p32_add.c source/libraries/softposit/source/p32_sub.c source/libraries/softposit/source/p32_mul.c source/libraries/softposit/source/p32_mulAdd.c source/libraries/softposit/source/p32_div.c source/libraries/softposit/source/p32_eq.c source/libraries/softposit/source/p32_le.c source/libraries/softposit/source/p32_lt.c source/libraries/softposit/source/p32_sqrt.c ##source/libraries/softposit/source/quire32_fdp_add.c ##source/libraries/softposit/source/quire32_fdp_sub.c source/libraries/softposit/source/ui32_to_p32.c source/libraries/softposit/source/ui64_to_p32.c source/libraries/softposit/source/i32_to_p32.c source/libraries/softposit/source/i64_to_p32.c source/libraries/softposit/source/s_approxRecipSqrt_1Ks.c # source/libraries/softposit/source/c_convertDecToPosit8.c # source/libraries/softposit/source/c_convertPosit8ToDec.c # source/libraries/softposit/source/c_convertDecToPosit16.c # source/libraries/softposit/source/c_convertPosit16ToDec.c # source/libraries/softposit/source/c_convertQuire8ToPosit8.c # source/libraries/softposit/source/c_convertQuire16ToPosit16.c ##source/libraries/softposit/source/c_convertQuire32ToPosit32.c source/libraries/softposit/source/c_convertDecToPosit32.c source/libraries/softposit/source/c_convertPosit32ToDec.c source/libraries/softposit/source/c_int.c ##source/libraries/softposit/source/s_addMagsPX2.c ##source/libraries/softposit/source/s_subMagsPX2.c ##source/libraries/softposit/source/s_mulAddPX2.c ##source/libraries/softposit/source/pX2_add.c ##source/libraries/softposit/source/pX2_sub.c ##source/libraries/softposit/source/pX2_mul.c ##source/libraries/softposit/source/pX2_div.c ##source/libraries/softposit/source/pX2_mulAdd.c ##source/libraries/softposit/source/pX2_roundToInt.c ##source/libraries/softposit/source/pX2_sqrt.c ##source/libraries/softposit/source/pX2_eq.c ##source/libraries/softposit/source/pX2_le.c ##source/libraries/softposit/source/pX2_lt.c ##source/libraries/softposit/source/ui32_to_pX2.c # source/libraries/softposit/source/ui64_to_pX2.c ##source/libraries/softposit/source/i32_to_pX2.c # source/libraries/softposit/source/i64_to_pX2.c ##source/libraries/softposit/source/c_convertQuireX2ToPositX2.c ) add_library(softposit STATIC ${softposit_sources}) target_include_directories(softposit PRIVATE source/libraries/softposit/source source/libraries/softposit/source/include source/libraries/softposit/build/Linux-x86_64-GCC ) target_compile_options(softposit PRIVATE -DSOFTPOSIT_FAST_INT64 )luametatex-2.10.08/cmake/tex.cmake000066400000000000000000000051471442250314700167260ustar00rootroot00000000000000set(tex_sources source/utilities/auxmemory.c source/utilities/auxzlib.c source/utilities/auxsparsearray.c source/utilities/auxsystem.c source/utilities/auxunistring.c source/utilities/auxfile.c source/utilities/auxposit.c source/libraries/hnj/hnjhyphen.c source/lua/lmtinterface.c source/lua/lmtlibrary.c source/lua/lmtcallbacklib.c source/lua/lmtlanguagelib.c source/lua/lmtlualib.c source/lua/lmtluaclib.c source/lua/lmttexiolib.c source/lua/lmttexlib.c source/lua/lmttokenlib.c source/lua/lmtnodelib.c source/lua/lmtenginelib.c source/lua/lmtfontlib.c source/lua/lmtstatuslib.c source/luaoptional/lmtoptional.c source/luarest/lmtfilelib.c source/luarest/lmtpdfelib.c source/luarest/lmtiolibext.c source/luarest/lmtoslibext.c source/luarest/lmtstrlibext.c source/luarest/lmtdecodelib.c source/luarest/lmtsha2lib.c source/luarest/lmtmd5lib.c source/luarest/lmtaeslib.c source/luarest/lmtbasexxlib.c source/luarest/lmtxmathlib.c source/luarest/lmtxcomplexlib.c source/luarest/lmtziplib.c source/luarest/lmtsparselib.c source/luarest/lmtposit.c source/tex/texalign.c source/tex/texarithmetic.c source/tex/texbuildpage.c source/tex/texcommands.c source/tex/texconditional.c source/tex/texdirections.c source/tex/texdumpdata.c source/tex/texequivalents.c source/tex/texerrors.c source/tex/texexpand.c source/tex/texmarks.c source/tex/texinputstack.c source/tex/texinserts.c source/tex/texadjust.c source/tex/texlinebreak.c source/tex/texlocalboxes.c source/tex/texmainbody.c source/tex/texmaincontrol.c source/tex/texmathcodes.c source/tex/texmlist.c source/tex/texnesting.c source/tex/texpackaging.c source/tex/texprimitive.c source/tex/texprinting.c source/tex/texscanning.c source/tex/texstringpool.c source/tex/textypes.c source/tex/texfont.c source/tex/texlanguage.c source/tex/texfileio.c source/tex/texmath.c source/tex/texnodes.c source/tex/textextcodes.c source/tex/textoken.c source/tex/texrules.c ) add_library(tex STATIC ${tex_sources}) target_compile_definitions(tex PUBLIC # LUAI_HASHLIMIT=6 # obsolete ZLIB_CONST=1 MINIZ_NO_ARCHIVE_APIS=1 MINIZ_NO_STDIO=1 MINIZ_NO_MALLOC=1 ) target_include_directories(tex PRIVATE . source/. source/libraries/miniz source/libraries/pplib source/libraries/pplib/util source/luacore/lua54/src source/libraries/mimalloc/include source/libraries/softposit/source/include ) luametatex-2.10.08/luametatex.svg000066400000000000000000175263301442250314700167500ustar00rootroot00000000000000 luametatex-2.10.08/source/000077500000000000000000000000001442250314700153355ustar00rootroot00000000000000luametatex-2.10.08/source/.gitignore000066400000000000000000000000231442250314700173200ustar00rootroot00000000000000.vs .log .tuc .tmp luametatex-2.10.08/source/libraries/000077500000000000000000000000001442250314700173115ustar00rootroot00000000000000luametatex-2.10.08/source/libraries/avl/000077500000000000000000000000001442250314700200735ustar00rootroot00000000000000luametatex-2.10.08/source/libraries/avl/avl.c000066400000000000000000001632061442250314700210310ustar00rootroot00000000000000/* This small C package is made of an independent set of routines dedicated to manipulating AVL trees (files avl.c, avl.h), and of an extension module for Python that builds upon it (file avlmodule.c). Unlike collectionsmodule.c, the latter file contains only Python bindings: it adds nothing to the underlying implementation. license: this package, pyavl, is donated to the public domain author : Richard McGraw Email : dasnar@fastmail.fm */ /* This file is reformatted a little. As there has not been any changed in the original we assume this is okay. No changes means that the code is fine and we never ran into issues. Also, we always check for NULL here. The avl code is used for hashing strings. It is also used in the backend of luatex but in luametatex we don't have that, so its use is now only in mplib. Actually there are two modules used in luatex: one for metapost an done for tex, and they are somewhat different. So, I took the most extensive one. Todo: Rename some variables to avoid a compiler warning. Todo: Maybe abstract the error messages and make them a callback. Todo: Maybe some more if/else/local (likely branch prediction). Todo: Maybe turn some common code into functions (just for fun, will make the source smaller). */ # include "avl.h" # ifdef AVL_SHOW_ERROR_ON # define AVL_SHOW_ERROR(fmt,arg) fprintf(stderr, "! avl.c: " fmt, arg) # else # define AVL_SHOW_ERROR(fmt,arg) (void) (fmt), (void) (arg) # endif const void *avl_default_item_copy(const void *item) { return (const void *) item; } void *avl_default_item_dispose(void *item) { (void) item; return (void *) NULL; } /* integral type to encode rank and skew bits */ typedef uint32_t rbal_t; /* avl_node structure */ typedef struct avl_node { /* aligned */ struct avl_node *sub[2]; struct avl_node *up; void *item; rbal_t rbal; int padding; } avl_node; /* avl_tree structure */ struct avl_tree_ { /* aligned */ avl_node *root; avl_size_t count; /* how many nodes in tree rooted at [root] */ int padding; /* alignment */ avl_compare_func compare; /* compare items */ avl_item_copy_func copy; avl_item_dispose_func dispose; avl_alloc_func alloc; /* to allocate memory (same signature as malloc) */ avl_dealloc_func dealloc; /* to deallocate memory (same signature as free) */ void *param; }; # define item_compare(cmp, tree, item1, item2) (*cmp)(tree->param, item1, item2) # define sub_left(a) (a)->sub[0] # define sub_right(a) (a)->sub[1] # define get_item(a) (a)->item /* RANK(a) = size of left subtree + 1 */ # define rbal(a) (a)->rbal # define rzero(a) (rbal(a) & ~3) # define get_bal(a) (rbal(a) & 3) # define is_lskew(a) (rbal(a) & 1) # define is_rskew(a) (rbal(a)>>1 & 1) # define set_lskew(a) (rbal(a) |= 1) # define set_rskew(a) (rbal(a) |= 2) # define set_skew(a,d) (rbal(a) |= (1 << d)) # define unset_lskew(a) (rbal(a) &= ~1) # define unset_rskew(a) (rbal(a) &= ~2) # define get_rank(a) (rbal(a) >> 2) # define set_rank(a,r) (rbal(a) = (r<<2 | get_bal(a))) # define incr_rank(a,r) (rbal(a) += r<<2) # define decr_rank(a,r) (rbal(a) -= r<<2) # define AVL_MIN_DEPTH 0 /* Helper structure. */ typedef enum { OP_BACKUP, OP_DETACH, OP_FREE } whichop_t; typedef struct ptr_handler { /* swapped and aligned */ void *ptr; whichop_t whichop; int padding; } ptr_handler; static void clear_node(avl_node *a) { sub_left(a) = NULL; sub_right(a) = NULL; (a)->up = NULL; rbal(a) = 4u; } /* Called by 'avl_ins', 'avl_dup', 'node_slice'. */ static avl_node *new_node(void *item, avl_node *up, avl_tree t) { avl_node *a = (*t->alloc)(sizeof(avl_node)); if (a) { sub_left(a) = NULL; sub_right(a) = NULL; a->up = up; a->rbal = 4u; a->item = (*t->copy)(item); } else { AVL_SHOW_ERROR("%s\n", "couldn't allocate node"); } return a; } static void free_node(avl_node *a, avl_tree t) { a->item = (*t->dispose)(a->item); (*t->dealloc)(a); } /* Function to detach node [a] from tree [t] (compiler will inline if needed). */ static void detach_node(avl_node *a, avl_tree t, struct ptr_handler *h) { clear_node(a); do { if (! h) { /* nothing */ } else if (h->whichop == OP_DETACH) { h->ptr = a; break; } else if (h->whichop == OP_BACKUP) { h->ptr = (*t->copy)(a->item); } free_node(a, t); } while (0); t->count--; } /* Tree methods. */ avl_tree avl_create ( avl_compare_func compare, avl_item_copy_func copy, avl_item_dispose_func dispose, avl_alloc_func alloc, avl_dealloc_func dealloc, void *param ) { avl_tree t = (*alloc)(sizeof(struct avl_tree_)); if (t) { t->root = NULL; t->count = 0; t->param = param; t->compare = compare; t->copy = copy; t->dispose = dispose; t->alloc = alloc; t->dealloc = dealloc; } else { AVL_SHOW_ERROR("%s\n", "couldn't create new handle in avl_create()"); } return t; } /* Empty the tree using rotations. */ static void node_empty(avl_tree t) { avl_node *a, *p; for (a = t->root; a != NULL;) { p = a; if (! sub_right(a)) { a = sub_left(a); } else { while (sub_left(a)) { /* rotR(a) */ a = sub_left(a); sub_left(p) = sub_right(a); sub_right(a) = p; p = a; } a = sub_right(p); } free_node(p, t); t->count--; } t->root = NULL; } /* [t] is an existing tree handle; this function invokes node_empty(). */ void avl_reset ( avl_tree t, avl_compare_func compare, avl_item_copy_func copy, avl_item_dispose_func dispose, avl_alloc_func alloc, avl_dealloc_func dealloc ) { if (t) { node_empty(t); t->compare = compare; t->copy = copy; t->dispose = dispose; t->alloc = alloc; t->dealloc = dealloc; } } void avl_empty(avl_tree t) { if (t) { node_empty(t); } } /* Destroy nodes and free handle. */ void avl_destroy(avl_tree t) { if (t) { node_empty(t); (*t->dealloc)(t); } } avl_tree avl_dup(avl_tree t, void *param) { if (t) { avl_tree tt = avl_create(t->compare, t->copy, t->dispose, t->alloc, t->dealloc, param); if (tt) { tt->count = t->count; if (t->root == NULL) { return tt; } else { avl_node *a, *c, *s; a = t->root; tt->root = c = new_node(get_item(a), NULL, t); if (c) { sub_right(c) = NULL; rbal(c) = rbal(a); while (1) { while (sub_left(a) != NULL) { a = sub_left(a); sub_left(c) = s = new_node(get_item(a), NULL, t); if (s) { s->up = c; sub_right(s) = c; c = s; rbal(c) = rbal(a); } else { goto recover; } } sub_left(c) = NULL; while (sub_right(a) == NULL) { s = sub_right(c); sub_right(c) = NULL; c = s; /* Find successor of [a] in original tree */ do { s = a; a = s->up; if (a == NULL) { return tt; } } while (s != sub_left(a)); } a = sub_right(a); s = new_node(get_item(a), NULL, t); if (s) { sub_right(s) = sub_right(c); sub_right(c) = s; s->up = c; c = s; rbal(c) = rbal(a); } else { goto recover; } } /* recovery code */ recover: while (1) { s = sub_right(c); sub_right(c) = NULL; if (s) { c = s; } else { break; } } node_empty(tt); abort: (*t->dealloc)(tt); AVL_SHOW_ERROR("%s\n", "couldn't allocate node in avl_dup()"); return NULL; } else { goto abort; } } } else { AVL_SHOW_ERROR("%s\n", "couldn't create new handle in avl_dup()"); } } return NULL; } avl_bool_t avl_isempty(avl_tree t) { return t == NULL || t->root == NULL; } avl_size_t avl_size(avl_tree t) { return t == NULL ? 0 : t->count; } static int depth(avl_node *a) { int h = AVL_MIN_DEPTH; for (; a != NULL; ++h) { a = a->sub[is_rskew(a)]; } return h; } static avl_node *node_first(avl_node *a) { while (sub_left(a)) { a = sub_left(a); } return a; } static avl_node *node_last(avl_node *a) { while (sub_right(a)) { a = sub_right(a); } return a; } /* [a] : non-null */ static avl_node *node_next(avl_node *a) { if (sub_right(a)) { return node_first (sub_right(a)); } else { avl_node *p; do { p = a; a = p->up; } while (a && sub_right(a) == p); return a; } } /* [a] : non-null */ static avl_node *node_prev(avl_node *a) { if (sub_left(a)) { return node_last (sub_left(a)); } else { avl_node *p; do { p = a; a = p->up; } while (a && sub_left(a) == p); return a; } } static avl_node *node_find(const void *item, avl_tree t) { avl_node *a = t->root; avl_compare_func cmp = t->compare; while (a) { int c = item_compare(cmp, t, item, get_item(a)); if (c < 0) { a = sub_left(a); } else if (c) { a = sub_right(a); } else { break; } } return a; } static avl_size_t get_index(avl_node *a) { avl_size_t n = get_rank(a); avl_node *p; while ((p = a->up)) { if (a != sub_left(p)) { n += get_rank(p); } a = p; } return n; } /* Find item by index. */ static avl_node *node_find_index(avl_size_t idx, avl_tree t) { avl_node *a = t->root; if (idx == 0 || idx > t->count) { return NULL; } else if (idx == 1) { return node_first(a); } else if (idx == t->count) { return node_last(a); } else { int c; while ((c = (int) (idx - get_rank(a))) != 0) { if (c < 0) { a = sub_left(a); } else { idx = (avl_size_t) c; a = sub_right(a); } } return a; } } /* Rebalance starting from node [a] where a->sub[d_] is deeper post-insertion. */ static avl_code_t rebalance_ins(avl_node *a, int dir, avl_tree t) { if (a) { avl_node *p; while (1) { incr_rank(a, (rbal_t) (!dir)); if (get_bal(a)) { break; } else { set_skew(a, dir); p = a->up; if (p) { dir = a != sub_left(p); a = p; } else { return 2; } } } /* Now bal(a) == -1 or +1. Rotate if needed. */ if (dir == 0) { if (is_rskew(a)) unset_rskew(a); else { avl_node *u = a->up; avl_node **r = u ? &u->sub[a != sub_left(u)] : &t->root; p = a; if (is_lskew(sub_left(p))) { /* rotR(p) */ a = sub_left(p); sub_left(p) = sub_right(a); if (sub_right(a)) { sub_right(a)->up = p; } sub_right(a) = p; unset_lskew(p); rbal(p) -= rzero(a); } else { /* rotLR(p) */ a = sub_right(sub_left(p)); sub_right(sub_left(p)) = sub_left(a); if (sub_left(a)) { sub_left(a)->up = sub_left(p); } sub_left(p)->up = a; sub_left(a) = sub_left(p); sub_left(p) = sub_right(a); if (sub_right(a)) { sub_right(a)->up = p; } sub_right(a) = p; switch (get_bal(a)) { case 0: /* not skewed */ unset_lskew(p); unset_rskew(sub_left(a)); break; case 1: /* left skew */ unset_lskew(p); set_rskew(p); unset_rskew(sub_left(a)); break; case 2: /* right skew */ unset_lskew(p); unset_rskew(sub_left(a)); set_lskew(sub_left(a)); break; } rbal(a) += rzero(sub_left(a)); rbal(p) -= rzero(a); } rbal(a) &= ~3; a->up = u; p->up = a; *r = a; } } else if (is_lskew(a)) { unset_lskew(a); } else { avl_node *u = a->up; avl_node **r = u != NULL ? &u->sub[a != sub_left(u)] : &t->root; p = a; if (is_rskew(sub_right(p))) { /* rotL(p) */ a = sub_right(p); sub_right(p) = sub_left(a); if (sub_left(a)) { sub_left(a)->up = p; } sub_left(a) = p; unset_rskew(p); rbal(a) += rzero(p); } else { /* rotRL(p) */ a = sub_left(sub_right(p)); sub_left(sub_right(p)) = sub_right(a); if (sub_right(a)) { sub_right(a)->up = sub_right(p); } sub_right(p)->up = a; sub_right(a) = sub_right(p); sub_right(p) = sub_left(a); if (sub_left(a)) { sub_left(a)->up = p; } sub_left(a) = p; switch (get_bal(a)) { case 0: /* not skewed */ unset_rskew(p); unset_lskew(sub_right(a)); break; case 1: /* left skew */ unset_rskew(p); unset_lskew(sub_right(a)); set_rskew(sub_right(a)); break; case 2: /* right skew */ unset_rskew(p); set_lskew(p); unset_lskew(sub_right(a)); break; } rbal(sub_right(a)) -= rzero(a); rbal(a) += rzero(p); } rbal(a) &= ~3; a->up = u; p->up = a; *r = a; } /* The tree rooted at 'a' is now valid. Finish adjusting ranks */ while ((p = a->up)) { incr_rank(p, (rbal_t)(a == sub_left(p))); a = p; } return 1; } return 2; } /* detach [p] : non-null; only the linkage is tweaked */ static avl_code_t rebalance_del(avl_node *p, avl_tree t, void **backup) { rbal_t bal; int dir = 0; avl_node *a = p->up; avl_node **r = a ? &a->sub[dir = p != sub_left(a)] : &t->root; avl_node *c = sub_right(p); if (! c && ! sub_left(p)) { *r = NULL; } else if (! c || ! sub_left(p)) { *r = c ? c : sub_left(p); (*r)->up = a; } else { if (sub_left(c)) { do { c = sub_left(c); } while (sub_left(c)); a = c->up; dir = 0; sub_left(a) = sub_right(c); if (sub_right(c)) { sub_right(c)->up = a; } sub_right(c) = sub_right(p); sub_right(c)->up = c; } else { a = c; dir = 1; } sub_left(c) = sub_left(p); sub_left(c)->up = c; c->up = p->up; rbal(c) = rbal(p); *r = c; } if (backup) { *backup = (*t->copy)(p->item); } detach_node(p, t, NULL); /* Start backtracking : subtree of [a] in direction [dir] is less deep */ for (;; a = (*r)->up) { if (a == NULL) { return 2; } else { decr_rank(a, (rbal_t)(!dir)); bal = get_bal(a); if (dir == 0) { if (bal == 0) { set_rskew(a); break; } if (a->up) { dir = a != sub_left(a->up); r = &a->up->sub[dir]; } else { r = &t->root; } if (bal & 1) { unset_lskew(a); } if (get_bal(a)) { p = a; bal = get_bal(sub_right(p)); if (! (bal & 1)) { /* bal = 0 or +1 */ /* rotL(p) */ a = sub_right(p); sub_right(p) = sub_left(a); if (sub_left(a)) { sub_left(a)->up = p; } sub_left(a) = p; if (bal) { unset_rskew(p); unset_rskew(a); } else { set_lskew(a); } rbal(a) += rzero(p); } else { /* rotRL(p) */ a = sub_left(sub_right(p)); sub_left(sub_right(p)) = sub_right(a); if (sub_right(a)) { sub_right(a)->up = sub_right(p); } sub_right(p)->up = a; sub_right(a) = sub_right(p); sub_right(p) = sub_left(a); if (sub_left(a)) { sub_left(a)->up = p; } sub_left(a) = p; switch (get_bal(a)) { case 0: /* not skewed */ unset_rskew(p); unset_lskew(sub_right(a)); break; case 1: /* left skew */ unset_rskew(p); unset_lskew(sub_right(a)); set_rskew(sub_right(a)); break; case 2: /* right skew */ unset_rskew(p); set_lskew(p); unset_lskew(sub_right(a)); break; } rbal(a) &= ~3; rbal(sub_right(a)) -= rzero(a); rbal(a) += rzero(p); } a->up = p->up; p->up = a; /* Done with rotation */ *r = a; if (bal == 0) { break; } } } else { /* dir == 1 */ if (bal == 0) { set_lskew(a); break; } if (a->up == NULL) { r = &t->root; } else { dir = a != sub_left(a->up); r = &a->up->sub[dir]; } if (bal & 2) { unset_rskew(a); } if (get_bal(a)) { p = a; bal = get_bal(sub_left(p)); if (! (bal & 2)) { /* bal = 0 or -1 */ /* rotR(p) */ a = sub_left(p); sub_left(p) = sub_right(a); if (sub_right(a)) { sub_right(a)->up = p; } sub_right(a) = p; if (bal) { unset_lskew(p); unset_lskew(a); } else { set_rskew(a); } rbal(p) -= rzero(a); } else { /* rotLR(p) */ a = sub_right(sub_left(p)); sub_right(sub_left(p)) = sub_left(a); if (sub_left(a)) { sub_left(a)->up = sub_left(p); } sub_left(p)->up = a; sub_left(a) = sub_left(p); sub_left(p) = sub_right(a); if (sub_right(a) != NULL) { sub_right(a)->up = p; } sub_right(a) = p; switch (get_bal(a)) { case 0: /* not skewed */ unset_lskew(p); unset_rskew(sub_left(a)); break; case 1: /* left skew */ unset_lskew(p); set_rskew(p); unset_rskew(sub_left(a)); break; case 2: /* right skew */ unset_lskew(p); unset_rskew(sub_left(a)); set_lskew(sub_left(a)); break; } rbal(a) &= ~3; rbal(a) += rzero(sub_left(a)); rbal(p) -= rzero(a); } a->up = p->up; p->up = a; /* Done with rotation */ *r = a; if (bal == 0) { break; } } } } } /* Finish adjusting ranks */ while ((p = a->up)) { decr_rank(p, (rbal_t)(a == sub_left(p))); a = p; } return 1; } void *avl_first(avl_tree t) { if (t && t->root) { return get_item(node_first(t->root)); } else { return NULL; } } void *avl_last(avl_tree t) { if (t && t->root) { return get_item(node_last(t->root)); } else { return NULL; } } void *avl_find(const void *item, avl_tree t) { if (t) { avl_node *a = node_find(item, t); return a ? get_item(a) : NULL; } else { return NULL; } } /* Return smallest index i in [1:len] s.t. tree[i] matches [item], or zero if not found. */ avl_size_t avl_index(const void *item, avl_tree t) { if (item && t && t->root) { avl_compare_func cmp = t->compare; avl_node *a, *p; avl_size_t idx = 0, n = 0; for (a = t->root;;) { int c = item_compare(cmp, t, item, get_item(a)); if (! c) { idx = n + get_rank(a); } else if (c > 0) { n += get_rank(a); } p = a->sub[c > 0]; if (p) { a = p; } else { return idx; } } } else { return 0; } } /* (lo,hi) where lo smallest index s.t. t[lo] >= lo_item, or t->count+1 and hi greatest index s.t. t[hi] <= hi_item, or 0. */ avl_code_t avl_span(const void *lo_item, const void *hi_item, avl_tree t, avl_size_t *lo_idx, avl_size_t *hi_idx) { if (t) { *lo_idx = t->count + 1; *hi_idx = 0; if (t->root) { avl_compare_func cmp = t->compare; avl_node *a; avl_size_t n = 0; int c = item_compare(cmp, t, lo_item, hi_item) > 0; if (c > 0) { const void *temp = lo_item; lo_item = hi_item; hi_item = temp; } a = t->root; do { c = item_compare(cmp, t, lo_item, get_item(a)); if (c > 0) { n += get_rank(a); a = sub_right(a); } else { *lo_idx = n + get_rank(a); a = sub_left(a); } } while (a); a = t->root; do { c = item_compare(cmp, t, hi_item, get_item(a)); if (c < 0) { a = sub_left(a); } else { *hi_idx += get_rank(a); a = sub_right(a); } } while (a); return 0; } } return -1; } /* Find the smallest item in tree [t] that is GEQ the passed item. */ void *avl_find_atleast(const void *item, avl_tree t) { if (t && t->root) { avl_compare_func cmp = t->compare; avl_node *a = t->root; void *p = NULL; do { int c = item_compare(cmp, t, item, get_item(a)); if (c > 0) { a = sub_right(a); } else { p = get_item(a); a = sub_left(a); } } while (a); return p; } else { return NULL; } } /* Find the greatest item in tree [t] that is LEQ the passed item. */ void *avl_find_atmost(const void *item, avl_tree t) { if (t && t->root) { avl_compare_func cmp = t->compare; avl_node *a = t->root; void *p = NULL; do { int c = item_compare(cmp, t, item, get_item(a)); if (c < 0) { a = sub_left(a); } else { p = get_item(a); a = sub_right(a); } } while (a); return p; } else { return NULL; } } /* Retrieve item of index [idx] in tree [t]. */ void *avl_find_index(avl_size_t idx, avl_tree t) { if (t) { avl_node *a = node_find_index(idx, t); return a ? get_item(a) : NULL; } else { return NULL; } } /* Iterative insertion. */ avl_code_t avl_ins(void *item, avl_tree t, avl_bool_t allow_duplicates) { if (t) { avl_compare_func cmp = t->compare; avl_node **r, *a; int dir = 0; for (r = &t->root, a = NULL; *r != NULL; r = &a->sub[dir = dir > 0]) { a = *r; dir = item_compare(cmp, t, item, get_item(a)); if (!dir && !allow_duplicates) return 0; } *r = new_node(item, a, t); if (*r) { t->count++; return rebalance_ins(a, dir, t); } else { return -1; } } else { return 0; } } avl_code_t avl_del(void *item, avl_tree t, void **backup) { if (t && t->root) { avl_node *a = node_find(item, t); if (a) { return rebalance_del(a, t, backup); } else { return 0; } } else { return 0; } } /* Helper function. */ static avl_code_t node_del_first(avl_tree t, struct ptr_handler *h) { avl_node *c; avl_node *p = node_first (t->root); avl_node *a = p->up; if (sub_right(p)) { sub_right(p)->up = a; } if (a == NULL) { t->root = sub_right(p); } else { sub_left(a) = sub_right(p); } detach_node (p, t, h); /* Start backtracking : subtree of [a] in direction [0] is less deep */ for (;; a = c) { if (a) { rbal_t bal; decr_rank(a, 1); bal = get_bal(a); if (bal == 0) { set_rskew(a); break; } else { if (bal & 1) { unset_lskew(a); } c = a->up; if (get_bal(a)) { p = a; bal = get_bal(sub_right(p)); if (! (bal & 1)) { /* bal = 0 or +1 */ /* rotL(p) */ a = sub_right(p); sub_right(p) = sub_left(a); if (sub_left(a)) { sub_left(a)->up = p; } sub_left(a) = p; if (bal) { unset_rskew(p); unset_rskew(a); } else { set_lskew(a); } rbal(a) += rzero(p); } else { /* rotRL(p) */ a = sub_left(sub_right(p)); sub_left(sub_right(p)) = sub_right(a); if (sub_right(a)) { sub_right(a)->up = sub_right(p); } sub_right(p)->up = a; sub_right(a) = sub_right(p); sub_right(p) = sub_left(a); if (sub_left(a)) { sub_left(a)->up = p; } sub_left(a) = p; switch (get_bal(a)) { case 0: /* not skewed */ unset_rskew(p); unset_lskew(sub_right(a)); break; case 1: /* left skew */ unset_rskew(p); unset_lskew(sub_right(a)); set_rskew(sub_right(a)); break; case 2: /* right skew */ unset_rskew(p); set_lskew(p); unset_lskew(sub_right(a)); break; } rbal(a) &= ~3; rbal(sub_right(a)) -= rzero(a); rbal(a) += rzero(p); } a->up = p->up; p->up = a; /* Done with rotation */ if (c) { sub_left(c) = a; } else { t->root = a; } if (bal == 0) { break; } } } } else { return 2; } } /* Finish adjusting ranks */ while ((a = a->up)) { decr_rank(a, 1); } return 1; } /* helper function */ static avl_code_t node_del_last(avl_tree t, struct ptr_handler *h) { avl_node *c; avl_node *p = node_last (t->root); avl_node *a = p->up; if (sub_left(p)) { sub_left(p)->up = a; } if (a) { sub_right(a) = sub_left(p); } else { t->root = sub_left(p); } detach_node(p, t, h); /* Start backtracking : subtree of [a] in direction [1] is less deep */ for (;; a = c) { if (a) { rbal_t bal = get_bal(a); if (bal == 0) { set_lskew(a); break; } else { if (bal & 2) { unset_rskew(a); } c = a->up; if (get_bal(a)) { p = a; bal = get_bal(sub_left(p)); if (! (bal & 2)) { /* bal = 0 or -1 */ /* rotR(p) */ a = sub_left(p); sub_left(p) = sub_right(a); if (sub_right(a)) { sub_right(a)->up = p; } sub_right(a) = p; if (bal) { unset_lskew(p); unset_lskew(a); } else { set_rskew(a); } rbal(p) -= rzero(a); } else { /* rotLR(p) */ a = sub_right(sub_left(p)); sub_right(sub_left(p)) = sub_left(a); if (sub_left(a) != NULL) sub_left(a)->up = sub_left(p); sub_left(p)->up = a; sub_left(a) = sub_left(p); sub_left(p) = sub_right(a); if (sub_right(a)) { sub_right(a)->up = p; } sub_right(a) = p; switch (get_bal(a)) { case 0: /* not skewed */ unset_lskew(p); unset_rskew(sub_left(a)); break; case 1: /* left skew */ unset_lskew(p); set_rskew(p); unset_rskew(sub_left(a)); break; case 2: /* right skew */ unset_lskew(p); unset_rskew(sub_left(a)); set_lskew(sub_left(a)); break; } rbal(a) &= ~3; rbal(a) += rzero(sub_left(a)); rbal(p) -= rzero(a); } a->up = p->up; p->up = a; /* Done with rotation */ if (c) { sub_right(c) = a; } else { t->root = a; } if (bal == 0) { break; } } } } else { return 2; } } return 1; } /* [p] is juncture node(zeroed out), [n] is rank of [p] in resulting tree, [delta] = depth_1 - depth_0 */ static avl_code_t join_left(avl_node *p, avl_node **r0, avl_node *r1, int delta, int n) { avl_node *a = NULL; avl_node **r = r0; if (r1) { while (delta < -1) { a = *r; delta += (int) (is_lskew(a) + 1); n -= (int) get_rank(a); r = &sub_right(a); } r1->up = p; if (*r) { (*r)->up = p; } if (delta) { set_lskew(p); } } else { while (*r != NULL) { a = *r; n -= (int) get_rank(a); r = &sub_right(a); } } /* at this point bal(*r) = -1 or 0 */ sub_left(p) = *r; sub_right(p) = r1; p->up = a; set_rank(p, n); *r = p; for (;;) { if (! a) { return 2; } else if (get_bal(a)) { break; } else { set_rskew(a); a = a->up; } } /* Rotate if need be */ /* No (+2,0) rotation to do */ if (is_lskew(a)) { unset_lskew(a); } else { p = a; if (is_rskew(sub_right(p))) { /* rotL(p) */ a = sub_right(p); sub_right(p) = sub_left(a); if (sub_left(a)) { sub_left(a)->up = p; } sub_left(a) = p; unset_rskew(p); rbal(a) += rzero(p); } else { /* rotRL(p) */ a = sub_left(sub_right(p)); sub_left(sub_right(p)) = sub_right(a); if (sub_right(a)) { sub_right(a)->up = sub_right(p); } sub_right(p)->up = a; sub_right(a) = sub_right(p); sub_right(p) = sub_left(a); if (sub_left(a)) { sub_left(a)->up = p; } sub_left(a) = p; switch (get_bal(a)) { case 0: /* not skewed */ unset_rskew(p); unset_lskew(sub_right(a)); break; case 1: /* left skew */ unset_rskew(p); unset_lskew(sub_right(a)); set_rskew(sub_right(a)); break; case 2: /* right skew */ unset_rskew(p); set_lskew(p); unset_lskew(sub_right(a)); break; } rbal(sub_right(a)) -= rzero(a); rbal(a) += rzero(p); } rbal(a) &= ~3; a->up = p->up; p->up = a; if (a->up) { sub_right(a->up) = a; } else { *r0 = a; } } return 1; } /* [p] is juncture node and [n] is rank of [p] in resulting tree. */ static avl_code_t join_right(avl_node *p, avl_node *r0, avl_node **r1, int delta, int n) { avl_node *a = NULL; avl_node **r = r1; if (r0) { while (delta > +1) { a = *r; delta -= (int) (is_rskew(a) + 1); incr_rank(a, (rbal_t)n); r = &sub_left(a); } r0->up = p; if (*r != NULL) { (*r)->up = p; } if (delta) { set_rskew(p); } } else { while (*r) { a = *r; incr_rank(a, (rbal_t) n); r = &sub_left(a); } n = 1; } /* at this point bal(*r) = +1 or 0 */ sub_left(p) = r0; sub_right(p) = *r; set_rank(p, n); p->up = a; *r = p; for (;;) { if (! a) { return 2; } else if (get_bal(a)) { break; } else { set_lskew(a); a = a->up; } } /* Rotate if need be. No (-2,0) rotation to do. */ if (is_rskew(a)) { unset_rskew(a); } else { p = a; if (is_lskew(sub_left(p))) { /* rotR(p) */ a = sub_left(p); sub_left(p) = sub_right(a); if (sub_right(a)) { sub_right(a)->up = p; } sub_right(a) = p; unset_lskew(p); rbal(p) -= rzero(a); } else { /* rotLR(p) */ a = sub_right(sub_left(p)); sub_right(sub_left(p)) = sub_left(a); if (sub_left(a)) { sub_left(a)->up = sub_left(p); } sub_left(p)->up = a; sub_left(a) = sub_left(p); sub_left(p) = sub_right(a); if (sub_right(a)) { sub_right(a)->up = p; } sub_right(a) = p; switch (get_bal(a)) { case 0: /* not skewed */ unset_lskew(p); unset_rskew(sub_left(a)); break; case 1: /* left skew */ unset_lskew(p); set_rskew(p); unset_rskew(sub_left(a)); break; case 2: /* right skew */ unset_lskew(p); unset_rskew(sub_left(a)); set_lskew(sub_left(a)); break; } rbal(a) += rzero(sub_left(a)); rbal(p) -= rzero(a); } rbal(a) &= ~3; a->up = p->up; p->up = a; if (a->up != NULL) { sub_left(a->up) = a; } else { *r1 = a; } } return 1; } avl_code_t avl_del_first(avl_tree t, void **backup) { if (t && t->root) { avl_code_t rv; if (backup) { ptr_handler h = { NULL, OP_BACKUP, 0 }; rv = node_del_first(t, &h); *backup = h.ptr; } else { rv = node_del_first(t, NULL); } return rv; } else { return 0; } } avl_code_t avl_del_last(avl_tree t, void **backup) { if (t && t->root) { if (backup == NULL) { return node_del_last(t, NULL); } else { ptr_handler h = { NULL, OP_BACKUP, 0 }; avl_code_t rv = node_del_last(t, &h); *backup = h.ptr; return rv; } } else { return 0; } } avl_code_t avl_ins_index(void *item, avl_size_t idx, avl_tree t) { if (idx == 0 || t == NULL || idx > t->count + 1) { return 0; } else { avl_node *p = new_node(item, NULL, t); if (p) { t->count++; /* Note: 'attach_node' macro increments t->count */ if (idx == 1) { return join_right(p, (avl_node *) NULL, &t->root, /*delta= */ 0, 1); } else if (idx == t->count) { return join_left(p, &t->root, (avl_node *) NULL, /*delta= */ 0, (int) t->count); } else { avl_node *a = node_find_index(idx - 1, t); int dir; if (sub_right(a)) { a = node_first(sub_right(a)); sub_left(a) = p; dir = 0; } else { sub_right(a) = p; dir = 1; } p->up = a; return rebalance_ins(a, dir, t); } } else { return -1; } } } avl_code_t avl_del_index(avl_size_t idx, avl_tree t, void **backup) { if (! t) { return 0; } else if (idx == 0 || idx > t->count) { return 0; } else if (idx == 1) { return avl_del_first(t, backup); } else if (idx == t->count) { return avl_del_last(t, backup); } else { avl_node *a = node_find_index(idx, t); return rebalance_del(a, t, backup); } } /* Outcome: [t0] handles the concatenation of [t0] and [t1]. */ void avl_cat(avl_tree t0, avl_tree t1) { if (! t0 || ! t1 ||! t1->root) { return; } else if (t0->root) { int delta = depth(t1->root) - depth(t0->root); ptr_handler h = { NULL, OP_DETACH, 0 }; if (delta <= 0) { if (node_del_first (t1, &h) == 2) { --delta; } (void) join_left((avl_node *) h.ptr, &t0->root, t1->root, delta, (int) (t0->count + 1)); } else { if (node_del_last(t0, &h) == 2) { ++delta; } (void) join_right((avl_node *) h.ptr, t0->root, &t1->root, delta, (int) (t0->count + 1)); t0->root = t1->root; } t1->root = NULL; t0->count += t1->count + 1; t1->count = 0; } else { t0->root = t1->root; t0->count = t1->count; t1->root = NULL; t1->count = 0; } } /* [t0] and [t1] are existing handles; see Donald Knuth, TAOCP Vol.3 "Sorting and searching". */ avl_code_t avl_split(const void *item, avl_tree t, avl_tree t0, avl_tree t1) { if (t && t->root) { avl_compare_func cmp = t->compare; avl_node *a, *p, *sn; /* sn: split node */ int k, na, an[AVL_STACK_CAPACITY]; t0->root = NULL; t1->root = NULL; t0->count = 0; t1->count = 0; /* invariant: [na] = size of tree rooted at [a] plus one */ for (a = t->root, na = (int) (t->count + 1), k = 0;;) { int d_ = item_compare(cmp, t, item, get_item(a)); if (d_) { p = a->sub[d_ = d_ > 0]; if (p) { an[k++] = na; if (d_) { na -= (int) get_rank(a); } else { na = (int) get_rank(a); } a = p; } else { return 0; } } else { break; } } /* record split node */ sn = a; if (k == 0) { t0->root = sub_left(a); t1->root = sub_right(a); if (t0->root) { t0->root->up = NULL; } if (t1->root) { t1->root->up = NULL; } t0->count = get_rank(a) - 1; t1->count = t->count - get_rank(a); } else { avl_node *r[2]; int h[2], ha; avl_size_t n[2]; int d_; r[0] = sub_left(a); r[1] = sub_right(a); if (r[0]) { r[0]->up = NULL; } if (r[1]) { r[1]->up = NULL; } ha = depth(a); h[0] = ha - (is_rskew(a) ? 2 : 1); h[1] = ha - (is_lskew(a) ? 2 : 1); n[0] = get_rank(a); /* size of r[0] plus one */ n[1] = (avl_size_t) na - n[0]; /* size of r[1] plus one */ for (p = a->up, d_ = a != sub_left(p);;) { a = p; /* a: juncture node */ p = a->up; if (d_ == 0) { int hh = h[1]; int nn; ha += (is_rskew(a) ? 2 : 1); h[1] = ha - (is_lskew(a) ? 2 : 1); nn = n[1]; n[1] += (avl_size_t) (an[k - 1] - (int) get_rank(a)); if (p) { d_ = a != sub_left(p); } rbal(a) = 0; if (h[1] >= hh) { avl_node *rr = r[1]; r[1] = sub_right(a); if (r[1]) { r[1]->up = NULL; } h[1] += (2 == join_right(a, rr, r + 1, h[1] - hh, (int) nn)); } else { h[1] = hh + (2 == join_left(a, r + 1, sub_right(a), h[1] - hh, (int) nn)); } } else { int hh = h[0]; int nn; ha += (is_lskew(a) ? 2 : 1); h[0] = ha - (is_rskew(a) ? 2 : 1); nn = get_rank(a); n[0] += nn; if (p) { d_ = a != sub_left(p); } rbal(a) = 0; if (h[0] >= hh) { avl_node *rr = r[0]; r[0] = sub_left(a); if (r[0]) { r[0]->up = NULL; } h[0] += (2 == join_left(a, r, rr, hh - h[0], (int) nn)); } else { h[0] = hh + (2 == join_right(a, sub_left(a), r, hh - h[0], (int) nn)); } } if (--k == 0) break; } t0->root = r[0]; t1->root = r[1]; t0->count = n[0] - 1; t1->count = n[1] - 1; } /* Detach split node */ detach_node(sn, t, NULL); t->root = NULL; t->count = 0; return 1; } else { return 0; } } /* Inorder traversal. */ void avl_walk(avl_tree t, avl_item_func proc, void *param) { if (t && t->root) { avl_node *a = t->root, *p; while (1) { while (sub_left(a)) { a = sub_left(a); } while (1) { (*proc)(get_item(a), param); if (sub_right(a)) { break; } else { do { p = a; a = p->up; if (! a) { return; } } while (p != sub_left(a)); } } a = sub_right(a); } } } /* Recursive helper for 'avl_slice'. */ static int node_slice(avl_node **root, avl_node **cur, avl_tree tree, avl_size_t len) { avl_size_t mid = len / 2; if (mid == 0) { if ((*root = new_node ((*cur)->item, /* parent */ NULL, tree)) == NULL) { return -1; } else { sub_left(*root) = NULL; sub_right(*root) = NULL; rbal(*root) = 4; *cur = node_next(*cur); return 0; } } else if ((*root = new_node(NULL, /* parent */ NULL, tree))) { avl_node *p = *root; int h0, h1 = -1; rbal(p) = (mid + 1) << 2; if ((h0 = node_slice(&sub_left(p), cur, tree, mid)) < 0) { return -1; } else { p->item = (*tree->copy) ((*cur)->item); sub_left(p)->up = p; *cur = node_next(*cur); if (len -= mid + 1) { if ((h1 = node_slice(&sub_right(p), cur, tree, len)) < 0) { return -1; } else { sub_right(p)->up = p; } } if (h0 > h1) { set_lskew(p); } else if (h0 < h1) { set_rskew(p); return 1 + h1; } return 1 + h0; } } else { return -1; } } /* Return a slice t[lo,hi) as a new tree. */ avl_tree avl_slice(avl_tree t, avl_size_t lo_idx, avl_size_t hi_idx, void *param) { if (! t || (lo_idx > hi_idx) || (lo_idx > t->count)) { return NULL; } else { if (lo_idx < 1) { lo_idx = 1; } if (hi_idx > t->count + 1) { hi_idx = t->count + 1; } { avl_tree tt = avl_create(t->compare, t->copy, t->dispose, t->alloc, t->dealloc, param); if (tt) { if (lo_idx < hi_idx) { avl_node *cur = node_find_index(lo_idx, t); if (node_slice(&tt->root, &cur, t, tt->count = hi_idx - lo_idx) < 0) { AVL_SHOW_ERROR("%s\n", "couldn't allocate node in avl_slice()"); node_empty(tt); (*t->dealloc)(tt); return NULL; } else { tt->root->up = NULL; } } return tt; } else { AVL_SHOW_ERROR("%s\n", "couldn't allocate new handle in avl_slice()"); return NULL; } } } } /* Recursive helper for 'avl_xload'. */ static int node_load(avl_node **root, avl_itersource cur, void **pres, avl_tree desc, avl_size_t len) { avl_size_t mid = len / 2; if (mid == 0) { if (0 != (*cur->f) (cur, pres) || (*root = new_node (*pres, /* parent */ NULL, desc)) == NULL) { return -1; } else { sub_left(*root) = NULL; sub_right(*root) = NULL; rbal(*root) = 4; return 0; } } else if ((*root = new_node (NULL, /* parent */ NULL, desc))) { avl_node *p = *root; int h0, h1 = -1; rbal(p) = (mid + 1) << 2; if ((h0 = node_load(&sub_left(p), cur, pres, desc, mid)) < 0) { return -1; } else if (0 != (*cur->f)(cur, pres)) { return -1; } else { p->item = (*desc->copy)(*pres); sub_left(p)->up = p; if (len -= mid + 1) { if ((h1 = node_load(&sub_right(p), cur, pres, desc, len)) < 0) { return -1; } else { sub_right(p)->up = p; } } if (h0 > h1) { set_lskew(p); } else if (h0 < h1) { set_rskew(p); return 1 + h1; } return 1 + h0; } } else { return -1; } } /* Load 'len' items from itersource. */ avl_tree avl_xload(avl_itersource src, void **pres, avl_size_t len, avl_config conf, void *tree_param) { if (src) { avl_tree tt = avl_create(conf->compare, conf->copy, conf->dispose, conf->alloc, conf->dealloc, tree_param); if (! tt) { AVL_SHOW_ERROR("%s\n", "couldn't allocate new handle in avl_load()"); return NULL; } if (len) { if (node_load(&tt->root, src, pres, tt, tt->count = len) < 0) { AVL_SHOW_ERROR("%s\n", "couldn't allocate node in avl_load()"); node_empty(tt); (*tt->dealloc)(tt); return NULL; } else { tt->root->up = NULL; } } return tt; } else { return NULL; } } /* ITERATORS */ typedef enum { AVL_ITERATOR_PRE, AVL_ITERATOR_POST, AVL_ITERATOR_INTREE } avl_status_t; struct avl_iterator_ { avl_node *pos; avl_tree tree; avl_status_t status; }; # define get_root(i) i->tree->root # define is_pre(i) i->status == AVL_ITERATOR_PRE # define is_post(i) i->status == AVL_ITERATOR_POST # define set_pre_iterator(i) i->status = AVL_ITERATOR_PRE # define set_post_iterator(i) i->status = AVL_ITERATOR_POST # define set_in_iterator(i) i->status = AVL_ITERATOR_INTREE /* Position existing iterator [iter] at node matching [item] in its own tree, if it exists; otherwise do nothing. */ void avl_iterator_seek(const void *item, avl_iterator iter) { avl_node *p = node_find(item, iter->tree); if (p) { set_in_iterator(iter); iter->pos = p; } } void avl_iterator_seek_index(avl_size_t idx, avl_iterator iter) { avl_node *p = node_find_index(idx, iter->tree); if (p) { set_in_iterator(iter); iter->pos = p; } } /* Return item pointer at current position. */ void *avl_iterator_cur(avl_iterator iter) { return iter->pos ? get_item(iter->pos) : NULL; } avl_size_t avl_iterator_count(avl_iterator iter) { return iter->tree->count; } avl_size_t avl_iterator_index(avl_iterator iter) { if (iter->pos) { return get_index(iter->pos); } else if (is_pre(iter)) { return 0; } else { return iter->tree->count + 1; } } /* Rustic: */ avl_iterator avl_iterator_new(avl_tree t, avl_ini_t ini, ...) { va_list args; avl_iterator iter = NULL; va_start(args, ini); if (! t) { /* okay */ } else if ((iter = (*t->alloc) (sizeof(struct avl_iterator_)))) { iter->pos = NULL; iter->tree = t; if (ini != AVL_ITERATOR_INI_INTREE) { iter->status = (ini == AVL_ITERATOR_INI_PRE) ? AVL_ITERATOR_PRE : AVL_ITERATOR_POST; } else { const void *item = NULL; item = va_arg(args, const void *); set_pre_iterator(iter); if (item == NULL) { AVL_SHOW_ERROR("%s\n", "missing argument to avl_iterator_new()"); } else { avl_iterator_seek(item, iter); } } } else { AVL_SHOW_ERROR("%s\n", "couldn't create iterator"); } va_end(args); return iter; } /* The following used to write to memory after it was freed. */ void avl_iterator_kill(avl_iterator iter) { if (iter != NULL) { avl_dealloc_func dealloc = iter->tree->dealloc; iter->pos = NULL; iter->tree = NULL; (*dealloc)(iter); } } void *avl_iterator_next(avl_iterator iter) { if (is_post(iter)) { return NULL; } else { avl_node *a = iter->pos; if (is_pre(iter)) { a = get_root(iter); if (a) { a = node_first(a); set_in_iterator(iter); } } else { a = node_next(a); if (! a) { set_post_iterator(iter); } } iter->pos = a; return a != NULL ? get_item(a) : NULL; } } void *avl_iterator_prev(avl_iterator iter) { if (is_pre(iter)) { return NULL; } else { avl_node *a = iter->pos; if (is_post(iter)) { a = get_root(iter); if (a) { a = node_last(a); set_in_iterator(iter); } } else { a = node_prev(a); if (! a) { set_pre_iterator(iter); } } iter->pos = a; return a ? get_item(a) : NULL; } } /* Remove node at current position and move cursor to next position. */ avl_code_t avl_iterator_del(avl_iterator iter, void **backup) { if (iter == NULL || iter->pos == NULL) { return 0; } else { avl_node *a = iter->pos, *p; p = node_next(a); if (! p) { set_post_iterator(iter); } iter->pos = p; return rebalance_del(a, iter->tree, backup); } } luametatex-2.10.08/source/libraries/avl/avl.h000066400000000000000000000275361442250314700210430ustar00rootroot00000000000000/* This small C package is made of an independent set of routines dedicated to manipulating AVL trees (files avl.c, avl.h), and of an extension module for Python that builds upon it (file avlmodule.c). Unlike collectionsmodule.c, the latter file contains only Python bindings: it adds nothing to the underlying implementation. license: this package, pyavl, is donated to the public domain author : Richard McGraw email : dasnar@fastmail.fm */ /* This file is reformatted a little. As there has not been any changed in the original we assume this is okay. No changes means that the code is fine and we never ran into issues. The avl code is used for hashing strings. It is also used in the backend of luatex but in luametatex we don't have that. */ # ifndef LIBRARIES_AVL_H # define LIBRARIES_AVL_H # include # include # include // # define avl_del mp_avl_del // # define avl_ins mp_avl_ins // # define avl_tree mp_avl_tree // # define avl_entry mp_avl_entry // # define avl_find mp_avl_find // # define avl_create mp_avl_create // # define avl_destroy mp_avl_destroy typedef enum avl_bool_t { avl_false, avl_true } avl_bool_t; # include typedef int8_t avl_code_t; typedef int8_t avl_bal_t; typedef uint32_t avl_size_t; typedef int (*avl_compare_func) (void *param, const void *lhs, const void *rhs); typedef void *(*avl_item_copy_func) (const void *item); typedef void *(*avl_item_dispose_func) (void *item); typedef void (*avl_item_func) (const void *item, void *param); typedef void *(*avl_alloc_func) (size_t); typedef void (*avl_dealloc_func) (void *); /* At minimum, shallow copy */ const void *avl_default_item_copy (const void *); void *avl_default_item_dispose (void *); # define AVL_STACK_CAPACITY 32 /* for avl_split() function */ typedef enum avl_ini_t { AVL_ITERATOR_INI_PRE, AVL_ITERATOR_INI_POST, AVL_ITERATOR_INI_INTREE } avl_ini_t; typedef struct avl_tree_ *avl_tree; typedef struct avl_iterator_ *avl_iterator; typedef struct avl_itersource_ avl_itersource_struct; typedef struct avl_itersource_ *avl_itersource; struct avl_itersource_ { void *p; /* return nonzero on error */ avl_code_t(*f) (avl_itersource from, void **to); }; typedef struct { avl_compare_func compare; avl_item_copy_func copy; avl_item_dispose_func dispose; avl_alloc_func alloc; avl_dealloc_func dealloc; } avl_config_struct, *avl_config; /* Public Functions */ /* --- CREATE --- Return a new tree and set its config. Return NULL on allocation failure. * 'alloc' defaults to malloc from stdlib * 'dealloc' defaults to free from stdlib * 'param' user param/refcon */ avl_tree avl_create( avl_compare_func compare, avl_item_copy_func copy, avl_item_dispose_func dispose, avl_alloc_func alloc, avl_dealloc_func dealloc, void *param ); /* --- RESET --- Empty tree 't' as in 'avl_empty()' and modify its config. */ void avl_reset( avl_tree t, avl_compare_func compare, avl_item_copy_func copy, avl_item_dispose_func dispose, avl_alloc_func alloc, avl_dealloc_func dealloc ); /* --- EMPTY --- Empty tree 't', calling its dispose_func for each item in 't'. The config is untouched. */ void avl_empty(avl_tree t); /* --- DESTROY --- Empty tree 't' and free the handle. */ void avl_destroy(avl_tree t); /* --- DUPLICATE (COPY) --- Return a copy of tree 't', using its copy_func for each item in 't'. Upon failure to allocate room for some item, return NULL. */ avl_tree avl_dup(avl_tree t, void *param); /* --- EMPTYNESS --- Return 'avl_true' iff tree 't' is empty (i.e. the handle is NULL or 't' contains no item). */ avl_bool_t avl_isempty(avl_tree t); /* --- SIZE --- Return number of items contained in tree 't'. */ avl_size_t avl_size(avl_tree t); /* --- FIRST (MINIMUM) --- Return first item in in-order traversal of 't'. Return NULL if 't' is empty. */ void *avl_first(avl_tree t); /* --- LAST (MAXIMUM) --- Return last item in in-order traversal of 't'. Return NULL if 't' is empty. */ void *avl_last(avl_tree t); /* --- FIND MATCHING ITEM --- Find item matching 'item' parameter in tree 't'. Return NULL if it's not found. If there are multiple matches, the first one that is encountered during the search is returned; it may not be the one with lowest rank. */ void *avl_find(const void *item, avl_tree t); /* --- INDEX (RANK) OF ITEM --- Return smallest index 'i' s.t. 't[i]' matches 'item', or zero if 'item' is not found. */ avl_size_t avl_index(const void *item, avl_tree t); /* --- SPAN ITEMS --- Return integers 'i,j' s.t. 't[i,j]' i smallest index s.t. t[i] >= lo_item, or t->count+1 and j greatest one s.t. t[j] <= hi_item, or 0. If 'hi_item' is less than 'lo_item' those are swapped. Return codes: 0 success -1 error: tree had no root -2 error: compare failed */ avl_code_t avl_span( const void *lo_item, const void *hi_item, avl_tree t, avl_size_t *lo_idx, avl_size_t *hi_idx ); /* --- FIND AT LEAST --- Return smallest item in 't' that is GEQ 'item', or NULL. */ void *avl_find_atleast(const void *item, avl_tree t); /* --- FIND AT MOST --- Return largest item in 't' that is LEQ 'item', or NULL. */ void *avl_find_atmost(const void *item, avl_tree t); /* --- FIND BY INDEX (RANK) --- Find item in 't' by index, that is return 't[idx]'. If 'idx' is not in '[1,avl_size(t)]' then return NULL. If a compare failed then return NULL. */ void *avl_find_index(avl_size_t idx, avl_tree t); /* --- INSERTION --- Insert 'item' in tree 't' with regard to its compare_func. Say 'avl_ins(item,t,avl_true)' to insert 'item' in 't' even if it is there already. If 'item' is a duplicate and 'allow_duplicates' is avl_false, nothing is done. Return codes: -1 error: allocation of new node failed -2 error: compare failed, tree unchanged 0 nothing was done, no error +1 operation successful +2 the same and height(t) increased by one. */ avl_code_t avl_ins(void *item, avl_tree t, avl_bool_t allow_duplicates); /* --- DELETION --- Remove 'item' from tree 't', calling its dispose_func. To make a backup of 'item' involving its copy_func, say 't(item,backup)' where 'backup' is some pointer to pointer to item. Otherwise set it to NULL. Return codes: 0 item not found -2 error: compare failed, tree unchanged +1 operation successful +2 the same and height(t) decreased by one. */ avl_code_t avl_del(void *item, avl_tree t, void **backup); /* --- DELETE FIRST --- Remove first item in in-order traversal from tree 't'. Note that only one item is removed. Return +1 or +2 as above. */ avl_code_t avl_del_first(avl_tree t, void **backup); /* --- DELETE LAST --- Remove last item in in-order traversal from tree 't'. Note that only one item is removed. Return +1 or +2 as above. */ avl_code_t avl_del_last(avl_tree t, void **backup); /* --- INSERT IN FRONT OF INDEX --- Insert 'item' in tree 't' so that afterwards, 't[idx]=item' except if 'idx<=0' or 'idx>size(t)+1'. To append 'item' to 't' regardless of order, say 'avl_ins_index(item,size+1,t)'. */ avl_code_t avl_ins_index(void *item, avl_size_t idx, avl_tree t); /* --- DELETE ITEM BY INDEX --- Remove item of rank 'idx' from tree 't' and return +1 or +2 as above except if 'idx' is not in '[1,avl_size(t)]' in which case return 0. */ avl_code_t avl_del_index(avl_size_t idx, avl_tree t, void **backup); /* --- IN-PLACE CONCATENATION --- Pre-condition: 't0' and 't1' are valid avl_trees Note that the code does not check whether the maximal item in 't0' is LEQ than the minimal item in 't1'. Post-condition: 't0' handles the concatenation of 't0' and 't1' which becomes empty (but its config is untouched). */ void avl_cat(avl_tree t0, avl_tree t1); /* --- SPLITTING --- Pre-condition: 't0' and 't1' are existing handles. Post-condition: items in 't0' all compare LEQ than 'item' and items in 't1' all compare GEQ than 'item'. This implementation removes one item. Return codes: 0 item not found, no-op -2 compare failed, tree unchanged +1 success */ avl_code_t avl_split(const void *item, avl_tree t, avl_tree t0, avl_tree t1); /* --- IN-ORDER TRAVERSAL --- Walk tree 't' in in-order, applying 'proc' at each node. The 'param' pointer is passed to 'proc', like this: '(*proc) (item_at_node,param)'. */ void avl_walk(avl_tree t, avl_item_func proc, void *param); /* --- SLICE --- Create a _new tree_ from the slice 't[lo_idx,hi_idx)' provided 'lo_idx <= hi_idx' and these indices are both in range. If a new tree can't be created or if some item can't be allocated, return NULL. Otherwise if the indices are inconsistent return NULL. */ avl_tree avl_slice(avl_tree t, avl_size_t lo_idx, avl_size_t hi_idx, void *param); /* ITERATORS An iterator assigned to a tree 't' is still usable after any item is inserted into 't' and after any item not located at this iterator's current position is deleted. The 'avl_iterator_del()' function may be used to remove the item at the iterator's current position. */ /* --- ITERATOR --- SEEK Find 'item' in this iterator's tree as in 'avl_find()' and make it the current position. */ void avl_iterator_seek(const void *item, avl_iterator iter); /* --- ITERATOR --- COUNT Return size of this iterator's tree */ avl_size_t avl_iterator_count(avl_iterator iter); /* --- ITERATOR --- SEEK BY INDEX Set the current position of 'iter' to 't[idx]' where 't' is the tree that is iterated over. */ void avl_iterator_seek_index(avl_size_t idx, avl_iterator iter); /* --- ITERATOR --- CURRENT POSITION Return item at current position of 'iter'. */ void *avl_iterator_cur(avl_iterator iter); /* --- ITERATOR --- INDEX Return rank of current item of 'iter' (as a result of computation) except it returns 0 or size of tree plus one if 'iter' is a pre- or post- iterator. */ avl_size_t avl_iterator_index(avl_iterator iter); /* --- ITERATOR --- CREATE Return a new cursor for tree 't'. If allocation of an iterator struct is impossible, return NULL. Say 'avl_iterator_new(t, ini)' with 'ini==AVL_ITERATOR_INI_PRE' or 'ini==AVL_ITERATOR_INI_POST' or say 'avl_iterator_new(t, AVL_ITERATOR_INI_INTREE, item_pointer)' to set the iterator's current position via 'avl_iterator_seek(item_pointer,the_iterator)'. In the latter case, the iterator is flagged as pre-iterator if the item is not found. */ avl_iterator avl_iterator_new(avl_tree t, avl_ini_t ini, ...); /* --- ITERATOR --- KILL Cleanup: free the iterator struct. */ void avl_iterator_kill(avl_iterator iter); /* --- ITERATOR --- SUCCESSOR Get next item pointer in iterator or NULL. 'iter' is flagged as post-iterator if it's in post-position. */ void *avl_iterator_next(avl_iterator iter); /* --- ITERATOR --- PREDECESSOR Get next item pointer in iterator or NULL. 'iter' is flagged as pre-iterator if it's in pre-position. */ void *avl_iterator_prev(avl_iterator iter); /* --- ITERATOR --- DELETION Remove item at current position of iterator 'iter' from its tree, if there is one. Current position is set to next item or iterator is flagged as post-iterator. */ avl_code_t avl_iterator_del(avl_iterator iter, void **backup); /* --- LOAD --- More general version of avl_slice */ avl_tree avl_xload( avl_itersource src, void **pres, avl_size_t len, avl_config conf, void *param ); # endif luametatex-2.10.08/source/libraries/avl/readme.txt000066400000000000000000000016761442250314700221030ustar00rootroot00000000000000Remark Usage of the avl library (irr) showed up in pdfTeX when Hartmut added some functionality. It therefore also ended up in being used in LuaTeX. The two files avl.c and avl.h come from pyavl and are in the public domain: license: this package, pyavl, is donated to the public domain author : Richard McGraw email : dasnar@fastmail.fm In the pdfTeX/LuaTeX the files were just there but I could track them down to https://github.com/pankajp/pyavl where the dates indicate that nothing has changed in the meantime. In the copies used here I added the information mentioned above. The files had some (experimental) code as well as optional testing on NULL values. As I don't expect updates (the code has been okay for quite a while) I made the tests mandate and removed the experimental code. We can strip this library and save some 10K on the binary because we don't need that much of it. That might happen at some point. Hans Hagen luametatex-2.10.08/source/libraries/decnumber/000077500000000000000000000000001442250314700212555ustar00rootroot00000000000000luametatex-2.10.08/source/libraries/decnumber/decContext.c000066400000000000000000000570561442250314700235360ustar00rootroot00000000000000/* ------------------------------------------------------------------ */ /* Decimal Context module */ /* ------------------------------------------------------------------ */ /* Copyright (c) IBM Corporation, 2000, 2009. All rights reserved. */ /* */ /* This software is made available under the terms of the */ /* ICU License -- ICU 1.8.1 and later. */ /* */ /* The description and User's Guide ("The decNumber C Library") for */ /* this software is called decNumber.pdf. This document is */ /* available, together with arithmetic and format specifications, */ /* testcases, and Web links, on the General Decimal Arithmetic page. */ /* */ /* Please send comments, suggestions, and corrections to the author: */ /* mfc@uk.ibm.com */ /* Mike Cowlishaw, IBM Fellow */ /* IBM UK, PO Box 31, Birmingham Road, Warwick CV34 5JL, UK */ /* ------------------------------------------------------------------ */ /* This module comprises the routines for handling arithmetic */ /* context structures. */ /* ------------------------------------------------------------------ */ #include // for strcmp #include // for printf if DECCHECK #include "decContext.h" // context and base types #include "decNumberLocal.h" // decNumber local types, etc. /* compile-time endian tester [assumes sizeof(Int)>1] */ static const Int mfcone=1; // constant 1 static const Flag *mfctop=(const Flag *)&mfcone; // -> top byte #define LITEND *mfctop // named flag; 1=little-endian /* ------------------------------------------------------------------ */ /* round-for-reround digits */ /* ------------------------------------------------------------------ */ const uByte DECSTICKYTAB[10]={1,1,2,3,4,6,6,7,8,9}; /* used if sticky */ /* ------------------------------------------------------------------ */ /* Powers of ten (powers[n]==10**n, 0<=n<=9) */ /* ------------------------------------------------------------------ */ const uInt DECPOWERS[10]={1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000}; /* ------------------------------------------------------------------ */ /* decContextClearStatus -- clear bits in current status */ /* */ /* context is the context structure to be queried */ /* mask indicates the bits to be cleared (the status bit that */ /* corresponds to each 1 bit in the mask is cleared) */ /* returns context */ /* */ /* No error is possible. */ /* ------------------------------------------------------------------ */ decContext *decContextClearStatus(decContext *context, uInt mask) { context->status&=~mask; return context; } // decContextClearStatus /* ------------------------------------------------------------------ */ /* decContextDefault -- initialize a context structure */ /* */ /* context is the structure to be initialized */ /* kind selects the required set of default values, one of: */ /* DEC_INIT_BASE -- select ANSI X3-274 defaults */ /* DEC_INIT_DECIMAL32 -- select IEEE 754 defaults, 32-bit */ /* DEC_INIT_DECIMAL64 -- select IEEE 754 defaults, 64-bit */ /* DEC_INIT_DECIMAL128 -- select IEEE 754 defaults, 128-bit */ /* For any other value a valid context is returned, but with */ /* Invalid_operation set in the status field. */ /* returns a context structure with the appropriate initial values. */ /* ------------------------------------------------------------------ */ decContext * decContextDefault(decContext *context, Int kind) { // set defaults... context->digits=9; // 9 digits context->emax=DEC_MAX_EMAX; // 9-digit exponents context->emin=DEC_MIN_EMIN; // .. balanced context->round=DEC_ROUND_HALF_UP; // 0.5 rises context->traps=DEC_Errors; // all but informational context->status=0; // cleared context->clamp=0; // no clamping #if DECSUBSET context->extended=0; // cleared #endif switch (kind) { case DEC_INIT_BASE: // [use defaults] break; case DEC_INIT_DECIMAL32: context->digits=7; // digits context->emax=96; // Emax context->emin=-95; // Emin context->round=DEC_ROUND_HALF_EVEN; // 0.5 to nearest even context->traps=0; // no traps set context->clamp=1; // clamp exponents #if DECSUBSET context->extended=1; // set #endif break; case DEC_INIT_DECIMAL64: context->digits=16; // digits context->emax=384; // Emax context->emin=-383; // Emin context->round=DEC_ROUND_HALF_EVEN; // 0.5 to nearest even context->traps=0; // no traps set context->clamp=1; // clamp exponents #if DECSUBSET context->extended=1; // set #endif break; case DEC_INIT_DECIMAL128: context->digits=34; // digits context->emax=6144; // Emax context->emin=-6143; // Emin context->round=DEC_ROUND_HALF_EVEN; // 0.5 to nearest even context->traps=0; // no traps set context->clamp=1; // clamp exponents #if DECSUBSET context->extended=1; // set #endif break; default: // invalid Kind // use defaults, and .. decContextSetStatus(context, DEC_Invalid_operation); // trap } return context;} // decContextDefault /* ------------------------------------------------------------------ */ /* decContextGetRounding -- return current rounding mode */ /* */ /* context is the context structure to be queried */ /* returns the rounding mode */ /* */ /* No error is possible. */ /* ------------------------------------------------------------------ */ enum rounding decContextGetRounding(decContext *context) { return context->round; } // decContextGetRounding /* ------------------------------------------------------------------ */ /* decContextGetStatus -- return current status */ /* */ /* context is the context structure to be queried */ /* returns status */ /* */ /* No error is possible. */ /* ------------------------------------------------------------------ */ uInt decContextGetStatus(decContext *context) { return context->status; } // decContextGetStatus /* ------------------------------------------------------------------ */ /* decContextRestoreStatus -- restore bits in current status */ /* */ /* context is the context structure to be updated */ /* newstatus is the source for the bits to be restored */ /* mask indicates the bits to be restored (the status bit that */ /* corresponds to each 1 bit in the mask is set to the value of */ /* the correspnding bit in newstatus) */ /* returns context */ /* */ /* No error is possible. */ /* ------------------------------------------------------------------ */ decContext *decContextRestoreStatus(decContext *context, uInt newstatus, uInt mask) { context->status&=~mask; // clear the selected bits context->status|=(mask&newstatus); // or in the new bits return context; } // decContextRestoreStatus /* ------------------------------------------------------------------ */ /* decContextSaveStatus -- save bits in current status */ /* */ /* context is the context structure to be queried */ /* mask indicates the bits to be saved (the status bits that */ /* correspond to each 1 bit in the mask are saved) */ /* returns the AND of the mask and the current status */ /* */ /* No error is possible. */ /* ------------------------------------------------------------------ */ uInt decContextSaveStatus(decContext *context, uInt mask) { return context->status&mask; } // decContextSaveStatus /* ------------------------------------------------------------------ */ /* decContextSetRounding -- set current rounding mode */ /* */ /* context is the context structure to be updated */ /* newround is the value which will replace the current mode */ /* returns context */ /* */ /* No error is possible. */ /* ------------------------------------------------------------------ */ decContext *decContextSetRounding(decContext *context, enum rounding newround) { context->round=newround; return context; } // decContextSetRounding /* ------------------------------------------------------------------ */ /* decContextSetStatus -- set status and raise trap if appropriate */ /* */ /* context is the context structure to be updated */ /* status is the DEC_ exception code */ /* returns the context structure */ /* */ /* Control may never return from this routine, if there is a signal */ /* handler and it takes a long jump. */ /* ------------------------------------------------------------------ */ decContext * decContextSetStatus(decContext *context, uInt status) { context->status|=status; if (status & context->traps) raise(SIGFPE); return context;} // decContextSetStatus /* ------------------------------------------------------------------ */ /* decContextSetStatusFromString -- set status from a string + trap */ /* */ /* context is the context structure to be updated */ /* string is a string exactly equal to one that might be returned */ /* by decContextStatusToString */ /* */ /* The status bit corresponding to the string is set, and a trap */ /* is raised if appropriate. */ /* */ /* returns the context structure, unless the string is equal to */ /* DEC_Condition_MU or is not recognized. In these cases NULL is */ /* returned. */ /* ------------------------------------------------------------------ */ decContext * decContextSetStatusFromString(decContext *context, const char *string) { if (strcmp(string, DEC_Condition_CS)==0) return decContextSetStatus(context, DEC_Conversion_syntax); if (strcmp(string, DEC_Condition_DZ)==0) return decContextSetStatus(context, DEC_Division_by_zero); if (strcmp(string, DEC_Condition_DI)==0) return decContextSetStatus(context, DEC_Division_impossible); if (strcmp(string, DEC_Condition_DU)==0) return decContextSetStatus(context, DEC_Division_undefined); if (strcmp(string, DEC_Condition_IE)==0) return decContextSetStatus(context, DEC_Inexact); if (strcmp(string, DEC_Condition_IS)==0) return decContextSetStatus(context, DEC_Insufficient_storage); if (strcmp(string, DEC_Condition_IC)==0) return decContextSetStatus(context, DEC_Invalid_context); if (strcmp(string, DEC_Condition_IO)==0) return decContextSetStatus(context, DEC_Invalid_operation); #if DECSUBSET if (strcmp(string, DEC_Condition_LD)==0) return decContextSetStatus(context, DEC_Lost_digits); #endif if (strcmp(string, DEC_Condition_OV)==0) return decContextSetStatus(context, DEC_Overflow); if (strcmp(string, DEC_Condition_PA)==0) return decContextSetStatus(context, DEC_Clamped); if (strcmp(string, DEC_Condition_RO)==0) return decContextSetStatus(context, DEC_Rounded); if (strcmp(string, DEC_Condition_SU)==0) return decContextSetStatus(context, DEC_Subnormal); if (strcmp(string, DEC_Condition_UN)==0) return decContextSetStatus(context, DEC_Underflow); if (strcmp(string, DEC_Condition_ZE)==0) return context; return NULL; // Multiple status, or unknown } // decContextSetStatusFromString /* ------------------------------------------------------------------ */ /* decContextSetStatusFromStringQuiet -- set status from a string */ /* */ /* context is the context structure to be updated */ /* string is a string exactly equal to one that might be returned */ /* by decContextStatusToString */ /* */ /* The status bit corresponding to the string is set; no trap is */ /* raised. */ /* */ /* returns the context structure, unless the string is equal to */ /* DEC_Condition_MU or is not recognized. In these cases NULL is */ /* returned. */ /* ------------------------------------------------------------------ */ decContext * decContextSetStatusFromStringQuiet(decContext *context, const char *string) { if (strcmp(string, DEC_Condition_CS)==0) return decContextSetStatusQuiet(context, DEC_Conversion_syntax); if (strcmp(string, DEC_Condition_DZ)==0) return decContextSetStatusQuiet(context, DEC_Division_by_zero); if (strcmp(string, DEC_Condition_DI)==0) return decContextSetStatusQuiet(context, DEC_Division_impossible); if (strcmp(string, DEC_Condition_DU)==0) return decContextSetStatusQuiet(context, DEC_Division_undefined); if (strcmp(string, DEC_Condition_IE)==0) return decContextSetStatusQuiet(context, DEC_Inexact); if (strcmp(string, DEC_Condition_IS)==0) return decContextSetStatusQuiet(context, DEC_Insufficient_storage); if (strcmp(string, DEC_Condition_IC)==0) return decContextSetStatusQuiet(context, DEC_Invalid_context); if (strcmp(string, DEC_Condition_IO)==0) return decContextSetStatusQuiet(context, DEC_Invalid_operation); #if DECSUBSET if (strcmp(string, DEC_Condition_LD)==0) return decContextSetStatusQuiet(context, DEC_Lost_digits); #endif if (strcmp(string, DEC_Condition_OV)==0) return decContextSetStatusQuiet(context, DEC_Overflow); if (strcmp(string, DEC_Condition_PA)==0) return decContextSetStatusQuiet(context, DEC_Clamped); if (strcmp(string, DEC_Condition_RO)==0) return decContextSetStatusQuiet(context, DEC_Rounded); if (strcmp(string, DEC_Condition_SU)==0) return decContextSetStatusQuiet(context, DEC_Subnormal); if (strcmp(string, DEC_Condition_UN)==0) return decContextSetStatusQuiet(context, DEC_Underflow); if (strcmp(string, DEC_Condition_ZE)==0) return context; return NULL; // Multiple status, or unknown } // decContextSetStatusFromStringQuiet /* ------------------------------------------------------------------ */ /* decContextSetStatusQuiet -- set status without trap */ /* */ /* context is the context structure to be updated */ /* status is the DEC_ exception code */ /* returns the context structure */ /* */ /* No error is possible. */ /* ------------------------------------------------------------------ */ decContext * decContextSetStatusQuiet(decContext *context, uInt status) { context->status|=status; return context;} // decContextSetStatusQuiet /* ------------------------------------------------------------------ */ /* decContextStatusToString -- convert status flags to a string */ /* */ /* context is a context with valid status field */ /* */ /* returns a constant string describing the condition. If multiple */ /* (or no) flags are set, a generic constant message is returned. */ /* ------------------------------------------------------------------ */ const char *decContextStatusToString(const decContext *context) { Int status=context->status; // test the five IEEE first, as some of the others are ambiguous when // DECEXTFLAG=0 if (status==DEC_Invalid_operation ) return DEC_Condition_IO; if (status==DEC_Division_by_zero ) return DEC_Condition_DZ; if (status==DEC_Overflow ) return DEC_Condition_OV; if (status==DEC_Underflow ) return DEC_Condition_UN; if (status==DEC_Inexact ) return DEC_Condition_IE; if (status==DEC_Division_impossible ) return DEC_Condition_DI; if (status==DEC_Division_undefined ) return DEC_Condition_DU; if (status==DEC_Rounded ) return DEC_Condition_RO; if (status==DEC_Clamped ) return DEC_Condition_PA; if (status==DEC_Subnormal ) return DEC_Condition_SU; if (status==DEC_Conversion_syntax ) return DEC_Condition_CS; if (status==DEC_Insufficient_storage ) return DEC_Condition_IS; if (status==DEC_Invalid_context ) return DEC_Condition_IC; #if DECSUBSET if (status==DEC_Lost_digits ) return DEC_Condition_LD; #endif if (status==0 ) return DEC_Condition_ZE; return DEC_Condition_MU; // Multiple errors } // decContextStatusToString /* ------------------------------------------------------------------ */ /* decContextTestEndian -- test whether DECLITEND is set correctly */ /* */ /* quiet is 1 to suppress message; 0 otherwise */ /* returns 0 if DECLITEND is correct */ /* 1 if DECLITEND is incorrect and should be 1 */ /* -1 if DECLITEND is incorrect and should be 0 */ /* */ /* A message is displayed if the return value is not 0 and quiet==0. */ /* */ /* No error is possible. */ /* ------------------------------------------------------------------ */ Int decContextTestEndian(Flag quiet) { Int res=0; // optimist uInt dle=(uInt)DECLITEND; // unsign if (dle>1) dle=1; // ensure 0 or 1 if (LITEND!=DECLITEND) { if (!quiet) { // always refer to this #if DECPRINT const char *adj; if (LITEND) adj="little"; else adj="big"; printf("Warning: DECLITEND is set to %d, but this computer appears to be %s-endian\n", DECLITEND, adj); #endif } res=(Int)LITEND-dle; } return res; } // decContextTestEndian /* ------------------------------------------------------------------ */ /* decContextTestSavedStatus -- test bits in saved status */ /* */ /* oldstatus is the status word to be tested */ /* mask indicates the bits to be tested (the oldstatus bits that */ /* correspond to each 1 bit in the mask are tested) */ /* returns 1 if any of the tested bits are 1, or 0 otherwise */ /* */ /* No error is possible. */ /* ------------------------------------------------------------------ */ uInt decContextTestSavedStatus(uInt oldstatus, uInt mask) { return (oldstatus&mask)!=0; } // decContextTestSavedStatus /* ------------------------------------------------------------------ */ /* decContextTestStatus -- test bits in current status */ /* */ /* context is the context structure to be updated */ /* mask indicates the bits to be tested (the status bits that */ /* correspond to each 1 bit in the mask are tested) */ /* returns 1 if any of the tested bits are 1, or 0 otherwise */ /* */ /* No error is possible. */ /* ------------------------------------------------------------------ */ uInt decContextTestStatus(decContext *context, uInt mask) { return (context->status&mask)!=0; } // decContextTestStatus /* ------------------------------------------------------------------ */ /* decContextZeroStatus -- clear all status bits */ /* */ /* context is the context structure to be updated */ /* returns context */ /* */ /* No error is possible. */ /* ------------------------------------------------------------------ */ decContext *decContextZeroStatus(decContext *context) { context->status=0; return context; } // decContextZeroStatus luametatex-2.10.08/source/libraries/decnumber/decContext.h000066400000000000000000000304411442250314700235300ustar00rootroot00000000000000/* ------------------------------------------------------------------ */ /* Decimal Context module header */ /* ------------------------------------------------------------------ */ /* Copyright (c) IBM Corporation, 2000, 2010. All rights reserved. */ /* */ /* This software is made available under the terms of the */ /* ICU License -- ICU 1.8.1 and later. */ /* */ /* The description and User's Guide ("The decNumber C Library") for */ /* this software is called decNumber.pdf. This document is */ /* available, together with arithmetic and format specifications, */ /* testcases, and Web links, on the General Decimal Arithmetic page. */ /* */ /* Please send comments, suggestions, and corrections to the author: */ /* mfc@uk.ibm.com */ /* Mike Cowlishaw, IBM Fellow */ /* IBM UK, PO Box 31, Birmingham Road, Warwick CV34 5JL, UK */ /* ------------------------------------------------------------------ */ /* */ /* Context variables must always have valid values: */ /* */ /* status -- [any bits may be cleared, but not set, by user] */ /* round -- must be one of the enumerated rounding modes */ /* */ /* The following variables are implied for fixed size formats (i.e., */ /* they are ignored) but should still be set correctly in case used */ /* with decNumber functions: */ /* */ /* clamp -- must be either 0 or 1 */ /* digits -- must be in the range 1 through 999999999 */ /* emax -- must be in the range 0 through 999999999 */ /* emin -- must be in the range 0 through -999999999 */ /* extended -- must be either 0 or 1 [present only if DECSUBSET] */ /* traps -- only defined bits may be set */ /* */ /* ------------------------------------------------------------------ */ #if !defined(DECCONTEXT) #define DECCONTEXT #define DECCNAME "decContext" /* Short name */ #define DECCFULLNAME "Decimal Context Descriptor" /* Verbose name */ #define DECCAUTHOR "Mike Cowlishaw" /* Who to blame */ #if !defined(int32_t) #include /* C99 standard integers */ #endif #include /* for printf, etc. */ #include /* for traps */ /* Extended flags setting -- set this to 0 to use only IEEE flags */ #if !defined(DECEXTFLAG) #define DECEXTFLAG 1 /* 1=enable extended flags */ #endif /* Conditional code flag -- set this to 0 for best performance */ #if !defined(DECSUBSET) #define DECSUBSET 0 /* 1=enable subset arithmetic */ #endif /* Context for operations, with associated constants */ enum rounding { DEC_ROUND_CEILING, /* round towards +infinity */ DEC_ROUND_UP, /* round away from 0 */ DEC_ROUND_HALF_UP, /* 0.5 rounds up */ DEC_ROUND_HALF_EVEN, /* 0.5 rounds to nearest even */ DEC_ROUND_HALF_DOWN, /* 0.5 rounds down */ DEC_ROUND_DOWN, /* round towards 0 (truncate) */ DEC_ROUND_FLOOR, /* round towards -infinity */ DEC_ROUND_05UP, /* round for reround */ DEC_ROUND_MAX /* enum must be less than this */ }; #define DEC_ROUND_DEFAULT DEC_ROUND_HALF_EVEN; typedef struct decContext { int32_t digits; /* working precision */ int32_t emax; /* maximum positive exponent */ int32_t emin; /* minimum negative exponent */ enum rounding round; /* rounding mode */ uint32_t traps; /* trap-enabler flags */ uint32_t status; /* status flags */ uint8_t clamp; /* flag: apply IEEE exponent clamp */ #if DECSUBSET uint8_t extended; /* flag: special-values allowed */ #endif } decContext; /* Maxima and Minima for context settings */ #define DEC_MAX_DIGITS 999999999 #define DEC_MIN_DIGITS 1 #define DEC_MAX_EMAX 999999999 #define DEC_MIN_EMAX 0 #define DEC_MAX_EMIN 0 #define DEC_MIN_EMIN -999999999 #define DEC_MAX_MATH 999999 /* max emax, etc., for math funcs. */ /* Classifications for decimal numbers, aligned with 754 (note that */ /* 'normal' and 'subnormal' are meaningful only with a decContext */ /* or a fixed size format). */ enum decClass { DEC_CLASS_SNAN, DEC_CLASS_QNAN, DEC_CLASS_NEG_INF, DEC_CLASS_NEG_NORMAL, DEC_CLASS_NEG_SUBNORMAL, DEC_CLASS_NEG_ZERO, DEC_CLASS_POS_ZERO, DEC_CLASS_POS_SUBNORMAL, DEC_CLASS_POS_NORMAL, DEC_CLASS_POS_INF }; /* Strings for the decClasses */ #define DEC_ClassString_SN "sNaN" #define DEC_ClassString_QN "NaN" #define DEC_ClassString_NI "-Infinity" #define DEC_ClassString_NN "-Normal" #define DEC_ClassString_NS "-Subnormal" #define DEC_ClassString_NZ "-Zero" #define DEC_ClassString_PZ "+Zero" #define DEC_ClassString_PS "+Subnormal" #define DEC_ClassString_PN "+Normal" #define DEC_ClassString_PI "+Infinity" #define DEC_ClassString_UN "Invalid" /* Trap-enabler and Status flags (exceptional conditions), and */ /* their names. The top byte is reserved for internal use */ #if DECEXTFLAG /* Extended flags */ #define DEC_Conversion_syntax 0x00000001 #define DEC_Division_by_zero 0x00000002 #define DEC_Division_impossible 0x00000004 #define DEC_Division_undefined 0x00000008 #define DEC_Insufficient_storage 0x00000010 /* [when malloc fails] */ #define DEC_Inexact 0x00000020 #define DEC_Invalid_context 0x00000040 #define DEC_Invalid_operation 0x00000080 #if DECSUBSET #define DEC_Lost_digits 0x00000100 #endif #define DEC_Overflow 0x00000200 #define DEC_Clamped 0x00000400 #define DEC_Rounded 0x00000800 #define DEC_Subnormal 0x00001000 #define DEC_Underflow 0x00002000 #else /* IEEE flags only */ #define DEC_Conversion_syntax 0x00000010 #define DEC_Division_by_zero 0x00000002 #define DEC_Division_impossible 0x00000010 #define DEC_Division_undefined 0x00000010 #define DEC_Insufficient_storage 0x00000010 /* [when malloc fails] */ #define DEC_Inexact 0x00000001 #define DEC_Invalid_context 0x00000010 #define DEC_Invalid_operation 0x00000010 #if DECSUBSET #define DEC_Lost_digits 0x00000000 #endif #define DEC_Overflow 0x00000008 #define DEC_Clamped 0x00000000 #define DEC_Rounded 0x00000000 #define DEC_Subnormal 0x00000000 #define DEC_Underflow 0x00000004 #endif /* IEEE 754 groupings for the flags */ /* [DEC_Clamped, DEC_Lost_digits, DEC_Rounded, and DEC_Subnormal */ /* are not in IEEE 754] */ #define DEC_IEEE_754_Division_by_zero (DEC_Division_by_zero) #if DECSUBSET #define DEC_IEEE_754_Inexact (DEC_Inexact | DEC_Lost_digits) #else #define DEC_IEEE_754_Inexact (DEC_Inexact) #endif #define DEC_IEEE_754_Invalid_operation (DEC_Conversion_syntax | \ DEC_Division_impossible | \ DEC_Division_undefined | \ DEC_Insufficient_storage | \ DEC_Invalid_context | \ DEC_Invalid_operation) #define DEC_IEEE_754_Overflow (DEC_Overflow) #define DEC_IEEE_754_Underflow (DEC_Underflow) /* flags which are normally errors (result is qNaN, infinite, or 0) */ #define DEC_Errors (DEC_IEEE_754_Division_by_zero | \ DEC_IEEE_754_Invalid_operation | \ DEC_IEEE_754_Overflow | DEC_IEEE_754_Underflow) /* flags which cause a result to become qNaN */ #define DEC_NaNs DEC_IEEE_754_Invalid_operation /* flags which are normally for information only (finite results) */ #if DECSUBSET #define DEC_Information (DEC_Clamped | DEC_Rounded | DEC_Inexact \ | DEC_Lost_digits) #else #define DEC_Information (DEC_Clamped | DEC_Rounded | DEC_Inexact) #endif /* IEEE 854 names (for compatibility with older decNumber versions) */ #define DEC_IEEE_854_Division_by_zero DEC_IEEE_754_Division_by_zero #define DEC_IEEE_854_Inexact DEC_IEEE_754_Inexact #define DEC_IEEE_854_Invalid_operation DEC_IEEE_754_Invalid_operation #define DEC_IEEE_854_Overflow DEC_IEEE_754_Overflow #define DEC_IEEE_854_Underflow DEC_IEEE_754_Underflow /* Name strings for the exceptional conditions */ #define DEC_Condition_CS "Conversion syntax" #define DEC_Condition_DZ "Division by zero" #define DEC_Condition_DI "Division impossible" #define DEC_Condition_DU "Division undefined" #define DEC_Condition_IE "Inexact" #define DEC_Condition_IS "Insufficient storage" #define DEC_Condition_IC "Invalid context" #define DEC_Condition_IO "Invalid operation" #if DECSUBSET #define DEC_Condition_LD "Lost digits" #endif #define DEC_Condition_OV "Overflow" #define DEC_Condition_PA "Clamped" #define DEC_Condition_RO "Rounded" #define DEC_Condition_SU "Subnormal" #define DEC_Condition_UN "Underflow" #define DEC_Condition_ZE "No status" #define DEC_Condition_MU "Multiple status" #define DEC_Condition_Length 21 /* length of the longest string, */ /* including terminator */ /* Initialization descriptors, used by decContextDefault */ #define DEC_INIT_BASE 0 #define DEC_INIT_DECIMAL32 32 #define DEC_INIT_DECIMAL64 64 #define DEC_INIT_DECIMAL128 128 /* Synonyms */ #define DEC_INIT_DECSINGLE DEC_INIT_DECIMAL32 #define DEC_INIT_DECDOUBLE DEC_INIT_DECIMAL64 #define DEC_INIT_DECQUAD DEC_INIT_DECIMAL128 /* decContext routines */ extern decContext * decContextClearStatus(decContext *, uint32_t); extern decContext * decContextDefault(decContext *, int32_t); extern enum rounding decContextGetRounding(decContext *); extern uint32_t decContextGetStatus(decContext *); extern decContext * decContextRestoreStatus(decContext *, uint32_t, uint32_t); extern uint32_t decContextSaveStatus(decContext *, uint32_t); extern decContext * decContextSetRounding(decContext *, enum rounding); extern decContext * decContextSetStatus(decContext *, uint32_t); extern decContext * decContextSetStatusFromString(decContext *, const char *); extern decContext * decContextSetStatusFromStringQuiet(decContext *, const char *); extern decContext * decContextSetStatusQuiet(decContext *, uint32_t); extern const char * decContextStatusToString(const decContext *); extern int32_t decContextTestEndian(uint8_t); extern uint32_t decContextTestSavedStatus(uint32_t, uint32_t); extern uint32_t decContextTestStatus(decContext *, uint32_t); extern decContext * decContextZeroStatus(decContext *); #endif luametatex-2.10.08/source/libraries/decnumber/decNumber.c000066400000000000000000013700161442250314700233350ustar00rootroot00000000000000/* ------------------------------------------------------------------ */ /* Decimal Number arithmetic module */ /* ------------------------------------------------------------------ */ /* Copyright (c) IBM Corporation, 2000, 2009. All rights reserved. */ /* */ /* This software is made available under the terms of the */ /* ICU License -- ICU 1.8.1 and later. */ /* */ /* The description and User's Guide ("The decNumber C Library") for */ /* this software is called decNumber.pdf. This document is */ /* available, together with arithmetic and format specifications, */ /* testcases, and Web links, on the General Decimal Arithmetic page. */ /* */ /* Please send comments, suggestions, and corrections to the author: */ /* mfc@uk.ibm.com */ /* Mike Cowlishaw, IBM Fellow */ /* IBM UK, PO Box 31, Birmingham Road, Warwick CV34 5JL, UK */ /* ------------------------------------------------------------------ */ /* This module comprises the routines for arbitrary-precision General */ /* Decimal Arithmetic as defined in the specification which may be */ /* found on the General Decimal Arithmetic pages. It implements both */ /* the full ('extended') arithmetic and the simpler ('subset') */ /* arithmetic. */ /* */ /* Usage notes: */ /* */ /* 1. This code is ANSI C89 except: */ /* */ /* a) C99 line comments (double forward slash) are used. (Most C */ /* compilers accept these. If yours does not, a simple script */ /* can be used to convert them to ANSI C comments.) */ /* */ /* b) Types from C99 stdint.h are used. If you do not have this */ /* header file, see the User's Guide section of the decNumber */ /* documentation; this lists the necessary definitions. */ /* */ /* c) If DECDPUN>4 or DECUSE64=1, the C99 64-bit int64_t and */ /* uint64_t types may be used. To avoid these, set DECUSE64=0 */ /* and DECDPUN<=4 (see documentation). */ /* */ /* The code also conforms to C99 restrictions; in particular, */ /* strict aliasing rules are observed. */ /* */ /* 2. The decNumber format which this library uses is optimized for */ /* efficient processing of relatively short numbers; in particular */ /* it allows the use of fixed sized structures and minimizes copy */ /* and move operations. It does, however, support arbitrary */ /* precision (up to 999,999,999 digits) and arbitrary exponent */ /* range (Emax in the range 0 through 999,999,999 and Emin in the */ /* range -999,999,999 through 0). Mathematical functions (for */ /* example decNumberExp) as identified below are restricted more */ /* tightly: digits, emax, and -emin in the context must be <= */ /* DEC_MAX_MATH (999999), and their operand(s) must be within */ /* these bounds. */ /* */ /* 3. Logical functions are further restricted; their operands must */ /* be finite, positive, have an exponent of zero, and all digits */ /* must be either 0 or 1. The result will only contain digits */ /* which are 0 or 1 (and will have exponent=0 and a sign of 0). */ /* */ /* 4. Operands to operator functions are never modified unless they */ /* are also specified to be the result number (which is always */ /* permitted). Other than that case, operands must not overlap. */ /* */ /* 5. Error handling: the type of the error is ORed into the status */ /* flags in the current context (decContext structure). The */ /* SIGFPE signal is then raised if the corresponding trap-enabler */ /* flag in the decContext is set (is 1). */ /* */ /* It is the responsibility of the caller to clear the status */ /* flags as required. */ /* */ /* The result of any routine which returns a number will always */ /* be a valid number (which may be a special value, such as an */ /* Infinity or NaN). */ /* */ /* 6. The decNumber format is not an exchangeable concrete */ /* representation as it comprises fields which may be machine- */ /* dependent (packed or unpacked, or special length, for example). */ /* Canonical conversions to and from strings are provided; other */ /* conversions are available in separate modules. */ /* */ /* 7. Normally, input operands are assumed to be valid. Set DECCHECK */ /* to 1 for extended operand checking (including NULL operands). */ /* Results are undefined if a badly-formed structure (or a NULL */ /* pointer to a structure) is provided, though with DECCHECK */ /* enabled the operator routines are protected against exceptions. */ /* (Except if the result pointer is NULL, which is unrecoverable.) */ /* */ /* However, the routines will never cause exceptions if they are */ /* given well-formed operands, even if the value of the operands */ /* is inappropriate for the operation and DECCHECK is not set. */ /* (Except for SIGFPE, as and where documented.) */ /* */ /* 8. Subset arithmetic is available only if DECSUBSET is set to 1. */ /* ------------------------------------------------------------------ */ /* Implementation notes for maintenance of this module: */ /* */ /* 1. Storage leak protection: Routines which use malloc are not */ /* permitted to use return for fastpath or error exits (i.e., */ /* they follow strict structured programming conventions). */ /* Instead they have a do{}while(0); construct surrounding the */ /* code which is protected -- break may be used to exit this. */ /* Other routines can safely use the return statement inline. */ /* */ /* Storage leak accounting can be enabled using DECALLOC. */ /* */ /* 2. All loops use the for(;;) construct. Any do construct does */ /* not loop; it is for allocation protection as just described. */ /* */ /* 3. Setting status in the context must always be the very last */ /* action in a routine, as non-0 status may raise a trap and hence */ /* the call to set status may not return (if the handler uses long */ /* jump). Therefore all cleanup must be done first. In general, */ /* to achieve this status is accumulated and is only applied just */ /* before return by calling decContextSetStatus (via decStatus). */ /* */ /* Routines which allocate storage cannot, in general, use the */ /* 'top level' routines which could cause a non-returning */ /* transfer of control. The decXxxxOp routines are safe (do not */ /* call decStatus even if traps are set in the context) and should */ /* be used instead (they are also a little faster). */ /* */ /* 4. Exponent checking is minimized by allowing the exponent to */ /* grow outside its limits during calculations, provided that */ /* the decFinalize function is called later. Multiplication and */ /* division, and intermediate calculations in exponentiation, */ /* require more careful checks because of the risk of 31-bit */ /* overflow (the most negative valid exponent is -1999999997, for */ /* a 999999999-digit number with adjusted exponent of -999999999). */ /* */ /* 5. Rounding is deferred until finalization of results, with any */ /* 'off to the right' data being represented as a single digit */ /* residue (in the range -1 through 9). This avoids any double- */ /* rounding when more than one shortening takes place (for */ /* example, when a result is subnormal). */ /* */ /* 6. The digits count is allowed to rise to a multiple of DECDPUN */ /* during many operations, so whole Units are handled and exact */ /* accounting of digits is not needed. The correct digits value */ /* is found by decGetDigits, which accounts for leading zeros. */ /* This must be called before any rounding if the number of digits */ /* is not known exactly. */ /* */ /* 7. The multiply-by-reciprocal 'trick' is used for partitioning */ /* numbers up to four digits, using appropriate constants. This */ /* is not useful for longer numbers because overflow of 32 bits */ /* would lead to 4 multiplies, which is almost as expensive as */ /* a divide (unless a floating-point or 64-bit multiply is */ /* assumed to be available). */ /* */ /* 8. Unusual abbreviations that may be used in the commentary: */ /* lhs -- left hand side (operand, of an operation) */ /* lsd -- least significant digit (of coefficient) */ /* lsu -- least significant Unit (of coefficient) */ /* msd -- most significant digit (of coefficient) */ /* msi -- most significant item (in an array) */ /* msu -- most significant Unit (of coefficient) */ /* rhs -- right hand side (operand, of an operation) */ /* +ve -- positive */ /* -ve -- negative */ /* ** -- raise to the power */ /* ------------------------------------------------------------------ */ #include // for malloc, free, etc. #include // for printf [if needed] #include // for strcpy #include // for lower #include "decNumber.h" // base number library #include "decNumberLocal.h" // decNumber local types, etc. /* Constants */ // Public lookup table used by the D2U macro const uByte d2utable[DECMAXD2U+1]=D2UTABLE; #define DECVERB 1 // set to 1 for verbose DECCHECK #define powers DECPOWERS // old internal name // Local constants #define DIVIDE 0x80 // Divide operators #define REMAINDER 0x40 // .. #define DIVIDEINT 0x20 // .. #define REMNEAR 0x10 // .. #define COMPARE 0x01 // Compare operators #define COMPMAX 0x02 // .. #define COMPMIN 0x03 // .. #define COMPTOTAL 0x04 // .. #define COMPNAN 0x05 // .. [NaN processing] #define COMPSIG 0x06 // .. [signaling COMPARE] #define COMPMAXMAG 0x07 // .. #define COMPMINMAG 0x08 // .. #define DEC_sNaN 0x40000000 // local status: sNaN signal #define BADINT (Int)0x80000000 // most-negative Int; error indicator // Next two indicate an integer >= 10**6, and its parity (bottom bit) #define BIGEVEN (Int)0x80000002 #define BIGODD (Int)0x80000003 static Unit uarrone[1]={1}; // Unit array of 1, used for incrementing /* Granularity-dependent code */ #if DECDPUN<=4 #define eInt Int // extended integer #define ueInt uInt // unsigned extended integer // Constant multipliers for divide-by-power-of five using reciprocal // multiply, after removing powers of 2 by shifting, and final shift // of 17 [we only need up to **4] static const uInt multies[]={131073, 26215, 5243, 1049, 210}; // QUOT10 -- macro to return the quotient of unit u divided by 10**n #define QUOT10(u, n) ((((uInt)(u)>>(n))*multies[n])>>17) #else // For DECDPUN>4 non-ANSI-89 64-bit types are needed. #if !DECUSE64 #error decNumber.c: DECUSE64 must be 1 when DECDPUN>4 #endif #define eInt Long // extended integer #define ueInt uLong // unsigned extended integer #endif /* Local routines */ static decNumber * decAddOp(decNumber *, const decNumber *, const decNumber *, decContext *, uByte, uInt *); static Flag decBiStr(const char *, const char *, const char *); static uInt decCheckMath(const decNumber *, decContext *, uInt *); static void decApplyRound(decNumber *, decContext *, Int, uInt *); static Int decCompare(const decNumber *lhs, const decNumber *rhs, Flag); static decNumber * decCompareOp(decNumber *, const decNumber *, const decNumber *, decContext *, Flag, uInt *); static void decCopyFit(decNumber *, const decNumber *, decContext *, Int *, uInt *); static decNumber * decDecap(decNumber *, Int); static decNumber * decDivideOp(decNumber *, const decNumber *, const decNumber *, decContext *, Flag, uInt *); static decNumber * decExpOp(decNumber *, const decNumber *, decContext *, uInt *); static void decFinalize(decNumber *, decContext *, Int *, uInt *); static Int decGetDigits(Unit *, Int); static Int decGetInt(const decNumber *); static decNumber * decLnOp(decNumber *, const decNumber *, decContext *, uInt *); static decNumber * decMultiplyOp(decNumber *, const decNumber *, const decNumber *, decContext *, uInt *); static decNumber * decNaNs(decNumber *, const decNumber *, const decNumber *, decContext *, uInt *); static decNumber * decQuantizeOp(decNumber *, const decNumber *, const decNumber *, decContext *, Flag, uInt *); static void decReverse(Unit *, Unit *); static void decSetCoeff(decNumber *, decContext *, const Unit *, Int, Int *, uInt *); static void decSetMaxValue(decNumber *, decContext *); static void decSetOverflow(decNumber *, decContext *, uInt *); static void decSetSubnormal(decNumber *, decContext *, Int *, uInt *); static Int decShiftToLeast(Unit *, Int, Int); static Int decShiftToMost(Unit *, Int, Int); static void decStatus(decNumber *, uInt, decContext *); static void decToString(const decNumber *, char[], Flag); static decNumber * decTrim(decNumber *, decContext *, Flag, Flag, Int *); static Int decUnitAddSub(const Unit *, Int, const Unit *, Int, Int, Unit *, Int); static Int decUnitCompare(const Unit *, Int, const Unit *, Int, Int); #if !DECSUBSET /* decFinish == decFinalize when no subset arithmetic needed */ #define decFinish(a,b,c,d) decFinalize(a,b,c,d) #else static void decFinish(decNumber *, decContext *, Int *, uInt *); static decNumber * decRoundOperand(const decNumber *, decContext *, uInt *); #endif /* Local macros */ // masked special-values bits #define SPECIALARG (rhs->bits & DECSPECIAL) #define SPECIALARGS ((lhs->bits | rhs->bits) & DECSPECIAL) /* Diagnostic macros, etc. */ #if DECALLOC // Handle malloc/free accounting. If enabled, our accountable routines // are used; otherwise the code just goes straight to the system malloc // and free routines. #define malloc(a) decMalloc(a) #define free(a) decFree(a) #define DECFENCE 0x5a // corruption detector // 'Our' malloc and free: static void *decMalloc(size_t); static void decFree(void *); uInt decAllocBytes=0; // count of bytes allocated // Note that DECALLOC code only checks for storage buffer overflow. // To check for memory leaks, the decAllocBytes variable must be // checked to be 0 at appropriate times (e.g., after the test // harness completes a set of tests). This checking may be unreliable // if the testing is done in a multi-thread environment. #endif # include "../../utilities/auxmemory.h" # define malloc lmt_memory_malloc # define free lmt_memory_free #if DECCHECK // Optional checking routines. Enabling these means that decNumber // and decContext operands to operator routines are checked for // correctness. This roughly doubles the execution time of the // fastest routines (and adds 600+ bytes), so should not normally be // used in 'production'. // decCheckInexact is used to check that inexact results have a full // complement of digits (where appropriate -- this is not the case // for Quantize, for example) #define DECUNRESU ((decNumber *)(void *)0xffffffff) #define DECUNUSED ((const decNumber *)(void *)0xffffffff) #define DECUNCONT ((decContext *)(void *)(0xffffffff)) static Flag decCheckOperands(decNumber *, const decNumber *, const decNumber *, decContext *); static Flag decCheckNumber(const decNumber *); static void decCheckInexact(const decNumber *, decContext *); #endif #if DECTRACE || DECCHECK // Optional trace/debugging routines (may or may not be used) void decNumberShow(const decNumber *); // displays the components of a number static void decDumpAr(char, const Unit *, Int); #endif /* ================================================================== */ /* Conversions */ /* ================================================================== */ /* ------------------------------------------------------------------ */ /* from-int32 -- conversion from Int or uInt */ /* */ /* dn is the decNumber to receive the integer */ /* in or uin is the integer to be converted */ /* returns dn */ /* */ /* No error is possible. */ /* ------------------------------------------------------------------ */ decNumber * decNumberFromInt32(decNumber *dn, Int in) { uInt unsig; if (in>=0) unsig=in; else { // negative (possibly BADINT) if (in==BADINT) unsig=(uInt)1073741824*2; // special case else unsig=-in; // invert } // in is now positive decNumberFromUInt32(dn, unsig); if (in<0) dn->bits=DECNEG; // sign needed return dn; } // decNumberFromInt32 decNumber * decNumberFromUInt32(decNumber *dn, uInt uin) { Unit *up; // work pointer decNumberZero(dn); // clean if (uin==0) return dn; // [or decGetDigits bad call] for (up=dn->lsu; uin>0; up++) { *up=(Unit)(uin%(DECDPUNMAX+1)); uin=uin/(DECDPUNMAX+1); } dn->digits=decGetDigits(dn->lsu, up-dn->lsu); return dn; } // decNumberFromUInt32 /* ------------------------------------------------------------------ */ /* to-int32 -- conversion to Int or uInt */ /* */ /* dn is the decNumber to convert */ /* set is the context for reporting errors */ /* returns the converted decNumber, or 0 if Invalid is set */ /* */ /* Invalid is set if the decNumber does not have exponent==0 or if */ /* it is a NaN, Infinite, or out-of-range. */ /* ------------------------------------------------------------------ */ Int decNumberToInt32(const decNumber *dn, decContext *set) { #if DECCHECK if (decCheckOperands(DECUNRESU, DECUNUSED, dn, set)) return 0; #endif // special or too many digits, or bad exponent if (dn->bits&DECSPECIAL || dn->digits>10 || dn->exponent!=0) ; // bad else { // is a finite integer with 10 or fewer digits Int d; // work const Unit *up; // .. uInt hi=0, lo; // .. up=dn->lsu; // -> lsu lo=*up; // get 1 to 9 digits #if DECDPUN>1 // split to higher hi=lo/10; lo=lo%10; #endif up++; // collect remaining Units, if any, into hi for (d=DECDPUN; ddigits; up++, d+=DECDPUN) hi+=*up*powers[d-1]; // now low has the lsd, hi the remainder if (hi>214748364 || (hi==214748364 && lo>7)) { // out of range? // most-negative is a reprieve if (dn->bits&DECNEG && hi==214748364 && lo==8) return 0x80000000; // bad -- drop through } else { // in-range always Int i=X10(hi)+lo; if (dn->bits&DECNEG) return -i; return i; } } // integer decContextSetStatus(set, DEC_Invalid_operation); // [may not return] return 0; } // decNumberToInt32 uInt decNumberToUInt32(const decNumber *dn, decContext *set) { #if DECCHECK if (decCheckOperands(DECUNRESU, DECUNUSED, dn, set)) return 0; #endif // special or too many digits, or bad exponent, or negative (<0) if (dn->bits&DECSPECIAL || dn->digits>10 || dn->exponent!=0 || (dn->bits&DECNEG && !ISZERO(dn))); // bad else { // is a finite integer with 10 or fewer digits Int d; // work const Unit *up; // .. uInt hi=0, lo; // .. up=dn->lsu; // -> lsu lo=*up; // get 1 to 9 digits #if DECDPUN>1 // split to higher hi=lo/10; lo=lo%10; #endif up++; // collect remaining Units, if any, into hi for (d=DECDPUN; ddigits; up++, d+=DECDPUN) hi+=*up*powers[d-1]; // now low has the lsd, hi the remainder if (hi>429496729 || (hi==429496729 && lo>5)) ; // no reprieve possible else return X10(hi)+lo; } // integer decContextSetStatus(set, DEC_Invalid_operation); // [may not return] return 0; } // decNumberToUInt32 /* ------------------------------------------------------------------ */ /* to-scientific-string -- conversion to numeric string */ /* to-engineering-string -- conversion to numeric string */ /* */ /* decNumberToString(dn, string); */ /* decNumberToEngString(dn, string); */ /* */ /* dn is the decNumber to convert */ /* string is the string where the result will be laid out */ /* */ /* string must be at least dn->digits+14 characters long */ /* */ /* No error is possible, and no status can be set. */ /* ------------------------------------------------------------------ */ char * decNumberToString(const decNumber *dn, char *string){ decToString(dn, string, 0); return string; } // DecNumberToString char * decNumberToEngString(const decNumber *dn, char *string){ decToString(dn, string, 1); return string; } // DecNumberToEngString /* ------------------------------------------------------------------ */ /* to-number -- conversion from numeric string */ /* */ /* decNumberFromString -- convert string to decNumber */ /* dn -- the number structure to fill */ /* chars[] -- the string to convert ('\0' terminated) */ /* set -- the context used for processing any error, */ /* determining the maximum precision available */ /* (set.digits), determining the maximum and minimum */ /* exponent (set.emax and set.emin), determining if */ /* extended values are allowed, and checking the */ /* rounding mode if overflow occurs or rounding is */ /* needed. */ /* */ /* The length of the coefficient and the size of the exponent are */ /* checked by this routine, so the correct error (Underflow or */ /* Overflow) can be reported or rounding applied, as necessary. */ /* */ /* If bad syntax is detected, the result will be a quiet NaN. */ /* ------------------------------------------------------------------ */ decNumber * decNumberFromString(decNumber *dn, const char chars[], decContext *set) { Int exponent=0; // working exponent [assume 0] uByte bits=0; // working flags [assume +ve] Unit *res; // where result will be built Unit resbuff[SD2U(DECBUFFER+9)];// local buffer in case need temporary // [+9 allows for ln() constants] Unit *allocres=NULL; // -> allocated result, iff allocated Int d=0; // count of digits found in decimal part const char *dotchar=NULL; // where dot was found const char *cfirst=chars; // -> first character of decimal part const char *last=NULL; // -> last digit of decimal part const char *c; // work Unit *up; // .. #if DECDPUN>1 Int cut, out; // .. #endif Int residue; // rounding residue uInt status=0; // error code #if DECCHECK if (decCheckOperands(DECUNRESU, DECUNUSED, DECUNUSED, set)) return decNumberZero(dn); #endif do { // status & malloc protection for (c=chars;; c++) { // -> input character if (*c>='0' && *c<='9') { // test for Arabic digit last=c; d++; // count of real digits continue; // still in decimal part } if (*c=='.' && dotchar==NULL) { // first '.' dotchar=c; // record offset into decimal part if (c==cfirst) cfirst++; // first digit must follow continue;} if (c==chars) { // first in string... if (*c=='-') { // valid - sign cfirst++; bits=DECNEG; continue;} if (*c=='+') { // valid + sign cfirst++; continue;} } // *c is not a digit, or a valid +, -, or '.' break; } // c if (last==NULL) { // no digits yet status=DEC_Conversion_syntax;// assume the worst if (*c=='\0') break; // and no more to come... #if DECSUBSET // if subset then infinities and NaNs are not allowed if (!set->extended) break; // hopeless #endif // Infinities and NaNs are possible, here if (dotchar!=NULL) break; // .. unless had a dot decNumberZero(dn); // be optimistic if (decBiStr(c, "infinity", "INFINITY") || decBiStr(c, "inf", "INF")) { dn->bits=bits | DECINF; status=0; // is OK break; // all done } // a NaN expected // 2003.09.10 NaNs are now permitted to have a sign dn->bits=bits | DECNAN; // assume simple NaN if (*c=='s' || *c=='S') { // looks like an sNaN c++; dn->bits=bits | DECSNAN; } if (*c!='n' && *c!='N') break; // check caseless "NaN" c++; if (*c!='a' && *c!='A') break; // .. c++; if (*c!='n' && *c!='N') break; // .. c++; // now either nothing, or nnnn payload, expected // -> start of integer and skip leading 0s [including plain 0] for (cfirst=c; *cfirst=='0';) cfirst++; if (*cfirst=='\0') { // "NaN" or "sNaN", maybe with all 0s status=0; // it's good break; // .. } // something other than 0s; setup last and d as usual [no dots] for (c=cfirst;; c++, d++) { if (*c<'0' || *c>'9') break; // test for Arabic digit last=c; } if (*c!='\0') break; // not all digits if (d>set->digits-1) { // [NB: payload in a decNumber can be full length unless // clamped, in which case can only be digits-1] if (set->clamp) break; if (d>set->digits) break; } // too many digits? // good; drop through to convert the integer to coefficient status=0; // syntax is OK bits=dn->bits; // for copy-back } // last==NULL else if (*c!='\0') { // more to process... // had some digits; exponent is only valid sequence now Flag nege; // 1=negative exponent const char *firstexp; // -> first significant exponent digit status=DEC_Conversion_syntax;// assume the worst if (*c!='e' && *c!='E') break; /* Found 'e' or 'E' -- now process explicit exponent */ // 1998.07.11: sign no longer required nege=0; c++; // to (possible) sign if (*c=='-') {nege=1; c++;} else if (*c=='+') c++; if (*c=='\0') break; for (; *c=='0' && *(c+1)!='\0';) c++; // strip insignificant zeros firstexp=c; // save exponent digit place for (; ;c++) { if (*c<'0' || *c>'9') break; // not a digit exponent=X10(exponent)+(Int)*c-(Int)'0'; } // c // if not now on a '\0', *c must not be a digit if (*c!='\0') break; // (this next test must be after the syntax checks) // if it was too long the exponent may have wrapped, so check // carefully and set it to a certain overflow if wrap possible if (c>=firstexp+9+1) { if (c>firstexp+9+1 || *firstexp>'1') exponent=DECNUMMAXE*2; // [up to 1999999999 is OK, for example 1E-1000000998] } if (nege) exponent=-exponent; // was negative status=0; // is OK } // stuff after digits // Here when whole string has been inspected; syntax is good // cfirst->first digit (never dot), last->last digit (ditto) // strip leading zeros/dot [leave final 0 if all 0's] if (*cfirst=='0') { // [cfirst has stepped over .] for (c=cfirst; cextended) { decNumberZero(dn); // clean result break; // [could be return] } #endif } // at least one leading 0 // Handle decimal point... if (dotchar!=NULL && dotchardigits) res=dn->lsu; // fits into supplied decNumber else { // rounding needed Int needbytes=D2U(d)*sizeof(Unit);// bytes needed res=resbuff; // assume use local buffer if (needbytes>(Int)sizeof(resbuff)) { // too big for local allocres=(Unit *)malloc(needbytes); if (allocres==NULL) {status|=DEC_Insufficient_storage; break;} res=allocres; } } // res now -> number lsu, buffer, or allocated storage for Unit array // Place the coefficient into the selected Unit array // [this is often 70% of the cost of this function when DECDPUN>1] #if DECDPUN>1 out=0; // accumulator up=res+D2U(d)-1; // -> msu cut=d-(up-res)*DECDPUN; // digits in top unit for (c=cfirst;; c++) { // along the digits if (*c=='.') continue; // ignore '.' [don't decrement cut] out=X10(out)+(Int)*c-(Int)'0'; if (c==last) break; // done [never get to trailing '.'] cut--; if (cut>0) continue; // more for this unit *up=(Unit)out; // write unit up--; // prepare for unit below.. cut=DECDPUN; // .. out=0; // .. } // c *up=(Unit)out; // write lsu #else // DECDPUN==1 up=res; // -> lsu for (c=last; c>=cfirst; c--) { // over each character, from least if (*c=='.') continue; // ignore . [don't step up] *up=(Unit)((Int)*c-(Int)'0'); up++; } // c #endif dn->bits=bits; dn->exponent=exponent; dn->digits=d; // if not in number (too long) shorten into the number if (d>set->digits) { residue=0; decSetCoeff(dn, set, res, d, &residue, &status); // always check for overflow or subnormal and round as needed decFinalize(dn, set, &residue, &status); } else { // no rounding, but may still have overflow or subnormal // [these tests are just for performance; finalize repeats them] if ((dn->exponent-1emin-dn->digits) || (dn->exponent-1>set->emax-set->digits)) { residue=0; decFinalize(dn, set, &residue, &status); } } // decNumberShow(dn); } while(0); // [for break] if (allocres!=NULL) free(allocres); // drop any storage used if (status!=0) decStatus(dn, status, set); return dn; } /* decNumberFromString */ /* ================================================================== */ /* Operators */ /* ================================================================== */ /* ------------------------------------------------------------------ */ /* decNumberAbs -- absolute value operator */ /* */ /* This computes C = abs(A) */ /* */ /* res is C, the result. C may be A */ /* rhs is A */ /* set is the context */ /* */ /* See also decNumberCopyAbs for a quiet bitwise version of this. */ /* C must have space for set->digits digits. */ /* ------------------------------------------------------------------ */ /* This has the same effect as decNumberPlus unless A is negative, */ /* in which case it has the same effect as decNumberMinus. */ /* ------------------------------------------------------------------ */ decNumber * decNumberAbs(decNumber *res, const decNumber *rhs, decContext *set) { decNumber dzero; // for 0 uInt status=0; // accumulator #if DECCHECK if (decCheckOperands(res, DECUNUSED, rhs, set)) return res; #endif decNumberZero(&dzero); // set 0 dzero.exponent=rhs->exponent; // [no coefficient expansion] decAddOp(res, &dzero, rhs, set, (uByte)(rhs->bits & DECNEG), &status); if (status!=0) decStatus(res, status, set); #if DECCHECK decCheckInexact(res, set); #endif return res; } // decNumberAbs /* ------------------------------------------------------------------ */ /* decNumberAdd -- add two Numbers */ /* */ /* This computes C = A + B */ /* */ /* res is C, the result. C may be A and/or B (e.g., X=X+X) */ /* lhs is A */ /* rhs is B */ /* set is the context */ /* */ /* C must have space for set->digits digits. */ /* ------------------------------------------------------------------ */ /* This just calls the routine shared with Subtract */ decNumber * decNumberAdd(decNumber *res, const decNumber *lhs, const decNumber *rhs, decContext *set) { uInt status=0; // accumulator decAddOp(res, lhs, rhs, set, 0, &status); if (status!=0) decStatus(res, status, set); #if DECCHECK decCheckInexact(res, set); #endif return res; } // decNumberAdd /* ------------------------------------------------------------------ */ /* decNumberAnd -- AND two Numbers, digitwise */ /* */ /* This computes C = A & B */ /* */ /* res is C, the result. C may be A and/or B (e.g., X=X&X) */ /* lhs is A */ /* rhs is B */ /* set is the context (used for result length and error report) */ /* */ /* C must have space for set->digits digits. */ /* */ /* Logical function restrictions apply (see above); a NaN is */ /* returned with Invalid_operation if a restriction is violated. */ /* ------------------------------------------------------------------ */ decNumber * decNumberAnd(decNumber *res, const decNumber *lhs, const decNumber *rhs, decContext *set) { const Unit *ua, *ub; // -> operands const Unit *msua, *msub; // -> operand msus Unit *uc, *msuc; // -> result and its msu Int msudigs; // digits in res msu #if DECCHECK if (decCheckOperands(res, lhs, rhs, set)) return res; #endif if (lhs->exponent!=0 || decNumberIsSpecial(lhs) || decNumberIsNegative(lhs) || rhs->exponent!=0 || decNumberIsSpecial(rhs) || decNumberIsNegative(rhs)) { decStatus(res, DEC_Invalid_operation, set); return res; } // operands are valid ua=lhs->lsu; // bottom-up ub=rhs->lsu; // .. uc=res->lsu; // .. msua=ua+D2U(lhs->digits)-1; // -> msu of lhs msub=ub+D2U(rhs->digits)-1; // -> msu of rhs msuc=uc+D2U(set->digits)-1; // -> msu of result msudigs=MSUDIGITS(set->digits); // [faster than remainder] for (; uc<=msuc; ua++, ub++, uc++) { // Unit loop Unit a, b; // extract units if (ua>msua) a=0; else a=*ua; if (ub>msub) b=0; else b=*ub; *uc=0; // can now write back if (a|b) { // maybe 1 bits to examine Int i, j; *uc=0; // can now write back // This loop could be unrolled and/or use BIN2BCD tables for (i=0; i1) { decStatus(res, DEC_Invalid_operation, set); return res; } if (uc==msuc && i==msudigs-1) break; // just did final digit } // each digit } // both OK } // each unit // [here uc-1 is the msu of the result] res->digits=decGetDigits(res->lsu, uc-res->lsu); res->exponent=0; // integer res->bits=0; // sign=0 return res; // [no status to set] } // decNumberAnd /* ------------------------------------------------------------------ */ /* decNumberCompare -- compare two Numbers */ /* */ /* This computes C = A ? B */ /* */ /* res is C, the result. C may be A and/or B (e.g., X=X?X) */ /* lhs is A */ /* rhs is B */ /* set is the context */ /* */ /* C must have space for one digit (or NaN). */ /* ------------------------------------------------------------------ */ decNumber * decNumberCompare(decNumber *res, const decNumber *lhs, const decNumber *rhs, decContext *set) { uInt status=0; // accumulator decCompareOp(res, lhs, rhs, set, COMPARE, &status); if (status!=0) decStatus(res, status, set); return res; } // decNumberCompare /* ------------------------------------------------------------------ */ /* decNumberCompareSignal -- compare, signalling on all NaNs */ /* */ /* This computes C = A ? B */ /* */ /* res is C, the result. C may be A and/or B (e.g., X=X?X) */ /* lhs is A */ /* rhs is B */ /* set is the context */ /* */ /* C must have space for one digit (or NaN). */ /* ------------------------------------------------------------------ */ decNumber * decNumberCompareSignal(decNumber *res, const decNumber *lhs, const decNumber *rhs, decContext *set) { uInt status=0; // accumulator decCompareOp(res, lhs, rhs, set, COMPSIG, &status); if (status!=0) decStatus(res, status, set); return res; } // decNumberCompareSignal /* ------------------------------------------------------------------ */ /* decNumberCompareTotal -- compare two Numbers, using total ordering */ /* */ /* This computes C = A ? B, under total ordering */ /* */ /* res is C, the result. C may be A and/or B (e.g., X=X?X) */ /* lhs is A */ /* rhs is B */ /* set is the context */ /* */ /* C must have space for one digit; the result will always be one of */ /* -1, 0, or 1. */ /* ------------------------------------------------------------------ */ decNumber * decNumberCompareTotal(decNumber *res, const decNumber *lhs, const decNumber *rhs, decContext *set) { uInt status=0; // accumulator decCompareOp(res, lhs, rhs, set, COMPTOTAL, &status); if (status!=0) decStatus(res, status, set); return res; } // decNumberCompareTotal /* ------------------------------------------------------------------ */ /* decNumberCompareTotalMag -- compare, total ordering of magnitudes */ /* */ /* This computes C = |A| ? |B|, under total ordering */ /* */ /* res is C, the result. C may be A and/or B (e.g., X=X?X) */ /* lhs is A */ /* rhs is B */ /* set is the context */ /* */ /* C must have space for one digit; the result will always be one of */ /* -1, 0, or 1. */ /* ------------------------------------------------------------------ */ decNumber * decNumberCompareTotalMag(decNumber *res, const decNumber *lhs, const decNumber *rhs, decContext *set) { uInt status=0; // accumulator uInt needbytes; // for space calculations decNumber bufa[D2N(DECBUFFER+1)];// +1 in case DECBUFFER=0 decNumber *allocbufa=NULL; // -> allocated bufa, iff allocated decNumber bufb[D2N(DECBUFFER+1)]; decNumber *allocbufb=NULL; // -> allocated bufb, iff allocated decNumber *a, *b; // temporary pointers #if DECCHECK if (decCheckOperands(res, lhs, rhs, set)) return res; #endif do { // protect allocated storage // if either is negative, take a copy and absolute if (decNumberIsNegative(lhs)) { // lhs<0 a=bufa; needbytes=sizeof(decNumber)+(D2U(lhs->digits)-1)*sizeof(Unit); if (needbytes>sizeof(bufa)) { // need malloc space allocbufa=(decNumber *)malloc(needbytes); if (allocbufa==NULL) { // hopeless -- abandon status|=DEC_Insufficient_storage; break;} a=allocbufa; // use the allocated space } decNumberCopy(a, lhs); // copy content a->bits&=~DECNEG; // .. and clear the sign lhs=a; // use copy from here on } if (decNumberIsNegative(rhs)) { // rhs<0 b=bufb; needbytes=sizeof(decNumber)+(D2U(rhs->digits)-1)*sizeof(Unit); if (needbytes>sizeof(bufb)) { // need malloc space allocbufb=(decNumber *)malloc(needbytes); if (allocbufb==NULL) { // hopeless -- abandon status|=DEC_Insufficient_storage; break;} b=allocbufb; // use the allocated space } decNumberCopy(b, rhs); // copy content b->bits&=~DECNEG; // .. and clear the sign rhs=b; // use copy from here on } decCompareOp(res, lhs, rhs, set, COMPTOTAL, &status); } while(0); // end protected if (allocbufa!=NULL) free(allocbufa); // drop any storage used if (allocbufb!=NULL) free(allocbufb); // .. if (status!=0) decStatus(res, status, set); return res; } // decNumberCompareTotalMag /* ------------------------------------------------------------------ */ /* decNumberDivide -- divide one number by another */ /* */ /* This computes C = A / B */ /* */ /* res is C, the result. C may be A and/or B (e.g., X=X/X) */ /* lhs is A */ /* rhs is B */ /* set is the context */ /* */ /* C must have space for set->digits digits. */ /* ------------------------------------------------------------------ */ decNumber * decNumberDivide(decNumber *res, const decNumber *lhs, const decNumber *rhs, decContext *set) { uInt status=0; // accumulator decDivideOp(res, lhs, rhs, set, DIVIDE, &status); if (status!=0) decStatus(res, status, set); #if DECCHECK decCheckInexact(res, set); #endif return res; } // decNumberDivide /* ------------------------------------------------------------------ */ /* decNumberDivideInteger -- divide and return integer quotient */ /* */ /* This computes C = A # B, where # is the integer divide operator */ /* */ /* res is C, the result. C may be A and/or B (e.g., X=X#X) */ /* lhs is A */ /* rhs is B */ /* set is the context */ /* */ /* C must have space for set->digits digits. */ /* ------------------------------------------------------------------ */ decNumber * decNumberDivideInteger(decNumber *res, const decNumber *lhs, const decNumber *rhs, decContext *set) { uInt status=0; // accumulator decDivideOp(res, lhs, rhs, set, DIVIDEINT, &status); if (status!=0) decStatus(res, status, set); return res; } // decNumberDivideInteger /* ------------------------------------------------------------------ */ /* decNumberExp -- exponentiation */ /* */ /* This computes C = exp(A) */ /* */ /* res is C, the result. C may be A */ /* rhs is A */ /* set is the context; note that rounding mode has no effect */ /* */ /* C must have space for set->digits digits. */ /* */ /* Mathematical function restrictions apply (see above); a NaN is */ /* returned with Invalid_operation if a restriction is violated. */ /* */ /* Finite results will always be full precision and Inexact, except */ /* when A is a zero or -Infinity (giving 1 or 0 respectively). */ /* */ /* An Inexact result is rounded using DEC_ROUND_HALF_EVEN; it will */ /* almost always be correctly rounded, but may be up to 1 ulp in */ /* error in rare cases. */ /* ------------------------------------------------------------------ */ /* This is a wrapper for decExpOp which can handle the slightly wider */ /* (double) range needed by Ln (which has to be able to calculate */ /* exp(-a) where a can be the tiniest number (Ntiny). */ /* ------------------------------------------------------------------ */ decNumber * decNumberExp(decNumber *res, const decNumber *rhs, decContext *set) { uInt status=0; // accumulator #if DECSUBSET decNumber *allocrhs=NULL; // non-NULL if rounded rhs allocated #endif #if DECCHECK if (decCheckOperands(res, DECUNUSED, rhs, set)) return res; #endif // Check restrictions; these restrictions ensure that if h=8 (see // decExpOp) then the result will either overflow or underflow to 0. // Other math functions restrict the input range, too, for inverses. // If not violated then carry out the operation. if (!decCheckMath(rhs, set, &status)) do { // protect allocation #if DECSUBSET if (!set->extended) { // reduce operand and set lostDigits status, as needed if (rhs->digits>set->digits) { allocrhs=decRoundOperand(rhs, set, &status); if (allocrhs==NULL) break; rhs=allocrhs; } } #endif decExpOp(res, rhs, set, &status); } while(0); // end protected #if DECSUBSET if (allocrhs !=NULL) free(allocrhs); // drop any storage used #endif // apply significant status if (status!=0) decStatus(res, status, set); #if DECCHECK decCheckInexact(res, set); #endif return res; } // decNumberExp /* ------------------------------------------------------------------ */ /* decNumberFMA -- fused multiply add */ /* */ /* This computes D = (A * B) + C with only one rounding */ /* */ /* res is D, the result. D may be A or B or C (e.g., X=FMA(X,X,X)) */ /* lhs is A */ /* rhs is B */ /* fhs is C [far hand side] */ /* set is the context */ /* */ /* Mathematical function restrictions apply (see above); a NaN is */ /* returned with Invalid_operation if a restriction is violated. */ /* */ /* C must have space for set->digits digits. */ /* ------------------------------------------------------------------ */ decNumber * decNumberFMA(decNumber *res, const decNumber *lhs, const decNumber *rhs, const decNumber *fhs, decContext *set) { uInt status=0; // accumulator decContext dcmul; // context for the multiplication uInt needbytes; // for space calculations decNumber bufa[D2N(DECBUFFER*2+1)]; decNumber *allocbufa=NULL; // -> allocated bufa, iff allocated decNumber *acc; // accumulator pointer decNumber dzero; // work #if DECCHECK if (decCheckOperands(res, lhs, rhs, set)) return res; if (decCheckOperands(res, fhs, DECUNUSED, set)) return res; #endif do { // protect allocated storage #if DECSUBSET if (!set->extended) { // [undefined if subset] status|=DEC_Invalid_operation; break;} #endif // Check math restrictions [these ensure no overflow or underflow] if ((!decNumberIsSpecial(lhs) && decCheckMath(lhs, set, &status)) || (!decNumberIsSpecial(rhs) && decCheckMath(rhs, set, &status)) || (!decNumberIsSpecial(fhs) && decCheckMath(fhs, set, &status))) break; // set up context for multiply dcmul=*set; dcmul.digits=lhs->digits+rhs->digits; // just enough // [The above may be an over-estimate for subset arithmetic, but that's OK] dcmul.emax=DEC_MAX_EMAX; // effectively unbounded .. dcmul.emin=DEC_MIN_EMIN; // [thanks to Math restrictions] // set up decNumber space to receive the result of the multiply acc=bufa; // may fit needbytes=sizeof(decNumber)+(D2U(dcmul.digits)-1)*sizeof(Unit); if (needbytes>sizeof(bufa)) { // need malloc space allocbufa=(decNumber *)malloc(needbytes); if (allocbufa==NULL) { // hopeless -- abandon status|=DEC_Insufficient_storage; break;} acc=allocbufa; // use the allocated space } // multiply with extended range and necessary precision //printf("emin=%ld\n", dcmul.emin); decMultiplyOp(acc, lhs, rhs, &dcmul, &status); // Only Invalid operation (from sNaN or Inf * 0) is possible in // status; if either is seen than ignore fhs (in case it is // another sNaN) and set acc to NaN unless we had an sNaN // [decMultiplyOp leaves that to caller] // Note sNaN has to go through addOp to shorten payload if // necessary if ((status&DEC_Invalid_operation)!=0) { if (!(status&DEC_sNaN)) { // but be true invalid decNumberZero(res); // acc not yet set res->bits=DECNAN; break; } decNumberZero(&dzero); // make 0 (any non-NaN would do) fhs=&dzero; // use that } #if DECCHECK else { // multiply was OK if (status!=0) printf("Status=%08lx after FMA multiply\n", (LI)status); } #endif // add the third operand and result -> res, and all is done decAddOp(res, acc, fhs, set, 0, &status); } while(0); // end protected if (allocbufa!=NULL) free(allocbufa); // drop any storage used if (status!=0) decStatus(res, status, set); #if DECCHECK decCheckInexact(res, set); #endif return res; } // decNumberFMA /* ------------------------------------------------------------------ */ /* decNumberInvert -- invert a Number, digitwise */ /* */ /* This computes C = ~A */ /* */ /* res is C, the result. C may be A (e.g., X=~X) */ /* rhs is A */ /* set is the context (used for result length and error report) */ /* */ /* C must have space for set->digits digits. */ /* */ /* Logical function restrictions apply (see above); a NaN is */ /* returned with Invalid_operation if a restriction is violated. */ /* ------------------------------------------------------------------ */ decNumber * decNumberInvert(decNumber *res, const decNumber *rhs, decContext *set) { const Unit *ua, *msua; // -> operand and its msu Unit *uc, *msuc; // -> result and its msu Int msudigs; // digits in res msu #if DECCHECK if (decCheckOperands(res, DECUNUSED, rhs, set)) return res; #endif if (rhs->exponent!=0 || decNumberIsSpecial(rhs) || decNumberIsNegative(rhs)) { decStatus(res, DEC_Invalid_operation, set); return res; } // operand is valid ua=rhs->lsu; // bottom-up uc=res->lsu; // .. msua=ua+D2U(rhs->digits)-1; // -> msu of rhs msuc=uc+D2U(set->digits)-1; // -> msu of result msudigs=MSUDIGITS(set->digits); // [faster than remainder] for (; uc<=msuc; ua++, uc++) { // Unit loop Unit a; // extract unit Int i, j; // work if (ua>msua) a=0; else a=*ua; *uc=0; // can now write back // always need to examine all bits in rhs // This loop could be unrolled and/or use BIN2BCD tables for (i=0; i1) { decStatus(res, DEC_Invalid_operation, set); return res; } if (uc==msuc && i==msudigs-1) break; // just did final digit } // each digit } // each unit // [here uc-1 is the msu of the result] res->digits=decGetDigits(res->lsu, uc-res->lsu); res->exponent=0; // integer res->bits=0; // sign=0 return res; // [no status to set] } // decNumberInvert /* ------------------------------------------------------------------ */ /* decNumberLn -- natural logarithm */ /* */ /* This computes C = ln(A) */ /* */ /* res is C, the result. C may be A */ /* rhs is A */ /* set is the context; note that rounding mode has no effect */ /* */ /* C must have space for set->digits digits. */ /* */ /* Notable cases: */ /* A<0 -> Invalid */ /* A=0 -> -Infinity (Exact) */ /* A=+Infinity -> +Infinity (Exact) */ /* A=1 exactly -> 0 (Exact) */ /* */ /* Mathematical function restrictions apply (see above); a NaN is */ /* returned with Invalid_operation if a restriction is violated. */ /* */ /* An Inexact result is rounded using DEC_ROUND_HALF_EVEN; it will */ /* almost always be correctly rounded, but may be up to 1 ulp in */ /* error in rare cases. */ /* ------------------------------------------------------------------ */ /* This is a wrapper for decLnOp which can handle the slightly wider */ /* (+11) range needed by Ln, Log10, etc. (which may have to be able */ /* to calculate at p+e+2). */ /* ------------------------------------------------------------------ */ decNumber * decNumberLn(decNumber *res, const decNumber *rhs, decContext *set) { uInt status=0; // accumulator #if DECSUBSET decNumber *allocrhs=NULL; // non-NULL if rounded rhs allocated #endif #if DECCHECK if (decCheckOperands(res, DECUNUSED, rhs, set)) return res; #endif // Check restrictions; this is a math function; if not violated // then carry out the operation. if (!decCheckMath(rhs, set, &status)) do { // protect allocation #if DECSUBSET if (!set->extended) { // reduce operand and set lostDigits status, as needed if (rhs->digits>set->digits) { allocrhs=decRoundOperand(rhs, set, &status); if (allocrhs==NULL) break; rhs=allocrhs; } // special check in subset for rhs=0 if (ISZERO(rhs)) { // +/- zeros -> error status|=DEC_Invalid_operation; break;} } // extended=0 #endif decLnOp(res, rhs, set, &status); } while(0); // end protected #if DECSUBSET if (allocrhs !=NULL) free(allocrhs); // drop any storage used #endif // apply significant status if (status!=0) decStatus(res, status, set); #if DECCHECK decCheckInexact(res, set); #endif return res; } // decNumberLn /* ------------------------------------------------------------------ */ /* decNumberLogB - get adjusted exponent, by 754 rules */ /* */ /* This computes C = adjustedexponent(A) */ /* */ /* res is C, the result. C may be A */ /* rhs is A */ /* set is the context, used only for digits and status */ /* */ /* For an unrounded result, digits may need to be 10 (A might have */ /* 10**9 digits and an exponent of +999999999, or one digit and an */ /* exponent of -1999999999). */ /* */ /* This returns the adjusted exponent of A after (in theory) padding */ /* with zeros on the right to set->digits digits while keeping the */ /* same value. The exponent is not limited by emin/emax. */ /* */ /* Notable cases: */ /* A<0 -> Use |A| */ /* A=0 -> -Infinity (Division by zero) */ /* A=Infinite -> +Infinity (Exact) */ /* A=1 exactly -> 0 (Exact) */ /* NaNs are propagated as usual */ /* ------------------------------------------------------------------ */ decNumber * decNumberLogB(decNumber *res, const decNumber *rhs, decContext *set) { uInt status=0; // accumulator #if DECCHECK if (decCheckOperands(res, DECUNUSED, rhs, set)) return res; #endif // NaNs as usual; Infinities return +Infinity; 0->oops if (decNumberIsNaN(rhs)) decNaNs(res, rhs, NULL, set, &status); else if (decNumberIsInfinite(rhs)) decNumberCopyAbs(res, rhs); else if (decNumberIsZero(rhs)) { decNumberZero(res); // prepare for Infinity res->bits=DECNEG|DECINF; // -Infinity status|=DEC_Division_by_zero; // as per 754 } else { // finite non-zero Int ae=rhs->exponent+rhs->digits-1; // adjusted exponent if (set->digits>=10) decNumberFromInt32(res, ae); // lay it out else { decNumber buft[D2N(10)]; // temporary number decNumber *t=buft; // .. decNumberFromInt32(t, ae); // lay it out decNumberPlus(res, t, set); // round as necessary } } if (status!=0) decStatus(res, status, set); return res; } // decNumberLogB /* ------------------------------------------------------------------ */ /* decNumberLog10 -- logarithm in base 10 */ /* */ /* This computes C = log10(A) */ /* */ /* res is C, the result. C may be A */ /* rhs is A */ /* set is the context; note that rounding mode has no effect */ /* */ /* C must have space for set->digits digits. */ /* */ /* Notable cases: */ /* A<0 -> Invalid */ /* A=0 -> -Infinity (Exact) */ /* A=+Infinity -> +Infinity (Exact) */ /* A=10**n (if n is an integer) -> n (Exact) */ /* */ /* Mathematical function restrictions apply (see above); a NaN is */ /* returned with Invalid_operation if a restriction is violated. */ /* */ /* An Inexact result is rounded using DEC_ROUND_HALF_EVEN; it will */ /* almost always be correctly rounded, but may be up to 1 ulp in */ /* error in rare cases. */ /* ------------------------------------------------------------------ */ /* This calculates ln(A)/ln(10) using appropriate precision. For */ /* ln(A) this is the max(p, rhs->digits + t) + 3, where p is the */ /* requested digits and t is the number of digits in the exponent */ /* (maximum 6). For ln(10) it is p + 3; this is often handled by the */ /* fastpath in decLnOp. The final division is done to the requested */ /* precision. */ /* ------------------------------------------------------------------ */ decNumber * decNumberLog10(decNumber *res, const decNumber *rhs, decContext *set) { uInt status=0, ignore=0; // status accumulators uInt needbytes; // for space calculations Int p; // working precision Int t; // digits in exponent of A // buffers for a and b working decimals // (adjustment calculator, same size) decNumber bufa[D2N(DECBUFFER+2)]; decNumber *allocbufa=NULL; // -> allocated bufa, iff allocated decNumber *a=bufa; // temporary a decNumber bufb[D2N(DECBUFFER+2)]; decNumber *allocbufb=NULL; // -> allocated bufb, iff allocated decNumber *b=bufb; // temporary b decNumber bufw[D2N(10)]; // working 2-10 digit number decNumber *w=bufw; // .. #if DECSUBSET decNumber *allocrhs=NULL; // non-NULL if rounded rhs allocated #endif decContext aset; // working context #if DECCHECK if (decCheckOperands(res, DECUNUSED, rhs, set)) return res; #endif // Check restrictions; this is a math function; if not violated // then carry out the operation. if (!decCheckMath(rhs, set, &status)) do { // protect malloc #if DECSUBSET if (!set->extended) { // reduce operand and set lostDigits status, as needed if (rhs->digits>set->digits) { allocrhs=decRoundOperand(rhs, set, &status); if (allocrhs==NULL) break; rhs=allocrhs; } // special check in subset for rhs=0 if (ISZERO(rhs)) { // +/- zeros -> error status|=DEC_Invalid_operation; break;} } // extended=0 #endif decContextDefault(&aset, DEC_INIT_DECIMAL64); // clean context // handle exact powers of 10; only check if +ve finite if (!(rhs->bits&(DECNEG|DECSPECIAL)) && !ISZERO(rhs)) { Int residue=0; // (no residue) uInt copystat=0; // clean status // round to a single digit... aset.digits=1; decCopyFit(w, rhs, &aset, &residue, ©stat); // copy & shorten // if exact and the digit is 1, rhs is a power of 10 if (!(copystat&DEC_Inexact) && w->lsu[0]==1) { // the exponent, conveniently, is the power of 10; making // this the result needs a little care as it might not fit, // so first convert it into the working number, and then move // to res decNumberFromInt32(w, w->exponent); residue=0; decCopyFit(res, w, set, &residue, &status); // copy & round decFinish(res, set, &residue, &status); // cleanup/set flags break; } // not a power of 10 } // not a candidate for exact // simplify the information-content calculation to use 'total // number of digits in a, including exponent' as compared to the // requested digits, as increasing this will only rarely cost an // iteration in ln(a) anyway t=6; // it can never be >6 // allocate space when needed... p=(rhs->digits+t>set->digits?rhs->digits+t:set->digits)+3; needbytes=sizeof(decNumber)+(D2U(p)-1)*sizeof(Unit); if (needbytes>sizeof(bufa)) { // need malloc space allocbufa=(decNumber *)malloc(needbytes); if (allocbufa==NULL) { // hopeless -- abandon status|=DEC_Insufficient_storage; break;} a=allocbufa; // use the allocated space } aset.digits=p; // as calculated aset.emax=DEC_MAX_MATH; // usual bounds aset.emin=-DEC_MAX_MATH; // .. aset.clamp=0; // and no concrete format decLnOp(a, rhs, &aset, &status); // a=ln(rhs) // skip the division if the result so far is infinite, NaN, or // zero, or there was an error; note NaN from sNaN needs copy if (status&DEC_NaNs && !(status&DEC_sNaN)) break; if (a->bits&DECSPECIAL || ISZERO(a)) { decNumberCopy(res, a); // [will fit] break;} // for ln(10) an extra 3 digits of precision are needed p=set->digits+3; needbytes=sizeof(decNumber)+(D2U(p)-1)*sizeof(Unit); if (needbytes>sizeof(bufb)) { // need malloc space allocbufb=(decNumber *)malloc(needbytes); if (allocbufb==NULL) { // hopeless -- abandon status|=DEC_Insufficient_storage; break;} b=allocbufb; // use the allocated space } decNumberZero(w); // set up 10... #if DECDPUN==1 w->lsu[1]=1; w->lsu[0]=0; // .. #else w->lsu[0]=10; // .. #endif w->digits=2; // .. aset.digits=p; decLnOp(b, w, &aset, &ignore); // b=ln(10) aset.digits=set->digits; // for final divide decDivideOp(res, a, b, &aset, DIVIDE, &status); // into result } while(0); // [for break] if (allocbufa!=NULL) free(allocbufa); // drop any storage used if (allocbufb!=NULL) free(allocbufb); // .. #if DECSUBSET if (allocrhs !=NULL) free(allocrhs); // .. #endif // apply significant status if (status!=0) decStatus(res, status, set); #if DECCHECK decCheckInexact(res, set); #endif return res; } // decNumberLog10 /* ------------------------------------------------------------------ */ /* decNumberMax -- compare two Numbers and return the maximum */ /* */ /* This computes C = A ? B, returning the maximum by 754 rules */ /* */ /* res is C, the result. C may be A and/or B (e.g., X=X?X) */ /* lhs is A */ /* rhs is B */ /* set is the context */ /* */ /* C must have space for set->digits digits. */ /* ------------------------------------------------------------------ */ decNumber * decNumberMax(decNumber *res, const decNumber *lhs, const decNumber *rhs, decContext *set) { uInt status=0; // accumulator decCompareOp(res, lhs, rhs, set, COMPMAX, &status); if (status!=0) decStatus(res, status, set); #if DECCHECK decCheckInexact(res, set); #endif return res; } // decNumberMax /* ------------------------------------------------------------------ */ /* decNumberMaxMag -- compare and return the maximum by magnitude */ /* */ /* This computes C = A ? B, returning the maximum by 754 rules */ /* */ /* res is C, the result. C may be A and/or B (e.g., X=X?X) */ /* lhs is A */ /* rhs is B */ /* set is the context */ /* */ /* C must have space for set->digits digits. */ /* ------------------------------------------------------------------ */ decNumber * decNumberMaxMag(decNumber *res, const decNumber *lhs, const decNumber *rhs, decContext *set) { uInt status=0; // accumulator decCompareOp(res, lhs, rhs, set, COMPMAXMAG, &status); if (status!=0) decStatus(res, status, set); #if DECCHECK decCheckInexact(res, set); #endif return res; } // decNumberMaxMag /* ------------------------------------------------------------------ */ /* decNumberMin -- compare two Numbers and return the minimum */ /* */ /* This computes C = A ? B, returning the minimum by 754 rules */ /* */ /* res is C, the result. C may be A and/or B (e.g., X=X?X) */ /* lhs is A */ /* rhs is B */ /* set is the context */ /* */ /* C must have space for set->digits digits. */ /* ------------------------------------------------------------------ */ decNumber * decNumberMin(decNumber *res, const decNumber *lhs, const decNumber *rhs, decContext *set) { uInt status=0; // accumulator decCompareOp(res, lhs, rhs, set, COMPMIN, &status); if (status!=0) decStatus(res, status, set); #if DECCHECK decCheckInexact(res, set); #endif return res; } // decNumberMin /* ------------------------------------------------------------------ */ /* decNumberMinMag -- compare and return the minimum by magnitude */ /* */ /* This computes C = A ? B, returning the minimum by 754 rules */ /* */ /* res is C, the result. C may be A and/or B (e.g., X=X?X) */ /* lhs is A */ /* rhs is B */ /* set is the context */ /* */ /* C must have space for set->digits digits. */ /* ------------------------------------------------------------------ */ decNumber * decNumberMinMag(decNumber *res, const decNumber *lhs, const decNumber *rhs, decContext *set) { uInt status=0; // accumulator decCompareOp(res, lhs, rhs, set, COMPMINMAG, &status); if (status!=0) decStatus(res, status, set); #if DECCHECK decCheckInexact(res, set); #endif return res; } // decNumberMinMag /* ------------------------------------------------------------------ */ /* decNumberMinus -- prefix minus operator */ /* */ /* This computes C = 0 - A */ /* */ /* res is C, the result. C may be A */ /* rhs is A */ /* set is the context */ /* */ /* See also decNumberCopyNegate for a quiet bitwise version of this. */ /* C must have space for set->digits digits. */ /* ------------------------------------------------------------------ */ /* Simply use AddOp for the subtract, which will do the necessary. */ /* ------------------------------------------------------------------ */ decNumber * decNumberMinus(decNumber *res, const decNumber *rhs, decContext *set) { decNumber dzero; uInt status=0; // accumulator #if DECCHECK if (decCheckOperands(res, DECUNUSED, rhs, set)) return res; #endif decNumberZero(&dzero); // make 0 dzero.exponent=rhs->exponent; // [no coefficient expansion] decAddOp(res, &dzero, rhs, set, DECNEG, &status); if (status!=0) decStatus(res, status, set); #if DECCHECK decCheckInexact(res, set); #endif return res; } // decNumberMinus /* ------------------------------------------------------------------ */ /* decNumberNextMinus -- next towards -Infinity */ /* */ /* This computes C = A - infinitesimal, rounded towards -Infinity */ /* */ /* res is C, the result. C may be A */ /* rhs is A */ /* set is the context */ /* */ /* This is a generalization of 754 NextDown. */ /* ------------------------------------------------------------------ */ decNumber * decNumberNextMinus(decNumber *res, const decNumber *rhs, decContext *set) { decNumber dtiny; // constant decContext workset=*set; // work uInt status=0; // accumulator #if DECCHECK if (decCheckOperands(res, DECUNUSED, rhs, set)) return res; #endif // +Infinity is the special case if ((rhs->bits&(DECINF|DECNEG))==DECINF) { decSetMaxValue(res, set); // is +ve // there is no status to set return res; } decNumberZero(&dtiny); // start with 0 dtiny.lsu[0]=1; // make number that is .. dtiny.exponent=DEC_MIN_EMIN-1; // .. smaller than tiniest workset.round=DEC_ROUND_FLOOR; decAddOp(res, rhs, &dtiny, &workset, DECNEG, &status); status&=DEC_Invalid_operation|DEC_sNaN; // only sNaN Invalid please if (status!=0) decStatus(res, status, set); return res; } // decNumberNextMinus /* ------------------------------------------------------------------ */ /* decNumberNextPlus -- next towards +Infinity */ /* */ /* This computes C = A + infinitesimal, rounded towards +Infinity */ /* */ /* res is C, the result. C may be A */ /* rhs is A */ /* set is the context */ /* */ /* This is a generalization of 754 NextUp. */ /* ------------------------------------------------------------------ */ decNumber * decNumberNextPlus(decNumber *res, const decNumber *rhs, decContext *set) { decNumber dtiny; // constant decContext workset=*set; // work uInt status=0; // accumulator #if DECCHECK if (decCheckOperands(res, DECUNUSED, rhs, set)) return res; #endif // -Infinity is the special case if ((rhs->bits&(DECINF|DECNEG))==(DECINF|DECNEG)) { decSetMaxValue(res, set); res->bits=DECNEG; // negative // there is no status to set return res; } decNumberZero(&dtiny); // start with 0 dtiny.lsu[0]=1; // make number that is .. dtiny.exponent=DEC_MIN_EMIN-1; // .. smaller than tiniest workset.round=DEC_ROUND_CEILING; decAddOp(res, rhs, &dtiny, &workset, 0, &status); status&=DEC_Invalid_operation|DEC_sNaN; // only sNaN Invalid please if (status!=0) decStatus(res, status, set); return res; } // decNumberNextPlus /* ------------------------------------------------------------------ */ /* decNumberNextToward -- next towards rhs */ /* */ /* This computes C = A +/- infinitesimal, rounded towards */ /* +/-Infinity in the direction of B, as per 754-1985 nextafter */ /* modified during revision but dropped from 754-2008. */ /* */ /* res is C, the result. C may be A or B. */ /* lhs is A */ /* rhs is B */ /* set is the context */ /* */ /* This is a generalization of 754-1985 NextAfter. */ /* ------------------------------------------------------------------ */ decNumber * decNumberNextToward(decNumber *res, const decNumber *lhs, const decNumber *rhs, decContext *set) { decNumber dtiny; // constant decContext workset=*set; // work Int result; // .. uInt status=0; // accumulator #if DECCHECK if (decCheckOperands(res, lhs, rhs, set)) return res; #endif if (decNumberIsNaN(lhs) || decNumberIsNaN(rhs)) { decNaNs(res, lhs, rhs, set, &status); } else { // Is numeric, so no chance of sNaN Invalid, etc. result=decCompare(lhs, rhs, 0); // sign matters if (result==BADINT) status|=DEC_Insufficient_storage; // rare else { // valid compare if (result==0) decNumberCopySign(res, lhs, rhs); // easy else { // differ: need NextPlus or NextMinus uByte sub; // add or subtract if (result<0) { // lhsbits&(DECINF|DECNEG))==(DECINF|DECNEG)) { decSetMaxValue(res, set); res->bits=DECNEG; // negative return res; // there is no status to set } workset.round=DEC_ROUND_CEILING; sub=0; // add, please } // plus else { // lhs>rhs, do nextminus // +Infinity is the special case if ((lhs->bits&(DECINF|DECNEG))==DECINF) { decSetMaxValue(res, set); return res; // there is no status to set } workset.round=DEC_ROUND_FLOOR; sub=DECNEG; // subtract, please } // minus decNumberZero(&dtiny); // start with 0 dtiny.lsu[0]=1; // make number that is .. dtiny.exponent=DEC_MIN_EMIN-1; // .. smaller than tiniest decAddOp(res, lhs, &dtiny, &workset, sub, &status); // + or - // turn off exceptions if the result is a normal number // (including Nmin), otherwise let all status through if (decNumberIsNormal(res, set)) status=0; } // unequal } // compare OK } // numeric if (status!=0) decStatus(res, status, set); return res; } // decNumberNextToward /* ------------------------------------------------------------------ */ /* decNumberOr -- OR two Numbers, digitwise */ /* */ /* This computes C = A | B */ /* */ /* res is C, the result. C may be A and/or B (e.g., X=X|X) */ /* lhs is A */ /* rhs is B */ /* set is the context (used for result length and error report) */ /* */ /* C must have space for set->digits digits. */ /* */ /* Logical function restrictions apply (see above); a NaN is */ /* returned with Invalid_operation if a restriction is violated. */ /* ------------------------------------------------------------------ */ decNumber * decNumberOr(decNumber *res, const decNumber *lhs, const decNumber *rhs, decContext *set) { const Unit *ua, *ub; // -> operands const Unit *msua, *msub; // -> operand msus Unit *uc, *msuc; // -> result and its msu Int msudigs; // digits in res msu #if DECCHECK if (decCheckOperands(res, lhs, rhs, set)) return res; #endif if (lhs->exponent!=0 || decNumberIsSpecial(lhs) || decNumberIsNegative(lhs) || rhs->exponent!=0 || decNumberIsSpecial(rhs) || decNumberIsNegative(rhs)) { decStatus(res, DEC_Invalid_operation, set); return res; } // operands are valid ua=lhs->lsu; // bottom-up ub=rhs->lsu; // .. uc=res->lsu; // .. msua=ua+D2U(lhs->digits)-1; // -> msu of lhs msub=ub+D2U(rhs->digits)-1; // -> msu of rhs msuc=uc+D2U(set->digits)-1; // -> msu of result msudigs=MSUDIGITS(set->digits); // [faster than remainder] for (; uc<=msuc; ua++, ub++, uc++) { // Unit loop Unit a, b; // extract units if (ua>msua) a=0; else a=*ua; if (ub>msub) b=0; else b=*ub; *uc=0; // can now write back if (a|b) { // maybe 1 bits to examine Int i, j; // This loop could be unrolled and/or use BIN2BCD tables for (i=0; i1) { decStatus(res, DEC_Invalid_operation, set); return res; } if (uc==msuc && i==msudigs-1) break; // just did final digit } // each digit } // non-zero } // each unit // [here uc-1 is the msu of the result] res->digits=decGetDigits(res->lsu, uc-res->lsu); res->exponent=0; // integer res->bits=0; // sign=0 return res; // [no status to set] } // decNumberOr /* ------------------------------------------------------------------ */ /* decNumberPlus -- prefix plus operator */ /* */ /* This computes C = 0 + A */ /* */ /* res is C, the result. C may be A */ /* rhs is A */ /* set is the context */ /* */ /* See also decNumberCopy for a quiet bitwise version of this. */ /* C must have space for set->digits digits. */ /* ------------------------------------------------------------------ */ /* This simply uses AddOp; Add will take fast path after preparing A. */ /* Performance is a concern here, as this routine is often used to */ /* check operands and apply rounding and overflow/underflow testing. */ /* ------------------------------------------------------------------ */ decNumber * decNumberPlus(decNumber *res, const decNumber *rhs, decContext *set) { decNumber dzero; uInt status=0; // accumulator #if DECCHECK if (decCheckOperands(res, DECUNUSED, rhs, set)) return res; #endif decNumberZero(&dzero); // make 0 dzero.exponent=rhs->exponent; // [no coefficient expansion] decAddOp(res, &dzero, rhs, set, 0, &status); if (status!=0) decStatus(res, status, set); #if DECCHECK decCheckInexact(res, set); #endif return res; } // decNumberPlus /* ------------------------------------------------------------------ */ /* decNumberMultiply -- multiply two Numbers */ /* */ /* This computes C = A x B */ /* */ /* res is C, the result. C may be A and/or B (e.g., X=X+X) */ /* lhs is A */ /* rhs is B */ /* set is the context */ /* */ /* C must have space for set->digits digits. */ /* ------------------------------------------------------------------ */ decNumber * decNumberMultiply(decNumber *res, const decNumber *lhs, const decNumber *rhs, decContext *set) { uInt status=0; // accumulator decMultiplyOp(res, lhs, rhs, set, &status); if (status!=0) decStatus(res, status, set); #if DECCHECK decCheckInexact(res, set); #endif return res; } // decNumberMultiply /* ------------------------------------------------------------------ */ /* decNumberPower -- raise a number to a power */ /* */ /* This computes C = A ** B */ /* */ /* res is C, the result. C may be A and/or B (e.g., X=X**X) */ /* lhs is A */ /* rhs is B */ /* set is the context */ /* */ /* C must have space for set->digits digits. */ /* */ /* Mathematical function restrictions apply (see above); a NaN is */ /* returned with Invalid_operation if a restriction is violated. */ /* */ /* However, if 1999999997<=B<=999999999 and B is an integer then the */ /* restrictions on A and the context are relaxed to the usual bounds, */ /* for compatibility with the earlier (integer power only) version */ /* of this function. */ /* */ /* When B is an integer, the result may be exact, even if rounded. */ /* */ /* The final result is rounded according to the context; it will */ /* almost always be correctly rounded, but may be up to 1 ulp in */ /* error in rare cases. */ /* ------------------------------------------------------------------ */ decNumber * decNumberPower(decNumber *res, const decNumber *lhs, const decNumber *rhs, decContext *set) { #if DECSUBSET decNumber *alloclhs=NULL; // non-NULL if rounded lhs allocated decNumber *allocrhs=NULL; // .., rhs #endif decNumber *allocdac=NULL; // -> allocated acc buffer, iff used decNumber *allocinv=NULL; // -> allocated 1/x buffer, iff used Int reqdigits=set->digits; // requested DIGITS Int n; // rhs in binary Flag rhsint=0; // 1 if rhs is an integer Flag useint=0; // 1 if can use integer calculation Flag isoddint=0; // 1 if rhs is an integer and odd Int i; // work #if DECSUBSET Int dropped; // .. #endif uInt needbytes; // buffer size needed Flag seenbit; // seen a bit while powering Int residue=0; // rounding residue uInt status=0; // accumulators uByte bits=0; // result sign if errors decContext aset; // working context decNumber dnOne; // work value 1... // local accumulator buffer [a decNumber, with digits+elength+1 digits] decNumber dacbuff[D2N(DECBUFFER+9)]; decNumber *dac=dacbuff; // -> result accumulator // same again for possible 1/lhs calculation decNumber invbuff[D2N(DECBUFFER+9)]; #if DECCHECK if (decCheckOperands(res, lhs, rhs, set)) return res; #endif do { // protect allocated storage #if DECSUBSET if (!set->extended) { // reduce operands and set status, as needed if (lhs->digits>reqdigits) { alloclhs=decRoundOperand(lhs, set, &status); if (alloclhs==NULL) break; lhs=alloclhs; } if (rhs->digits>reqdigits) { allocrhs=decRoundOperand(rhs, set, &status); if (allocrhs==NULL) break; rhs=allocrhs; } } #endif // [following code does not require input rounding] // handle NaNs and rhs Infinity (lhs infinity is harder) if (SPECIALARGS) { if (decNumberIsNaN(lhs) || decNumberIsNaN(rhs)) { // NaNs decNaNs(res, lhs, rhs, set, &status); break;} if (decNumberIsInfinite(rhs)) { // rhs Infinity Flag rhsneg=rhs->bits&DECNEG; // save rhs sign if (decNumberIsNegative(lhs) // lhs<0 && !decNumberIsZero(lhs)) // .. status|=DEC_Invalid_operation; else { // lhs >=0 decNumberZero(&dnOne); // set up 1 dnOne.lsu[0]=1; decNumberCompare(dac, lhs, &dnOne, set); // lhs ? 1 decNumberZero(res); // prepare for 0/1/Infinity if (decNumberIsNegative(dac)) { // lhs<1 if (rhsneg) res->bits|=DECINF; // +Infinity [else is +0] } else if (dac->lsu[0]==0) { // lhs=1 // 1**Infinity is inexact, so return fully-padded 1.0000 Int shift=set->digits-1; *res->lsu=1; // was 0, make int 1 res->digits=decShiftToMost(res->lsu, 1, shift); res->exponent=-shift; // make 1.0000... status|=DEC_Inexact|DEC_Rounded; // deemed inexact } else { // lhs>1 if (!rhsneg) res->bits|=DECINF; // +Infinity [else is +0] } } // lhs>=0 break;} // [lhs infinity drops through] } // specials // Original rhs may be an integer that fits and is in range n=decGetInt(rhs); if (n!=BADINT) { // it is an integer rhsint=1; // record the fact for 1**n isoddint=(Flag)n&1; // [works even if big] if (n!=BIGEVEN && n!=BIGODD) // can use integer path? useint=1; // looks good } if (decNumberIsNegative(lhs) // -x .. && isoddint) bits=DECNEG; // .. to an odd power // handle LHS infinity if (decNumberIsInfinite(lhs)) { // [NaNs already handled] uByte rbits=rhs->bits; // save decNumberZero(res); // prepare if (n==0) *res->lsu=1; // [-]Inf**0 => 1 else { // -Inf**nonint -> error if (!rhsint && decNumberIsNegative(lhs)) { status|=DEC_Invalid_operation; // -Inf**nonint is error break;} if (!(rbits & DECNEG)) bits|=DECINF; // was not a **-n // [otherwise will be 0 or -0] res->bits=bits; } break;} // similarly handle LHS zero if (decNumberIsZero(lhs)) { if (n==0) { // 0**0 => Error #if DECSUBSET if (!set->extended) { // [unless subset] decNumberZero(res); *res->lsu=1; // return 1 break;} #endif status|=DEC_Invalid_operation; } else { // 0**x uByte rbits=rhs->bits; // save if (rbits & DECNEG) { // was a 0**(-n) #if DECSUBSET if (!set->extended) { // [bad if subset] status|=DEC_Invalid_operation; break;} #endif bits|=DECINF; } decNumberZero(res); // prepare // [otherwise will be 0 or -0] res->bits=bits; } break;} // here both lhs and rhs are finite; rhs==0 is handled in the // integer path. Next handle the non-integer cases if (!useint) { // non-integral rhs // any -ve lhs is bad, as is either operand or context out of // bounds if (decNumberIsNegative(lhs)) { status|=DEC_Invalid_operation; break;} if (decCheckMath(lhs, set, &status) || decCheckMath(rhs, set, &status)) break; // variable status decContextDefault(&aset, DEC_INIT_DECIMAL64); // clean context aset.emax=DEC_MAX_MATH; // usual bounds aset.emin=-DEC_MAX_MATH; // .. aset.clamp=0; // and no concrete format // calculate the result using exp(ln(lhs)*rhs), which can // all be done into the accumulator, dac. The precision needed // is enough to contain the full information in the lhs (which // is the total digits, including exponent), or the requested // precision, if larger, + 4; 6 is used for the exponent // maximum length, and this is also used when it is shorter // than the requested digits as it greatly reduces the >0.5 ulp // cases at little cost (because Ln doubles digits each // iteration so a few extra digits rarely causes an extra // iteration) aset.digits=MAXI(lhs->digits, set->digits)+6+4; } // non-integer rhs else { // rhs is in-range integer if (n==0) { // x**0 = 1 // (0**0 was handled above) decNumberZero(res); // result=1 *res->lsu=1; // .. break;} // rhs is a non-zero integer if (n<0) n=-n; // use abs(n) aset=*set; // clone the context aset.round=DEC_ROUND_HALF_EVEN; // internally use balanced // calculate the working DIGITS aset.digits=reqdigits+(rhs->digits+rhs->exponent)+2; #if DECSUBSET if (!set->extended) aset.digits--; // use classic precision #endif // it's an error if this is more than can be handled if (aset.digits>DECNUMMAXP) {status|=DEC_Invalid_operation; break;} } // integer path // aset.digits is the count of digits for the accumulator needed // if accumulator is too long for local storage, then allocate needbytes=sizeof(decNumber)+(D2U(aset.digits)-1)*sizeof(Unit); // [needbytes also used below if 1/lhs needed] if (needbytes>sizeof(dacbuff)) { allocdac=(decNumber *)malloc(needbytes); if (allocdac==NULL) { // hopeless -- abandon status|=DEC_Insufficient_storage; break;} dac=allocdac; // use the allocated space } // here, aset is set up and accumulator is ready for use if (!useint) { // non-integral rhs // x ** y; special-case x=1 here as it will otherwise always // reduce to integer 1; decLnOp has a fastpath which detects // the case of x=1 decLnOp(dac, lhs, &aset, &status); // dac=ln(lhs) // [no error possible, as lhs 0 already handled] if (ISZERO(dac)) { // x==1, 1.0, etc. // need to return fully-padded 1.0000 etc., but rhsint->1 *dac->lsu=1; // was 0, make int 1 if (!rhsint) { // add padding Int shift=set->digits-1; dac->digits=decShiftToMost(dac->lsu, 1, shift); dac->exponent=-shift; // make 1.0000... status|=DEC_Inexact|DEC_Rounded; // deemed inexact } } else { decMultiplyOp(dac, dac, rhs, &aset, &status); // dac=dac*rhs decExpOp(dac, dac, &aset, &status); // dac=exp(dac) } // and drop through for final rounding } // non-integer rhs else { // carry on with integer decNumberZero(dac); // acc=1 *dac->lsu=1; // .. // if a negative power the constant 1 is needed, and if not subset // invert the lhs now rather than inverting the result later if (decNumberIsNegative(rhs)) { // was a **-n [hence digits>0] decNumber *inv=invbuff; // asssume use fixed buffer decNumberCopy(&dnOne, dac); // dnOne=1; [needed now or later] #if DECSUBSET if (set->extended) { // need to calculate 1/lhs #endif // divide lhs into 1, putting result in dac [dac=1/dac] decDivideOp(dac, &dnOne, lhs, &aset, DIVIDE, &status); // now locate or allocate space for the inverted lhs if (needbytes>sizeof(invbuff)) { allocinv=(decNumber *)malloc(needbytes); if (allocinv==NULL) { // hopeless -- abandon status|=DEC_Insufficient_storage; break;} inv=allocinv; // use the allocated space } // [inv now points to big-enough buffer or allocated storage] decNumberCopy(inv, dac); // copy the 1/lhs decNumberCopy(dac, &dnOne); // restore acc=1 lhs=inv; // .. and go forward with new lhs #if DECSUBSET } #endif } // Raise-to-the-power loop... seenbit=0; // set once a 1-bit is encountered for (i=1;;i++){ // for each bit [top bit ignored] // abandon if had overflow or terminal underflow if (status & (DEC_Overflow|DEC_Underflow)) { // interesting? if (status&DEC_Overflow || ISZERO(dac)) break; } // [the following two lines revealed an optimizer bug in a C++ // compiler, with symptom: 5**3 -> 25, when n=n+n was used] n=n<<1; // move next bit to testable position if (n<0) { // top bit is set seenbit=1; // OK, significant bit seen decMultiplyOp(dac, dac, lhs, &aset, &status); // dac=dac*x } if (i==31) break; // that was the last bit if (!seenbit) continue; // no need to square 1 decMultiplyOp(dac, dac, dac, &aset, &status); // dac=dac*dac [square] } /*i*/ // 32 bits // complete internal overflow or underflow processing if (status & (DEC_Overflow|DEC_Underflow)) { #if DECSUBSET // If subset, and power was negative, reverse the kind of -erflow // [1/x not yet done] if (!set->extended && decNumberIsNegative(rhs)) { if (status & DEC_Overflow) status^=DEC_Overflow | DEC_Underflow | DEC_Subnormal; else { // trickier -- Underflow may or may not be set status&=~(DEC_Underflow | DEC_Subnormal); // [one or both] status|=DEC_Overflow; } } #endif dac->bits=(dac->bits & ~DECNEG) | bits; // force correct sign // round subnormals [to set.digits rather than aset.digits] // or set overflow result similarly as required decFinalize(dac, set, &residue, &status); decNumberCopy(res, dac); // copy to result (is now OK length) break; } #if DECSUBSET if (!set->extended && // subset math decNumberIsNegative(rhs)) { // was a **-n [hence digits>0] // so divide result into 1 [dac=1/dac] decDivideOp(dac, &dnOne, dac, &aset, DIVIDE, &status); } #endif } // rhs integer path // reduce result to the requested length and copy to result decCopyFit(res, dac, set, &residue, &status); decFinish(res, set, &residue, &status); // final cleanup #if DECSUBSET if (!set->extended) decTrim(res, set, 0, 1, &dropped); // trailing zeros #endif } while(0); // end protected if (allocdac!=NULL) free(allocdac); // drop any storage used if (allocinv!=NULL) free(allocinv); // .. #if DECSUBSET if (alloclhs!=NULL) free(alloclhs); // .. if (allocrhs!=NULL) free(allocrhs); // .. #endif if (status!=0) decStatus(res, status, set); #if DECCHECK decCheckInexact(res, set); #endif return res; } // decNumberPower /* ------------------------------------------------------------------ */ /* decNumberQuantize -- force exponent to requested value */ /* */ /* This computes C = op(A, B), where op adjusts the coefficient */ /* of C (by rounding or shifting) such that the exponent (-scale) */ /* of C has exponent of B. The numerical value of C will equal A, */ /* except for the effects of any rounding that occurred. */ /* */ /* res is C, the result. C may be A or B */ /* lhs is A, the number to adjust */ /* rhs is B, the number with exponent to match */ /* set is the context */ /* */ /* C must have space for set->digits digits. */ /* */ /* Unless there is an error or the result is infinite, the exponent */ /* after the operation is guaranteed to be equal to that of B. */ /* ------------------------------------------------------------------ */ decNumber * decNumberQuantize(decNumber *res, const decNumber *lhs, const decNumber *rhs, decContext *set) { uInt status=0; // accumulator decQuantizeOp(res, lhs, rhs, set, 1, &status); if (status!=0) decStatus(res, status, set); return res; } // decNumberQuantize /* ------------------------------------------------------------------ */ /* decNumberReduce -- remove trailing zeros */ /* */ /* This computes C = 0 + A, and normalizes the result */ /* */ /* res is C, the result. C may be A */ /* rhs is A */ /* set is the context */ /* */ /* C must have space for set->digits digits. */ /* ------------------------------------------------------------------ */ // Previously known as Normalize decNumber * decNumberNormalize(decNumber *res, const decNumber *rhs, decContext *set) { return decNumberReduce(res, rhs, set); } // decNumberNormalize decNumber * decNumberReduce(decNumber *res, const decNumber *rhs, decContext *set) { #if DECSUBSET decNumber *allocrhs=NULL; // non-NULL if rounded rhs allocated #endif uInt status=0; // as usual Int residue=0; // as usual Int dropped; // work #if DECCHECK if (decCheckOperands(res, DECUNUSED, rhs, set)) return res; #endif do { // protect allocated storage #if DECSUBSET if (!set->extended) { // reduce operand and set lostDigits status, as needed if (rhs->digits>set->digits) { allocrhs=decRoundOperand(rhs, set, &status); if (allocrhs==NULL) break; rhs=allocrhs; } } #endif // [following code does not require input rounding] // Infinities copy through; NaNs need usual treatment if (decNumberIsNaN(rhs)) { decNaNs(res, rhs, NULL, set, &status); break; } // reduce result to the requested length and copy to result decCopyFit(res, rhs, set, &residue, &status); // copy & round decFinish(res, set, &residue, &status); // cleanup/set flags decTrim(res, set, 1, 0, &dropped); // normalize in place // [may clamp] } while(0); // end protected #if DECSUBSET if (allocrhs !=NULL) free(allocrhs); // .. #endif if (status!=0) decStatus(res, status, set);// then report status return res; } // decNumberReduce /* ------------------------------------------------------------------ */ /* decNumberRescale -- force exponent to requested value */ /* */ /* This computes C = op(A, B), where op adjusts the coefficient */ /* of C (by rounding or shifting) such that the exponent (-scale) */ /* of C has the value B. The numerical value of C will equal A, */ /* except for the effects of any rounding that occurred. */ /* */ /* res is C, the result. C may be A or B */ /* lhs is A, the number to adjust */ /* rhs is B, the requested exponent */ /* set is the context */ /* */ /* C must have space for set->digits digits. */ /* */ /* Unless there is an error or the result is infinite, the exponent */ /* after the operation is guaranteed to be equal to B. */ /* ------------------------------------------------------------------ */ decNumber * decNumberRescale(decNumber *res, const decNumber *lhs, const decNumber *rhs, decContext *set) { uInt status=0; // accumulator decQuantizeOp(res, lhs, rhs, set, 0, &status); if (status!=0) decStatus(res, status, set); return res; } // decNumberRescale /* ------------------------------------------------------------------ */ /* decNumberRemainder -- divide and return remainder */ /* */ /* This computes C = A % B */ /* */ /* res is C, the result. C may be A and/or B (e.g., X=X%X) */ /* lhs is A */ /* rhs is B */ /* set is the context */ /* */ /* C must have space for set->digits digits. */ /* ------------------------------------------------------------------ */ decNumber * decNumberRemainder(decNumber *res, const decNumber *lhs, const decNumber *rhs, decContext *set) { uInt status=0; // accumulator decDivideOp(res, lhs, rhs, set, REMAINDER, &status); if (status!=0) decStatus(res, status, set); #if DECCHECK decCheckInexact(res, set); #endif return res; } // decNumberRemainder /* ------------------------------------------------------------------ */ /* decNumberRemainderNear -- divide and return remainder from nearest */ /* */ /* This computes C = A % B, where % is the IEEE remainder operator */ /* */ /* res is C, the result. C may be A and/or B (e.g., X=X%X) */ /* lhs is A */ /* rhs is B */ /* set is the context */ /* */ /* C must have space for set->digits digits. */ /* ------------------------------------------------------------------ */ decNumber * decNumberRemainderNear(decNumber *res, const decNumber *lhs, const decNumber *rhs, decContext *set) { uInt status=0; // accumulator decDivideOp(res, lhs, rhs, set, REMNEAR, &status); if (status!=0) decStatus(res, status, set); #if DECCHECK decCheckInexact(res, set); #endif return res; } // decNumberRemainderNear /* ------------------------------------------------------------------ */ /* decNumberRotate -- rotate the coefficient of a Number left/right */ /* */ /* This computes C = A rot B (in base ten and rotating set->digits */ /* digits). */ /* */ /* res is C, the result. C may be A and/or B (e.g., X=XrotX) */ /* lhs is A */ /* rhs is B, the number of digits to rotate (-ve to right) */ /* set is the context */ /* */ /* The digits of the coefficient of A are rotated to the left (if B */ /* is positive) or to the right (if B is negative) without adjusting */ /* the exponent or the sign of A. If lhs->digits is less than */ /* set->digits the coefficient is padded with zeros on the left */ /* before the rotate. Any leading zeros in the result are removed */ /* as usual. */ /* */ /* B must be an integer (q=0) and in the range -set->digits through */ /* +set->digits. */ /* C must have space for set->digits digits. */ /* NaNs are propagated as usual. Infinities are unaffected (but */ /* B must be valid). No status is set unless B is invalid or an */ /* operand is an sNaN. */ /* ------------------------------------------------------------------ */ decNumber * decNumberRotate(decNumber *res, const decNumber *lhs, const decNumber *rhs, decContext *set) { uInt status=0; // accumulator Int rotate; // rhs as an Int #if DECCHECK if (decCheckOperands(res, lhs, rhs, set)) return res; #endif // NaNs propagate as normal if (decNumberIsNaN(lhs) || decNumberIsNaN(rhs)) decNaNs(res, lhs, rhs, set, &status); // rhs must be an integer else if (decNumberIsInfinite(rhs) || rhs->exponent!=0) status=DEC_Invalid_operation; else { // both numeric, rhs is an integer rotate=decGetInt(rhs); // [cannot fail] if (rotate==BADINT // something bad .. || rotate==BIGODD || rotate==BIGEVEN // .. very big .. || abs(rotate)>set->digits) // .. or out of range status=DEC_Invalid_operation; else { // rhs is OK decNumberCopy(res, lhs); // convert -ve rotate to equivalent positive rotation if (rotate<0) rotate=set->digits+rotate; if (rotate!=0 && rotate!=set->digits // zero or full rotation && !decNumberIsInfinite(res)) { // lhs was infinite // left-rotate to do; 0 < rotate < set->digits uInt units, shift; // work uInt msudigits; // digits in result msu Unit *msu=res->lsu+D2U(res->digits)-1; // current msu Unit *msumax=res->lsu+D2U(set->digits)-1; // rotation msu for (msu++; msu<=msumax; msu++) *msu=0; // ensure high units=0 res->digits=set->digits; // now full-length msudigits=MSUDIGITS(res->digits); // actual digits in msu // rotation here is done in-place, in three steps // 1. shift all to least up to one unit to unit-align final // lsd [any digits shifted out are rotated to the left, // abutted to the original msd (which may require split)] // // [if there are no whole units left to rotate, the // rotation is now complete] // // 2. shift to least, from below the split point only, so that // the final msd is in the right place in its Unit [any // digits shifted out will fit exactly in the current msu, // left aligned, no split required] // // 3. rotate all the units by reversing left part, right // part, and then whole // // example: rotate right 8 digits (2 units + 2), DECDPUN=3. // // start: 00a bcd efg hij klm npq // // 1a 000 0ab cde fgh|ijk lmn [pq saved] // 1b 00p qab cde fgh|ijk lmn // // 2a 00p qab cde fgh|00i jkl [mn saved] // 2b mnp qab cde fgh|00i jkl // // 3a fgh cde qab mnp|00i jkl // 3b fgh cde qab mnp|jkl 00i // 3c 00i jkl mnp qab cde fgh // Step 1: amount to shift is the partial right-rotate count rotate=set->digits-rotate; // make it right-rotate units=rotate/DECDPUN; // whole units to rotate shift=rotate%DECDPUN; // left-over digits count if (shift>0) { // not an exact number of units uInt save=res->lsu[0]%powers[shift]; // save low digit(s) decShiftToLeast(res->lsu, D2U(res->digits), shift); if (shift>msudigits) { // msumax-1 needs >0 digits uInt rem=save%powers[shift-msudigits];// split save *msumax=(Unit)(save/powers[shift-msudigits]); // and insert *(msumax-1)=*(msumax-1) +(Unit)(rem*powers[DECDPUN-(shift-msudigits)]); // .. } else { // all fits in msumax *msumax=*msumax+(Unit)(save*powers[msudigits-shift]); // [maybe *1] } } // digits shift needed // If whole units to rotate... if (units>0) { // some to do // Step 2: the units to touch are the whole ones in rotate, // if any, and the shift is DECDPUN-msudigits (which may be // 0, again) shift=DECDPUN-msudigits; if (shift>0) { // not an exact number of units uInt save=res->lsu[0]%powers[shift]; // save low digit(s) decShiftToLeast(res->lsu, units, shift); *msumax=*msumax+(Unit)(save*powers[msudigits]); } // partial shift needed // Step 3: rotate the units array using triple reverse // (reversing is easy and fast) decReverse(res->lsu+units, msumax); // left part decReverse(res->lsu, res->lsu+units-1); // right part decReverse(res->lsu, msumax); // whole } // whole units to rotate // the rotation may have left an undetermined number of zeros // on the left, so true length needs to be calculated res->digits=decGetDigits(res->lsu, msumax-res->lsu+1); } // rotate needed } // rhs OK } // numerics if (status!=0) decStatus(res, status, set); return res; } // decNumberRotate /* ------------------------------------------------------------------ */ /* decNumberSameQuantum -- test for equal exponents */ /* */ /* res is the result number, which will contain either 0 or 1 */ /* lhs is a number to test */ /* rhs is the second (usually a pattern) */ /* */ /* No errors are possible and no context is needed. */ /* ------------------------------------------------------------------ */ decNumber * decNumberSameQuantum(decNumber *res, const decNumber *lhs, const decNumber *rhs) { Unit ret=0; // return value #if DECCHECK if (decCheckOperands(res, lhs, rhs, DECUNCONT)) return res; #endif if (SPECIALARGS) { if (decNumberIsNaN(lhs) && decNumberIsNaN(rhs)) ret=1; else if (decNumberIsInfinite(lhs) && decNumberIsInfinite(rhs)) ret=1; // [anything else with a special gives 0] } else if (lhs->exponent==rhs->exponent) ret=1; decNumberZero(res); // OK to overwrite an operand now *res->lsu=ret; return res; } // decNumberSameQuantum /* ------------------------------------------------------------------ */ /* decNumberScaleB -- multiply by a power of 10 */ /* */ /* This computes C = A x 10**B where B is an integer (q=0) with */ /* maximum magnitude 2*(emax+digits) */ /* */ /* res is C, the result. C may be A or B */ /* lhs is A, the number to adjust */ /* rhs is B, the requested power of ten to use */ /* set is the context */ /* */ /* C must have space for set->digits digits. */ /* */ /* The result may underflow or overflow. */ /* ------------------------------------------------------------------ */ decNumber * decNumberScaleB(decNumber *res, const decNumber *lhs, const decNumber *rhs, decContext *set) { Int reqexp; // requested exponent change [B] uInt status=0; // accumulator Int residue; // work #if DECCHECK if (decCheckOperands(res, lhs, rhs, set)) return res; #endif // Handle special values except lhs infinite if (decNumberIsNaN(lhs) || decNumberIsNaN(rhs)) decNaNs(res, lhs, rhs, set, &status); // rhs must be an integer else if (decNumberIsInfinite(rhs) || rhs->exponent!=0) status=DEC_Invalid_operation; else { // lhs is a number; rhs is a finite with q==0 reqexp=decGetInt(rhs); // [cannot fail] // maximum range is larger than getInt can handle, so this is // more restrictive than the specification if (reqexp==BADINT // something bad .. || reqexp==BIGODD || reqexp==BIGEVEN // it was huge || (abs(reqexp)+1)/2>(set->digits+set->emax)) // .. or out of range status=DEC_Invalid_operation; else { // rhs is OK decNumberCopy(res, lhs); // all done if infinite lhs if (!decNumberIsInfinite(res)) { // prepare to scale Int exp=res->exponent; // save for overflow test res->exponent+=reqexp; // adjust the exponent if (((exp^reqexp)>=0) // same sign ... && ((exp^res->exponent)<0)) { // .. but result had different // the calculation overflowed, so force right treatment if (exp<0) res->exponent=DEC_MIN_EMIN-DEC_MAX_DIGITS; else res->exponent=DEC_MAX_EMAX+1; } residue=0; decFinalize(res, set, &residue, &status); // final check } // finite LHS } // rhs OK } // rhs finite if (status!=0) decStatus(res, status, set); return res; } // decNumberScaleB /* ------------------------------------------------------------------ */ /* decNumberShift -- shift the coefficient of a Number left or right */ /* */ /* This computes C = A << B or C = A >> -B (in base ten). */ /* */ /* res is C, the result. C may be A and/or B (e.g., X=X<digits through */ /* +set->digits. */ /* C must have space for set->digits digits. */ /* NaNs are propagated as usual. Infinities are unaffected (but */ /* B must be valid). No status is set unless B is invalid or an */ /* operand is an sNaN. */ /* ------------------------------------------------------------------ */ decNumber * decNumberShift(decNumber *res, const decNumber *lhs, const decNumber *rhs, decContext *set) { uInt status=0; // accumulator Int shift; // rhs as an Int #if DECCHECK if (decCheckOperands(res, lhs, rhs, set)) return res; #endif // NaNs propagate as normal if (decNumberIsNaN(lhs) || decNumberIsNaN(rhs)) decNaNs(res, lhs, rhs, set, &status); // rhs must be an integer else if (decNumberIsInfinite(rhs) || rhs->exponent!=0) status=DEC_Invalid_operation; else { // both numeric, rhs is an integer shift=decGetInt(rhs); // [cannot fail] if (shift==BADINT // something bad .. || shift==BIGODD || shift==BIGEVEN // .. very big .. || abs(shift)>set->digits) // .. or out of range status=DEC_Invalid_operation; else { // rhs is OK decNumberCopy(res, lhs); if (shift!=0 && !decNumberIsInfinite(res)) { // something to do if (shift>0) { // to left if (shift==set->digits) { // removing all *res->lsu=0; // so place 0 res->digits=1; // .. } else { // // first remove leading digits if necessary if (res->digits+shift>set->digits) { decDecap(res, res->digits+shift-set->digits); // that updated res->digits; may have gone to 1 (for a // single digit or for zero } if (res->digits>1 || *res->lsu) // if non-zero.. res->digits=decShiftToMost(res->lsu, res->digits, shift); } // partial left } // left else { // to right if (-shift>=res->digits) { // discarding all *res->lsu=0; // so place 0 res->digits=1; // .. } else { decShiftToLeast(res->lsu, D2U(res->digits), -shift); res->digits-=(-shift); } } // to right } // non-0 non-Inf shift } // rhs OK } // numerics if (status!=0) decStatus(res, status, set); return res; } // decNumberShift /* ------------------------------------------------------------------ */ /* decNumberSquareRoot -- square root operator */ /* */ /* This computes C = squareroot(A) */ /* */ /* res is C, the result. C may be A */ /* rhs is A */ /* set is the context; note that rounding mode has no effect */ /* */ /* C must have space for set->digits digits. */ /* ------------------------------------------------------------------ */ /* This uses the following varying-precision algorithm in: */ /* */ /* Properly Rounded Variable Precision Square Root, T. E. Hull and */ /* A. Abrham, ACM Transactions on Mathematical Software, Vol 11 #3, */ /* pp229-237, ACM, September 1985. */ /* */ /* The square-root is calculated using Newton's method, after which */ /* a check is made to ensure the result is correctly rounded. */ /* */ /* % [Reformatted original Numerical Turing source code follows.] */ /* function sqrt(x : real) : real */ /* % sqrt(x) returns the properly rounded approximation to the square */ /* % root of x, in the precision of the calling environment, or it */ /* % fails if x < 0. */ /* % t e hull and a abrham, august, 1984 */ /* if x <= 0 then */ /* if x < 0 then */ /* assert false */ /* else */ /* result 0 */ /* end if */ /* end if */ /* var f := setexp(x, 0) % fraction part of x [0.1 <= x < 1] */ /* var e := getexp(x) % exponent part of x */ /* var approx : real */ /* if e mod 2 = 0 then */ /* approx := .259 + .819 * f % approx to root of f */ /* else */ /* f := f/l0 % adjustments */ /* e := e + 1 % for odd */ /* approx := .0819 + 2.59 * f % exponent */ /* end if */ /* */ /* var p:= 3 */ /* const maxp := currentprecision + 2 */ /* loop */ /* p := min(2*p - 2, maxp) % p = 4,6,10, . . . , maxp */ /* precision p */ /* approx := .5 * (approx + f/approx) */ /* exit when p = maxp */ /* end loop */ /* */ /* % approx is now within 1 ulp of the properly rounded square root */ /* % of f; to ensure proper rounding, compare squares of (approx - */ /* % l/2 ulp) and (approx + l/2 ulp) with f. */ /* p := currentprecision */ /* begin */ /* precision p + 2 */ /* const approxsubhalf := approx - setexp(.5, -p) */ /* if mulru(approxsubhalf, approxsubhalf) > f then */ /* approx := approx - setexp(.l, -p + 1) */ /* else */ /* const approxaddhalf := approx + setexp(.5, -p) */ /* if mulrd(approxaddhalf, approxaddhalf) < f then */ /* approx := approx + setexp(.l, -p + 1) */ /* end if */ /* end if */ /* end */ /* result setexp(approx, e div 2) % fix exponent */ /* end sqrt */ /* ------------------------------------------------------------------ */ decNumber * decNumberSquareRoot(decNumber *res, const decNumber *rhs, decContext *set) { decContext workset, approxset; // work contexts decNumber dzero; // used for constant zero Int maxp; // largest working precision Int workp; // working precision Int residue=0; // rounding residue uInt status=0, ignore=0; // status accumulators uInt rstatus; // .. Int exp; // working exponent Int ideal; // ideal (preferred) exponent Int needbytes; // work Int dropped; // .. #if DECSUBSET decNumber *allocrhs=NULL; // non-NULL if rounded rhs allocated #endif // buffer for f [needs +1 in case DECBUFFER 0] decNumber buff[D2N(DECBUFFER+1)]; // buffer for a [needs +2 to match likely maxp] decNumber bufa[D2N(DECBUFFER+2)]; // buffer for temporary, b [must be same size as a] decNumber bufb[D2N(DECBUFFER+2)]; decNumber *allocbuff=NULL; // -> allocated buff, iff allocated decNumber *allocbufa=NULL; // -> allocated bufa, iff allocated decNumber *allocbufb=NULL; // -> allocated bufb, iff allocated decNumber *f=buff; // reduced fraction decNumber *a=bufa; // approximation to result decNumber *b=bufb; // intermediate result // buffer for temporary variable, up to 3 digits decNumber buft[D2N(3)]; decNumber *t=buft; // up-to-3-digit constant or work #if DECCHECK if (decCheckOperands(res, DECUNUSED, rhs, set)) return res; #endif do { // protect allocated storage #if DECSUBSET if (!set->extended) { // reduce operand and set lostDigits status, as needed if (rhs->digits>set->digits) { allocrhs=decRoundOperand(rhs, set, &status); if (allocrhs==NULL) break; // [Note: 'f' allocation below could reuse this buffer if // used, but as this is rare they are kept separate for clarity.] rhs=allocrhs; } } #endif // [following code does not require input rounding] // handle infinities and NaNs if (SPECIALARG) { if (decNumberIsInfinite(rhs)) { // an infinity if (decNumberIsNegative(rhs)) status|=DEC_Invalid_operation; else decNumberCopy(res, rhs); // +Infinity } else decNaNs(res, rhs, NULL, set, &status); // a NaN break; } // calculate the ideal (preferred) exponent [floor(exp/2)] // [It would be nicer to write: ideal=rhs->exponent>>1, but this // generates a compiler warning. Generated code is the same.] ideal=(rhs->exponent&~1)/2; // target // handle zeros if (ISZERO(rhs)) { decNumberCopy(res, rhs); // could be 0 or -0 res->exponent=ideal; // use the ideal [safe] // use decFinish to clamp any out-of-range exponent, etc. decFinish(res, set, &residue, &status); break; } // any other -x is an oops if (decNumberIsNegative(rhs)) { status|=DEC_Invalid_operation; break; } // space is needed for three working variables // f -- the same precision as the RHS, reduced to 0.01->0.99... // a -- Hull's approximation -- precision, when assigned, is // currentprecision+1 or the input argument precision, // whichever is larger (+2 for use as temporary) // b -- intermediate temporary result (same size as a) // if any is too long for local storage, then allocate workp=MAXI(set->digits+1, rhs->digits); // actual rounding precision workp=MAXI(workp, 7); // at least 7 for low cases maxp=workp+2; // largest working precision needbytes=sizeof(decNumber)+(D2U(rhs->digits)-1)*sizeof(Unit); if (needbytes>(Int)sizeof(buff)) { allocbuff=(decNumber *)malloc(needbytes); if (allocbuff==NULL) { // hopeless -- abandon status|=DEC_Insufficient_storage; break;} f=allocbuff; // use the allocated space } // a and b both need to be able to hold a maxp-length number needbytes=sizeof(decNumber)+(D2U(maxp)-1)*sizeof(Unit); if (needbytes>(Int)sizeof(bufa)) { // [same applies to b] allocbufa=(decNumber *)malloc(needbytes); allocbufb=(decNumber *)malloc(needbytes); if (allocbufa==NULL || allocbufb==NULL) { // hopeless status|=DEC_Insufficient_storage; break;} a=allocbufa; // use the allocated spaces b=allocbufb; // .. } // copy rhs -> f, save exponent, and reduce so 0.1 <= f < 1 decNumberCopy(f, rhs); exp=f->exponent+f->digits; // adjusted to Hull rules f->exponent=-(f->digits); // to range // set up working context decContextDefault(&workset, DEC_INIT_DECIMAL64); workset.emax=DEC_MAX_EMAX; workset.emin=DEC_MIN_EMIN; // [Until further notice, no error is possible and status bits // (Rounded, etc.) should be ignored, not accumulated.] // Calculate initial approximation, and allow for odd exponent workset.digits=workp; // p for initial calculation t->bits=0; t->digits=3; a->bits=0; a->digits=3; if ((exp & 1)==0) { // even exponent // Set t=0.259, a=0.819 t->exponent=-3; a->exponent=-3; #if DECDPUN>=3 t->lsu[0]=259; a->lsu[0]=819; #elif DECDPUN==2 t->lsu[0]=59; t->lsu[1]=2; a->lsu[0]=19; a->lsu[1]=8; #else t->lsu[0]=9; t->lsu[1]=5; t->lsu[2]=2; a->lsu[0]=9; a->lsu[1]=1; a->lsu[2]=8; #endif } else { // odd exponent // Set t=0.0819, a=2.59 f->exponent--; // f=f/10 exp++; // e=e+1 t->exponent=-4; a->exponent=-2; #if DECDPUN>=3 t->lsu[0]=819; a->lsu[0]=259; #elif DECDPUN==2 t->lsu[0]=19; t->lsu[1]=8; a->lsu[0]=59; a->lsu[1]=2; #else t->lsu[0]=9; t->lsu[1]=1; t->lsu[2]=8; a->lsu[0]=9; a->lsu[1]=5; a->lsu[2]=2; #endif } decMultiplyOp(a, a, f, &workset, &ignore); // a=a*f decAddOp(a, a, t, &workset, 0, &ignore); // ..+t // [a is now the initial approximation for sqrt(f), calculated with // currentprecision, which is also a's precision.] // the main calculation loop decNumberZero(&dzero); // make 0 decNumberZero(t); // set t = 0.5 t->lsu[0]=5; // .. t->exponent=-1; // .. workset.digits=3; // initial p for (; workset.digitsexponent+=exp/2; // set correct exponent rstatus=0; // clear status residue=0; // .. and accumulator decCopyFit(a, a, &approxset, &residue, &rstatus); // reduce (if needed) decFinish(a, &approxset, &residue, &rstatus); // clean and finalize // Overflow was possible if the input exponent was out-of-range, // in which case quit if (rstatus&DEC_Overflow) { status=rstatus; // use the status as-is decNumberCopy(res, a); // copy to result break; } // Preserve status except Inexact/Rounded status|=(rstatus & ~(DEC_Rounded|DEC_Inexact)); // Carry out the Hull correction a->exponent-=exp/2; // back to 0.1->1 // a is now at final precision and within 1 ulp of the properly // rounded square root of f; to ensure proper rounding, compare // squares of (a - l/2 ulp) and (a + l/2 ulp) with f. // Here workset.digits=maxp and t=0.5, and a->digits determines // the ulp workset.digits--; // maxp-1 is OK now t->exponent=-a->digits-1; // make 0.5 ulp decAddOp(b, a, t, &workset, DECNEG, &ignore); // b = a - 0.5 ulp workset.round=DEC_ROUND_UP; decMultiplyOp(b, b, b, &workset, &ignore); // b = mulru(b, b) decCompareOp(b, f, b, &workset, COMPARE, &ignore); // b ? f, reversed if (decNumberIsNegative(b)) { // f < b [i.e., b > f] // this is the more common adjustment, though both are rare t->exponent++; // make 1.0 ulp t->lsu[0]=1; // .. decAddOp(a, a, t, &workset, DECNEG, &ignore); // a = a - 1 ulp // assign to approx [round to length] approxset.emin-=exp/2; // adjust to match a approxset.emax-=exp/2; decAddOp(a, &dzero, a, &approxset, 0, &ignore); } else { decAddOp(b, a, t, &workset, 0, &ignore); // b = a + 0.5 ulp workset.round=DEC_ROUND_DOWN; decMultiplyOp(b, b, b, &workset, &ignore); // b = mulrd(b, b) decCompareOp(b, b, f, &workset, COMPARE, &ignore); // b ? f if (decNumberIsNegative(b)) { // b < f t->exponent++; // make 1.0 ulp t->lsu[0]=1; // .. decAddOp(a, a, t, &workset, 0, &ignore); // a = a + 1 ulp // assign to approx [round to length] approxset.emin-=exp/2; // adjust to match a approxset.emax-=exp/2; decAddOp(a, &dzero, a, &approxset, 0, &ignore); } } // [no errors are possible in the above, and rounding/inexact during // estimation are irrelevant, so status was not accumulated] // Here, 0.1 <= a < 1 (still), so adjust back a->exponent+=exp/2; // set correct exponent // count droppable zeros [after any subnormal rounding] by // trimming a copy decNumberCopy(b, a); decTrim(b, set, 1, 1, &dropped); // [drops trailing zeros] // Set Inexact and Rounded. The answer can only be exact if // it is short enough so that squaring it could fit in workp // digits, so this is the only (relatively rare) condition that // a careful check is needed if (b->digits*2-1 > workp) { // cannot fit status|=DEC_Inexact|DEC_Rounded; } else { // could be exact/unrounded uInt mstatus=0; // local status decMultiplyOp(b, b, b, &workset, &mstatus); // try the multiply if (mstatus&DEC_Overflow) { // result just won't fit status|=DEC_Inexact|DEC_Rounded; } else { // plausible decCompareOp(t, b, rhs, &workset, COMPARE, &mstatus); // b ? rhs if (!ISZERO(t)) status|=DEC_Inexact|DEC_Rounded; // not equal else { // is Exact // here, dropped is the count of trailing zeros in 'a' // use closest exponent to ideal... Int todrop=ideal-a->exponent; // most that can be dropped if (todrop<0) status|=DEC_Rounded; // ideally would add 0s else { // unrounded // there are some to drop, but emax may not allow all Int maxexp=set->emax-set->digits+1; Int maxdrop=maxexp-a->exponent; if (todrop>maxdrop && set->clamp) { // apply clamping todrop=maxdrop; status|=DEC_Clamped; } if (dropped0) { // have some to drop decShiftToLeast(a->lsu, D2U(a->digits), todrop); a->exponent+=todrop; // maintain numerical value a->digits-=todrop; // new length } } } } } // double-check Underflow, as perhaps the result could not have // been subnormal (initial argument too big), or it is now Exact if (status&DEC_Underflow) { Int ae=rhs->exponent+rhs->digits-1; // adjusted exponent // check if truly subnormal #if DECEXTFLAG // DEC_Subnormal too if (ae>=set->emin*2) status&=~(DEC_Subnormal|DEC_Underflow); #else if (ae>=set->emin*2) status&=~DEC_Underflow; #endif // check if truly inexact if (!(status&DEC_Inexact)) status&=~DEC_Underflow; } decNumberCopy(res, a); // a is now the result } while(0); // end protected if (allocbuff!=NULL) free(allocbuff); // drop any storage used if (allocbufa!=NULL) free(allocbufa); // .. if (allocbufb!=NULL) free(allocbufb); // .. #if DECSUBSET if (allocrhs !=NULL) free(allocrhs); // .. #endif if (status!=0) decStatus(res, status, set);// then report status #if DECCHECK decCheckInexact(res, set); #endif return res; } // decNumberSquareRoot /* ------------------------------------------------------------------ */ /* decNumberSubtract -- subtract two Numbers */ /* */ /* This computes C = A - B */ /* */ /* res is C, the result. C may be A and/or B (e.g., X=X-X) */ /* lhs is A */ /* rhs is B */ /* set is the context */ /* */ /* C must have space for set->digits digits. */ /* ------------------------------------------------------------------ */ decNumber * decNumberSubtract(decNumber *res, const decNumber *lhs, const decNumber *rhs, decContext *set) { uInt status=0; // accumulator decAddOp(res, lhs, rhs, set, DECNEG, &status); if (status!=0) decStatus(res, status, set); #if DECCHECK decCheckInexact(res, set); #endif return res; } // decNumberSubtract /* ------------------------------------------------------------------ */ /* decNumberToIntegralExact -- round-to-integral-value with InExact */ /* decNumberToIntegralValue -- round-to-integral-value */ /* */ /* res is the result */ /* rhs is input number */ /* set is the context */ /* */ /* res must have space for any value of rhs. */ /* */ /* This implements the IEEE special operators and therefore treats */ /* special values as valid. For finite numbers it returns */ /* rescale(rhs, 0) if rhs->exponent is <0. */ /* Otherwise the result is rhs (so no error is possible, except for */ /* sNaN). */ /* */ /* The context is used for rounding mode and status after sNaN, but */ /* the digits setting is ignored. The Exact version will signal */ /* Inexact if the result differs numerically from rhs; the other */ /* never signals Inexact. */ /* ------------------------------------------------------------------ */ decNumber * decNumberToIntegralExact(decNumber *res, const decNumber *rhs, decContext *set) { decNumber dn; decContext workset; // working context uInt status=0; // accumulator #if DECCHECK if (decCheckOperands(res, DECUNUSED, rhs, set)) return res; #endif // handle infinities and NaNs if (SPECIALARG) { if (decNumberIsInfinite(rhs)) decNumberCopy(res, rhs); // an Infinity else decNaNs(res, rhs, NULL, set, &status); // a NaN } else { // finite // have a finite number; no error possible (res must be big enough) if (rhs->exponent>=0) return decNumberCopy(res, rhs); // that was easy, but if negative exponent there is work to do... workset=*set; // clone rounding, etc. workset.digits=rhs->digits; // no length rounding workset.traps=0; // no traps decNumberZero(&dn); // make a number with exponent 0 decNumberQuantize(res, rhs, &dn, &workset); status|=workset.status; } if (status!=0) decStatus(res, status, set); return res; } // decNumberToIntegralExact decNumber * decNumberToIntegralValue(decNumber *res, const decNumber *rhs, decContext *set) { decContext workset=*set; // working context workset.traps=0; // no traps decNumberToIntegralExact(res, rhs, &workset); // this never affects set, except for sNaNs; NaN will have been set // or propagated already, so no need to call decStatus set->status|=workset.status&DEC_Invalid_operation; return res; } // decNumberToIntegralValue /* ------------------------------------------------------------------ */ /* decNumberXor -- XOR two Numbers, digitwise */ /* */ /* This computes C = A ^ B */ /* */ /* res is C, the result. C may be A and/or B (e.g., X=X^X) */ /* lhs is A */ /* rhs is B */ /* set is the context (used for result length and error report) */ /* */ /* C must have space for set->digits digits. */ /* */ /* Logical function restrictions apply (see above); a NaN is */ /* returned with Invalid_operation if a restriction is violated. */ /* ------------------------------------------------------------------ */ decNumber * decNumberXor(decNumber *res, const decNumber *lhs, const decNumber *rhs, decContext *set) { const Unit *ua, *ub; // -> operands const Unit *msua, *msub; // -> operand msus Unit *uc, *msuc; // -> result and its msu Int msudigs; // digits in res msu #if DECCHECK if (decCheckOperands(res, lhs, rhs, set)) return res; #endif if (lhs->exponent!=0 || decNumberIsSpecial(lhs) || decNumberIsNegative(lhs) || rhs->exponent!=0 || decNumberIsSpecial(rhs) || decNumberIsNegative(rhs)) { decStatus(res, DEC_Invalid_operation, set); return res; } // operands are valid ua=lhs->lsu; // bottom-up ub=rhs->lsu; // .. uc=res->lsu; // .. msua=ua+D2U(lhs->digits)-1; // -> msu of lhs msub=ub+D2U(rhs->digits)-1; // -> msu of rhs msuc=uc+D2U(set->digits)-1; // -> msu of result msudigs=MSUDIGITS(set->digits); // [faster than remainder] for (; uc<=msuc; ua++, ub++, uc++) { // Unit loop Unit a, b; // extract units if (ua>msua) a=0; else a=*ua; if (ub>msub) b=0; else b=*ub; *uc=0; // can now write back if (a|b) { // maybe 1 bits to examine Int i, j; // This loop could be unrolled and/or use BIN2BCD tables for (i=0; i1) { decStatus(res, DEC_Invalid_operation, set); return res; } if (uc==msuc && i==msudigs-1) break; // just did final digit } // each digit } // non-zero } // each unit // [here uc-1 is the msu of the result] res->digits=decGetDigits(res->lsu, uc-res->lsu); res->exponent=0; // integer res->bits=0; // sign=0 return res; // [no status to set] } // decNumberXor /* ================================================================== */ /* Utility routines */ /* ================================================================== */ /* ------------------------------------------------------------------ */ /* decNumberClass -- return the decClass of a decNumber */ /* dn -- the decNumber to test */ /* set -- the context to use for Emin */ /* returns the decClass enum */ /* ------------------------------------------------------------------ */ enum decClass decNumberClass(const decNumber *dn, decContext *set) { if (decNumberIsSpecial(dn)) { if (decNumberIsQNaN(dn)) return DEC_CLASS_QNAN; if (decNumberIsSNaN(dn)) return DEC_CLASS_SNAN; // must be an infinity if (decNumberIsNegative(dn)) return DEC_CLASS_NEG_INF; return DEC_CLASS_POS_INF; } // is finite if (decNumberIsNormal(dn, set)) { // most common if (decNumberIsNegative(dn)) return DEC_CLASS_NEG_NORMAL; return DEC_CLASS_POS_NORMAL; } // is subnormal or zero if (decNumberIsZero(dn)) { // most common if (decNumberIsNegative(dn)) return DEC_CLASS_NEG_ZERO; return DEC_CLASS_POS_ZERO; } if (decNumberIsNegative(dn)) return DEC_CLASS_NEG_SUBNORMAL; return DEC_CLASS_POS_SUBNORMAL; } // decNumberClass /* ------------------------------------------------------------------ */ /* decNumberClassToString -- convert decClass to a string */ /* */ /* eclass is a valid decClass */ /* returns a constant string describing the class (max 13+1 chars) */ /* ------------------------------------------------------------------ */ const char *decNumberClassToString(enum decClass eclass) { if (eclass==DEC_CLASS_POS_NORMAL) return DEC_ClassString_PN; if (eclass==DEC_CLASS_NEG_NORMAL) return DEC_ClassString_NN; if (eclass==DEC_CLASS_POS_ZERO) return DEC_ClassString_PZ; if (eclass==DEC_CLASS_NEG_ZERO) return DEC_ClassString_NZ; if (eclass==DEC_CLASS_POS_SUBNORMAL) return DEC_ClassString_PS; if (eclass==DEC_CLASS_NEG_SUBNORMAL) return DEC_ClassString_NS; if (eclass==DEC_CLASS_POS_INF) return DEC_ClassString_PI; if (eclass==DEC_CLASS_NEG_INF) return DEC_ClassString_NI; if (eclass==DEC_CLASS_QNAN) return DEC_ClassString_QN; if (eclass==DEC_CLASS_SNAN) return DEC_ClassString_SN; return DEC_ClassString_UN; // Unknown } // decNumberClassToString /* ------------------------------------------------------------------ */ /* decNumberCopy -- copy a number */ /* */ /* dest is the target decNumber */ /* src is the source decNumber */ /* returns dest */ /* */ /* (dest==src is allowed and is a no-op) */ /* All fields are updated as required. This is a utility operation, */ /* so special values are unchanged and no error is possible. */ /* ------------------------------------------------------------------ */ decNumber * decNumberCopy(decNumber *dest, const decNumber *src) { #if DECCHECK if (src==NULL) return decNumberZero(dest); #endif if (dest==src) return dest; // no copy required // Use explicit assignments here as structure assignment could copy // more than just the lsu (for small DECDPUN). This would not affect // the value of the results, but could disturb test harness spill // checking. dest->bits=src->bits; dest->exponent=src->exponent; dest->digits=src->digits; dest->lsu[0]=src->lsu[0]; if (src->digits>DECDPUN) { // more Units to come const Unit *smsup, *s; // work Unit *d; // .. // memcpy for the remaining Units would be safe as they cannot // overlap. However, this explicit loop is faster in short cases. d=dest->lsu+1; // -> first destination smsup=src->lsu+D2U(src->digits); // -> source msu+1 for (s=src->lsu+1; sdigits digits. */ /* No exception or error can occur; this is a quiet bitwise operation.*/ /* See also decNumberAbs for a checking version of this. */ /* ------------------------------------------------------------------ */ decNumber * decNumberCopyAbs(decNumber *res, const decNumber *rhs) { #if DECCHECK if (decCheckOperands(res, DECUNUSED, rhs, DECUNCONT)) return res; #endif decNumberCopy(res, rhs); res->bits&=~DECNEG; // turn off sign return res; } // decNumberCopyAbs /* ------------------------------------------------------------------ */ /* decNumberCopyNegate -- quiet negate value operator */ /* */ /* This sets C = negate(A) */ /* */ /* res is C, the result. C may be A */ /* rhs is A */ /* */ /* C must have space for set->digits digits. */ /* No exception or error can occur; this is a quiet bitwise operation.*/ /* See also decNumberMinus for a checking version of this. */ /* ------------------------------------------------------------------ */ decNumber * decNumberCopyNegate(decNumber *res, const decNumber *rhs) { #if DECCHECK if (decCheckOperands(res, DECUNUSED, rhs, DECUNCONT)) return res; #endif decNumberCopy(res, rhs); res->bits^=DECNEG; // invert the sign return res; } // decNumberCopyNegate /* ------------------------------------------------------------------ */ /* decNumberCopySign -- quiet copy and set sign operator */ /* */ /* This sets C = A with the sign of B */ /* */ /* res is C, the result. C may be A */ /* lhs is A */ /* rhs is B */ /* */ /* C must have space for set->digits digits. */ /* No exception or error can occur; this is a quiet bitwise operation.*/ /* ------------------------------------------------------------------ */ decNumber * decNumberCopySign(decNumber *res, const decNumber *lhs, const decNumber *rhs) { uByte sign; // rhs sign #if DECCHECK if (decCheckOperands(res, DECUNUSED, rhs, DECUNCONT)) return res; #endif sign=rhs->bits & DECNEG; // save sign bit decNumberCopy(res, lhs); res->bits&=~DECNEG; // clear the sign res->bits|=sign; // set from rhs return res; } // decNumberCopySign /* ------------------------------------------------------------------ */ /* decNumberGetBCD -- get the coefficient in BCD8 */ /* dn is the source decNumber */ /* bcd is the uInt array that will receive dn->digits BCD bytes, */ /* most-significant at offset 0 */ /* returns bcd */ /* */ /* bcd must have at least dn->digits bytes. No error is possible; if */ /* dn is a NaN or Infinite, digits must be 1 and the coefficient 0. */ /* ------------------------------------------------------------------ */ uByte * decNumberGetBCD(const decNumber *dn, uByte *bcd) { uByte *ub=bcd+dn->digits-1; // -> lsd const Unit *up=dn->lsu; // Unit pointer, -> lsu #if DECDPUN==1 // trivial simple copy for (; ub>=bcd; ub--, up++) *ub=*up; #else // chopping needed uInt u=*up; // work uInt cut=DECDPUN; // downcounter through unit for (; ub>=bcd; ub--) { *ub=(uByte)(u%10); // [*6554 trick inhibits, here] u=u/10; cut--; if (cut>0) continue; // more in this unit up++; u=*up; cut=DECDPUN; } #endif return bcd; } // decNumberGetBCD /* ------------------------------------------------------------------ */ /* decNumberSetBCD -- set (replace) the coefficient from BCD8 */ /* dn is the target decNumber */ /* bcd is the uInt array that will source n BCD bytes, most- */ /* significant at offset 0 */ /* n is the number of digits in the source BCD array (bcd) */ /* returns dn */ /* */ /* dn must have space for at least n digits. No error is possible; */ /* if dn is a NaN, or Infinite, or is to become a zero, n must be 1 */ /* and bcd[0] zero. */ /* ------------------------------------------------------------------ */ decNumber * decNumberSetBCD(decNumber *dn, const uByte *bcd, uInt n) { Unit *up=dn->lsu+D2U(dn->digits)-1; // -> msu [target pointer] const uByte *ub=bcd; // -> source msd #if DECDPUN==1 // trivial simple copy for (; ub=dn->lsu; up--) { // each Unit from msu *up=0; // will take <=DECDPUN digits for (; cut>0; ub++, cut--) *up=X10(*up)+*ub; cut=DECDPUN; // next Unit has all digits } #endif dn->digits=n; // set digit count return dn; } // decNumberSetBCD /* ------------------------------------------------------------------ */ /* decNumberIsNormal -- test normality of a decNumber */ /* dn is the decNumber to test */ /* set is the context to use for Emin */ /* returns 1 if |dn| is finite and >=Nmin, 0 otherwise */ /* ------------------------------------------------------------------ */ Int decNumberIsNormal(const decNumber *dn, decContext *set) { Int ae; // adjusted exponent #if DECCHECK if (decCheckOperands(DECUNRESU, DECUNUSED, dn, set)) return 0; #endif if (decNumberIsSpecial(dn)) return 0; // not finite if (decNumberIsZero(dn)) return 0; // not non-zero ae=dn->exponent+dn->digits-1; // adjusted exponent if (aeemin) return 0; // is subnormal return 1; } // decNumberIsNormal /* ------------------------------------------------------------------ */ /* decNumberIsSubnormal -- test subnormality of a decNumber */ /* dn is the decNumber to test */ /* set is the context to use for Emin */ /* returns 1 if |dn| is finite, non-zero, and exponent+dn->digits-1; // adjusted exponent if (aeemin) return 1; // is subnormal return 0; } // decNumberIsSubnormal /* ------------------------------------------------------------------ */ /* decNumberTrim -- remove insignificant zeros */ /* */ /* dn is the number to trim */ /* returns dn */ /* */ /* All fields are updated as required. This is a utility operation, */ /* so special values are unchanged and no error is possible. The */ /* zeros are removed unconditionally. */ /* ------------------------------------------------------------------ */ decNumber * decNumberTrim(decNumber *dn) { Int dropped; // work decContext set; // .. #if DECCHECK if (decCheckOperands(DECUNRESU, DECUNUSED, dn, DECUNCONT)) return dn; #endif decContextDefault(&set, DEC_INIT_BASE); // clamp=0 return decTrim(dn, &set, 0, 1, &dropped); } // decNumberTrim /* ------------------------------------------------------------------ */ /* decNumberVersion -- return the name and version of this module */ /* */ /* No error is possible. */ /* ------------------------------------------------------------------ */ const char * decNumberVersion(void) { return DECVERSION; } // decNumberVersion /* ------------------------------------------------------------------ */ /* decNumberZero -- set a number to 0 */ /* */ /* dn is the number to set, with space for one digit */ /* returns dn */ /* */ /* No error is possible. */ /* ------------------------------------------------------------------ */ // Memset is not used as it is much slower in some environments. decNumber * decNumberZero(decNumber *dn) { #if DECCHECK if (decCheckOperands(dn, DECUNUSED, DECUNUSED, DECUNCONT)) return dn; #endif dn->bits=0; dn->exponent=0; dn->digits=1; dn->lsu[0]=0; return dn; } // decNumberZero /* ================================================================== */ /* Local routines */ /* ================================================================== */ /* ------------------------------------------------------------------ */ /* decToString -- lay out a number into a string */ /* */ /* dn is the number to lay out */ /* string is where to lay out the number */ /* eng is 1 if Engineering, 0 if Scientific */ /* */ /* string must be at least dn->digits+14 characters long */ /* No error is possible. */ /* */ /* Note that this routine can generate a -0 or 0.000. These are */ /* never generated in subset to-number or arithmetic, but can occur */ /* in non-subset arithmetic (e.g., -1*0 or 1.234-1.234). */ /* ------------------------------------------------------------------ */ // If DECCHECK is enabled the string "?" is returned if a number is // invalid. static void decToString(const decNumber *dn, char *string, Flag eng) { Int exp=dn->exponent; // local copy Int e; // E-part value Int pre; // digits before the '.' Int cut; // for counting digits in a Unit char *c=string; // work [output pointer] const Unit *up=dn->lsu+D2U(dn->digits)-1; // -> msu [input pointer] uInt u, pow; // work #if DECCHECK if (decCheckOperands(DECUNRESU, dn, DECUNUSED, DECUNCONT)) { strcpy(string, "?"); return;} #endif if (decNumberIsNegative(dn)) { // Negatives get a minus *c='-'; c++; } if (dn->bits&DECSPECIAL) { // Is a special value if (decNumberIsInfinite(dn)) { strcpy(c, "Inf"); strcpy(c+3, "inity"); return;} // a NaN if (dn->bits&DECSNAN) { // signalling NaN *c='s'; c++; } strcpy(c, "NaN"); c+=3; // step past // if not a clean non-zero coefficient, that's all there is in a // NaN string if (exp!=0 || (*dn->lsu==0 && dn->digits==1)) return; // [drop through to add integer] } // calculate how many digits in msu, and hence first cut cut=MSUDIGITS(dn->digits); // [faster than remainder] cut--; // power of ten for digit if (exp==0) { // simple integer [common fastpath] for (;up>=dn->lsu; up--) { // each Unit from msu u=*up; // contains DECDPUN digits to lay out for (; cut>=0; c++, cut--) TODIGIT(u, cut, c, pow); cut=DECDPUN-1; // next Unit has all digits } *c='\0'; // terminate the string return;} /* non-0 exponent -- assume plain form */ pre=dn->digits+exp; // digits before '.' e=0; // no E if ((exp>0) || (pre<-5)) { // need exponential form e=exp+dn->digits-1; // calculate E value pre=1; // assume one digit before '.' if (eng && (e!=0)) { // engineering: may need to adjust Int adj; // adjustment // The C remainder operator is undefined for negative numbers, so // a positive remainder calculation must be used here if (e<0) { adj=(-e)%3; if (adj!=0) adj=3-adj; } else { // e>0 adj=e%3; } e=e-adj; // if dealing with zero still produce an exponent which is a // multiple of three, as expected, but there will only be the // one zero before the E, still. Otherwise note the padding. if (!ISZERO(dn)) pre+=adj; else { // is zero if (adj!=0) { // 0.00Esnn needed e=e+3; pre=-(2-adj); } } // zero } // eng } // need exponent /* lay out the digits of the coefficient, adding 0s and . as needed */ u=*up; if (pre>0) { // xxx.xxx or xx00 (engineering) form Int n=pre; for (; pre>0; pre--, c++, cut--) { if (cut<0) { // need new Unit if (up==dn->lsu) break; // out of input digits (pre>digits) up--; cut=DECDPUN-1; u=*up; } TODIGIT(u, cut, c, pow); } if (ndigits) { // more to come, after '.' *c='.'; c++; for (;; c++, cut--) { if (cut<0) { // need new Unit if (up==dn->lsu) break; // out of input digits up--; cut=DECDPUN-1; u=*up; } TODIGIT(u, cut, c, pow); } } else for (; pre>0; pre--, c++) *c='0'; // 0 padding (for engineering) needed } else { // 0.xxx or 0.000xxx form *c='0'; c++; *c='.'; c++; for (; pre<0; pre++, c++) *c='0'; // add any 0's after '.' for (; ; c++, cut--) { if (cut<0) { // need new Unit if (up==dn->lsu) break; // out of input digits up--; cut=DECDPUN-1; u=*up; } TODIGIT(u, cut, c, pow); } } /* Finally add the E-part, if needed. It will never be 0, has a base maximum and minimum of +999999999 through -999999999, but could range down to -1999999998 for anormal numbers */ if (e!=0) { Flag had=0; // 1=had non-zero *c='E'; c++; *c='+'; c++; // assume positive u=e; // .. if (e<0) { *(c-1)='-'; // oops, need - u=-e; // uInt, please } // lay out the exponent [_itoa or equivalent is not ANSI C] for (cut=9; cut>=0; cut--) { TODIGIT(u, cut, c, pow); if (*c=='0' && !had) continue; // skip leading zeros had=1; // had non-0 c++; // step for next } // cut } *c='\0'; // terminate the string (all paths) return; } // decToString /* ------------------------------------------------------------------ */ /* decAddOp -- add/subtract operation */ /* */ /* This computes C = A + B */ /* */ /* res is C, the result. C may be A and/or B (e.g., X=X+X) */ /* lhs is A */ /* rhs is B */ /* set is the context */ /* negate is DECNEG if rhs should be negated, or 0 otherwise */ /* status accumulates status for the caller */ /* */ /* C must have space for set->digits digits. */ /* Inexact in status must be 0 for correct Exact zero sign in result */ /* ------------------------------------------------------------------ */ /* If possible, the coefficient is calculated directly into C. */ /* However, if: */ /* -- a digits+1 calculation is needed because the numbers are */ /* unaligned and span more than set->digits digits */ /* -- a carry to digits+1 digits looks possible */ /* -- C is the same as A or B, and the result would destructively */ /* overlap the A or B coefficient */ /* then the result must be calculated into a temporary buffer. In */ /* this case a local (stack) buffer is used if possible, and only if */ /* too long for that does malloc become the final resort. */ /* */ /* Misalignment is handled as follows: */ /* Apad: (AExp>BExp) Swap operands and proceed as for BExp>AExp. */ /* BPad: Apply the padding by a combination of shifting (whole */ /* units) and multiplication (part units). */ /* */ /* Addition, especially x=x+1, is speed-critical. */ /* The static buffer is larger than might be expected to allow for */ /* calls from higher-level funtions (notable exp). */ /* ------------------------------------------------------------------ */ static decNumber * decAddOp(decNumber *res, const decNumber *lhs, const decNumber *rhs, decContext *set, uByte negate, uInt *status) { #if DECSUBSET decNumber *alloclhs=NULL; // non-NULL if rounded lhs allocated decNumber *allocrhs=NULL; // .., rhs #endif Int rhsshift; // working shift (in Units) Int maxdigits; // longest logical length Int mult; // multiplier Int residue; // rounding accumulator uByte bits; // result bits Flag diffsign; // non-0 if arguments have different sign Unit *acc; // accumulator for result Unit accbuff[SD2U(DECBUFFER*2+20)]; // local buffer [*2+20 reduces many // allocations when called from // other operations, notable exp] Unit *allocacc=NULL; // -> allocated acc buffer, iff allocated Int reqdigits=set->digits; // local copy; requested DIGITS Int padding; // work #if DECCHECK if (decCheckOperands(res, lhs, rhs, set)) return res; #endif do { // protect allocated storage #if DECSUBSET if (!set->extended) { // reduce operands and set lostDigits status, as needed if (lhs->digits>reqdigits) { alloclhs=decRoundOperand(lhs, set, status); if (alloclhs==NULL) break; lhs=alloclhs; } if (rhs->digits>reqdigits) { allocrhs=decRoundOperand(rhs, set, status); if (allocrhs==NULL) break; rhs=allocrhs; } } #endif // [following code does not require input rounding] // note whether signs differ [used all paths] diffsign=(Flag)((lhs->bits^rhs->bits^negate)&DECNEG); // handle infinities and NaNs if (SPECIALARGS) { // a special bit set if (SPECIALARGS & (DECSNAN | DECNAN)) // a NaN decNaNs(res, lhs, rhs, set, status); else { // one or two infinities if (decNumberIsInfinite(lhs)) { // LHS is infinity // two infinities with different signs is invalid if (decNumberIsInfinite(rhs) && diffsign) { *status|=DEC_Invalid_operation; break; } bits=lhs->bits & DECNEG; // get sign from LHS } else bits=(rhs->bits^negate) & DECNEG;// RHS must be Infinity bits|=DECINF; decNumberZero(res); res->bits=bits; // set +/- infinity } // an infinity break; } // Quick exit for add 0s; return the non-0, modified as need be if (ISZERO(lhs)) { Int adjust; // work Int lexp=lhs->exponent; // save in case LHS==RES bits=lhs->bits; // .. residue=0; // clear accumulator decCopyFit(res, rhs, set, &residue, status); // copy (as needed) res->bits^=negate; // flip if rhs was negated #if DECSUBSET if (set->extended) { // exponents on zeros count #endif // exponent will be the lower of the two adjust=lexp-res->exponent; // adjustment needed [if -ve] if (ISZERO(res)) { // both 0: special IEEE 754 rules if (adjust<0) res->exponent=lexp; // set exponent // 0-0 gives +0 unless rounding to -infinity, and -0-0 gives -0 if (diffsign) { if (set->round!=DEC_ROUND_FLOOR) res->bits=0; else res->bits=DECNEG; // preserve 0 sign } } else { // non-0 res if (adjust<0) { // 0-padding needed if ((res->digits-adjust)>set->digits) { adjust=res->digits-set->digits; // to fit exactly *status|=DEC_Rounded; // [but exact] } res->digits=decShiftToMost(res->lsu, res->digits, -adjust); res->exponent+=adjust; // set the exponent. } } // non-0 res #if DECSUBSET } // extended #endif decFinish(res, set, &residue, status); // clean and finalize break;} if (ISZERO(rhs)) { // [lhs is non-zero] Int adjust; // work Int rexp=rhs->exponent; // save in case RHS==RES bits=rhs->bits; // be clean residue=0; // clear accumulator decCopyFit(res, lhs, set, &residue, status); // copy (as needed) #if DECSUBSET if (set->extended) { // exponents on zeros count #endif // exponent will be the lower of the two // [0-0 case handled above] adjust=rexp-res->exponent; // adjustment needed [if -ve] if (adjust<0) { // 0-padding needed if ((res->digits-adjust)>set->digits) { adjust=res->digits-set->digits; // to fit exactly *status|=DEC_Rounded; // [but exact] } res->digits=decShiftToMost(res->lsu, res->digits, -adjust); res->exponent+=adjust; // set the exponent. } #if DECSUBSET } // extended #endif decFinish(res, set, &residue, status); // clean and finalize break;} // [NB: both fastpath and mainpath code below assume these cases // (notably 0-0) have already been handled] // calculate the padding needed to align the operands padding=rhs->exponent-lhs->exponent; // Fastpath cases where the numbers are aligned and normal, the RHS // is all in one unit, no operand rounding is needed, and no carry, // lengthening, or borrow is needed if (padding==0 && rhs->digits<=DECDPUN && rhs->exponent>=set->emin // [some normals drop through] && rhs->exponent<=set->emax-set->digits+1 // [could clamp] && rhs->digits<=reqdigits && lhs->digits<=reqdigits) { Int partial=*lhs->lsu; if (!diffsign) { // adding partial+=*rhs->lsu; if ((partial<=DECDPUNMAX) // result fits in unit && (lhs->digits>=DECDPUN || // .. and no digits-count change partial<(Int)powers[lhs->digits])) { // .. if (res!=lhs) decNumberCopy(res, lhs); // not in place *res->lsu=(Unit)partial; // [copy could have overwritten RHS] break; } // else drop out for careful add } else { // signs differ partial-=*rhs->lsu; if (partial>0) { // no borrow needed, and non-0 result if (res!=lhs) decNumberCopy(res, lhs); // not in place *res->lsu=(Unit)partial; // this could have reduced digits [but result>0] res->digits=decGetDigits(res->lsu, D2U(res->digits)); break; } // else drop out for careful subtract } } // Now align (pad) the lhs or rhs so they can be added or // subtracted, as necessary. If one number is much larger than // the other (that is, if in plain form there is a least one // digit between the lowest digit of one and the highest of the // other) padding with up to DIGITS-1 trailing zeros may be // needed; then apply rounding (as exotic rounding modes may be // affected by the residue). rhsshift=0; // rhs shift to left (padding) in Units bits=lhs->bits; // assume sign is that of LHS mult=1; // likely multiplier // [if padding==0 the operands are aligned; no padding is needed] if (padding!=0) { // some padding needed; always pad the RHS, as any required // padding can then be effected by a simple combination of // shifts and a multiply Flag swapped=0; if (padding<0) { // LHS needs the padding const decNumber *t; padding=-padding; // will be +ve bits=(uByte)(rhs->bits^negate); // assumed sign is now that of RHS t=lhs; lhs=rhs; rhs=t; swapped=1; } // If, after pad, rhs would be longer than lhs by digits+1 or // more then lhs cannot affect the answer, except as a residue, // so only need to pad up to a length of DIGITS+1. if (rhs->digits+padding > lhs->digits+reqdigits+1) { // The RHS is sufficient // for residue use the relative sign indication... Int shift=reqdigits-rhs->digits; // left shift needed residue=1; // residue for rounding if (diffsign) residue=-residue; // signs differ // copy, shortening if necessary decCopyFit(res, rhs, set, &residue, status); // if it was already shorter, then need to pad with zeros if (shift>0) { res->digits=decShiftToMost(res->lsu, res->digits, shift); res->exponent-=shift; // adjust the exponent. } // flip the result sign if unswapped and rhs was negated if (!swapped) res->bits^=negate; decFinish(res, set, &residue, status); // done break;} // LHS digits may affect result rhsshift=D2U(padding+1)-1; // this much by Unit shift .. mult=powers[padding-(rhsshift*DECDPUN)]; // .. this by multiplication } // padding needed if (diffsign) mult=-mult; // signs differ // determine the longer operand maxdigits=rhs->digits+padding; // virtual length of RHS if (lhs->digits>maxdigits) maxdigits=lhs->digits; // Decide on the result buffer to use; if possible place directly // into result. acc=res->lsu; // assume add direct to result // If destructive overlap, or the number is too long, or a carry or // borrow to DIGITS+1 might be possible, a buffer must be used. // [Might be worth more sophisticated tests when maxdigits==reqdigits] if ((maxdigits>=reqdigits) // is, or could be, too large || (res==rhs && rhsshift>0)) { // destructive overlap // buffer needed, choose it; units for maxdigits digits will be // needed, +1 Unit for carry or borrow Int need=D2U(maxdigits)+1; acc=accbuff; // assume use local buffer if (need*sizeof(Unit)>sizeof(accbuff)) { // printf("malloc add %ld %ld\n", need, sizeof(accbuff)); allocacc=(Unit *)malloc(need*sizeof(Unit)); if (allocacc==NULL) { // hopeless -- abandon *status|=DEC_Insufficient_storage; break;} acc=allocacc; } } res->bits=(uByte)(bits&DECNEG); // it's now safe to overwrite.. res->exponent=lhs->exponent; // .. operands (even if aliased) #if DECTRACE decDumpAr('A', lhs->lsu, D2U(lhs->digits)); decDumpAr('B', rhs->lsu, D2U(rhs->digits)); printf(" :h: %ld %ld\n", rhsshift, mult); #endif // add [A+B*m] or subtract [A+B*(-m)] res->digits=decUnitAddSub(lhs->lsu, D2U(lhs->digits), rhs->lsu, D2U(rhs->digits), rhsshift, acc, mult) *DECDPUN; // [units -> digits] if (res->digits<0) { // borrowed... res->digits=-res->digits; res->bits^=DECNEG; // flip the sign } #if DECTRACE decDumpAr('+', acc, D2U(res->digits)); #endif // If a buffer was used the result must be copied back, possibly // shortening. (If no buffer was used then the result must have // fit, so can't need rounding and residue must be 0.) residue=0; // clear accumulator if (acc!=res->lsu) { #if DECSUBSET if (set->extended) { // round from first significant digit #endif // remove leading zeros that were added due to rounding up to // integral Units -- before the test for rounding. if (res->digits>reqdigits) res->digits=decGetDigits(acc, D2U(res->digits)); decSetCoeff(res, set, acc, res->digits, &residue, status); #if DECSUBSET } else { // subset arithmetic rounds from original significant digit // May have an underestimate. This only occurs when both // numbers fit in DECDPUN digits and are padding with a // negative multiple (-10, -100...) and the top digit(s) become // 0. (This only matters when using X3.274 rules where the // leading zero could be included in the rounding.) if (res->digitsdigits))=0; // ensure leading 0 is there res->digits=maxdigits; } else { // remove leading zeros that added due to rounding up to // integral Units (but only those in excess of the original // maxdigits length, unless extended) before test for rounding. if (res->digits>reqdigits) { res->digits=decGetDigits(acc, D2U(res->digits)); if (res->digitsdigits=maxdigits; } } decSetCoeff(res, set, acc, res->digits, &residue, status); // Now apply rounding if needed before removing leading zeros. // This is safe because subnormals are not a possibility if (residue!=0) { decApplyRound(res, set, residue, status); residue=0; // did what needed to be done } } // subset #endif } // used buffer // strip leading zeros [these were left on in case of subset subtract] res->digits=decGetDigits(res->lsu, D2U(res->digits)); // apply checks and rounding decFinish(res, set, &residue, status); // "When the sum of two operands with opposite signs is exactly // zero, the sign of that sum shall be '+' in all rounding modes // except round toward -Infinity, in which mode that sign shall be // '-'." [Subset zeros also never have '-', set by decFinish.] if (ISZERO(res) && diffsign #if DECSUBSET && set->extended #endif && (*status&DEC_Inexact)==0) { if (set->round==DEC_ROUND_FLOOR) res->bits|=DECNEG; // sign - else res->bits&=~DECNEG; // sign + } } while(0); // end protected if (allocacc!=NULL) free(allocacc); // drop any storage used #if DECSUBSET if (allocrhs!=NULL) free(allocrhs); // .. if (alloclhs!=NULL) free(alloclhs); // .. #endif return res; } // decAddOp /* ------------------------------------------------------------------ */ /* decDivideOp -- division operation */ /* */ /* This routine performs the calculations for all four division */ /* operators (divide, divideInteger, remainder, remainderNear). */ /* */ /* C=A op B */ /* */ /* res is C, the result. C may be A and/or B (e.g., X=X/X) */ /* lhs is A */ /* rhs is B */ /* set is the context */ /* op is DIVIDE, DIVIDEINT, REMAINDER, or REMNEAR respectively. */ /* status is the usual accumulator */ /* */ /* C must have space for set->digits digits. */ /* */ /* ------------------------------------------------------------------ */ /* The underlying algorithm of this routine is the same as in the */ /* 1981 S/370 implementation, that is, non-restoring long division */ /* with bi-unit (rather than bi-digit) estimation for each unit */ /* multiplier. In this pseudocode overview, complications for the */ /* Remainder operators and division residues for exact rounding are */ /* omitted for clarity. */ /* */ /* Prepare operands and handle special values */ /* Test for x/0 and then 0/x */ /* Exp =Exp1 - Exp2 */ /* Exp =Exp +len(var1) -len(var2) */ /* Sign=Sign1 * Sign2 */ /* Pad accumulator (Var1) to double-length with 0's (pad1) */ /* Pad Var2 to same length as Var1 */ /* msu2pair/plus=1st 2 or 1 units of var2, +1 to allow for round */ /* have=0 */ /* Do until (have=digits+1 OR residue=0) */ /* if exp<0 then if integer divide/residue then leave */ /* this_unit=0 */ /* Do forever */ /* compare numbers */ /* if <0 then leave inner_loop */ /* if =0 then (* quick exit without subtract *) do */ /* this_unit=this_unit+1; output this_unit */ /* leave outer_loop; end */ /* Compare lengths of numbers (mantissae): */ /* If same then tops2=msu2pair -- {units 1&2 of var2} */ /* else tops2=msu2plus -- {0, unit 1 of var2} */ /* tops1=first_unit_of_Var1*10**DECDPUN +second_unit_of_var1 */ /* mult=tops1/tops2 -- Good and safe guess at divisor */ /* if mult=0 then mult=1 */ /* this_unit=this_unit+mult */ /* subtract */ /* end inner_loop */ /* if have\=0 | this_unit\=0 then do */ /* output this_unit */ /* have=have+1; end */ /* var2=var2/10 */ /* exp=exp-1 */ /* end outer_loop */ /* exp=exp+1 -- set the proper exponent */ /* if have=0 then generate answer=0 */ /* Return (Result is defined by Var1) */ /* */ /* ------------------------------------------------------------------ */ /* Two working buffers are needed during the division; one (digits+ */ /* 1) to accumulate the result, and the other (up to 2*digits+1) for */ /* long subtractions. These are acc and var1 respectively. */ /* var1 is a copy of the lhs coefficient, var2 is the rhs coefficient.*/ /* The static buffers may be larger than might be expected to allow */ /* for calls from higher-level funtions (notable exp). */ /* ------------------------------------------------------------------ */ static decNumber * decDivideOp(decNumber *res, const decNumber *lhs, const decNumber *rhs, decContext *set, Flag op, uInt *status) { #if DECSUBSET decNumber *alloclhs=NULL; // non-NULL if rounded lhs allocated decNumber *allocrhs=NULL; // .., rhs #endif Unit accbuff[SD2U(DECBUFFER+DECDPUN+10)]; // local buffer Unit *acc=accbuff; // -> accumulator array for result Unit *allocacc=NULL; // -> allocated buffer, iff allocated Unit *accnext; // -> where next digit will go Int acclength; // length of acc needed [Units] Int accunits; // count of units accumulated Int accdigits; // count of digits accumulated Unit varbuff[SD2U(DECBUFFER*2+DECDPUN)]; // buffer for var1 Unit *var1=varbuff; // -> var1 array for long subtraction Unit *varalloc=NULL; // -> allocated buffer, iff used Unit *msu1; // -> msu of var1 const Unit *var2; // -> var2 array const Unit *msu2; // -> msu of var2 Int msu2plus; // msu2 plus one [does not vary] eInt msu2pair; // msu2 pair plus one [does not vary] Int var1units, var2units; // actual lengths Int var2ulen; // logical length (units) Int var1initpad=0; // var1 initial padding (digits) Int maxdigits; // longest LHS or required acc length Int mult; // multiplier for subtraction Unit thisunit; // current unit being accumulated Int residue; // for rounding Int reqdigits=set->digits; // requested DIGITS Int exponent; // working exponent Int maxexponent=0; // DIVIDE maximum exponent if unrounded uByte bits; // working sign Unit *target; // work const Unit *source; // .. uInt const *pow; // .. Int shift, cut; // .. #if DECSUBSET Int dropped; // work #endif #if DECCHECK if (decCheckOperands(res, lhs, rhs, set)) return res; #endif do { // protect allocated storage #if DECSUBSET if (!set->extended) { // reduce operands and set lostDigits status, as needed if (lhs->digits>reqdigits) { alloclhs=decRoundOperand(lhs, set, status); if (alloclhs==NULL) break; lhs=alloclhs; } if (rhs->digits>reqdigits) { allocrhs=decRoundOperand(rhs, set, status); if (allocrhs==NULL) break; rhs=allocrhs; } } #endif // [following code does not require input rounding] bits=(lhs->bits^rhs->bits)&DECNEG; // assumed sign for divisions // handle infinities and NaNs if (SPECIALARGS) { // a special bit set if (SPECIALARGS & (DECSNAN | DECNAN)) { // one or two NaNs decNaNs(res, lhs, rhs, set, status); break; } // one or two infinities if (decNumberIsInfinite(lhs)) { // LHS (dividend) is infinite if (decNumberIsInfinite(rhs) || // two infinities are invalid .. op & (REMAINDER | REMNEAR)) { // as is remainder of infinity *status|=DEC_Invalid_operation; break; } // [Note that infinity/0 raises no exceptions] decNumberZero(res); res->bits=bits|DECINF; // set +/- infinity break; } else { // RHS (divisor) is infinite residue=0; if (op&(REMAINDER|REMNEAR)) { // result is [finished clone of] lhs decCopyFit(res, lhs, set, &residue, status); } else { // a division decNumberZero(res); res->bits=bits; // set +/- zero // for DIVIDEINT the exponent is always 0. For DIVIDE, result // is a 0 with infinitely negative exponent, clamped to minimum if (op&DIVIDE) { res->exponent=set->emin-set->digits+1; *status|=DEC_Clamped; } } decFinish(res, set, &residue, status); break; } } // handle 0 rhs (x/0) if (ISZERO(rhs)) { // x/0 is always exceptional if (ISZERO(lhs)) { decNumberZero(res); // [after lhs test] *status|=DEC_Division_undefined;// 0/0 will become NaN } else { decNumberZero(res); if (op&(REMAINDER|REMNEAR)) *status|=DEC_Invalid_operation; else { *status|=DEC_Division_by_zero; // x/0 res->bits=bits|DECINF; // .. is +/- Infinity } } break;} // handle 0 lhs (0/x) if (ISZERO(lhs)) { // 0/x [x!=0] #if DECSUBSET if (!set->extended) decNumberZero(res); else { #endif if (op&DIVIDE) { residue=0; exponent=lhs->exponent-rhs->exponent; // ideal exponent decNumberCopy(res, lhs); // [zeros always fit] res->bits=bits; // sign as computed res->exponent=exponent; // exponent, too decFinalize(res, set, &residue, status); // check exponent } else if (op&DIVIDEINT) { decNumberZero(res); // integer 0 res->bits=bits; // sign as computed } else { // a remainder exponent=rhs->exponent; // [save in case overwrite] decNumberCopy(res, lhs); // [zeros always fit] if (exponentexponent) res->exponent=exponent; // use lower } #if DECSUBSET } #endif break;} // Precalculate exponent. This starts off adjusted (and hence fits // in 31 bits) and becomes the usual unadjusted exponent as the // division proceeds. The order of evaluation is important, here, // to avoid wrap. exponent=(lhs->exponent+lhs->digits)-(rhs->exponent+rhs->digits); // If the working exponent is -ve, then some quick exits are // possible because the quotient is known to be <1 // [for REMNEAR, it needs to be < -1, as -0.5 could need work] if (exponent<0 && !(op==DIVIDE)) { if (op&DIVIDEINT) { decNumberZero(res); // integer part is 0 #if DECSUBSET if (set->extended) #endif res->bits=bits; // set +/- zero break;} // fastpath remainders so long as the lhs has the smaller // (or equal) exponent if (lhs->exponent<=rhs->exponent) { if (op&REMAINDER || exponent<-1) { // It is REMAINDER or safe REMNEAR; result is [finished // clone of] lhs (r = x - 0*y) residue=0; decCopyFit(res, lhs, set, &residue, status); decFinish(res, set, &residue, status); break; } // [unsafe REMNEAR drops through] } } // fastpaths /* Long (slow) division is needed; roll up the sleeves... */ // The accumulator will hold the quotient of the division. // If it needs to be too long for stack storage, then allocate. acclength=D2U(reqdigits+DECDPUN); // in Units if (acclength*sizeof(Unit)>sizeof(accbuff)) { // printf("malloc dvacc %ld units\n", acclength); allocacc=(Unit *)malloc(acclength*sizeof(Unit)); if (allocacc==NULL) { // hopeless -- abandon *status|=DEC_Insufficient_storage; break;} acc=allocacc; // use the allocated space } // var1 is the padded LHS ready for subtractions. // If it needs to be too long for stack storage, then allocate. // The maximum units needed for var1 (long subtraction) is: // Enough for // (rhs->digits+reqdigits-1) -- to allow full slide to right // or (lhs->digits) -- to allow for long lhs // whichever is larger // +1 -- for rounding of slide to right // +1 -- for leading 0s // +1 -- for pre-adjust if a remainder or DIVIDEINT // [Note: unused units do not participate in decUnitAddSub data] maxdigits=rhs->digits+reqdigits-1; if (lhs->digits>maxdigits) maxdigits=lhs->digits; var1units=D2U(maxdigits)+2; // allocate a guard unit above msu1 for REMAINDERNEAR if (!(op&DIVIDE)) var1units++; if ((var1units+1)*sizeof(Unit)>sizeof(varbuff)) { // printf("malloc dvvar %ld units\n", var1units+1); varalloc=(Unit *)malloc((var1units+1)*sizeof(Unit)); if (varalloc==NULL) { // hopeless -- abandon *status|=DEC_Insufficient_storage; break;} var1=varalloc; // use the allocated space } // Extend the lhs and rhs to full long subtraction length. The lhs // is truly extended into the var1 buffer, with 0 padding, so a // subtract in place is always possible. The rhs (var2) has // virtual padding (implemented by decUnitAddSub). // One guard unit was allocated above msu1 for rem=rem+rem in // REMAINDERNEAR. msu1=var1+var1units-1; // msu of var1 source=lhs->lsu+D2U(lhs->digits)-1; // msu of input array for (target=msu1; source>=lhs->lsu; source--, target--) *target=*source; for (; target>=var1; target--) *target=0; // rhs (var2) is left-aligned with var1 at the start var2ulen=var1units; // rhs logical length (units) var2units=D2U(rhs->digits); // rhs actual length (units) var2=rhs->lsu; // -> rhs array msu2=var2+var2units-1; // -> msu of var2 [never changes] // now set up the variables which will be used for estimating the // multiplication factor. If these variables are not exact, add // 1 to make sure that the multiplier is never overestimated. msu2plus=*msu2; // it's value .. if (var2units>1) msu2plus++; // .. +1 if any more msu2pair=(eInt)*msu2*(DECDPUNMAX+1);// top two pair .. if (var2units>1) { // .. [else treat 2nd as 0] msu2pair+=*(msu2-1); // .. if (var2units>2) msu2pair++; // .. +1 if any more } // The calculation is working in units, which may have leading zeros, // but the exponent was calculated on the assumption that they are // both left-aligned. Adjust the exponent to compensate: add the // number of leading zeros in var1 msu and subtract those in var2 msu. // [This is actually done by counting the digits and negating, as // lead1=DECDPUN-digits1, and similarly for lead2.] for (pow=&powers[1]; *msu1>=*pow; pow++) exponent--; for (pow=&powers[1]; *msu2>=*pow; pow++) exponent++; // Now, if doing an integer divide or remainder, ensure that // the result will be Unit-aligned. To do this, shift the var1 // accumulator towards least if need be. (It's much easier to // do this now than to reassemble the residue afterwards, if // doing a remainder.) Also ensure the exponent is not negative. if (!(op&DIVIDE)) { Unit *u; // work // save the initial 'false' padding of var1, in digits var1initpad=(var1units-D2U(lhs->digits))*DECDPUN; // Determine the shift to do. if (exponent<0) cut=-exponent; else cut=DECDPUN-exponent%DECDPUN; decShiftToLeast(var1, var1units, cut); exponent+=cut; // maintain numerical value var1initpad-=cut; // .. and reduce padding // clean any most-significant units which were just emptied for (u=msu1; cut>=DECDPUN; cut-=DECDPUN, u--) *u=0; } // align else { // is DIVIDE maxexponent=lhs->exponent-rhs->exponent; // save // optimization: if the first iteration will just produce 0, // preadjust to skip it [valid for DIVIDE only] if (*msu1<*msu2) { var2ulen--; // shift down exponent-=DECDPUN; // update the exponent } } // ---- start the long-division loops ------------------------------ accunits=0; // no units accumulated yet accdigits=0; // .. or digits accnext=acc+acclength-1; // -> msu of acc [NB: allows digits+1] for (;;) { // outer forever loop thisunit=0; // current unit assumed 0 // find the next unit for (;;) { // inner forever loop // strip leading zero units [from either pre-adjust or from // subtract last time around]. Leave at least one unit. for (; *msu1==0 && msu1>var1; msu1--) var1units--; if (var1units msu for (pv1=msu1; ; pv1--, pv2--) { // v1=*pv1 -- always OK v2=0; // assume in padding if (pv2>=var2) v2=*pv2; // in range if (*pv1!=v2) break; // no longer the same if (pv1==var1) break; // done; leave pv1 as is } // here when all inspected or a difference seen if (*pv1v2. Prepare for real subtraction; the lengths are equal // Estimate the multiplier (there's always a msu1-1)... // Bring in two units of var2 to provide a good estimate. mult=(Int)(((eInt)*msu1*(DECDPUNMAX+1)+*(msu1-1))/msu2pair); } // lengths the same else { // var1units > var2ulen, so subtraction is safe // The var2 msu is one unit towards the lsu of the var1 msu, // so only one unit for var2 can be used. mult=(Int)(((eInt)*msu1*(DECDPUNMAX+1)+*(msu1-1))/msu2plus); } if (mult==0) mult=1; // must always be at least 1 // subtraction needed; var1 is > var2 thisunit=(Unit)(thisunit+mult); // accumulate // subtract var1-var2, into var1; only the overlap needs // processing, as this is an in-place calculation shift=var2ulen-var2units; #if DECTRACE decDumpAr('1', &var1[shift], var1units-shift); decDumpAr('2', var2, var2units); printf("m=%ld\n", -mult); #endif decUnitAddSub(&var1[shift], var1units-shift, var2, var2units, 0, &var1[shift], -mult); #if DECTRACE decDumpAr('#', &var1[shift], var1units-shift); #endif // var1 now probably has leading zeros; these are removed at the // top of the inner loop. } // inner loop // The next unit has been calculated in full; unless it's a // leading zero, add to acc if (accunits!=0 || thisunit!=0) { // is first or non-zero *accnext=thisunit; // store in accumulator // account exactly for the new digits if (accunits==0) { accdigits++; // at least one for (pow=&powers[1]; thisunit>=*pow; pow++) accdigits++; } else accdigits+=DECDPUN; accunits++; // update count accnext--; // ready for next if (accdigits>reqdigits) break; // have enough digits } // if the residue is zero, the operation is done (unless divide // or divideInteger and still not enough digits yet) if (*var1==0 && var1units==1) { // residue is 0 if (op&(REMAINDER|REMNEAR)) break; if ((op&DIVIDE) && (exponent<=maxexponent)) break; // [drop through if divideInteger] } // also done enough if calculating remainder or integer // divide and just did the last ('units') unit if (exponent==0 && !(op&DIVIDE)) break; // to get here, var1 is less than var2, so divide var2 by the per- // Unit power of ten and go for the next digit var2ulen--; // shift down exponent-=DECDPUN; // update the exponent } // outer loop // ---- division is complete --------------------------------------- // here: acc has at least reqdigits+1 of good results (or fewer // if early stop), starting at accnext+1 (its lsu) // var1 has any residue at the stopping point // accunits is the number of digits collected in acc if (accunits==0) { // acc is 0 accunits=1; // show have a unit .. accdigits=1; // .. *accnext=0; // .. whose value is 0 } else accnext++; // back to last placed // accnext now -> lowest unit of result residue=0; // assume no residue if (op&DIVIDE) { // record the presence of any residue, for rounding if (*var1!=0 || var1units>1) residue=1; else { // no residue // Had an exact division; clean up spurious trailing 0s. // There will be at most DECDPUN-1, from the final multiply, // and then only if the result is non-0 (and even) and the // exponent is 'loose'. #if DECDPUN>1 Unit lsu=*accnext; if (!(lsu&0x01) && (lsu!=0)) { // count the trailing zeros Int drop=0; for (;; drop++) { // [will terminate because lsu!=0] if (exponent>=maxexponent) break; // don't chop real 0s #if DECDPUN<=4 if ((lsu-QUOT10(lsu, drop+1) *powers[drop+1])!=0) break; // found non-0 digit #else if (lsu%powers[drop+1]!=0) break; // found non-0 digit #endif exponent++; } if (drop>0) { accunits=decShiftToLeast(accnext, accunits, drop); accdigits=decGetDigits(accnext, accunits); accunits=D2U(accdigits); // [exponent was adjusted in the loop] } } // neither odd nor 0 #endif } // exact divide } // divide else /* op!=DIVIDE */ { // check for coefficient overflow if (accdigits+exponent>reqdigits) { *status|=DEC_Division_impossible; break; } if (op & (REMAINDER|REMNEAR)) { // [Here, the exponent will be 0, because var1 was adjusted // appropriately.] Int postshift; // work Flag wasodd=0; // integer was odd Unit *quotlsu; // for save Int quotdigits; // .. bits=lhs->bits; // remainder sign is always as lhs // Fastpath when residue is truly 0 is worthwhile [and // simplifies the code below] if (*var1==0 && var1units==1) { // residue is 0 Int exp=lhs->exponent; // save min(exponents) if (rhs->exponentexponent; decNumberZero(res); // 0 coefficient #if DECSUBSET if (set->extended) #endif res->exponent=exp; // .. with proper exponent res->bits=(uByte)(bits&DECNEG); // [cleaned] decFinish(res, set, &residue, status); // might clamp break; } // note if the quotient was odd if (*accnext & 0x01) wasodd=1; // acc is odd quotlsu=accnext; // save in case need to reinspect quotdigits=accdigits; // .. // treat the residue, in var1, as the value to return, via acc // calculate the unused zero digits. This is the smaller of: // var1 initial padding (saved above) // var2 residual padding, which happens to be given by: postshift=var1initpad+exponent-lhs->exponent+rhs->exponent; // [the 'exponent' term accounts for the shifts during divide] if (var1initpadexponent; // exponent is smaller of lhs & rhs if (rhs->exponentexponent; // Now correct the result if doing remainderNear; if it // (looking just at coefficients) is > rhs/2, or == rhs/2 and // the integer was odd then the result should be rem-rhs. if (op&REMNEAR) { Int compare, tarunits; // work Unit *up; // .. // calculate remainder*2 into the var1 buffer (which has // 'headroom' of an extra unit and hence enough space) // [a dedicated 'double' loop would be faster, here] tarunits=decUnitAddSub(accnext, accunits, accnext, accunits, 0, accnext, 1); // decDumpAr('r', accnext, tarunits); // Here, accnext (var1) holds tarunits Units with twice the // remainder's coefficient, which must now be compared to the // RHS. The remainder's exponent may be smaller than the RHS's. compare=decUnitCompare(accnext, tarunits, rhs->lsu, D2U(rhs->digits), rhs->exponent-exponent); if (compare==BADINT) { // deep trouble *status|=DEC_Insufficient_storage; break;} // now restore the remainder by dividing by two; the lsu // is known to be even. for (up=accnext; up0 || (compare==0 && wasodd)) { // adjustment needed Int exp, expunits, exprem; // work // This is effectively causing round-up of the quotient, // so if it was the rare case where it was full and all // nines, it would overflow and hence division-impossible // should be raised Flag allnines=0; // 1 if quotient all nines if (quotdigits==reqdigits) { // could be borderline for (up=quotlsu; ; up++) { if (quotdigits>DECDPUN) { if (*up!=DECDPUNMAX) break;// non-nines } else { // this is the last Unit if (*up==powers[quotdigits]-1) allnines=1; break; } quotdigits-=DECDPUN; // checked those digits } // up } // borderline check if (allnines) { *status|=DEC_Division_impossible; break;} // rem-rhs is needed; the sign will invert. Again, var1 // can safely be used for the working Units array. exp=rhs->exponent-exponent; // RHS padding needed // Calculate units and remainder from exponent. expunits=exp/DECDPUN; exprem=exp%DECDPUN; // subtract [A+B*(-m)]; the result will always be negative accunits=-decUnitAddSub(accnext, accunits, rhs->lsu, D2U(rhs->digits), expunits, accnext, -(Int)powers[exprem]); accdigits=decGetDigits(accnext, accunits); // count digits exactly accunits=D2U(accdigits); // and recalculate the units for copy // [exponent is as for original remainder] bits^=DECNEG; // flip the sign } } // REMNEAR } // REMAINDER or REMNEAR } // not DIVIDE // Set exponent and bits res->exponent=exponent; res->bits=(uByte)(bits&DECNEG); // [cleaned] // Now the coefficient. decSetCoeff(res, set, accnext, accdigits, &residue, status); decFinish(res, set, &residue, status); // final cleanup #if DECSUBSET // If a divide then strip trailing zeros if subset [after round] if (!set->extended && (op==DIVIDE)) decTrim(res, set, 0, 1, &dropped); #endif } while(0); // end protected if (varalloc!=NULL) free(varalloc); // drop any storage used if (allocacc!=NULL) free(allocacc); // .. #if DECSUBSET if (allocrhs!=NULL) free(allocrhs); // .. if (alloclhs!=NULL) free(alloclhs); // .. #endif return res; } // decDivideOp /* ------------------------------------------------------------------ */ /* decMultiplyOp -- multiplication operation */ /* */ /* This routine performs the multiplication C=A x B. */ /* */ /* res is C, the result. C may be A and/or B (e.g., X=X*X) */ /* lhs is A */ /* rhs is B */ /* set is the context */ /* status is the usual accumulator */ /* */ /* C must have space for set->digits digits. */ /* */ /* ------------------------------------------------------------------ */ /* 'Classic' multiplication is used rather than Karatsuba, as the */ /* latter would give only a minor improvement for the short numbers */ /* expected to be handled most (and uses much more memory). */ /* */ /* There are two major paths here: the general-purpose ('old code') */ /* path which handles all DECDPUN values, and a fastpath version */ /* which is used if 64-bit ints are available, DECDPUN<=4, and more */ /* than two calls to decUnitAddSub would be made. */ /* */ /* The fastpath version lumps units together into 8-digit or 9-digit */ /* chunks, and also uses a lazy carry strategy to minimise expensive */ /* 64-bit divisions. The chunks are then broken apart again into */ /* units for continuing processing. Despite this overhead, the */ /* fastpath can speed up some 16-digit operations by 10x (and much */ /* more for higher-precision calculations). */ /* */ /* A buffer always has to be used for the accumulator; in the */ /* fastpath, buffers are also always needed for the chunked copies of */ /* of the operand coefficients. */ /* Static buffers are larger than needed just for multiply, to allow */ /* for calls from other operations (notably exp). */ /* ------------------------------------------------------------------ */ #define FASTMUL (DECUSE64 && DECDPUN<5) static decNumber * decMultiplyOp(decNumber *res, const decNumber *lhs, const decNumber *rhs, decContext *set, uInt *status) { Int accunits; // Units of accumulator in use Int exponent; // work Int residue=0; // rounding residue uByte bits; // result sign Unit *acc; // -> accumulator Unit array Int needbytes; // size calculator void *allocacc=NULL; // -> allocated accumulator, iff allocated Unit accbuff[SD2U(DECBUFFER*4+1)]; // buffer (+1 for DECBUFFER==0, // *4 for calls from other operations) const Unit *mer, *mermsup; // work Int madlength; // Units in multiplicand Int shift; // Units to shift multiplicand by #if FASTMUL // if DECDPUN is 1 or 3 work in base 10**9, otherwise // (DECDPUN is 2 or 4) then work in base 10**8 #if DECDPUN & 1 // odd #define FASTBASE 1000000000 // base #define FASTDIGS 9 // digits in base #define FASTLAZY 18 // carry resolution point [1->18] #else #define FASTBASE 100000000 #define FASTDIGS 8 #define FASTLAZY 1844 // carry resolution point [1->1844] #endif // three buffers are used, two for chunked copies of the operands // (base 10**8 or base 10**9) and one base 2**64 accumulator with // lazy carry evaluation uInt zlhibuff[(DECBUFFER*2+1)/8+1]; // buffer (+1 for DECBUFFER==0) uInt *zlhi=zlhibuff; // -> lhs array uInt *alloclhi=NULL; // -> allocated buffer, iff allocated uInt zrhibuff[(DECBUFFER*2+1)/8+1]; // buffer (+1 for DECBUFFER==0) uInt *zrhi=zrhibuff; // -> rhs array uInt *allocrhi=NULL; // -> allocated buffer, iff allocated uLong zaccbuff[(DECBUFFER*2+1)/4+2]; // buffer (+1 for DECBUFFER==0) // [allocacc is shared for both paths, as only one will run] uLong *zacc=zaccbuff; // -> accumulator array for exact result #if DECDPUN==1 Int zoff; // accumulator offset #endif uInt *lip, *rip; // item pointers uInt *lmsi, *rmsi; // most significant items Int ilhs, irhs, iacc; // item counts in the arrays Int lazy; // lazy carry counter uLong lcarry; // uLong carry uInt carry; // carry (NB not uLong) Int count; // work const Unit *cup; // .. Unit *up; // .. uLong *lp; // .. Int p; // .. #endif #if DECSUBSET decNumber *alloclhs=NULL; // -> allocated buffer, iff allocated decNumber *allocrhs=NULL; // -> allocated buffer, iff allocated #endif #if DECCHECK if (decCheckOperands(res, lhs, rhs, set)) return res; #endif // precalculate result sign bits=(uByte)((lhs->bits^rhs->bits)&DECNEG); // handle infinities and NaNs if (SPECIALARGS) { // a special bit set if (SPECIALARGS & (DECSNAN | DECNAN)) { // one or two NaNs decNaNs(res, lhs, rhs, set, status); return res;} // one or two infinities; Infinity * 0 is invalid if (((lhs->bits & DECINF)==0 && ISZERO(lhs)) ||((rhs->bits & DECINF)==0 && ISZERO(rhs))) { *status|=DEC_Invalid_operation; return res;} decNumberZero(res); res->bits=bits|DECINF; // infinity return res;} // For best speed, as in DMSRCN [the original Rexx numerics // module], use the shorter number as the multiplier (rhs) and // the longer as the multiplicand (lhs) to minimise the number of // adds (partial products) if (lhs->digitsdigits) { // swap... const decNumber *hold=lhs; lhs=rhs; rhs=hold; } do { // protect allocated storage #if DECSUBSET if (!set->extended) { // reduce operands and set lostDigits status, as needed if (lhs->digits>set->digits) { alloclhs=decRoundOperand(lhs, set, status); if (alloclhs==NULL) break; lhs=alloclhs; } if (rhs->digits>set->digits) { allocrhs=decRoundOperand(rhs, set, status); if (allocrhs==NULL) break; rhs=allocrhs; } } #endif // [following code does not require input rounding] #if FASTMUL // fastpath can be used // use the fast path if there are enough digits in the shorter // operand to make the setup and takedown worthwhile #define NEEDTWO (DECDPUN*2) // within two decUnitAddSub calls if (rhs->digits>NEEDTWO) { // use fastpath... // calculate the number of elements in each array ilhs=(lhs->digits+FASTDIGS-1)/FASTDIGS; // [ceiling] irhs=(rhs->digits+FASTDIGS-1)/FASTDIGS; // .. iacc=ilhs+irhs; // allocate buffers if required, as usual needbytes=ilhs*sizeof(uInt); if (needbytes>(Int)sizeof(zlhibuff)) { alloclhi=(uInt *)malloc(needbytes); zlhi=alloclhi;} needbytes=irhs*sizeof(uInt); if (needbytes>(Int)sizeof(zrhibuff)) { allocrhi=(uInt *)malloc(needbytes); zrhi=allocrhi;} // Allocating the accumulator space needs a special case when // DECDPUN=1 because when converting the accumulator to Units // after the multiplication each 8-byte item becomes 9 1-byte // units. Therefore iacc extra bytes are needed at the front // (rounded up to a multiple of 8 bytes), and the uLong // accumulator starts offset the appropriate number of units // to the right to avoid overwrite during the unchunking. needbytes=iacc*sizeof(uLong); #if DECDPUN==1 zoff=(iacc+7)/8; // items to offset by needbytes+=zoff*8; #endif if (needbytes>(Int)sizeof(zaccbuff)) { allocacc=(uLong *)malloc(needbytes); zacc=(uLong *)allocacc;} if (zlhi==NULL||zrhi==NULL||zacc==NULL) { *status|=DEC_Insufficient_storage; break;} acc=(Unit *)zacc; // -> target Unit array #if DECDPUN==1 zacc+=zoff; // start uLong accumulator to right #endif // assemble the chunked copies of the left and right sides for (count=lhs->digits, cup=lhs->lsu, lip=zlhi; count>0; lip++) for (p=0, *lip=0; p0; p+=DECDPUN, cup++, count-=DECDPUN) *lip+=*cup*powers[p]; lmsi=lip-1; // save -> msi for (count=rhs->digits, cup=rhs->lsu, rip=zrhi; count>0; rip++) for (p=0, *rip=0; p0; p+=DECDPUN, cup++, count-=DECDPUN) *rip+=*cup*powers[p]; rmsi=rip-1; // save -> msi // zero the accumulator for (lp=zacc; lp0 && rip!=rmsi) continue; lazy=FASTLAZY; // reset delay count // spin up the accumulator resolving overflows for (lp=zacc; lp assume buffer for accumulator needbytes=(D2U(lhs->digits)+D2U(rhs->digits))*sizeof(Unit); if (needbytes>(Int)sizeof(accbuff)) { allocacc=(Unit *)malloc(needbytes); if (allocacc==NULL) {*status|=DEC_Insufficient_storage; break;} acc=(Unit *)allocacc; // use the allocated space } /* Now the main long multiplication loop */ // Unlike the equivalent in the IBM Java implementation, there // is no advantage in calculating from msu to lsu. So, do it // by the book, as it were. // Each iteration calculates ACC=ACC+MULTAND*MULT accunits=1; // accumulator starts at '0' *acc=0; // .. (lsu=0) shift=0; // no multiplicand shift at first madlength=D2U(lhs->digits); // this won't change mermsup=rhs->lsu+D2U(rhs->digits); // -> msu+1 of multiplier for (mer=rhs->lsu; merlsu, madlength, 0, &acc[shift], *mer) + shift; else { // extend acc with a 0; it will be used shortly *(acc+accunits)=0; // [this avoids length of <=0 later] accunits++; } // multiply multiplicand by 10**DECDPUN for next Unit to left shift++; // add this for 'logical length' } // n #if FASTMUL } // unchunked units #endif // common end-path #if DECTRACE decDumpAr('*', acc, accunits); // Show exact result #endif // acc now contains the exact result of the multiplication, // possibly with a leading zero unit; build the decNumber from // it, noting if any residue res->bits=bits; // set sign res->digits=decGetDigits(acc, accunits); // count digits exactly // There can be a 31-bit wrap in calculating the exponent. // This can only happen if both input exponents are negative and // both their magnitudes are large. If there was a wrap, set a // safe very negative exponent, from which decFinalize() will // raise a hard underflow shortly. exponent=lhs->exponent+rhs->exponent; // calculate exponent if (lhs->exponent<0 && rhs->exponent<0 && exponent>0) exponent=-2*DECNUMMAXE; // force underflow res->exponent=exponent; // OK to overwrite now // Set the coefficient. If any rounding, residue records decSetCoeff(res, set, acc, res->digits, &residue, status); decFinish(res, set, &residue, status); // final cleanup } while(0); // end protected if (allocacc!=NULL) free(allocacc); // drop any storage used #if DECSUBSET if (allocrhs!=NULL) free(allocrhs); // .. if (alloclhs!=NULL) free(alloclhs); // .. #endif #if FASTMUL if (allocrhi!=NULL) free(allocrhi); // .. if (alloclhi!=NULL) free(alloclhi); // .. #endif return res; } // decMultiplyOp /* ------------------------------------------------------------------ */ /* decExpOp -- effect exponentiation */ /* */ /* This computes C = exp(A) */ /* */ /* res is C, the result. C may be A */ /* rhs is A */ /* set is the context; note that rounding mode has no effect */ /* */ /* C must have space for set->digits digits. status is updated but */ /* not set. */ /* */ /* Restrictions: */ /* */ /* digits, emax, and -emin in the context must be less than */ /* 2*DEC_MAX_MATH (1999998), and the rhs must be within these */ /* bounds or a zero. This is an internal routine, so these */ /* restrictions are contractual and not enforced. */ /* */ /* A finite result is rounded using DEC_ROUND_HALF_EVEN; it will */ /* almost always be correctly rounded, but may be up to 1 ulp in */ /* error in rare cases. */ /* */ /* Finite results will always be full precision and Inexact, except */ /* when A is a zero or -Infinity (giving 1 or 0 respectively). */ /* ------------------------------------------------------------------ */ /* This approach used here is similar to the algorithm described in */ /* */ /* Variable Precision Exponential Function, T. E. Hull and */ /* A. Abrham, ACM Transactions on Mathematical Software, Vol 12 #2, */ /* pp79-91, ACM, June 1986. */ /* */ /* with the main difference being that the iterations in the series */ /* evaluation are terminated dynamically (which does not require the */ /* extra variable-precision variables which are expensive in this */ /* context). */ /* */ /* The error analysis in Hull & Abrham's paper applies except for the */ /* round-off error accumulation during the series evaluation. This */ /* code does not precalculate the number of iterations and so cannot */ /* use Horner's scheme. Instead, the accumulation is done at double- */ /* precision, which ensures that the additions of the terms are exact */ /* and do not accumulate round-off (and any round-off errors in the */ /* terms themselves move 'to the right' faster than they can */ /* accumulate). This code also extends the calculation by allowing, */ /* in the spirit of other decNumber operators, the input to be more */ /* precise than the result (the precision used is based on the more */ /* precise of the input or requested result). */ /* */ /* Implementation notes: */ /* */ /* 1. This is separated out as decExpOp so it can be called from */ /* other Mathematical functions (notably Ln) with a wider range */ /* than normal. In particular, it can handle the slightly wider */ /* (double) range needed by Ln (which has to be able to calculate */ /* exp(-x) where x can be the tiniest number (Ntiny). */ /* */ /* 2. Normalizing x to be <=0.1 (instead of <=1) reduces loop */ /* iterations by appoximately a third with additional (although */ /* diminishing) returns as the range is reduced to even smaller */ /* fractions. However, h (the power of 10 used to correct the */ /* result at the end, see below) must be kept <=8 as otherwise */ /* the final result cannot be computed. Hence the leverage is a */ /* sliding value (8-h), where potentially the range is reduced */ /* more for smaller values. */ /* */ /* The leverage that can be applied in this way is severely */ /* limited by the cost of the raise-to-the power at the end, */ /* which dominates when the number of iterations is small (less */ /* than ten) or when rhs is short. As an example, the adjustment */ /* x**10,000,000 needs 31 multiplications, all but one full-width. */ /* */ /* 3. The restrictions (especially precision) could be raised with */ /* care, but the full decNumber range seems very hard within the */ /* 32-bit limits. */ /* */ /* 4. The working precisions for the static buffers are twice the */ /* obvious size to allow for calls from decNumberPower. */ /* ------------------------------------------------------------------ */ decNumber * decExpOp(decNumber *res, const decNumber *rhs, decContext *set, uInt *status) { uInt ignore=0; // working status Int h; // adjusted exponent for 0.xxxx Int p; // working precision Int residue; // rounding residue uInt needbytes; // for space calculations const decNumber *x=rhs; // (may point to safe copy later) decContext aset, tset, dset; // working contexts Int comp; // work // the argument is often copied to normalize it, so (unusually) it // is treated like other buffers, using DECBUFFER, +1 in case // DECBUFFER is 0 decNumber bufr[D2N(DECBUFFER*2+1)]; decNumber *allocrhs=NULL; // non-NULL if rhs buffer allocated // the working precision will be no more than set->digits+8+1 // so for on-stack buffers DECBUFFER+9 is used, +1 in case DECBUFFER // is 0 (and twice that for the accumulator) // buffer for t, term (working precision plus) decNumber buft[D2N(DECBUFFER*2+9+1)]; decNumber *allocbuft=NULL; // -> allocated buft, iff allocated decNumber *t=buft; // term // buffer for a, accumulator (working precision * 2), at least 9 decNumber bufa[D2N(DECBUFFER*4+18+1)]; decNumber *allocbufa=NULL; // -> allocated bufa, iff allocated decNumber *a=bufa; // accumulator // decNumber for the divisor term; this needs at most 9 digits // and so can be fixed size [16 so can use standard context] decNumber bufd[D2N(16)]; decNumber *d=bufd; // divisor decNumber numone; // constant 1 #if DECCHECK Int iterations=0; // for later sanity check if (decCheckOperands(res, DECUNUSED, rhs, set)) return res; #endif do { // protect allocated storage if (SPECIALARG) { // handle infinities and NaNs if (decNumberIsInfinite(rhs)) { // an infinity if (decNumberIsNegative(rhs)) // -Infinity -> +0 decNumberZero(res); else decNumberCopy(res, rhs); // +Infinity -> self } else decNaNs(res, rhs, NULL, set, status); // a NaN break;} if (ISZERO(rhs)) { // zeros -> exact 1 decNumberZero(res); // make clean 1 *res->lsu=1; // .. break;} // [no status to set] // e**x when 0 < x < 0.66 is < 1+3x/2, hence can fast-path // positive and negative tiny cases which will result in inexact // 1. This also allows the later add-accumulate to always be // exact (because its length will never be more than twice the // working precision). // The comparator (tiny) needs just one digit, so use the // decNumber d for it (reused as the divisor, etc., below); its // exponent is such that if x is positive it will have // set->digits-1 zeros between the decimal point and the digit, // which is 4, and if x is negative one more zero there as the // more precise result will be of the form 0.9999999 rather than // 1.0000001. Hence, tiny will be 0.0000004 if digits=7 and x>0 // or 0.00000004 if digits=7 and x<0. If RHS not larger than // this then the result will be 1.000000 decNumberZero(d); // clean *d->lsu=4; // set 4 .. d->exponent=-set->digits; // * 10**(-d) if (decNumberIsNegative(rhs)) d->exponent--; // negative case comp=decCompare(d, rhs, 1); // signless compare if (comp==BADINT) { *status|=DEC_Insufficient_storage; break;} if (comp>=0) { // rhs < d Int shift=set->digits-1; decNumberZero(res); // set 1 *res->lsu=1; // .. res->digits=decShiftToMost(res->lsu, 1, shift); res->exponent=-shift; // make 1.0000... *status|=DEC_Inexact | DEC_Rounded; // .. inexactly break;} // tiny // set up the context to be used for calculating a, as this is // used on both paths below decContextDefault(&aset, DEC_INIT_DECIMAL64); // accumulator bounds are as requested (could underflow) aset.emax=set->emax; // usual bounds aset.emin=set->emin; // .. aset.clamp=0; // and no concrete format // calculate the adjusted (Hull & Abrham) exponent (where the // decimal point is just to the left of the coefficient msd) h=rhs->exponent+rhs->digits; // if h>8 then 10**h cannot be calculated safely; however, when // h=8 then exp(|rhs|) will be at least exp(1E+7) which is at // least 6.59E+4342944, so (due to the restriction on Emax/Emin) // overflow (or underflow to 0) is guaranteed -- so this case can // be handled by simply forcing the appropriate excess if (h>8) { // overflow/underflow // set up here so Power call below will over or underflow to // zero; set accumulator to either 2 or 0.02 // [stack buffer for a is always big enough for this] decNumberZero(a); *a->lsu=2; // not 1 but < exp(1) if (decNumberIsNegative(rhs)) a->exponent=-2; // make 0.02 h=8; // clamp so 10**h computable p=9; // set a working precision } else { // h<=8 Int maxlever=(rhs->digits>8?1:0); // [could/should increase this for precisions >40 or so, too] // if h is 8, cannot normalize to a lower upper limit because // the final result will not be computable (see notes above), // but leverage can be applied whenever h is less than 8. // Apply as much as possible, up to a MAXLEVER digits, which // sets the tradeoff against the cost of the later a**(10**h). // As h is increased, the working precision below also // increases to compensate for the "constant digits at the // front" effect. Int lever=MINI(8-h, maxlever); // leverage attainable Int use=-rhs->digits-lever; // exponent to use for RHS h+=lever; // apply leverage selected if (h<0) { // clamp use+=h; // [may end up subnormal] h=0; } // Take a copy of RHS if it needs normalization (true whenever x>=1) if (rhs->exponent!=use) { decNumber *newrhs=bufr; // assume will fit on stack needbytes=sizeof(decNumber)+(D2U(rhs->digits)-1)*sizeof(Unit); if (needbytes>sizeof(bufr)) { // need malloc space allocrhs=(decNumber *)malloc(needbytes); if (allocrhs==NULL) { // hopeless -- abandon *status|=DEC_Insufficient_storage; break;} newrhs=allocrhs; // use the allocated space } decNumberCopy(newrhs, rhs); // copy to safe space newrhs->exponent=use; // normalize; now <1 x=newrhs; // ready for use // decNumberShow(x); } // Now use the usual power series to evaluate exp(x). The // series starts as 1 + x + x^2/2 ... so prime ready for the // third term by setting the term variable t=x, the accumulator // a=1, and the divisor d=2. // First determine the working precision. From Hull & Abrham // this is set->digits+h+2. However, if x is 'over-precise' we // need to allow for all its digits to potentially participate // (consider an x where all the excess digits are 9s) so in // this case use x->digits+h+2 p=MAXI(x->digits, set->digits)+h+2; // [h<=8] // a and t are variable precision, and depend on p, so space // must be allocated for them if necessary // the accumulator needs to be able to hold 2p digits so that // the additions on the second and subsequent iterations are // sufficiently exact. needbytes=sizeof(decNumber)+(D2U(p*2)-1)*sizeof(Unit); if (needbytes>sizeof(bufa)) { // need malloc space allocbufa=(decNumber *)malloc(needbytes); if (allocbufa==NULL) { // hopeless -- abandon *status|=DEC_Insufficient_storage; break;} a=allocbufa; // use the allocated space } // the term needs to be able to hold p digits (which is // guaranteed to be larger than x->digits, so the initial copy // is safe); it may also be used for the raise-to-power // calculation below, which needs an extra two digits needbytes=sizeof(decNumber)+(D2U(p+2)-1)*sizeof(Unit); if (needbytes>sizeof(buft)) { // need malloc space allocbuft=(decNumber *)malloc(needbytes); if (allocbuft==NULL) { // hopeless -- abandon *status|=DEC_Insufficient_storage; break;} t=allocbuft; // use the allocated space } decNumberCopy(t, x); // term=x decNumberZero(a); *a->lsu=1; // accumulator=1 decNumberZero(d); *d->lsu=2; // divisor=2 decNumberZero(&numone); *numone.lsu=1; // constant 1 for increment // set up the contexts for calculating a, t, and d decContextDefault(&tset, DEC_INIT_DECIMAL64); dset=tset; // accumulator bounds are set above, set precision now aset.digits=p*2; // double // term bounds avoid any underflow or overflow tset.digits=p; tset.emin=DEC_MIN_EMIN; // [emax is plenty] // [dset.digits=16, etc., are sufficient] // finally ready to roll for (;;) { #if DECCHECK iterations++; #endif // only the status from the accumulation is interesting // [but it should remain unchanged after first add] decAddOp(a, a, t, &aset, 0, status); // a=a+t decMultiplyOp(t, t, x, &tset, &ignore); // t=t*x decDivideOp(t, t, d, &tset, DIVIDE, &ignore); // t=t/d // the iteration ends when the term cannot affect the result, // if rounded to p digits, which is when its value is smaller // than the accumulator by p+1 digits. There must also be // full precision in a. if (((a->digits+a->exponent)>=(t->digits+t->exponent+p+1)) && (a->digits>=p)) break; decAddOp(d, d, &numone, &dset, 0, &ignore); // d=d+1 } // iterate #if DECCHECK // just a sanity check; comment out test to show always if (iterations>p+3) printf("Exp iterations=%ld, status=%08lx, p=%ld, d=%ld\n", (LI)iterations, (LI)*status, (LI)p, (LI)x->digits); #endif } // h<=8 // apply postconditioning: a=a**(10**h) -- this is calculated // at a slightly higher precision than Hull & Abrham suggest if (h>0) { Int seenbit=0; // set once a 1-bit is seen Int i; // counter Int n=powers[h]; // always positive aset.digits=p+2; // sufficient precision // avoid the overhead and many extra digits of decNumberPower // as all that is needed is the short 'multipliers' loop; here // accumulate the answer into t decNumberZero(t); *t->lsu=1; // acc=1 for (i=1;;i++){ // for each bit [top bit ignored] // abandon if have had overflow or terminal underflow if (*status & (DEC_Overflow|DEC_Underflow)) { // interesting? if (*status&DEC_Overflow || ISZERO(t)) break;} n=n<<1; // move next bit to testable position if (n<0) { // top bit is set seenbit=1; // OK, have a significant bit decMultiplyOp(t, t, a, &aset, status); // acc=acc*x } if (i==31) break; // that was the last bit if (!seenbit) continue; // no need to square 1 decMultiplyOp(t, t, t, &aset, status); // acc=acc*acc [square] } /*i*/ // 32 bits // decNumberShow(t); a=t; // and carry on using t instead of a } // Copy and round the result to res residue=1; // indicate dirt to right .. if (ISZERO(a)) residue=0; // .. unless underflowed to 0 aset.digits=set->digits; // [use default rounding] decCopyFit(res, a, &aset, &residue, status); // copy & shorten decFinish(res, set, &residue, status); // cleanup/set flags } while(0); // end protected if (allocrhs !=NULL) free(allocrhs); // drop any storage used if (allocbufa!=NULL) free(allocbufa); // .. if (allocbuft!=NULL) free(allocbuft); // .. // [status is handled by caller] return res; } // decExpOp /* ------------------------------------------------------------------ */ /* Initial-estimate natural logarithm table */ /* */ /* LNnn -- 90-entry 16-bit table for values from .10 through .99. */ /* The result is a 4-digit encode of the coefficient (c=the */ /* top 14 bits encoding 0-9999) and a 2-digit encode of the */ /* exponent (e=the bottom 2 bits encoding 0-3) */ /* */ /* The resulting value is given by: */ /* */ /* v = -c * 10**(-e-3) */ /* */ /* where e and c are extracted from entry k = LNnn[x-10] */ /* where x is truncated (NB) into the range 10 through 99, */ /* and then c = k>>2 and e = k&3. */ /* ------------------------------------------------------------------ */ const uShort LNnn[90]={9016, 8652, 8316, 8008, 7724, 7456, 7208, 6972, 6748, 6540, 6340, 6148, 5968, 5792, 5628, 5464, 5312, 5164, 5020, 4884, 4748, 4620, 4496, 4376, 4256, 4144, 4032, 39233, 38181, 37157, 36157, 35181, 34229, 33297, 32389, 31501, 30629, 29777, 28945, 28129, 27329, 26545, 25777, 25021, 24281, 23553, 22837, 22137, 21445, 20769, 20101, 19445, 18801, 18165, 17541, 16925, 16321, 15721, 15133, 14553, 13985, 13421, 12865, 12317, 11777, 11241, 10717, 10197, 9685, 9177, 8677, 8185, 7697, 7213, 6737, 6269, 5801, 5341, 4889, 4437, 39930, 35534, 31186, 26886, 22630, 18418, 14254, 10130, 6046, 20055}; /* ------------------------------------------------------------------ */ /* decLnOp -- effect natural logarithm */ /* */ /* This computes C = ln(A) */ /* */ /* res is C, the result. C may be A */ /* rhs is A */ /* set is the context; note that rounding mode has no effect */ /* */ /* C must have space for set->digits digits. */ /* */ /* Notable cases: */ /* A<0 -> Invalid */ /* A=0 -> -Infinity (Exact) */ /* A=+Infinity -> +Infinity (Exact) */ /* A=1 exactly -> 0 (Exact) */ /* */ /* Restrictions (as for Exp): */ /* */ /* digits, emax, and -emin in the context must be less than */ /* DEC_MAX_MATH+11 (1000010), and the rhs must be within these */ /* bounds or a zero. This is an internal routine, so these */ /* restrictions are contractual and not enforced. */ /* */ /* A finite result is rounded using DEC_ROUND_HALF_EVEN; it will */ /* almost always be correctly rounded, but may be up to 1 ulp in */ /* error in rare cases. */ /* ------------------------------------------------------------------ */ /* The result is calculated using Newton's method, with each */ /* iteration calculating a' = a + x * exp(-a) - 1. See, for example, */ /* Epperson 1989. */ /* */ /* The iteration ends when the adjustment x*exp(-a)-1 is tiny enough. */ /* This has to be calculated at the sum of the precision of x and the */ /* working precision. */ /* */ /* Implementation notes: */ /* */ /* 1. This is separated out as decLnOp so it can be called from */ /* other Mathematical functions (e.g., Log 10) with a wider range */ /* than normal. In particular, it can handle the slightly wider */ /* (+9+2) range needed by a power function. */ /* */ /* 2. The speed of this function is about 10x slower than exp, as */ /* it typically needs 4-6 iterations for short numbers, and the */ /* extra precision needed adds a squaring effect, twice. */ /* */ /* 3. Fastpaths are included for ln(10) and ln(2), up to length 40, */ /* as these are common requests. ln(10) is used by log10(x). */ /* */ /* 4. An iteration might be saved by widening the LNnn table, and */ /* would certainly save at least one if it were made ten times */ /* bigger, too (for truncated fractions 0.100 through 0.999). */ /* However, for most practical evaluations, at least four or five */ /* iterations will be neede -- so this would only speed up by */ /* 20-25% and that probably does not justify increasing the table */ /* size. */ /* */ /* 5. The static buffers are larger than might be expected to allow */ /* for calls from decNumberPower. */ /* ------------------------------------------------------------------ */ decNumber * decLnOp(decNumber *res, const decNumber *rhs, decContext *set, uInt *status) { uInt ignore=0; // working status accumulator uInt needbytes; // for space calculations Int residue; // rounding residue Int r; // rhs=f*10**r [see below] Int p; // working precision Int pp; // precision for iteration Int t; // work // buffers for a (accumulator, typically precision+2) and b // (adjustment calculator, same size) decNumber bufa[D2N(DECBUFFER+12)]; decNumber *allocbufa=NULL; // -> allocated bufa, iff allocated decNumber *a=bufa; // accumulator/work decNumber bufb[D2N(DECBUFFER*2+2)]; decNumber *allocbufb=NULL; // -> allocated bufa, iff allocated decNumber *b=bufb; // adjustment/work decNumber numone; // constant 1 decNumber cmp; // work decContext aset, bset; // working contexts #if DECCHECK Int iterations=0; // for later sanity check if (decCheckOperands(res, DECUNUSED, rhs, set)) return res; #endif do { // protect allocated storage if (SPECIALARG) { // handle infinities and NaNs if (decNumberIsInfinite(rhs)) { // an infinity if (decNumberIsNegative(rhs)) // -Infinity -> error *status|=DEC_Invalid_operation; else decNumberCopy(res, rhs); // +Infinity -> self } else decNaNs(res, rhs, NULL, set, status); // a NaN break;} if (ISZERO(rhs)) { // +/- zeros -> -Infinity decNumberZero(res); // make clean res->bits=DECINF|DECNEG; // set - infinity break;} // [no status to set] // Non-zero negatives are bad... if (decNumberIsNegative(rhs)) { // -x -> error *status|=DEC_Invalid_operation; break;} // Here, rhs is positive, finite, and in range // lookaside fastpath code for ln(2) and ln(10) at common lengths if (rhs->exponent==0 && set->digits<=40) { #if DECDPUN==1 if (rhs->lsu[0]==0 && rhs->lsu[1]==1 && rhs->digits==2) { // ln(10) #else if (rhs->lsu[0]==10 && rhs->digits==2) { // ln(10) #endif aset=*set; aset.round=DEC_ROUND_HALF_EVEN; #define LN10 "2.302585092994045684017991454684364207601" decNumberFromString(res, LN10, &aset); *status|=(DEC_Inexact | DEC_Rounded); // is inexact break;} if (rhs->lsu[0]==2 && rhs->digits==1) { // ln(2) aset=*set; aset.round=DEC_ROUND_HALF_EVEN; #define LN2 "0.6931471805599453094172321214581765680755" decNumberFromString(res, LN2, &aset); *status|=(DEC_Inexact | DEC_Rounded); break;} } // integer and short // Determine the working precision. This is normally the // requested precision + 2, with a minimum of 9. However, if // the rhs is 'over-precise' then allow for all its digits to // potentially participate (consider an rhs where all the excess // digits are 9s) so in this case use rhs->digits+2. p=MAXI(rhs->digits, MAXI(set->digits, 7))+2; // Allocate space for the accumulator and the high-precision // adjustment calculator, if necessary. The accumulator must // be able to hold p digits, and the adjustment up to // rhs->digits+p digits. They are also made big enough for 16 // digits so that they can be used for calculating the initial // estimate. needbytes=sizeof(decNumber)+(D2U(MAXI(p,16))-1)*sizeof(Unit); if (needbytes>sizeof(bufa)) { // need malloc space allocbufa=(decNumber *)malloc(needbytes); if (allocbufa==NULL) { // hopeless -- abandon *status|=DEC_Insufficient_storage; break;} a=allocbufa; // use the allocated space } pp=p+rhs->digits; needbytes=sizeof(decNumber)+(D2U(MAXI(pp,16))-1)*sizeof(Unit); if (needbytes>sizeof(bufb)) { // need malloc space allocbufb=(decNumber *)malloc(needbytes); if (allocbufb==NULL) { // hopeless -- abandon *status|=DEC_Insufficient_storage; break;} b=allocbufb; // use the allocated space } // Prepare an initial estimate in acc. Calculate this by // considering the coefficient of x to be a normalized fraction, // f, with the decimal point at far left and multiplied by // 10**r. Then, rhs=f*10**r and 0.1<=f<1, and // ln(x) = ln(f) + ln(10)*r // Get the initial estimate for ln(f) from a small lookup // table (see above) indexed by the first two digits of f, // truncated. decContextDefault(&aset, DEC_INIT_DECIMAL64); // 16-digit extended r=rhs->exponent+rhs->digits; // 'normalised' exponent decNumberFromInt32(a, r); // a=r decNumberFromInt32(b, 2302585); // b=ln(10) (2.302585) b->exponent=-6; // .. decMultiplyOp(a, a, b, &aset, &ignore); // a=a*b // now get top two digits of rhs into b by simple truncate and // force to integer residue=0; // (no residue) aset.digits=2; aset.round=DEC_ROUND_DOWN; decCopyFit(b, rhs, &aset, &residue, &ignore); // copy & shorten b->exponent=0; // make integer t=decGetInt(b); // [cannot fail] if (t<10) t=X10(t); // adjust single-digit b t=LNnn[t-10]; // look up ln(b) decNumberFromInt32(b, t>>2); // b=ln(b) coefficient b->exponent=-(t&3)-3; // set exponent b->bits=DECNEG; // ln(0.10)->ln(0.99) always -ve aset.digits=16; aset.round=DEC_ROUND_HALF_EVEN; // restore decAddOp(a, a, b, &aset, 0, &ignore); // acc=a+b // the initial estimate is now in a, with up to 4 digits correct. // When rhs is at or near Nmax the estimate will be low, so we // will approach it from below, avoiding overflow when calling exp. decNumberZero(&numone); *numone.lsu=1; // constant 1 for adjustment // accumulator bounds are as requested (could underflow, but // cannot overflow) aset.emax=set->emax; aset.emin=set->emin; aset.clamp=0; // no concrete format // set up a context to be used for the multiply and subtract bset=aset; bset.emax=DEC_MAX_MATH*2; // use double bounds for the bset.emin=-DEC_MAX_MATH*2; // adjustment calculation // [see decExpOp call below] // for each iteration double the number of digits to calculate, // up to a maximum of p pp=9; // initial precision // [initially 9 as then the sequence starts 7+2, 16+2, and // 34+2, which is ideal for standard-sized numbers] aset.digits=pp; // working context bset.digits=pp+rhs->digits; // wider context for (;;) { // iterate #if DECCHECK iterations++; if (iterations>24) break; // consider 9 * 2**24 #endif // calculate the adjustment (exp(-a)*x-1) into b. This is a // catastrophic subtraction but it really is the difference // from 1 that is of interest. // Use the internal entry point to Exp as it allows the double // range for calculating exp(-a) when a is the tiniest subnormal. a->bits^=DECNEG; // make -a decExpOp(b, a, &bset, &ignore); // b=exp(-a) a->bits^=DECNEG; // restore sign of a // now multiply by rhs and subtract 1, at the wider precision decMultiplyOp(b, b, rhs, &bset, &ignore); // b=b*rhs decAddOp(b, b, &numone, &bset, DECNEG, &ignore); // b=b-1 // the iteration ends when the adjustment cannot affect the // result by >=0.5 ulp (at the requested digits), which // is when its value is smaller than the accumulator by // set->digits+1 digits (or it is zero) -- this is a looser // requirement than for Exp because all that happens to the // accumulator after this is the final rounding (but note that // there must also be full precision in a, or a=0). if (decNumberIsZero(b) || (a->digits+a->exponent)>=(b->digits+b->exponent+set->digits+1)) { if (a->digits==p) break; if (decNumberIsZero(a)) { decCompareOp(&cmp, rhs, &numone, &aset, COMPARE, &ignore); // rhs=1 ? if (cmp.lsu[0]==0) a->exponent=0; // yes, exact 0 else *status|=(DEC_Inexact | DEC_Rounded); // no, inexact break; } // force padding if adjustment has gone to 0 before full length if (decNumberIsZero(b)) b->exponent=a->exponent-p; } // not done yet ... decAddOp(a, a, b, &aset, 0, &ignore); // a=a+b for next estimate if (pp==p) continue; // precision is at maximum // lengthen the next calculation pp=pp*2; // double precision if (pp>p) pp=p; // clamp to maximum aset.digits=pp; // working context bset.digits=pp+rhs->digits; // wider context } // Newton's iteration #if DECCHECK // just a sanity check; remove the test to show always if (iterations>24) printf("Ln iterations=%ld, status=%08lx, p=%ld, d=%ld\n", (LI)iterations, (LI)*status, (LI)p, (LI)rhs->digits); #endif // Copy and round the result to res residue=1; // indicate dirt to right if (ISZERO(a)) residue=0; // .. unless underflowed to 0 aset.digits=set->digits; // [use default rounding] decCopyFit(res, a, &aset, &residue, status); // copy & shorten decFinish(res, set, &residue, status); // cleanup/set flags } while(0); // end protected if (allocbufa!=NULL) free(allocbufa); // drop any storage used if (allocbufb!=NULL) free(allocbufb); // .. // [status is handled by caller] return res; } // decLnOp /* ------------------------------------------------------------------ */ /* decQuantizeOp -- force exponent to requested value */ /* */ /* This computes C = op(A, B), where op adjusts the coefficient */ /* of C (by rounding or shifting) such that the exponent (-scale) */ /* of C has the value B or matches the exponent of B. */ /* The numerical value of C will equal A, except for the effects of */ /* any rounding that occurred. */ /* */ /* res is C, the result. C may be A or B */ /* lhs is A, the number to adjust */ /* rhs is B, the requested exponent */ /* set is the context */ /* quant is 1 for quantize or 0 for rescale */ /* status is the status accumulator (this can be called without */ /* risk of control loss) */ /* */ /* C must have space for set->digits digits. */ /* */ /* Unless there is an error or the result is infinite, the exponent */ /* after the operation is guaranteed to be that requested. */ /* ------------------------------------------------------------------ */ static decNumber * decQuantizeOp(decNumber *res, const decNumber *lhs, const decNumber *rhs, decContext *set, Flag quant, uInt *status) { #if DECSUBSET decNumber *alloclhs=NULL; // non-NULL if rounded lhs allocated decNumber *allocrhs=NULL; // .., rhs #endif const decNumber *inrhs=rhs; // save original rhs Int reqdigits=set->digits; // requested DIGITS Int reqexp; // requested exponent [-scale] Int residue=0; // rounding residue Int etiny=set->emin-(reqdigits-1); #if DECCHECK if (decCheckOperands(res, lhs, rhs, set)) return res; #endif do { // protect allocated storage #if DECSUBSET if (!set->extended) { // reduce operands and set lostDigits status, as needed if (lhs->digits>reqdigits) { alloclhs=decRoundOperand(lhs, set, status); if (alloclhs==NULL) break; lhs=alloclhs; } if (rhs->digits>reqdigits) { // [this only checks lostDigits] allocrhs=decRoundOperand(rhs, set, status); if (allocrhs==NULL) break; rhs=allocrhs; } } #endif // [following code does not require input rounding] // Handle special values if (SPECIALARGS) { // NaNs get usual processing if (SPECIALARGS & (DECSNAN | DECNAN)) decNaNs(res, lhs, rhs, set, status); // one infinity but not both is bad else if ((lhs->bits ^ rhs->bits) & DECINF) *status|=DEC_Invalid_operation; // both infinity: return lhs else decNumberCopy(res, lhs); // [nop if in place] break; } // set requested exponent if (quant) reqexp=inrhs->exponent; // quantize -- match exponents else { // rescale -- use value of rhs // Original rhs must be an integer that fits and is in range, // which could be from -1999999997 to +999999999, thanks to // subnormals reqexp=decGetInt(inrhs); // [cannot fail] } #if DECSUBSET if (!set->extended) etiny=set->emin; // no subnormals #endif if (reqexp==BADINT // bad (rescale only) or .. || reqexp==BIGODD || reqexp==BIGEVEN // very big (ditto) or .. || (reqexpset->emax)) { // > emax *status|=DEC_Invalid_operation; break;} // the RHS has been processed, so it can be overwritten now if necessary if (ISZERO(lhs)) { // zero coefficient unchanged decNumberCopy(res, lhs); // [nop if in place] res->exponent=reqexp; // .. just set exponent #if DECSUBSET if (!set->extended) res->bits=0; // subset specification; no -0 #endif } else { // non-zero lhs Int adjust=reqexp-lhs->exponent; // digit adjustment needed // if adjusted coefficient will definitely not fit, give up now if ((lhs->digits-adjust)>reqdigits) { *status|=DEC_Invalid_operation; break; } if (adjust>0) { // increasing exponent // this will decrease the length of the coefficient by adjust // digits, and must round as it does so decContext workset; // work workset=*set; // clone rounding, etc. workset.digits=lhs->digits-adjust; // set requested length // [note that the latter can be <1, here] decCopyFit(res, lhs, &workset, &residue, status); // fit to result decApplyRound(res, &workset, residue, status); // .. and round residue=0; // [used] // If just rounded a 999s case, exponent will be off by one; // adjust back (after checking space), if so. if (res->exponent>reqexp) { // re-check needed, e.g., for quantize(0.9999, 0.001) under // set->digits==3 if (res->digits==reqdigits) { // cannot shift by 1 *status&=~(DEC_Inexact | DEC_Rounded); // [clean these] *status|=DEC_Invalid_operation; break; } res->digits=decShiftToMost(res->lsu, res->digits, 1); // shift res->exponent--; // (re)adjust the exponent. } #if DECSUBSET if (ISZERO(res) && !set->extended) res->bits=0; // subset; no -0 #endif } // increase else /* adjust<=0 */ { // decreasing or = exponent // this will increase the length of the coefficient by -adjust // digits, by adding zero or more trailing zeros; this is // already checked for fit, above decNumberCopy(res, lhs); // [it will fit] // if padding needed (adjust<0), add it now... if (adjust<0) { res->digits=decShiftToMost(res->lsu, res->digits, -adjust); res->exponent+=adjust; // adjust the exponent } } // decrease } // non-zero // Check for overflow [do not use Finalize in this case, as an // overflow here is a "don't fit" situation] if (res->exponent>set->emax-res->digits+1) { // too big *status|=DEC_Invalid_operation; break; } else { decFinalize(res, set, &residue, status); // set subnormal flags *status&=~DEC_Underflow; // suppress Underflow [as per 754] } } while(0); // end protected #if DECSUBSET if (allocrhs!=NULL) free(allocrhs); // drop any storage used if (alloclhs!=NULL) free(alloclhs); // .. #endif return res; } // decQuantizeOp /* ------------------------------------------------------------------ */ /* decCompareOp -- compare, min, or max two Numbers */ /* */ /* This computes C = A ? B and carries out one of four operations: */ /* COMPARE -- returns the signum (as a number) giving the */ /* result of a comparison unless one or both */ /* operands is a NaN (in which case a NaN results) */ /* COMPSIG -- as COMPARE except that a quiet NaN raises */ /* Invalid operation. */ /* COMPMAX -- returns the larger of the operands, using the */ /* 754 maxnum operation */ /* COMPMAXMAG -- ditto, comparing absolute values */ /* COMPMIN -- the 754 minnum operation */ /* COMPMINMAG -- ditto, comparing absolute values */ /* COMTOTAL -- returns the signum (as a number) giving the */ /* result of a comparison using 754 total ordering */ /* */ /* res is C, the result. C may be A and/or B (e.g., X=X?X) */ /* lhs is A */ /* rhs is B */ /* set is the context */ /* op is the operation flag */ /* status is the usual accumulator */ /* */ /* C must have space for one digit for COMPARE or set->digits for */ /* COMPMAX, COMPMIN, COMPMAXMAG, or COMPMINMAG. */ /* ------------------------------------------------------------------ */ /* The emphasis here is on speed for common cases, and avoiding */ /* coefficient comparison if possible. */ /* ------------------------------------------------------------------ */ decNumber * decCompareOp(decNumber *res, const decNumber *lhs, const decNumber *rhs, decContext *set, Flag op, uInt *status) { #if DECSUBSET decNumber *alloclhs=NULL; // non-NULL if rounded lhs allocated decNumber *allocrhs=NULL; // .., rhs #endif Int result=0; // default result value uByte merged; // work #if DECCHECK if (decCheckOperands(res, lhs, rhs, set)) return res; #endif do { // protect allocated storage #if DECSUBSET if (!set->extended) { // reduce operands and set lostDigits status, as needed if (lhs->digits>set->digits) { alloclhs=decRoundOperand(lhs, set, status); if (alloclhs==NULL) {result=BADINT; break;} lhs=alloclhs; } if (rhs->digits>set->digits) { allocrhs=decRoundOperand(rhs, set, status); if (allocrhs==NULL) {result=BADINT; break;} rhs=allocrhs; } } #endif // [following code does not require input rounding] // If total ordering then handle differing signs 'up front' if (op==COMPTOTAL) { // total ordering if (decNumberIsNegative(lhs) & !decNumberIsNegative(rhs)) { result=-1; break; } if (!decNumberIsNegative(lhs) & decNumberIsNegative(rhs)) { result=+1; break; } } // handle NaNs specially; let infinities drop through // This assumes sNaN (even just one) leads to NaN. merged=(lhs->bits | rhs->bits) & (DECSNAN | DECNAN); if (merged) { // a NaN bit set if (op==COMPARE); // result will be NaN else if (op==COMPSIG) // treat qNaN as sNaN *status|=DEC_Invalid_operation | DEC_sNaN; else if (op==COMPTOTAL) { // total ordering, always finite // signs are known to be the same; compute the ordering here // as if the signs are both positive, then invert for negatives if (!decNumberIsNaN(lhs)) result=-1; else if (!decNumberIsNaN(rhs)) result=+1; // here if both NaNs else if (decNumberIsSNaN(lhs) && decNumberIsQNaN(rhs)) result=-1; else if (decNumberIsQNaN(lhs) && decNumberIsSNaN(rhs)) result=+1; else { // both NaN or both sNaN // now it just depends on the payload result=decUnitCompare(lhs->lsu, D2U(lhs->digits), rhs->lsu, D2U(rhs->digits), 0); // [Error not possible, as these are 'aligned'] } // both same NaNs if (decNumberIsNegative(lhs)) result=-result; break; } // total order else if (merged & DECSNAN); // sNaN -> qNaN else { // here if MIN or MAX and one or two quiet NaNs // min or max -- 754 rules ignore single NaN if (!decNumberIsNaN(lhs) || !decNumberIsNaN(rhs)) { // just one NaN; force choice to be the non-NaN operand op=COMPMAX; if (lhs->bits & DECNAN) result=-1; // pick rhs else result=+1; // pick lhs break; } } // max or min op=COMPNAN; // use special path decNaNs(res, lhs, rhs, set, status); // propagate NaN break; } // have numbers if (op==COMPMAXMAG || op==COMPMINMAG) result=decCompare(lhs, rhs, 1); else result=decCompare(lhs, rhs, 0); // sign matters } while(0); // end protected if (result==BADINT) *status|=DEC_Insufficient_storage; // rare else { if (op==COMPARE || op==COMPSIG ||op==COMPTOTAL) { // returning signum if (op==COMPTOTAL && result==0) { // operands are numerically equal or same NaN (and same sign, // tested first); if identical, leave result 0 if (lhs->exponent!=rhs->exponent) { if (lhs->exponentexponent) result=-1; else result=+1; if (decNumberIsNegative(lhs)) result=-result; } // lexp!=rexp } // total-order by exponent decNumberZero(res); // [always a valid result] if (result!=0) { // must be -1 or +1 *res->lsu=1; if (result<0) res->bits=DECNEG; } } else if (op==COMPNAN); // special, drop through else { // MAX or MIN, non-NaN result Int residue=0; // rounding accumulator // choose the operand for the result const decNumber *choice; if (result==0) { // operands are numerically equal // choose according to sign then exponent (see 754) uByte slhs=(lhs->bits & DECNEG); uByte srhs=(rhs->bits & DECNEG); #if DECSUBSET if (!set->extended) { // subset: force left-hand op=COMPMAX; result=+1; } else #endif if (slhs!=srhs) { // signs differ if (slhs) result=-1; // rhs is max else result=+1; // lhs is max } else if (slhs && srhs) { // both negative if (lhs->exponentexponent) result=+1; else result=-1; // [if equal, use lhs, technically identical] } else { // both positive if (lhs->exponent>rhs->exponent) result=+1; else result=-1; // [ditto] } } // numerically equal // here result will be non-0; reverse if looking for MIN if (op==COMPMIN || op==COMPMINMAG) result=-result; choice=(result>0 ? lhs : rhs); // choose // copy chosen to result, rounding if need be decCopyFit(res, choice, set, &residue, status); decFinish(res, set, &residue, status); } } #if DECSUBSET if (allocrhs!=NULL) free(allocrhs); // free any storage used if (alloclhs!=NULL) free(alloclhs); // .. #endif return res; } // decCompareOp /* ------------------------------------------------------------------ */ /* decCompare -- compare two decNumbers by numerical value */ /* */ /* This routine compares A ? B without altering them. */ /* */ /* Arg1 is A, a decNumber which is not a NaN */ /* Arg2 is B, a decNumber which is not a NaN */ /* Arg3 is 1 for a sign-independent compare, 0 otherwise */ /* */ /* returns -1, 0, or 1 for AB, or BADINT if failure */ /* (the only possible failure is an allocation error) */ /* ------------------------------------------------------------------ */ static Int decCompare(const decNumber *lhs, const decNumber *rhs, Flag abs) { Int result; // result value Int sigr; // rhs signum Int compare; // work result=1; // assume signum(lhs) if (ISZERO(lhs)) result=0; if (abs) { if (ISZERO(rhs)) return result; // LHS wins or both 0 // RHS is non-zero if (result==0) return -1; // LHS is 0; RHS wins // [here, both non-zero, result=1] } else { // signs matter if (result && decNumberIsNegative(lhs)) result=-1; sigr=1; // compute signum(rhs) if (ISZERO(rhs)) sigr=0; else if (decNumberIsNegative(rhs)) sigr=-1; if (result > sigr) return +1; // L > R, return 1 if (result < sigr) return -1; // L < R, return -1 if (result==0) return 0; // both 0 } // signums are the same; both are non-zero if ((lhs->bits | rhs->bits) & DECINF) { // one or more infinities if (decNumberIsInfinite(rhs)) { if (decNumberIsInfinite(lhs)) result=0;// both infinite else result=-result; // only rhs infinite } return result; } // must compare the coefficients, allowing for exponents if (lhs->exponent>rhs->exponent) { // LHS exponent larger // swap sides, and sign const decNumber *temp=lhs; lhs=rhs; rhs=temp; result=-result; } compare=decUnitCompare(lhs->lsu, D2U(lhs->digits), rhs->lsu, D2U(rhs->digits), rhs->exponent-lhs->exponent); if (compare!=BADINT) compare*=result; // comparison succeeded return compare; } // decCompare /* ------------------------------------------------------------------ */ /* decUnitCompare -- compare two >=0 integers in Unit arrays */ /* */ /* This routine compares A ? B*10**E where A and B are unit arrays */ /* A is a plain integer */ /* B has an exponent of E (which must be non-negative) */ /* */ /* Arg1 is A first Unit (lsu) */ /* Arg2 is A length in Units */ /* Arg3 is B first Unit (lsu) */ /* Arg4 is B length in Units */ /* Arg5 is E (0 if the units are aligned) */ /* */ /* returns -1, 0, or 1 for AB, or BADINT if failure */ /* (the only possible failure is an allocation error, which can */ /* only occur if E!=0) */ /* ------------------------------------------------------------------ */ static Int decUnitCompare(const Unit *a, Int alength, const Unit *b, Int blength, Int exp) { Unit *acc; // accumulator for result Unit accbuff[SD2U(DECBUFFER*2+1)]; // local buffer Unit *allocacc=NULL; // -> allocated acc buffer, iff allocated Int accunits, need; // units in use or needed for acc const Unit *l, *r, *u; // work Int expunits, exprem, result; // .. if (exp==0) { // aligned; fastpath if (alength>blength) return 1; if (alength=a; l--, r--) { if (*l>*r) return 1; if (*l<*r) return -1; } return 0; // all units match } // aligned // Unaligned. If one is >1 unit longer than the other, padded // approximately, then can return easily if (alength>blength+(Int)D2U(exp)) return 1; if (alength+1sizeof(accbuff)) { allocacc=(Unit *)malloc(need*sizeof(Unit)); if (allocacc==NULL) return BADINT; // hopeless -- abandon acc=allocacc; } // Calculate units and remainder from exponent. expunits=exp/DECDPUN; exprem=exp%DECDPUN; // subtract [A+B*(-m)] accunits=decUnitAddSub(a, alength, b, blength, expunits, acc, -(Int)powers[exprem]); // [UnitAddSub result may have leading zeros, even on zero] if (accunits<0) result=-1; // negative result else { // non-negative result // check units of the result before freeing any storage for (u=acc; u=0 integers in Unit arrays */ /* */ /* This routine performs the calculation: */ /* */ /* C=A+(B*M) */ /* */ /* Where M is in the range -DECDPUNMAX through +DECDPUNMAX. */ /* */ /* A may be shorter or longer than B. */ /* */ /* Leading zeros are not removed after a calculation. The result is */ /* either the same length as the longer of A and B (adding any */ /* shift), or one Unit longer than that (if a Unit carry occurred). */ /* */ /* A and B content are not altered unless C is also A or B. */ /* C may be the same array as A or B, but only if no zero padding is */ /* requested (that is, C may be B only if bshift==0). */ /* C is filled from the lsu; only those units necessary to complete */ /* the calculation are referenced. */ /* */ /* Arg1 is A first Unit (lsu) */ /* Arg2 is A length in Units */ /* Arg3 is B first Unit (lsu) */ /* Arg4 is B length in Units */ /* Arg5 is B shift in Units (>=0; pads with 0 units if positive) */ /* Arg6 is C first Unit (lsu) */ /* Arg7 is M, the multiplier */ /* */ /* returns the count of Units written to C, which will be non-zero */ /* and negated if the result is negative. That is, the sign of the */ /* returned Int is the sign of the result (positive for zero) and */ /* the absolute value of the Int is the count of Units. */ /* */ /* It is the caller's responsibility to make sure that C size is */ /* safe, allowing space if necessary for a one-Unit carry. */ /* */ /* This routine is severely performance-critical; *any* change here */ /* must be measured (timed) to assure no performance degradation. */ /* In particular, trickery here tends to be counter-productive, as */ /* increased complexity of code hurts register optimizations on */ /* register-poor architectures. Avoiding divisions is nearly */ /* always a Good Idea, however. */ /* */ /* Special thanks to Rick McGuire (IBM Cambridge, MA) and Dave Clark */ /* (IBM Warwick, UK) for some of the ideas used in this routine. */ /* ------------------------------------------------------------------ */ static Int decUnitAddSub(const Unit *a, Int alength, const Unit *b, Int blength, Int bshift, Unit *c, Int m) { const Unit *alsu=a; // A lsu [need to remember it] Unit *clsu=c; // C ditto Unit *minC; // low water mark for C Unit *maxC; // high water mark for C eInt carry=0; // carry integer (could be Long) Int add; // work #if DECDPUN<=4 // myriadal, millenary, etc. Int est; // estimated quotient #endif #if DECTRACE if (alength<1 || blength<1) printf("decUnitAddSub: alen blen m %ld %ld [%ld]\n", alength, blength, m); #endif maxC=c+alength; // A is usually the longer minC=c+blength; // .. and B the shorter if (bshift!=0) { // B is shifted; low As copy across minC+=bshift; // if in place [common], skip copy unless there's a gap [rare] if (a==c && bshift<=alength) { c+=bshift; a+=bshift; } else for (; cmaxC) { // swap Unit *hold=minC; minC=maxC; maxC=hold; } // For speed, do the addition as two loops; the first where both A // and B contribute, and the second (if necessary) where only one or // other of the numbers contribute. // Carry handling is the same (i.e., duplicated) in each case. for (; c=0) { est=(((ueInt)carry>>11)*53687)>>18; *c=(Unit)(carry-est*(DECDPUNMAX+1)); // remainder carry=est; // likely quotient [89%] if (*c>11)*53687)>>18; *c=(Unit)(carry-est*(DECDPUNMAX+1)); carry=est-(DECDPUNMAX+1); // correctly negative if (*c=0) { est=(((ueInt)carry>>3)*16777)>>21; *c=(Unit)(carry-est*(DECDPUNMAX+1)); // remainder carry=est; // likely quotient [99%] if (*c>3)*16777)>>21; *c=(Unit)(carry-est*(DECDPUNMAX+1)); carry=est-(DECDPUNMAX+1); // correctly negative if (*c=0) { est=QUOT10(carry, DECDPUN); *c=(Unit)(carry-est*(DECDPUNMAX+1)); // remainder carry=est; // quotient continue; } // negative case carry=carry+(eInt)(DECDPUNMAX+1)*(DECDPUNMAX+1); // make positive est=QUOT10(carry, DECDPUN); *c=(Unit)(carry-est*(DECDPUNMAX+1)); carry=est-(DECDPUNMAX+1); // correctly negative #else // remainder operator is undefined if negative, so must test if ((ueInt)carry<(DECDPUNMAX+1)*2) { // fastpath carry +1 *c=(Unit)(carry-(DECDPUNMAX+1)); // [helps additions] carry=1; continue; } if (carry>=0) { *c=(Unit)(carry%(DECDPUNMAX+1)); carry=carry/(DECDPUNMAX+1); continue; } // negative case carry=carry+(eInt)(DECDPUNMAX+1)*(DECDPUNMAX+1); // make positive *c=(Unit)(carry%(DECDPUNMAX+1)); carry=carry/(DECDPUNMAX+1)-(DECDPUNMAX+1); #endif } // c // now may have one or other to complete // [pretest to avoid loop setup/shutdown] if (cDECDPUNMAX #if DECDPUN==4 // use divide-by-multiply if (carry>=0) { est=(((ueInt)carry>>11)*53687)>>18; *c=(Unit)(carry-est*(DECDPUNMAX+1)); // remainder carry=est; // likely quotient [79.7%] if (*c>11)*53687)>>18; *c=(Unit)(carry-est*(DECDPUNMAX+1)); carry=est-(DECDPUNMAX+1); // correctly negative if (*c=0) { est=(((ueInt)carry>>3)*16777)>>21; *c=(Unit)(carry-est*(DECDPUNMAX+1)); // remainder carry=est; // likely quotient [99%] if (*c>3)*16777)>>21; *c=(Unit)(carry-est*(DECDPUNMAX+1)); carry=est-(DECDPUNMAX+1); // correctly negative if (*c=0) { est=QUOT10(carry, DECDPUN); *c=(Unit)(carry-est*(DECDPUNMAX+1)); // remainder carry=est; // quotient continue; } // negative case carry=carry+(eInt)(DECDPUNMAX+1)*(DECDPUNMAX+1); // make positive est=QUOT10(carry, DECDPUN); *c=(Unit)(carry-est*(DECDPUNMAX+1)); carry=est-(DECDPUNMAX+1); // correctly negative #else if ((ueInt)carry<(DECDPUNMAX+1)*2){ // fastpath carry 1 *c=(Unit)(carry-(DECDPUNMAX+1)); carry=1; continue; } // remainder operator is undefined if negative, so must test if (carry>=0) { *c=(Unit)(carry%(DECDPUNMAX+1)); carry=carry/(DECDPUNMAX+1); continue; } // negative case carry=carry+(eInt)(DECDPUNMAX+1)*(DECDPUNMAX+1); // make positive *c=(Unit)(carry%(DECDPUNMAX+1)); carry=carry/(DECDPUNMAX+1)-(DECDPUNMAX+1); #endif } // c // OK, all A and B processed; might still have carry or borrow // return number of Units in the result, negated if a borrow if (carry==0) return c-clsu; // no carry, so no more to do if (carry>0) { // positive carry *c=(Unit)carry; // place as new unit c++; // .. return c-clsu; } // -ve carry: it's a borrow; complement needed add=1; // temporary carry... for (c=clsu; c current Unit #if DECCHECK if (decCheckOperands(dn, DECUNUSED, DECUNUSED, DECUNCONT)) return dn; #endif *dropped=0; // assume no zeros dropped if ((dn->bits & DECSPECIAL) // fast exit if special .. || (*dn->lsu & 0x01)) return dn; // .. or odd if (ISZERO(dn)) { // .. or 0 dn->exponent=0; // (sign is preserved) return dn; } // have a finite number which is even exp=dn->exponent; cut=1; // digit (1-DECDPUN) in Unit up=dn->lsu; // -> current Unit for (d=0; ddigits-1; d++) { // [don't strip the final digit] // slice by powers #if DECDPUN<=4 uInt quot=QUOT10(*up, cut); if ((*up-quot*powers[cut])!=0) break; // found non-0 digit #else if (*up%powers[cut]!=0) break; // found non-0 digit #endif // have a trailing 0 if (!all) { // trimming // [if exp>0 then all trailing 0s are significant for trim] if (exp<=0) { // if digit might be significant if (exp==0) break; // then quit exp++; // next digit might be significant } } cut++; // next power if (cut>DECDPUN) { // need new Unit up++; cut=1; } } // d if (d==0) return dn; // none to drop // may need to limit drop if clamping if (set->clamp && !noclamp) { Int maxd=set->emax-set->digits+1-dn->exponent; if (maxd<=0) return dn; // nothing possible if (d>maxd) d=maxd; } // effect the drop decShiftToLeast(dn->lsu, D2U(dn->digits), d); dn->exponent+=d; // maintain numerical value dn->digits-=d; // new length *dropped=d; // report the count return dn; } // decTrim /* ------------------------------------------------------------------ */ /* decReverse -- reverse a Unit array in place */ /* */ /* ulo is the start of the array */ /* uhi is the end of the array (highest Unit to include) */ /* */ /* The units ulo through uhi are reversed in place (if the number */ /* of units is odd, the middle one is untouched). Note that the */ /* digit(s) in each unit are unaffected. */ /* ------------------------------------------------------------------ */ static void decReverse(Unit *ulo, Unit *uhi) { Unit temp; for (; ulo=uar; source--, target--) *target=*source; } else { first=uar+D2U(digits+shift)-1; // where msu of source will end up for (; source>=uar; source--, target--) { // split the source Unit and accumulate remainder for next #if DECDPUN<=4 uInt quot=QUOT10(*source, cut); uInt rem=*source-quot*powers[cut]; next+=quot; #else uInt rem=*source%powers[cut]; next+=*source/powers[cut]; #endif if (target<=first) *target=(Unit)next; // write to target iff valid next=rem*powers[DECDPUN-cut]; // save remainder for next Unit } } // shift-move // propagate any partial unit to one below and clear the rest for (; target>=uar; target--) { *target=(Unit)next; next=0; } return digits+shift; } // decShiftToMost /* ------------------------------------------------------------------ */ /* decShiftToLeast -- shift digits in array towards least significant */ /* */ /* uar is the array */ /* units is length of the array, in units */ /* shift is the number of digits to remove from the lsu end; it */ /* must be zero or positive and <= than units*DECDPUN. */ /* */ /* returns the new length of the integer in the array, in units */ /* */ /* Removed digits are discarded (lost). Units not required to hold */ /* the final result are unchanged. */ /* ------------------------------------------------------------------ */ static Int decShiftToLeast(Unit *uar, Int units, Int shift) { Unit *target, *up; // work Int cut, count; // work Int quot, rem; // for division if (shift==0) return units; // [fastpath] nothing to do if (shift==units*DECDPUN) { // [fastpath] little to do *uar=0; // all digits cleared gives zero return 1; // leaves just the one } target=uar; // both paths cut=MSUDIGITS(shift); if (cut==DECDPUN) { // unit-boundary case; easy up=uar+D2U(shift); for (; updigits is > set->digits) */ /* set is the relevant context */ /* status is the status accumulator */ /* */ /* returns an allocated decNumber with the rounded result. */ /* */ /* lostDigits and other status may be set by this. */ /* */ /* Since the input is an operand, it must not be modified. */ /* Instead, return an allocated decNumber, rounded as required. */ /* It is the caller's responsibility to free the allocated storage. */ /* */ /* If no storage is available then the result cannot be used, so NULL */ /* is returned. */ /* ------------------------------------------------------------------ */ static decNumber *decRoundOperand(const decNumber *dn, decContext *set, uInt *status) { decNumber *res; // result structure uInt newstatus=0; // status from round Int residue=0; // rounding accumulator // Allocate storage for the returned decNumber, big enough for the // length specified by the context res=(decNumber *)malloc(sizeof(decNumber) +(D2U(set->digits)-1)*sizeof(Unit)); if (res==NULL) { *status|=DEC_Insufficient_storage; return NULL; } decCopyFit(res, dn, set, &residue, &newstatus); decApplyRound(res, set, residue, &newstatus); // If that set Inexact then "lost digits" is raised... if (newstatus & DEC_Inexact) newstatus|=DEC_Lost_digits; *status|=newstatus; return res; } // decRoundOperand #endif /* ------------------------------------------------------------------ */ /* decCopyFit -- copy a number, truncating the coefficient if needed */ /* */ /* dest is the target decNumber */ /* src is the source decNumber */ /* set is the context [used for length (digits) and rounding mode] */ /* residue is the residue accumulator */ /* status contains the current status to be updated */ /* */ /* (dest==src is allowed and will be a no-op if fits) */ /* All fields are updated as required. */ /* ------------------------------------------------------------------ */ static void decCopyFit(decNumber *dest, const decNumber *src, decContext *set, Int *residue, uInt *status) { dest->bits=src->bits; dest->exponent=src->exponent; decSetCoeff(dest, set, src->lsu, src->digits, residue, status); } // decCopyFit /* ------------------------------------------------------------------ */ /* decSetCoeff -- set the coefficient of a number */ /* */ /* dn is the number whose coefficient array is to be set. */ /* It must have space for set->digits digits */ /* set is the context [for size] */ /* lsu -> lsu of the source coefficient [may be dn->lsu] */ /* len is digits in the source coefficient [may be dn->digits] */ /* residue is the residue accumulator. This has values as in */ /* decApplyRound, and will be unchanged unless the */ /* target size is less than len. In this case, the */ /* coefficient is truncated and the residue is updated to */ /* reflect the previous residue and the dropped digits. */ /* status is the status accumulator, as usual */ /* */ /* The coefficient may already be in the number, or it can be an */ /* external intermediate array. If it is in the number, lsu must == */ /* dn->lsu and len must == dn->digits. */ /* */ /* Note that the coefficient length (len) may be < set->digits, and */ /* in this case this merely copies the coefficient (or is a no-op */ /* if dn->lsu==lsu). */ /* */ /* Note also that (only internally, from decQuantizeOp and */ /* decSetSubnormal) the value of set->digits may be less than one, */ /* indicating a round to left. This routine handles that case */ /* correctly; caller ensures space. */ /* */ /* dn->digits, dn->lsu (and as required), and dn->exponent are */ /* updated as necessary. dn->bits (sign) is unchanged. */ /* */ /* DEC_Rounded status is set if any digits are discarded. */ /* DEC_Inexact status is set if any non-zero digits are discarded, or */ /* incoming residue was non-0 (implies rounded) */ /* ------------------------------------------------------------------ */ // mapping array: maps 0-9 to canonical residues, so that a residue // can be adjusted in the range [-1, +1] and achieve correct rounding // 0 1 2 3 4 5 6 7 8 9 static const uByte resmap[10]={0, 3, 3, 3, 3, 5, 7, 7, 7, 7}; static void decSetCoeff(decNumber *dn, decContext *set, const Unit *lsu, Int len, Int *residue, uInt *status) { Int discard; // number of digits to discard uInt cut; // cut point in Unit const Unit *up; // work Unit *target; // .. Int count; // .. #if DECDPUN<=4 uInt temp; // .. #endif discard=len-set->digits; // digits to discard if (discard<=0) { // no digits are being discarded if (dn->lsu!=lsu) { // copy needed // copy the coefficient array to the result number; no shift needed count=len; // avoids D2U up=lsu; for (target=dn->lsu; count>0; target++, up++, count-=DECDPUN) *target=*up; dn->digits=len; // set the new length } // dn->exponent and residue are unchanged, record any inexactitude if (*residue!=0) *status|=(DEC_Inexact | DEC_Rounded); return; } // some digits must be discarded ... dn->exponent+=discard; // maintain numerical value *status|=DEC_Rounded; // accumulate Rounded status if (*residue>1) *residue=1; // previous residue now to right, so reduce if (discard>len) { // everything, +1, is being discarded // guard digit is 0 // residue is all the number [NB could be all 0s] if (*residue<=0) { // not already positive count=len; // avoids D2U for (up=lsu; count>0; up++, count-=DECDPUN) if (*up!=0) { // found non-0 *residue=1; break; // no need to check any others } } if (*residue!=0) *status|=DEC_Inexact; // record inexactitude *dn->lsu=0; // coefficient will now be 0 dn->digits=1; // .. return; } // total discard // partial discard [most common case] // here, at least the first (most significant) discarded digit exists // spin up the number, noting residue during the spin, until get to // the Unit with the first discarded digit. When reach it, extract // it and remember its position count=0; for (up=lsu;; up++) { count+=DECDPUN; if (count>=discard) break; // full ones all checked if (*up!=0) *residue=1; } // up // here up -> Unit with first discarded digit cut=discard-(count-DECDPUN)-1; if (cut==DECDPUN-1) { // unit-boundary case (fast) Unit half=(Unit)powers[DECDPUN]>>1; // set residue directly if (*up>=half) { if (*up>half) *residue=7; else *residue+=5; // add sticky bit } else { // digits<=0) { // special for Quantize/Subnormal :-( *dn->lsu=0; // .. result is 0 dn->digits=1; // .. } else { // shift to least count=set->digits; // now digits to end up with dn->digits=count; // set the new length up++; // move to next // on unit boundary, so shift-down copy loop is simple for (target=dn->lsu; count>0; target++, up++, count-=DECDPUN) *target=*up; } } // unit-boundary case else { // discard digit is in low digit(s), and not top digit uInt discard1; // first discarded digit uInt quot, rem; // for divisions if (cut==0) quot=*up; // is at bottom of unit else /* cut>0 */ { // it's not at bottom of unit #if DECDPUN<=4 quot=QUOT10(*up, cut); rem=*up-quot*powers[cut]; #else rem=*up%powers[cut]; quot=*up/powers[cut]; #endif if (rem!=0) *residue=1; } // discard digit is now at bottom of quot #if DECDPUN<=4 temp=(quot*6554)>>16; // fast /10 // Vowels algorithm here not a win (9 instructions) discard1=quot-X10(temp); quot=temp; #else discard1=quot%10; quot=quot/10; #endif // here, discard1 is the guard digit, and residue is everything // else [use mapping array to accumulate residue safely] *residue+=resmap[discard1]; cut++; // update cut // here: up -> Unit of the array with bottom digit // cut is the division point for each Unit // quot holds the uncut high-order digits for the current unit if (set->digits<=0) { // special for Quantize/Subnormal :-( *dn->lsu=0; // .. result is 0 dn->digits=1; // .. } else { // shift to least needed count=set->digits; // now digits to end up with dn->digits=count; // set the new length // shift-copy the coefficient array to the result number for (target=dn->lsu; ; target++) { *target=(Unit)quot; count-=(DECDPUN-cut); if (count<=0) break; up++; quot=*up; #if DECDPUN<=4 quot=QUOT10(quot, cut); rem=*up-quot*powers[cut]; #else rem=quot%powers[cut]; quot=quot/powers[cut]; #endif *target=(Unit)(*target+rem*powers[DECDPUN-cut]); count-=cut; if (count<=0) break; } // shift-copy loop } // shift to least } // not unit boundary if (*residue!=0) *status|=DEC_Inexact; // record inexactitude return; } // decSetCoeff /* ------------------------------------------------------------------ */ /* decApplyRound -- apply pending rounding to a number */ /* */ /* dn is the number, with space for set->digits digits */ /* set is the context [for size and rounding mode] */ /* residue indicates pending rounding, being any accumulated */ /* guard and sticky information. It may be: */ /* 6-9: rounding digit is >5 */ /* 5: rounding digit is exactly half-way */ /* 1-4: rounding digit is <5 and >0 */ /* 0: the coefficient is exact */ /* -1: as 1, but the hidden digits are subtractive, that */ /* is, of the opposite sign to dn. In this case the */ /* coefficient must be non-0. This case occurs when */ /* subtracting a small number (which can be reduced to */ /* a sticky bit); see decAddOp. */ /* status is the status accumulator, as usual */ /* */ /* This routine applies rounding while keeping the length of the */ /* coefficient constant. The exponent and status are unchanged */ /* except if: */ /* */ /* -- the coefficient was increased and is all nines (in which */ /* case Overflow could occur, and is handled directly here so */ /* the caller does not need to re-test for overflow) */ /* */ /* -- the coefficient was decreased and becomes all nines (in which */ /* case Underflow could occur, and is also handled directly). */ /* */ /* All fields in dn are updated as required. */ /* */ /* ------------------------------------------------------------------ */ static void decApplyRound(decNumber *dn, decContext *set, Int residue, uInt *status) { Int bump; // 1 if coefficient needs to be incremented // -1 if coefficient needs to be decremented if (residue==0) return; // nothing to apply bump=0; // assume a smooth ride // now decide whether, and how, to round, depending on mode switch (set->round) { case DEC_ROUND_05UP: { // round zero or five up (for reround) // This is the same as DEC_ROUND_DOWN unless there is a // positive residue and the lsd of dn is 0 or 5, in which case // it is bumped; when residue is <0, the number is therefore // bumped down unless the final digit was 1 or 6 (in which // case it is bumped down and then up -- a no-op) Int lsd5=*dn->lsu%5; // get lsd and quintate if (residue<0 && lsd5!=1) bump=-1; else if (residue>0 && lsd5==0) bump=1; // [bump==1 could be applied directly; use common path for clarity] break;} // r-05 case DEC_ROUND_DOWN: { // no change, except if negative residue if (residue<0) bump=-1; break;} // r-d case DEC_ROUND_HALF_DOWN: { if (residue>5) bump=1; break;} // r-h-d case DEC_ROUND_HALF_EVEN: { if (residue>5) bump=1; // >0.5 goes up else if (residue==5) { // exactly 0.5000... // 0.5 goes up iff [new] lsd is odd if (*dn->lsu & 0x01) bump=1; } break;} // r-h-e case DEC_ROUND_HALF_UP: { if (residue>=5) bump=1; break;} // r-h-u case DEC_ROUND_UP: { if (residue>0) bump=1; break;} // r-u case DEC_ROUND_CEILING: { // same as _UP for positive numbers, and as _DOWN for negatives // [negative residue cannot occur on 0] if (decNumberIsNegative(dn)) { if (residue<0) bump=-1; } else { if (residue>0) bump=1; } break;} // r-c case DEC_ROUND_FLOOR: { // same as _UP for negative numbers, and as _DOWN for positive // [negative residue cannot occur on 0] if (!decNumberIsNegative(dn)) { if (residue<0) bump=-1; } else { if (residue>0) bump=1; } break;} // r-f default: { // e.g., DEC_ROUND_MAX *status|=DEC_Invalid_context; #if DECTRACE || (DECCHECK && DECVERB) printf("Unknown rounding mode: %d\n", set->round); #endif break;} } // switch // now bump the number, up or down, if need be if (bump==0) return; // no action required // Simply use decUnitAddSub unless bumping up and the number is // all nines. In this special case set to 100... explicitly // and adjust the exponent by one (as otherwise could overflow // the array) // Similarly handle all-nines result if bumping down. if (bump>0) { Unit *up; // work uInt count=dn->digits; // digits to be checked for (up=dn->lsu; ; up++) { if (count<=DECDPUN) { // this is the last Unit (the msu) if (*up!=powers[count]-1) break; // not still 9s // here if it, too, is all nines *up=(Unit)powers[count-1]; // here 999 -> 100 etc. for (up=up-1; up>=dn->lsu; up--) *up=0; // others all to 0 dn->exponent++; // and bump exponent // [which, very rarely, could cause Overflow...] if ((dn->exponent+dn->digits)>set->emax+1) { decSetOverflow(dn, set, status); } return; // done } // a full unit to check, with more to come if (*up!=DECDPUNMAX) break; // not still 9s count-=DECDPUN; } // up } // bump>0 else { // -1 // here checking for a pre-bump of 1000... (leading 1, all // other digits zero) Unit *up, *sup; // work uInt count=dn->digits; // digits to be checked for (up=dn->lsu; ; up++) { if (count<=DECDPUN) { // this is the last Unit (the msu) if (*up!=powers[count-1]) break; // not 100.. // here if have the 1000... case sup=up; // save msu pointer *up=(Unit)powers[count]-1; // here 100 in msu -> 999 // others all to all-nines, too for (up=up-1; up>=dn->lsu; up--) *up=(Unit)powers[DECDPUN]-1; dn->exponent--; // and bump exponent // iff the number was at the subnormal boundary (exponent=etiny) // then the exponent is now out of range, so it will in fact get // clamped to etiny and the final 9 dropped. // printf(">> emin=%d exp=%d sdig=%d\n", set->emin, // dn->exponent, set->digits); if (dn->exponent+1==set->emin-set->digits+1) { if (count==1 && dn->digits==1) *sup=0; // here 9 -> 0[.9] else { *sup=(Unit)powers[count-1]-1; // here 999.. in msu -> 99.. dn->digits--; } dn->exponent++; *status|=DEC_Underflow | DEC_Subnormal | DEC_Inexact | DEC_Rounded; } return; // done } // a full unit to check, with more to come if (*up!=0) break; // not still 0s count-=DECDPUN; } // up } // bump<0 // Actual bump needed. Do it. decUnitAddSub(dn->lsu, D2U(dn->digits), uarrone, 1, 0, dn->lsu, bump); } // decApplyRound #if DECSUBSET /* ------------------------------------------------------------------ */ /* decFinish -- finish processing a number */ /* */ /* dn is the number */ /* set is the context */ /* residue is the rounding accumulator (as in decApplyRound) */ /* status is the accumulator */ /* */ /* This finishes off the current number by: */ /* 1. If not extended: */ /* a. Converting a zero result to clean '0' */ /* b. Reducing positive exponents to 0, if would fit in digits */ /* 2. Checking for overflow and subnormals (always) */ /* Note this is just Finalize when no subset arithmetic. */ /* All fields are updated as required. */ /* ------------------------------------------------------------------ */ static void decFinish(decNumber *dn, decContext *set, Int *residue, uInt *status) { if (!set->extended) { if ISZERO(dn) { // value is zero dn->exponent=0; // clean exponent .. dn->bits=0; // .. and sign return; // no error possible } if (dn->exponent>=0) { // non-negative exponent // >0; reduce to integer if possible if (set->digits >= (dn->exponent+dn->digits)) { dn->digits=decShiftToMost(dn->lsu, dn->digits, dn->exponent); dn->exponent=0; } } } // !extended decFinalize(dn, set, residue, status); } // decFinish #endif /* ------------------------------------------------------------------ */ /* decFinalize -- final check, clamp, and round of a number */ /* */ /* dn is the number */ /* set is the context */ /* residue is the rounding accumulator (as in decApplyRound) */ /* status is the status accumulator */ /* */ /* This finishes off the current number by checking for subnormal */ /* results, applying any pending rounding, checking for overflow, */ /* and applying any clamping. */ /* Underflow and overflow conditions are raised as appropriate. */ /* All fields are updated as required. */ /* ------------------------------------------------------------------ */ static void decFinalize(decNumber *dn, decContext *set, Int *residue, uInt *status) { Int shift; // shift needed if clamping Int tinyexp=set->emin-dn->digits+1; // precalculate subnormal boundary // Must be careful, here, when checking the exponent as the // adjusted exponent could overflow 31 bits [because it may already // be up to twice the expected]. // First test for subnormal. This must be done before any final // round as the result could be rounded to Nmin or 0. if (dn->exponent<=tinyexp) { // prefilter Int comp; decNumber nmin; // A very nasty case here is dn == Nmin and residue<0 if (dn->exponentemin; comp=decCompare(dn, &nmin, 1); // (signless compare) if (comp==BADINT) { // oops *status|=DEC_Insufficient_storage; // abandon... return; } if (*residue<0 && comp==0) { // neg residue and dn==Nmin decApplyRound(dn, set, *residue, status); // might force down decSetSubnormal(dn, set, residue, status); return; } } // now apply any pending round (this could raise overflow). if (*residue!=0) decApplyRound(dn, set, *residue, status); // Check for overflow [redundant in the 'rare' case] or clamp if (dn->exponent<=set->emax-set->digits+1) return; // neither needed // here when might have an overflow or clamp to do if (dn->exponent>set->emax-dn->digits+1) { // too big decSetOverflow(dn, set, status); return; } // here when the result is normal but in clamp range if (!set->clamp) return; // here when need to apply the IEEE exponent clamp (fold-down) shift=dn->exponent-(set->emax-set->digits+1); // shift coefficient (if non-zero) if (!ISZERO(dn)) { dn->digits=decShiftToMost(dn->lsu, dn->digits, shift); } dn->exponent-=shift; // adjust the exponent to match *status|=DEC_Clamped; // and record the dirty deed return; } // decFinalize /* ------------------------------------------------------------------ */ /* decSetOverflow -- set number to proper overflow value */ /* */ /* dn is the number (used for sign [only] and result) */ /* set is the context [used for the rounding mode, etc.] */ /* status contains the current status to be updated */ /* */ /* This sets the sign of a number and sets its value to either */ /* Infinity or the maximum finite value, depending on the sign of */ /* dn and the rounding mode, following IEEE 754 rules. */ /* ------------------------------------------------------------------ */ static void decSetOverflow(decNumber *dn, decContext *set, uInt *status) { Flag needmax=0; // result is maximum finite value uByte sign=dn->bits&DECNEG; // clean and save sign bit if (ISZERO(dn)) { // zero does not overflow magnitude Int emax=set->emax; // limit value if (set->clamp) emax-=set->digits-1; // lower if clamping if (dn->exponent>emax) { // clamp required dn->exponent=emax; *status|=DEC_Clamped; } return; } decNumberZero(dn); switch (set->round) { case DEC_ROUND_DOWN: { needmax=1; // never Infinity break;} // r-d case DEC_ROUND_05UP: { needmax=1; // never Infinity break;} // r-05 case DEC_ROUND_CEILING: { if (sign) needmax=1; // Infinity if non-negative break;} // r-c case DEC_ROUND_FLOOR: { if (!sign) needmax=1; // Infinity if negative break;} // r-f default: break; // Infinity in all other cases } if (needmax) { decSetMaxValue(dn, set); dn->bits=sign; // set sign } else dn->bits=sign|DECINF; // Value is +/-Infinity *status|=DEC_Overflow | DEC_Inexact | DEC_Rounded; } // decSetOverflow /* ------------------------------------------------------------------ */ /* decSetMaxValue -- set number to +Nmax (maximum normal value) */ /* */ /* dn is the number to set */ /* set is the context [used for digits and emax] */ /* */ /* This sets the number to the maximum positive value. */ /* ------------------------------------------------------------------ */ static void decSetMaxValue(decNumber *dn, decContext *set) { Unit *up; // work Int count=set->digits; // nines to add dn->digits=count; // fill in all nines to set maximum value for (up=dn->lsu; ; up++) { if (count>DECDPUN) *up=DECDPUNMAX; // unit full o'nines else { // this is the msu *up=(Unit)(powers[count]-1); break; } count-=DECDPUN; // filled those digits } // up dn->bits=0; // + sign dn->exponent=set->emax-set->digits+1; } // decSetMaxValue /* ------------------------------------------------------------------ */ /* decSetSubnormal -- process value whose exponent is extended) { decNumberZero(dn); // always full overflow *status|=DEC_Underflow | DEC_Subnormal | DEC_Inexact | DEC_Rounded; return; } #endif // Full arithmetic -- allow subnormals, rounded to minimum exponent // (Etiny) if needed etiny=set->emin-(set->digits-1); // smallest allowed exponent if ISZERO(dn) { // value is zero // residue can never be non-zero here #if DECCHECK if (*residue!=0) { printf("++ Subnormal 0 residue %ld\n", (LI)*residue); *status|=DEC_Invalid_operation; } #endif if (dn->exponentexponent=etiny; *status|=DEC_Clamped; } return; } *status|=DEC_Subnormal; // have a non-zero subnormal adjust=etiny-dn->exponent; // calculate digits to remove if (adjust<=0) { // not out of range; unrounded // residue can never be non-zero here, except in the Nmin-residue // case (which is a subnormal result), so can take fast-path here // it may already be inexact (from setting the coefficient) if (*status&DEC_Inexact) *status|=DEC_Underflow; return; } // adjust>0, so need to rescale the result so exponent becomes Etiny // [this code is similar to that in rescale] workset=*set; // clone rounding, etc. workset.digits=dn->digits-adjust; // set requested length workset.emin-=adjust; // and adjust emin to match // [note that the latter can be <1, here, similar to Rescale case] decSetCoeff(dn, &workset, dn->lsu, dn->digits, residue, status); decApplyRound(dn, &workset, *residue, status); // Use 754 default rule: Underflow is set iff Inexact // [independent of whether trapped] if (*status&DEC_Inexact) *status|=DEC_Underflow; // if rounded up a 999s case, exponent will be off by one; adjust // back if so [it will fit, because it was shortened earlier] if (dn->exponent>etiny) { dn->digits=decShiftToMost(dn->lsu, dn->digits, 1); dn->exponent--; // (re)adjust the exponent. } // if rounded to zero, it is by definition clamped... if (ISZERO(dn)) *status|=DEC_Clamped; } // decSetSubnormal /* ------------------------------------------------------------------ */ /* decCheckMath - check entry conditions for a math function */ /* */ /* This checks the context and the operand */ /* */ /* rhs is the operand to check */ /* set is the context to check */ /* status is unchanged if both are good */ /* */ /* returns non-zero if status is changed, 0 otherwise */ /* */ /* Restrictions enforced: */ /* */ /* digits, emax, and -emin in the context must be less than */ /* DEC_MAX_MATH (999999), and A must be within these bounds if */ /* non-zero. Invalid_operation is set in the status if a */ /* restriction is violated. */ /* ------------------------------------------------------------------ */ static uInt decCheckMath(const decNumber *rhs, decContext *set, uInt *status) { uInt save=*status; // record if (set->digits>DEC_MAX_MATH || set->emax>DEC_MAX_MATH || -set->emin>DEC_MAX_MATH) *status|=DEC_Invalid_context; else if ((rhs->digits>DEC_MAX_MATH || rhs->exponent+rhs->digits>DEC_MAX_MATH+1 || rhs->exponent+rhs->digits<2*(1-DEC_MAX_MATH)) && !ISZERO(rhs)) *status|=DEC_Invalid_operation; return (*status!=save); } // decCheckMath /* ------------------------------------------------------------------ */ /* decGetInt -- get integer from a number */ /* */ /* dn is the number [which will not be altered] */ /* */ /* returns one of: */ /* BADINT if there is a non-zero fraction */ /* the converted integer */ /* BIGEVEN if the integer is even and magnitude > 2*10**9 */ /* BIGODD if the integer is odd and magnitude > 2*10**9 */ /* */ /* This checks and gets a whole number from the input decNumber. */ /* The sign can be determined from dn by the caller when BIGEVEN or */ /* BIGODD is returned. */ /* ------------------------------------------------------------------ */ static Int decGetInt(const decNumber *dn) { Int theInt; // result accumulator const Unit *up; // work Int got; // digits (real or not) processed Int ilength=dn->digits+dn->exponent; // integral length Flag neg=decNumberIsNegative(dn); // 1 if -ve // The number must be an integer that fits in 10 digits // Assert, here, that 10 is enough for any rescale Etiny #if DEC_MAX_EMAX > 999999999 #error GetInt may need updating [for Emax] #endif #if DEC_MIN_EMIN < -999999999 #error GetInt may need updating [for Emin] #endif if (ISZERO(dn)) return 0; // zeros are OK, with any exponent up=dn->lsu; // ready for lsu theInt=0; // ready to accumulate if (dn->exponent>=0) { // relatively easy // no fractional part [usual]; allow for positive exponent got=dn->exponent; } else { // -ve exponent; some fractional part to check and discard Int count=-dn->exponent; // digits to discard // spin up whole units until reach the Unit with the unit digit for (; count>=DECDPUN; up++) { if (*up!=0) return BADINT; // non-zero Unit to discard count-=DECDPUN; } if (count==0) got=0; // [a multiple of DECDPUN] else { // [not multiple of DECDPUN] Int rem; // work // slice off fraction digits and check for non-zero #if DECDPUN<=4 theInt=QUOT10(*up, count); rem=*up-theInt*powers[count]; #else rem=*up%powers[count]; // slice off discards theInt=*up/powers[count]; #endif if (rem!=0) return BADINT; // non-zero fraction // it looks good got=DECDPUN-count; // number of digits so far up++; // ready for next } } // now it's known there's no fractional part // tricky code now, to accumulate up to 9.3 digits if (got==0) {theInt=*up; got+=DECDPUN; up++;} // ensure lsu is there if (ilength<11) { Int save=theInt; // collect any remaining unit(s) for (; got1999999997) ilength=11; else if (!neg && theInt>999999999) ilength=11; if (ilength==11) theInt=save; // restore correct low bit } } if (ilength>10) { // too big if (theInt&1) return BIGODD; // bottom bit 1 return BIGEVEN; // bottom bit 0 } if (neg) theInt=-theInt; // apply sign return theInt; } // decGetInt /* ------------------------------------------------------------------ */ /* decDecap -- decapitate the coefficient of a number */ /* */ /* dn is the number to be decapitated */ /* drop is the number of digits to be removed from the left of dn; */ /* this must be <= dn->digits (if equal, the coefficient is */ /* set to 0) */ /* */ /* Returns dn; dn->digits will be <= the initial digits less drop */ /* (after removing drop digits there may be leading zero digits */ /* which will also be removed). Only dn->lsu and dn->digits change. */ /* ------------------------------------------------------------------ */ static decNumber *decDecap(decNumber *dn, Int drop) { Unit *msu; // -> target cut point Int cut; // work if (drop>=dn->digits) { // losing the whole thing #if DECCHECK if (drop>dn->digits) printf("decDecap called with drop>digits [%ld>%ld]\n", (LI)drop, (LI)dn->digits); #endif dn->lsu[0]=0; dn->digits=1; return dn; } msu=dn->lsu+D2U(dn->digits-drop)-1; // -> likely msu cut=MSUDIGITS(dn->digits-drop); // digits to be in use in msu if (cut!=DECDPUN) *msu%=powers[cut]; // clear left digits // that may have left leading zero digits, so do a proper count... dn->digits=decGetDigits(dn->lsu, msu-dn->lsu+1); return dn; } // decDecap /* ------------------------------------------------------------------ */ /* decBiStr -- compare string with pairwise options */ /* */ /* targ is the string to compare */ /* str1 is one of the strings to compare against (length may be 0) */ /* str2 is the other; it must be the same length as str1 */ /* */ /* returns 1 if strings compare equal, (that is, it is the same */ /* length as str1 and str2, and each character of targ is in either */ /* str1 or str2 in the corresponding position), or 0 otherwise */ /* */ /* This is used for generic caseless compare, including the awkward */ /* case of the Turkish dotted and dotless Is. Use as (for example): */ /* if (decBiStr(test, "mike", "MIKE")) ... */ /* ------------------------------------------------------------------ */ static Flag decBiStr(const char *targ, const char *str1, const char *str2) { for (;;targ++, str1++, str2++) { if (*targ!=*str1 && *targ!=*str2) return 0; // *targ has a match in one (or both, if terminator) if (*targ=='\0') break; } // forever return 1; } // decBiStr /* ------------------------------------------------------------------ */ /* decNaNs -- handle NaN operand or operands */ /* */ /* res is the result number */ /* lhs is the first operand */ /* rhs is the second operand, or NULL if none */ /* context is used to limit payload length */ /* status contains the current status */ /* returns res in case convenient */ /* */ /* Called when one or both operands is a NaN, and propagates the */ /* appropriate result to res. When an sNaN is found, it is changed */ /* to a qNaN and Invalid operation is set. */ /* ------------------------------------------------------------------ */ static decNumber * decNaNs(decNumber *res, const decNumber *lhs, const decNumber *rhs, decContext *set, uInt *status) { // This decision tree ends up with LHS being the source pointer, // and status updated if need be if (lhs->bits & DECSNAN) *status|=DEC_Invalid_operation | DEC_sNaN; else if (rhs==NULL); else if (rhs->bits & DECSNAN) { lhs=rhs; *status|=DEC_Invalid_operation | DEC_sNaN; } else if (lhs->bits & DECNAN); else lhs=rhs; // propagate the payload if (lhs->digits<=set->digits) decNumberCopy(res, lhs); // easy else { // too long const Unit *ul; Unit *ur, *uresp1; // copy safe number of units, then decapitate res->bits=lhs->bits; // need sign etc. uresp1=res->lsu+D2U(set->digits); for (ur=res->lsu, ul=lhs->lsu; urdigits=D2U(set->digits)*DECDPUN; // maybe still too long if (res->digits>set->digits) decDecap(res, res->digits-set->digits); } res->bits&=~DECSNAN; // convert any sNaN to NaN, while res->bits|=DECNAN; // .. preserving sign res->exponent=0; // clean exponent // [coefficient was copied/decapitated] return res; } // decNaNs /* ------------------------------------------------------------------ */ /* decStatus -- apply non-zero status */ /* */ /* dn is the number to set if error */ /* status contains the current status (not yet in context) */ /* set is the context */ /* */ /* If the status is an error status, the number is set to a NaN, */ /* unless the error was an overflow, divide-by-zero, or underflow, */ /* in which case the number will have already been set. */ /* */ /* The context status is then updated with the new status. Note that */ /* this may raise a signal, so control may never return from this */ /* routine (hence resources must be recovered before it is called). */ /* ------------------------------------------------------------------ */ static void decStatus(decNumber *dn, uInt status, decContext *set) { if (status & DEC_NaNs) { // error status -> NaN // if cause was an sNaN, clear and propagate [NaN is already set up] if (status & DEC_sNaN) status&=~DEC_sNaN; else { decNumberZero(dn); // other error: clean throughout dn->bits=DECNAN; // and make a quiet NaN } } decContextSetStatus(set, status); // [may not return] return; } // decStatus /* ------------------------------------------------------------------ */ /* decGetDigits -- count digits in a Units array */ /* */ /* uar is the Unit array holding the number (this is often an */ /* accumulator of some sort) */ /* len is the length of the array in units [>=1] */ /* */ /* returns the number of (significant) digits in the array */ /* */ /* All leading zeros are excluded, except the last if the array has */ /* only zero Units. */ /* ------------------------------------------------------------------ */ // This may be called twice during some operations. static Int decGetDigits(Unit *uar, Int len) { Unit *up=uar+(len-1); // -> msu Int digits=(len-1)*DECDPUN+1; // possible digits excluding msu #if DECDPUN>4 uInt const *pow; // work #endif // (at least 1 in final msu) #if DECCHECK if (len<1) printf("decGetDigits called with len<1 [%ld]\n", (LI)len); #endif for (; up>=uar; up--) { if (*up==0) { // unit is all 0s if (digits==1) break; // a zero has one digit digits-=DECDPUN; // adjust for 0 unit continue;} // found the first (most significant) non-zero Unit #if DECDPUN>1 // not done yet if (*up<10) break; // is 1-9 digits++; #if DECDPUN>2 // not done yet if (*up<100) break; // is 10-99 digits++; #if DECDPUN>3 // not done yet if (*up<1000) break; // is 100-999 digits++; #if DECDPUN>4 // count the rest ... for (pow=&powers[4]; *up>=*pow; pow++) digits++; #endif #endif #endif #endif break; } // up return digits; } // decGetDigits #if DECTRACE | DECCHECK /* ------------------------------------------------------------------ */ /* decNumberShow -- display a number [debug aid] */ /* dn is the number to show */ /* */ /* Shows: sign, exponent, coefficient (msu first), digits */ /* or: sign, special-value */ /* ------------------------------------------------------------------ */ // this is public so other modules can use it void decNumberShow(const decNumber *dn) { const Unit *up; // work uInt u, d; // .. Int cut; // .. char isign='+'; // main sign if (dn==NULL) { printf("NULL\n"); return;} if (decNumberIsNegative(dn)) isign='-'; printf(" >> %c ", isign); if (dn->bits&DECSPECIAL) { // Is a special value if (decNumberIsInfinite(dn)) printf("Infinity"); else { // a NaN if (dn->bits&DECSNAN) printf("sNaN"); // signalling NaN else printf("NaN"); } // if coefficient and exponent are 0, no more to do if (dn->exponent==0 && dn->digits==1 && *dn->lsu==0) { printf("\n"); return;} // drop through to report other information printf(" "); } // now carefully display the coefficient up=dn->lsu+D2U(dn->digits)-1; // msu printf("%ld", (LI)*up); for (up=up-1; up>=dn->lsu; up--) { u=*up; printf(":"); for (cut=DECDPUN-1; cut>=0; cut--) { d=u/powers[cut]; u-=d*powers[cut]; printf("%ld", (LI)d); } // cut } // up if (dn->exponent!=0) { char esign='+'; if (dn->exponent<0) esign='-'; printf(" E%c%ld", esign, (LI)abs(dn->exponent)); } printf(" [%ld]\n", (LI)dn->digits); } // decNumberShow #endif #if DECTRACE || DECCHECK /* ------------------------------------------------------------------ */ /* decDumpAr -- display a unit array [debug/check aid] */ /* name is a single-character tag name */ /* ar is the array to display */ /* len is the length of the array in Units */ /* ------------------------------------------------------------------ */ static void decDumpAr(char name, const Unit *ar, Int len) { Int i; const char *spec; #if DECDPUN==9 spec="%09d "; #elif DECDPUN==8 spec="%08d "; #elif DECDPUN==7 spec="%07d "; #elif DECDPUN==6 spec="%06d "; #elif DECDPUN==5 spec="%05d "; #elif DECDPUN==4 spec="%04d "; #elif DECDPUN==3 spec="%03d "; #elif DECDPUN==2 spec="%02d "; #else spec="%d "; #endif printf(" :%c: ", name); for (i=len-1; i>=0; i--) { if (i==len-1) printf("%ld ", (LI)ar[i]); else printf(spec, ar[i]); } printf("\n"); return;} #endif #if DECCHECK /* ------------------------------------------------------------------ */ /* decCheckOperands -- check operand(s) to a routine */ /* res is the result structure (not checked; it will be set to */ /* quiet NaN if error found (and it is not NULL)) */ /* lhs is the first operand (may be DECUNRESU) */ /* rhs is the second (may be DECUNUSED) */ /* set is the context (may be DECUNCONT) */ /* returns 0 if both operands, and the context are clean, or 1 */ /* otherwise (in which case the context will show an error, */ /* unless NULL). Note that res is not cleaned; caller should */ /* handle this so res=NULL case is safe. */ /* The caller is expected to abandon immediately if 1 is returned. */ /* ------------------------------------------------------------------ */ static Flag decCheckOperands(decNumber *res, const decNumber *lhs, const decNumber *rhs, decContext *set) { Flag bad=0; if (set==NULL) { // oops; hopeless #if DECTRACE || DECVERB printf("Reference to context is NULL.\n"); #endif bad=1; return 1;} else if (set!=DECUNCONT && (set->digits<1 || set->round>=DEC_ROUND_MAX)) { bad=1; #if DECTRACE || DECVERB printf("Bad context [digits=%ld round=%ld].\n", (LI)set->digits, (LI)set->round); #endif } else { if (res==NULL) { bad=1; #if DECTRACE // this one not DECVERB as standard tests include NULL printf("Reference to result is NULL.\n"); #endif } if (!bad && lhs!=DECUNUSED) bad=(decCheckNumber(lhs)); if (!bad && rhs!=DECUNUSED) bad=(decCheckNumber(rhs)); } if (bad) { if (set!=DECUNCONT) decContextSetStatus(set, DEC_Invalid_operation); if (res!=DECUNRESU && res!=NULL) { decNumberZero(res); res->bits=DECNAN; // qNaN } } return bad; } // decCheckOperands /* ------------------------------------------------------------------ */ /* decCheckNumber -- check a number */ /* dn is the number to check */ /* returns 0 if the number is clean, or 1 otherwise */ /* */ /* The number is considered valid if it could be a result from some */ /* operation in some valid context. */ /* ------------------------------------------------------------------ */ static Flag decCheckNumber(const decNumber *dn) { const Unit *up; // work uInt maxuint; // .. Int ae, d, digits; // .. Int emin, emax; // .. if (dn==NULL) { // hopeless #if DECTRACE // this one not DECVERB as standard tests include NULL printf("Reference to decNumber is NULL.\n"); #endif return 1;} // check special values if (dn->bits & DECSPECIAL) { if (dn->exponent!=0) { #if DECTRACE || DECVERB printf("Exponent %ld (not 0) for a special value [%02x].\n", (LI)dn->exponent, dn->bits); #endif return 1;} // 2003.09.08: NaNs may now have coefficients, so next tests Inf only if (decNumberIsInfinite(dn)) { if (dn->digits!=1) { #if DECTRACE || DECVERB printf("Digits %ld (not 1) for an infinity.\n", (LI)dn->digits); #endif return 1;} if (*dn->lsu!=0) { #if DECTRACE || DECVERB printf("LSU %ld (not 0) for an infinity.\n", (LI)*dn->lsu); #endif decDumpAr('I', dn->lsu, D2U(dn->digits)); return 1;} } // Inf // 2002.12.26: negative NaNs can now appear through proposed IEEE // concrete formats (decimal64, etc.). return 0; } // check the coefficient if (dn->digits<1 || dn->digits>DECNUMMAXP) { #if DECTRACE || DECVERB printf("Digits %ld in number.\n", (LI)dn->digits); #endif return 1;} d=dn->digits; for (up=dn->lsu; d>0; up++) { if (d>DECDPUN) maxuint=DECDPUNMAX; else { // reached the msu maxuint=powers[d]-1; if (dn->digits>1 && *upmaxuint) { #if DECTRACE || DECVERB printf("Bad Unit [%08lx] in %ld-digit number at offset %ld [maxuint %ld].\n", (LI)*up, (LI)dn->digits, (LI)(up-dn->lsu), (LI)maxuint); #endif return 1;} d-=DECDPUN; } // check the exponent. Note that input operands can have exponents // which are out of the set->emin/set->emax and set->digits range // (just as they can have more digits than set->digits). ae=dn->exponent+dn->digits-1; // adjusted exponent emax=DECNUMMAXE; emin=DECNUMMINE; digits=DECNUMMAXP; if (ae+emax) { #if DECTRACE || DECVERB printf("Adjusted exponent overflow [%ld].\n", (LI)ae); decNumberShow(dn); #endif return 1;} return 0; // it's OK } // decCheckNumber /* ------------------------------------------------------------------ */ /* decCheckInexact -- check a normal finite inexact result has digits */ /* dn is the number to check */ /* set is the context (for status and precision) */ /* sets Invalid operation, etc., if some digits are missing */ /* [this check is not made for DECSUBSET compilation or when */ /* subnormal is not set] */ /* ------------------------------------------------------------------ */ static void decCheckInexact(const decNumber *dn, decContext *set) { #if !DECSUBSET && DECEXTFLAG if ((set->status & (DEC_Inexact|DEC_Subnormal))==DEC_Inexact && (set->digits!=dn->digits) && !(dn->bits & DECSPECIAL)) { #if DECTRACE || DECVERB printf("Insufficient digits [%ld] on normal Inexact result.\n", (LI)dn->digits); decNumberShow(dn); #endif decContextSetStatus(set, DEC_Invalid_operation); } #else // next is a noop for quiet compiler if (dn!=NULL && dn->digits==0) set->status|=DEC_Invalid_operation; #endif return; } // decCheckInexact #endif #if DECALLOC #undef malloc #undef free /* ------------------------------------------------------------------ */ /* decMalloc -- accountable allocation routine */ /* n is the number of bytes to allocate */ /* */ /* Semantics is the same as the stdlib malloc routine, but bytes */ /* allocated are accounted for globally, and corruption fences are */ /* added before and after the 'actual' storage. */ /* ------------------------------------------------------------------ */ /* This routine allocates storage with an extra twelve bytes; 8 are */ /* at the start and hold: */ /* 0-3 the original length requested */ /* 4-7 buffer corruption detection fence (DECFENCE, x4) */ /* The 4 bytes at the end also hold a corruption fence (DECFENCE, x4) */ /* ------------------------------------------------------------------ */ static void *decMalloc(size_t n) { uInt size=n+12; // true size void *alloc; // -> allocated storage uByte *b, *b0; // work uInt uiwork; // for macros alloc=malloc(size); // -> allocated storage if (alloc==NULL) return NULL; // out of strorage b0=(uByte *)alloc; // as bytes decAllocBytes+=n; // account for storage UBFROMUI(alloc, n); // save n // printf(" alloc ++ dAB: %ld (%ld)\n", (LI)decAllocBytes, (LI)n); for (b=b0+4; b play area } // decMalloc /* ------------------------------------------------------------------ */ /* decFree -- accountable free routine */ /* alloc is the storage to free */ /* */ /* Semantics is the same as the stdlib malloc routine, except that */ /* the global storage accounting is updated and the fences are */ /* checked to ensure that no routine has written 'out of bounds'. */ /* ------------------------------------------------------------------ */ /* This routine first checks that the fences have not been corrupted. */ /* It then frees the storage using the 'truw' storage address (that */ /* is, offset by 8). */ /* ------------------------------------------------------------------ */ static void decFree(void *alloc) { uInt n; // original length uByte *b, *b0; // work uInt uiwork; // for macros if (alloc==NULL) return; // allowed; it's a nop b0=(uByte *)alloc; // as bytes b0-=8; // -> true start of storage n=UBTOUI(b0); // lift length for (b=b0+4; b0 */ /* and <10; 3 or powers of 2 are best]. */ /* DECNUMDIGITS is the default number of digits that can be held in */ /* the structure. If undefined, 1 is assumed and it is assumed */ /* that the structure will be immediately followed by extra space, */ /* as required. DECNUMDIGITS is always >0. */ #if !defined(DECNUMDIGITS) #define DECNUMDIGITS 1 #endif /* The size (integer data type) of each unit is determined by the */ /* number of digits it will hold. */ #if DECDPUN<=2 #define decNumberUnit uint8_t #elif DECDPUN<=4 #define decNumberUnit uint16_t #else #define decNumberUnit uint32_t #endif /* The number of units needed is ceil(DECNUMDIGITS/DECDPUN) */ #define DECNUMUNITS ((DECNUMDIGITS+DECDPUN-1)/DECDPUN) /* The data structure... */ typedef struct decNumber { int32_t digits; /* Count of digits in the coefficient; >0 */ int32_t exponent; /* Unadjusted exponent, unbiased, in */ /* range: -1999999997 through 999999999 */ uint8_t bits; /* Indicator bits (see above) */ /* Coefficient, from least significant unit */ decNumberUnit lsu[DECNUMUNITS]; } decNumber; /* Notes: */ /* 1. If digits is > DECDPUN then there will one or more */ /* decNumberUnits immediately following the first element of lsu.*/ /* These contain the remaining (more significant) digits of the */ /* number, and may be in the lsu array, or may be guaranteed by */ /* some other mechanism (such as being contained in another */ /* structure, or being overlaid on dynamically allocated */ /* storage). */ /* */ /* Each integer of the coefficient (except potentially the last) */ /* contains DECDPUN digits (e.g., a value in the range 0 through */ /* 99999999 if DECDPUN is 8, or 0 through 999 if DECDPUN is 3). */ /* */ /* 2. A decNumber converted to a string may need up to digits+14 */ /* characters. The worst cases (non-exponential and exponential */ /* formats) are -0.00000{9...}# and -9.{9...}E+999999999# */ /* (where # is '\0') */ /* ---------------------------------------------------------------- */ /* decNumber public functions and macros */ /* ---------------------------------------------------------------- */ /* Conversions */ decNumber * decNumberFromInt32(decNumber *, int32_t); decNumber * decNumberFromUInt32(decNumber *, uint32_t); decNumber * decNumberFromString(decNumber *, const char *, decContext *); char * decNumberToString(const decNumber *, char *); char * decNumberToEngString(const decNumber *, char *); uint32_t decNumberToUInt32(const decNumber *, decContext *); int32_t decNumberToInt32(const decNumber *, decContext *); uint8_t * decNumberGetBCD(const decNumber *, uint8_t *); decNumber * decNumberSetBCD(decNumber *, const uint8_t *, uint32_t); /* Operators and elementary functions */ decNumber * decNumberAbs(decNumber *, const decNumber *, decContext *); decNumber * decNumberAdd(decNumber *, const decNumber *, const decNumber *, decContext *); decNumber * decNumberAnd(decNumber *, const decNumber *, const decNumber *, decContext *); decNumber * decNumberCompare(decNumber *, const decNumber *, const decNumber *, decContext *); decNumber * decNumberCompareSignal(decNumber *, const decNumber *, const decNumber *, decContext *); decNumber * decNumberCompareTotal(decNumber *, const decNumber *, const decNumber *, decContext *); decNumber * decNumberCompareTotalMag(decNumber *, const decNumber *, const decNumber *, decContext *); decNumber * decNumberDivide(decNumber *, const decNumber *, const decNumber *, decContext *); decNumber * decNumberDivideInteger(decNumber *, const decNumber *, const decNumber *, decContext *); decNumber * decNumberExp(decNumber *, const decNumber *, decContext *); decNumber * decNumberFMA(decNumber *, const decNumber *, const decNumber *, const decNumber *, decContext *); decNumber * decNumberInvert(decNumber *, const decNumber *, decContext *); decNumber * decNumberLn(decNumber *, const decNumber *, decContext *); decNumber * decNumberLogB(decNumber *, const decNumber *, decContext *); decNumber * decNumberLog10(decNumber *, const decNumber *, decContext *); decNumber * decNumberMax(decNumber *, const decNumber *, const decNumber *, decContext *); decNumber * decNumberMaxMag(decNumber *, const decNumber *, const decNumber *, decContext *); decNumber * decNumberMin(decNumber *, const decNumber *, const decNumber *, decContext *); decNumber * decNumberMinMag(decNumber *, const decNumber *, const decNumber *, decContext *); decNumber * decNumberMinus(decNumber *, const decNumber *, decContext *); decNumber * decNumberMultiply(decNumber *, const decNumber *, const decNumber *, decContext *); decNumber * decNumberNormalize(decNumber *, const decNumber *, decContext *); decNumber * decNumberOr(decNumber *, const decNumber *, const decNumber *, decContext *); decNumber * decNumberPlus(decNumber *, const decNumber *, decContext *); decNumber * decNumberPower(decNumber *, const decNumber *, const decNumber *, decContext *); decNumber * decNumberQuantize(decNumber *, const decNumber *, const decNumber *, decContext *); decNumber * decNumberReduce(decNumber *, const decNumber *, decContext *); decNumber * decNumberRemainder(decNumber *, const decNumber *, const decNumber *, decContext *); decNumber * decNumberRemainderNear(decNumber *, const decNumber *, const decNumber *, decContext *); decNumber * decNumberRescale(decNumber *, const decNumber *, const decNumber *, decContext *); decNumber * decNumberRotate(decNumber *, const decNumber *, const decNumber *, decContext *); decNumber * decNumberSameQuantum(decNumber *, const decNumber *, const decNumber *); decNumber * decNumberScaleB(decNumber *, const decNumber *, const decNumber *, decContext *); decNumber * decNumberShift(decNumber *, const decNumber *, const decNumber *, decContext *); decNumber * decNumberSquareRoot(decNumber *, const decNumber *, decContext *); decNumber * decNumberSubtract(decNumber *, const decNumber *, const decNumber *, decContext *); decNumber * decNumberToIntegralExact(decNumber *, const decNumber *, decContext *); decNumber * decNumberToIntegralValue(decNumber *, const decNumber *, decContext *); decNumber * decNumberXor(decNumber *, const decNumber *, const decNumber *, decContext *); /* Utilities */ enum decClass decNumberClass(const decNumber *, decContext *); const char * decNumberClassToString(enum decClass); decNumber * decNumberCopy(decNumber *, const decNumber *); decNumber * decNumberCopyAbs(decNumber *, const decNumber *); decNumber * decNumberCopyNegate(decNumber *, const decNumber *); decNumber * decNumberCopySign(decNumber *, const decNumber *, const decNumber *); decNumber * decNumberNextMinus(decNumber *, const decNumber *, decContext *); decNumber * decNumberNextPlus(decNumber *, const decNumber *, decContext *); decNumber * decNumberNextToward(decNumber *, const decNumber *, const decNumber *, decContext *); decNumber * decNumberTrim(decNumber *); const char * decNumberVersion(void); decNumber * decNumberZero(decNumber *); /* Functions for testing decNumbers (normality depends on context) */ int32_t decNumberIsNormal(const decNumber *, decContext *); int32_t decNumberIsSubnormal(const decNumber *, decContext *); /* Macros for testing decNumber *dn */ #define decNumberIsCanonical(dn) (1) /* All decNumbers are saintly */ #define decNumberIsFinite(dn) (((dn)->bits&DECSPECIAL)==0) #define decNumberIsInfinite(dn) (((dn)->bits&DECINF)!=0) #define decNumberIsNaN(dn) (((dn)->bits&(DECNAN|DECSNAN))!=0) #define decNumberIsNegative(dn) (((dn)->bits&DECNEG)!=0) #define decNumberIsQNaN(dn) (((dn)->bits&(DECNAN))!=0) #define decNumberIsSNaN(dn) (((dn)->bits&(DECSNAN))!=0) #define decNumberIsSpecial(dn) (((dn)->bits&DECSPECIAL)!=0) #define decNumberIsZero(dn) (*(dn)->lsu==0 \ && (dn)->digits==1 \ && (((dn)->bits&DECSPECIAL)==0)) #define decNumberRadix(dn) (10) #endif luametatex-2.10.08/source/libraries/decnumber/decNumberLocal.h000066400000000000000000001142051442250314700243100ustar00rootroot00000000000000/* ------------------------------------------------------------------ */ /* decNumber package local type, tuning, and macro definitions */ /* ------------------------------------------------------------------ */ /* Copyright (c) IBM Corporation, 2000, 2010. All rights reserved. */ /* */ /* This software is made available under the terms of the */ /* ICU License -- ICU 1.8.1 and later. */ /* */ /* The description and User's Guide ("The decNumber C Library") for */ /* this software is called decNumber.pdf. This document is */ /* available, together with arithmetic and format specifications, */ /* testcases, and Web links, on the General Decimal Arithmetic page. */ /* */ /* Please send comments, suggestions, and corrections to the author: */ /* mfc@uk.ibm.com */ /* Mike Cowlishaw, IBM Fellow */ /* IBM UK, PO Box 31, Birmingham Road, Warwick CV34 5JL, UK */ /* ------------------------------------------------------------------ */ /* This header file is included by all modules in the decNumber */ /* library, and contains local type definitions, tuning parameters, */ /* etc. It should not need to be used by application programs. */ /* decNumber.h or one of decDouble (etc.) must be included first. */ /* ------------------------------------------------------------------ */ #if !defined(DECNUMBERLOC) #define DECNUMBERLOC #define DECVERSION "decNumber 3.68" /* Package Version [16 max.] */ #define DECNLAUTHOR "Mike Cowlishaw" /* Who to blame */ #include /* for abs */ #include /* for memset, strcpy */ /* Conditional code flag -- set this to match hardware platform */ #if !defined(DECLITEND) #define DECLITEND 1 /* 1=little-endian, 0=big-endian */ #endif /* Conditional code flag -- set this to 1 for best performance */ #if !defined(DECUSE64) #define DECUSE64 1 /* 1=use int64s, 0=int32 & smaller only */ #endif /* Conditional code flag -- set this to 0 to exclude printf calls */ #if !defined(DECPRINT) #define DECPRINT 1 /* 1=allow printf calls; 0=no printf */ #endif /* Conditional check flags -- set these to 0 for best performance */ #if !defined(DECCHECK) #define DECCHECK 0 /* 1 to enable robust checking */ #endif #if !defined(DECALLOC) #define DECALLOC 0 /* 1 to enable memory accounting */ #endif #if !defined(DECTRACE) #define DECTRACE 0 /* 1 to trace certain internals, etc. */ #endif /* Tuning parameter for decNumber (arbitrary precision) module */ #if !defined(DECBUFFER) #define DECBUFFER 36 /* Size basis for local buffers. This */ /* should be a common maximum precision */ /* rounded up to a multiple of 4; must */ /* be zero or positive. */ #endif /* ---------------------------------------------------------------- */ /* Check parameter dependencies */ /* ---------------------------------------------------------------- */ #if DECCHECK & !DECPRINT #error DECCHECK needs DECPRINT to be useful #endif #if DECALLOC & !DECPRINT #error DECALLOC needs DECPRINT to be useful #endif #if DECTRACE & !DECPRINT #error DECTRACE needs DECPRINT to be useful #endif /* ---------------------------------------------------------------- */ /* Definitions for all modules (general-purpose) */ /* ---------------------------------------------------------------- */ /* Local names for common types -- for safety, decNumber modules do */ /* not use int or long directly. */ #define Flag uint8_t #define Byte int8_t #define uByte uint8_t #define Short int16_t #define uShort uint16_t #define Int int32_t #define uInt uint32_t #define Unit decNumberUnit #if DECUSE64 #define Long int64_t #define uLong uint64_t #endif /* Development-use definitions */ typedef long int LI; /* for printf arguments only */ #define DECNOINT 0 /* 1 to check no internal use of 'int' */ /* or stdint types */ #if DECNOINT /* if these interfere with your C includes, do not set DECNOINT */ #define int ? /* enable to ensure that plain C 'int' */ #define long ?? /* .. or 'long' types are not used */ #endif /* Shared lookup tables */ extern const uByte DECSTICKYTAB[10]; /* re-round digits if sticky */ extern const uInt DECPOWERS[10]; /* powers of ten table */ /* The following are included from decDPD.h */ extern const uShort DPD2BIN[1024]; /* DPD -> 0-999 */ extern const uShort BIN2DPD[1000]; /* 0-999 -> DPD */ extern const uInt DPD2BINK[1024]; /* DPD -> 0-999000 */ extern const uInt DPD2BINM[1024]; /* DPD -> 0-999000000 */ extern const uByte DPD2BCD8[4096]; /* DPD -> ddd + len */ extern const uByte BIN2BCD8[4000]; /* 0-999 -> ddd + len */ extern const uShort BCD2DPD[2458]; /* 0-0x999 -> DPD (0x999=2457)*/ /* LONGMUL32HI -- set w=(u*v)>>32, where w, u, and v are uInts */ /* (that is, sets w to be the high-order word of the 64-bit result; */ /* the low-order word is simply u*v.) */ /* This version is derived from Knuth via Hacker's Delight; */ /* it seems to optimize better than some others tried */ #define LONGMUL32HI(w, u, v) { \ uInt u0, u1, v0, v1, w0, w1, w2, t; \ u0=u & 0xffff; u1=u>>16; \ v0=v & 0xffff; v1=v>>16; \ w0=u0*v0; \ t=u1*v0 + (w0>>16); \ w1=t & 0xffff; w2=t>>16; \ w1=u0*v1 + w1; \ (w)=u1*v1 + w2 + (w1>>16);} /* ROUNDUP -- round an integer up to a multiple of n */ #define ROUNDUP(i, n) ((((i)+(n)-1)/n)*n) #define ROUNDUP4(i) (((i)+3)&~3) /* special for n=4 */ /* ROUNDDOWN -- round an integer down to a multiple of n */ #define ROUNDDOWN(i, n) (((i)/n)*n) #define ROUNDDOWN4(i) ((i)&~3) /* special for n=4 */ /* References to multi-byte sequences under different sizes; these */ /* require locally declared variables, but do not violate strict */ /* aliasing or alignment (as did the UINTAT simple cast to uInt). */ /* Variables needed are uswork, uiwork, etc. [so do not use at same */ /* level in an expression, e.g., UBTOUI(x)==UBTOUI(y) may fail]. */ /* Return a uInt, etc., from bytes starting at a char* or uByte* */ #define UBTOUS(b) (memcpy((void *)&uswork, b, 2), uswork) #define UBTOUI(b) (memcpy((void *)&uiwork, b, 4), uiwork) /* Store a uInt, etc., into bytes starting at a char* or uByte*. */ /* Returns i, evaluated, for convenience; has to use uiwork because */ /* i may be an expression. */ #define UBFROMUS(b, i) (uswork=(i), memcpy(b, (void *)&uswork, 2), uswork) #define UBFROMUI(b, i) (uiwork=(i), memcpy(b, (void *)&uiwork, 4), uiwork) /* X10 and X100 -- multiply integer i by 10 or 100 */ /* [shifts are usually faster than multiply; could be conditional] */ #define X10(i) (((i)<<1)+((i)<<3)) #define X100(i) (((i)<<2)+((i)<<5)+((i)<<6)) /* MAXI and MINI -- general max & min (not in ANSI) for integers */ #define MAXI(x,y) ((x)<(y)?(y):(x)) #define MINI(x,y) ((x)>(y)?(y):(x)) /* Useful constants */ #define BILLION 1000000000 /* 10**9 */ /* CHARMASK: 0x30303030 for ASCII/UTF8; 0xF0F0F0F0 for EBCDIC */ #define CHARMASK ((((((((uInt)'0')<<8)+'0')<<8)+'0')<<8)+'0') /* ---------------------------------------------------------------- */ /* Definitions for arbitary-precision modules (only valid after */ /* decNumber.h has been included) */ /* ---------------------------------------------------------------- */ /* Limits and constants */ #define DECNUMMAXP 999999999 /* maximum precision code can handle */ #define DECNUMMAXE 999999999 /* maximum adjusted exponent ditto */ #define DECNUMMINE -999999999 /* minimum adjusted exponent ditto */ #if (DECNUMMAXP != DEC_MAX_DIGITS) #error Maximum digits mismatch #endif #if (DECNUMMAXE != DEC_MAX_EMAX) #error Maximum exponent mismatch #endif #if (DECNUMMINE != DEC_MIN_EMIN) #error Minimum exponent mismatch #endif /* Set DECDPUNMAX -- the maximum integer that fits in DECDPUN */ /* digits, and D2UTABLE -- the initializer for the D2U table */ #if DECDPUN==1 #define DECDPUNMAX 9 #define D2UTABLE {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17, \ 18,19,20,21,22,23,24,25,26,27,28,29,30,31,32, \ 33,34,35,36,37,38,39,40,41,42,43,44,45,46,47, \ 48,49} #elif DECDPUN==2 #define DECDPUNMAX 99 #define D2UTABLE {0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10, \ 11,11,12,12,13,13,14,14,15,15,16,16,17,17,18, \ 18,19,19,20,20,21,21,22,22,23,23,24,24,25} #elif DECDPUN==3 #define DECDPUNMAX 999 #define D2UTABLE {0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7, \ 8,8,8,9,9,9,10,10,10,11,11,11,12,12,12,13,13, \ 13,14,14,14,15,15,15,16,16,16,17} #elif DECDPUN==4 #define DECDPUNMAX 9999 #define D2UTABLE {0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,6, \ 6,6,6,7,7,7,7,8,8,8,8,9,9,9,9,10,10,10,10,11, \ 11,11,11,12,12,12,12,13} #elif DECDPUN==5 #define DECDPUNMAX 99999 #define D2UTABLE {0,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5, \ 5,5,5,5,6,6,6,6,6,7,7,7,7,7,8,8,8,8,8,9,9,9, \ 9,9,10,10,10,10} #elif DECDPUN==6 #define DECDPUNMAX 999999 #define D2UTABLE {0,1,1,1,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,4,4,4, \ 4,4,4,5,5,5,5,5,5,6,6,6,6,6,6,7,7,7,7,7,7,8, \ 8,8,8,8,8,9} #elif DECDPUN==7 #define DECDPUNMAX 9999999 #define D2UTABLE {0,1,1,1,1,1,1,1,2,2,2,2,2,2,2,3,3,3,3,3,3,3, \ 4,4,4,4,4,4,4,5,5,5,5,5,5,5,6,6,6,6,6,6,6,7, \ 7,7,7,7,7,7} #elif DECDPUN==8 #define DECDPUNMAX 99999999 #define D2UTABLE {0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3, \ 3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,6,6,6, \ 6,6,6,6,6,7} #elif DECDPUN==9 #define DECDPUNMAX 999999999 #define D2UTABLE {0,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,3,3,3, \ 3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5, \ 5,5,6,6,6,6} #elif defined(DECDPUN) #error DECDPUN must be in the range 1-9 #endif /* ----- Shared data (in decNumber.c) ----- */ /* Public lookup table used by the D2U macro (see below) */ #define DECMAXD2U 49 extern const uByte d2utable[DECMAXD2U+1]; /* ----- Macros ----- */ /* ISZERO -- return true if decNumber dn is a zero */ /* [performance-critical in some situations] */ #define ISZERO(dn) decNumberIsZero(dn) /* now just a local name */ /* D2U -- return the number of Units needed to hold d digits */ /* (runtime version, with table lookaside for small d) */ #if DECDPUN==8 #define D2U(d) ((unsigned)((d)<=DECMAXD2U?d2utable[d]:((d)+7)>>3)) #elif DECDPUN==4 #define D2U(d) ((unsigned)((d)<=DECMAXD2U?d2utable[d]:((d)+3)>>2)) #else #define D2U(d) ((d)<=DECMAXD2U?d2utable[d]:((d)+DECDPUN-1)/DECDPUN) #endif /* SD2U -- static D2U macro (for compile-time calculation) */ #define SD2U(d) (((d)+DECDPUN-1)/DECDPUN) /* MSUDIGITS -- returns digits in msu, from digits, calculated */ /* using D2U */ #define MSUDIGITS(d) ((d)-(D2U(d)-1)*DECDPUN) /* D2N -- return the number of decNumber structs that would be */ /* needed to contain that number of digits (and the initial */ /* decNumber struct) safely. Note that one Unit is included in the */ /* initial structure. Used for allocating space that is aligned on */ /* a decNumber struct boundary. */ #define D2N(d) \ ((((SD2U(d)-1)*sizeof(Unit))+sizeof(decNumber)*2-1)/sizeof(decNumber)) /* TODIGIT -- macro to remove the leading digit from the unsigned */ /* integer u at column cut (counting from the right, LSD=0) and */ /* place it as an ASCII character into the character pointed to by */ /* c. Note that cut must be <= 9, and the maximum value for u is */ /* 2,000,000,000 (as is needed for negative exponents of */ /* subnormals). The unsigned integer pow is used as a temporary */ /* variable. */ #define TODIGIT(u, cut, c, pow) { \ *(c)='0'; \ pow=DECPOWERS[cut]*2; \ if ((u)>pow) { \ pow*=4; \ if ((u)>=pow) {(u)-=pow; *(c)+=8;} \ pow/=2; \ if ((u)>=pow) {(u)-=pow; *(c)+=4;} \ pow/=2; \ } \ if ((u)>=pow) {(u)-=pow; *(c)+=2;} \ pow/=2; \ if ((u)>=pow) {(u)-=pow; *(c)+=1;} \ } /* ---------------------------------------------------------------- */ /* Definitions for fixed-precision modules (only valid after */ /* decSingle.h, decDouble.h, or decQuad.h has been included) */ /* ---------------------------------------------------------------- */ /* bcdnum -- a structure describing a format-independent finite */ /* number, whose coefficient is a string of bcd8 uBytes */ typedef struct bcdnum { uByte *msd; /* -> most significant digit */ uByte *lsd; /* -> least ditto */ uInt sign; /* 0=positive, DECFLOAT_Sign=negative */ Int exponent; /* Unadjusted signed exponent (q), or */ /* DECFLOAT_NaN etc. for a special */ } bcdnum; /* Test if exponent or bcdnum exponent must be a special, etc. */ #define EXPISSPECIAL(exp) ((exp)>=DECFLOAT_MinSp) #define EXPISINF(exp) (exp==DECFLOAT_Inf) #define EXPISNAN(exp) (exp==DECFLOAT_qNaN || exp==DECFLOAT_sNaN) #define NUMISSPECIAL(num) (EXPISSPECIAL((num)->exponent)) /* Refer to a 32-bit word or byte in a decFloat (df) by big-endian */ /* (array) notation (the 0 word or byte contains the sign bit), */ /* automatically adjusting for endianness; similarly address a word */ /* in the next-wider format (decFloatWider, or dfw) */ #define DECWORDS (DECBYTES/4) #define DECWWORDS (DECWBYTES/4) #if DECLITEND #define DFBYTE(df, off) ((df)->bytes[DECBYTES-1-(off)]) #define DFWORD(df, off) ((df)->words[DECWORDS-1-(off)]) #define DFWWORD(dfw, off) ((dfw)->words[DECWWORDS-1-(off)]) #else #define DFBYTE(df, off) ((df)->bytes[off]) #define DFWORD(df, off) ((df)->words[off]) #define DFWWORD(dfw, off) ((dfw)->words[off]) #endif /* Tests for sign or specials, directly on DECFLOATs */ #define DFISSIGNED(df) ((DFWORD(df, 0)&0x80000000)!=0) #define DFISSPECIAL(df) ((DFWORD(df, 0)&0x78000000)==0x78000000) #define DFISINF(df) ((DFWORD(df, 0)&0x7c000000)==0x78000000) #define DFISNAN(df) ((DFWORD(df, 0)&0x7c000000)==0x7c000000) #define DFISQNAN(df) ((DFWORD(df, 0)&0x7e000000)==0x7c000000) #define DFISSNAN(df) ((DFWORD(df, 0)&0x7e000000)==0x7e000000) /* Shared lookup tables */ extern const uInt DECCOMBMSD[64]; /* Combination field -> MSD */ extern const uInt DECCOMBFROM[48]; /* exp+msd -> Combination */ /* Private generic (utility) routine */ #if DECCHECK || DECTRACE extern void decShowNum(const bcdnum *, const char *); #endif /* Format-dependent macros and constants */ #if defined(DECPMAX) /* Useful constants */ #define DECPMAX9 (ROUNDUP(DECPMAX, 9)/9) /* 'Pmax' in 10**9s */ /* Top words for a zero */ #define SINGLEZERO 0x22500000 #define DOUBLEZERO 0x22380000 #define QUADZERO 0x22080000 /* [ZEROWORD is defined to be one of these in the DFISZERO macro] */ /* Format-dependent common tests: */ /* DFISZERO -- test for (any) zero */ /* DFISCCZERO -- test for coefficient continuation being zero */ /* DFISCC01 -- test for coefficient contains only 0s and 1s */ /* DFISINT -- test for finite and exponent q=0 */ /* DFISUINT01 -- test for sign=0, finite, exponent q=0, and */ /* MSD=0 or 1 */ /* ZEROWORD is also defined here. */ /* */ /* In DFISZERO the first test checks the least-significant word */ /* (most likely to be non-zero); the penultimate tests MSD and */ /* DPDs in the signword, and the final test excludes specials and */ /* MSD>7. DFISINT similarly has to allow for the two forms of */ /* MSD codes. DFISUINT01 only has to allow for one form of MSD */ /* code. */ #if DECPMAX==7 #define ZEROWORD SINGLEZERO /* [test macros not needed except for Zero] */ #define DFISZERO(df) ((DFWORD(df, 0)&0x1c0fffff)==0 \ && (DFWORD(df, 0)&0x60000000)!=0x60000000) #elif DECPMAX==16 #define ZEROWORD DOUBLEZERO #define DFISZERO(df) ((DFWORD(df, 1)==0 \ && (DFWORD(df, 0)&0x1c03ffff)==0 \ && (DFWORD(df, 0)&0x60000000)!=0x60000000)) #define DFISINT(df) ((DFWORD(df, 0)&0x63fc0000)==0x22380000 \ ||(DFWORD(df, 0)&0x7bfc0000)==0x6a380000) #define DFISUINT01(df) ((DFWORD(df, 0)&0xfbfc0000)==0x22380000) #define DFISCCZERO(df) (DFWORD(df, 1)==0 \ && (DFWORD(df, 0)&0x0003ffff)==0) #define DFISCC01(df) ((DFWORD(df, 0)&~0xfffc9124)==0 \ && (DFWORD(df, 1)&~0x49124491)==0) #elif DECPMAX==34 #define ZEROWORD QUADZERO #define DFISZERO(df) ((DFWORD(df, 3)==0 \ && DFWORD(df, 2)==0 \ && DFWORD(df, 1)==0 \ && (DFWORD(df, 0)&0x1c003fff)==0 \ && (DFWORD(df, 0)&0x60000000)!=0x60000000)) #define DFISINT(df) ((DFWORD(df, 0)&0x63ffc000)==0x22080000 \ ||(DFWORD(df, 0)&0x7bffc000)==0x6a080000) #define DFISUINT01(df) ((DFWORD(df, 0)&0xfbffc000)==0x22080000) #define DFISCCZERO(df) (DFWORD(df, 3)==0 \ && DFWORD(df, 2)==0 \ && DFWORD(df, 1)==0 \ && (DFWORD(df, 0)&0x00003fff)==0) #define DFISCC01(df) ((DFWORD(df, 0)&~0xffffc912)==0 \ && (DFWORD(df, 1)&~0x44912449)==0 \ && (DFWORD(df, 2)&~0x12449124)==0 \ && (DFWORD(df, 3)&~0x49124491)==0) #endif /* Macros to test if a certain 10 bits of a uInt or pair of uInts */ /* are a canonical declet [higher or lower bits are ignored]. */ /* declet is at offset 0 (from the right) in a uInt: */ #define CANONDPD(dpd) (((dpd)&0x300)==0 || ((dpd)&0x6e)!=0x6e) /* declet is at offset k (a multiple of 2) in a uInt: */ #define CANONDPDOFF(dpd, k) (((dpd)&(0x300<<(k)))==0 \ || ((dpd)&(((uInt)0x6e)<<(k)))!=(((uInt)0x6e)<<(k))) /* declet is at offset k (a multiple of 2) in a pair of uInts: */ /* [the top 2 bits will always be in the more-significant uInt] */ #define CANONDPDTWO(hi, lo, k) (((hi)&(0x300>>(32-(k))))==0 \ || ((hi)&(0x6e>>(32-(k))))!=(0x6e>>(32-(k))) \ || ((lo)&(((uInt)0x6e)<<(k)))!=(((uInt)0x6e)<<(k))) /* Macro to test whether a full-length (length DECPMAX) BCD8 */ /* coefficient, starting at uByte u, is all zeros */ /* Test just the LSWord first, then the remainder as a sequence */ /* of tests in order to avoid same-level use of UBTOUI */ #if DECPMAX==7 #define ISCOEFFZERO(u) ( \ UBTOUI((u)+DECPMAX-4)==0 \ && UBTOUS((u)+DECPMAX-6)==0 \ && *(u)==0) #elif DECPMAX==16 #define ISCOEFFZERO(u) ( \ UBTOUI((u)+DECPMAX-4)==0 \ && UBTOUI((u)+DECPMAX-8)==0 \ && UBTOUI((u)+DECPMAX-12)==0 \ && UBTOUI(u)==0) #elif DECPMAX==34 #define ISCOEFFZERO(u) ( \ UBTOUI((u)+DECPMAX-4)==0 \ && UBTOUI((u)+DECPMAX-8)==0 \ && UBTOUI((u)+DECPMAX-12)==0 \ && UBTOUI((u)+DECPMAX-16)==0 \ && UBTOUI((u)+DECPMAX-20)==0 \ && UBTOUI((u)+DECPMAX-24)==0 \ && UBTOUI((u)+DECPMAX-28)==0 \ && UBTOUI((u)+DECPMAX-32)==0 \ && UBTOUS(u)==0) #endif /* Macros and masks for the sign, exponent continuation, and MSD */ /* Get the sign as DECFLOAT_Sign or 0 */ #define GETSIGN(df) (DFWORD(df, 0)&0x80000000) /* Get the exponent continuation from a decFloat *df as an Int */ #define GETECON(df) ((Int)((DFWORD((df), 0)&0x03ffffff)>>(32-6-DECECONL))) /* Ditto, from the next-wider format */ #define GETWECON(df) ((Int)((DFWWORD((df), 0)&0x03ffffff)>>(32-6-DECWECONL))) /* Get the biased exponent similarly */ #define GETEXP(df) ((Int)(DECCOMBEXP[DFWORD((df), 0)>>26]+GETECON(df))) /* Get the unbiased exponent similarly */ #define GETEXPUN(df) ((Int)GETEXP(df)-DECBIAS) /* Get the MSD similarly (as uInt) */ #define GETMSD(df) (DECCOMBMSD[DFWORD((df), 0)>>26]) /* Compile-time computes of the exponent continuation field masks */ /* full exponent continuation field: */ #define ECONMASK ((0x03ffffff>>(32-6-DECECONL))<<(32-6-DECECONL)) /* same, not including its first digit (the qNaN/sNaN selector): */ #define ECONNANMASK ((0x01ffffff>>(32-6-DECECONL))<<(32-6-DECECONL)) /* Macros to decode the coefficient in a finite decFloat *df into */ /* a BCD string (uByte *bcdin) of length DECPMAX uBytes. */ /* In-line sequence to convert least significant 10 bits of uInt */ /* dpd to three BCD8 digits starting at uByte u. Note that an */ /* extra byte is written to the right of the three digits because */ /* four bytes are moved at a time for speed; the alternative */ /* macro moves exactly three bytes (usually slower). */ #define dpd2bcd8(u, dpd) memcpy(u, &DPD2BCD8[((dpd)&0x3ff)*4], 4) #define dpd2bcd83(u, dpd) memcpy(u, &DPD2BCD8[((dpd)&0x3ff)*4], 3) /* Decode the declets. After extracting each one, it is decoded */ /* to BCD8 using a table lookup (also used for variable-length */ /* decode). Each DPD decode is 3 bytes BCD8 plus a one-byte */ /* length which is not used, here). Fixed-length 4-byte moves */ /* are fast, however, almost everywhere, and so are used except */ /* for the final three bytes (to avoid overrun). The code below */ /* is 36 instructions for Doubles and about 70 for Quads, even */ /* on IA32. */ /* Two macros are defined for each format: */ /* GETCOEFF extracts the coefficient of the current format */ /* GETWCOEFF extracts the coefficient of the next-wider format. */ /* The latter is a copy of the next-wider GETCOEFF using DFWWORD. */ #if DECPMAX==7 #define GETCOEFF(df, bcd) { \ uInt sourhi=DFWORD(df, 0); \ *(bcd)=(uByte)DECCOMBMSD[sourhi>>26]; \ dpd2bcd8(bcd+1, sourhi>>10); \ dpd2bcd83(bcd+4, sourhi);} #define GETWCOEFF(df, bcd) { \ uInt sourhi=DFWWORD(df, 0); \ uInt sourlo=DFWWORD(df, 1); \ *(bcd)=(uByte)DECCOMBMSD[sourhi>>26]; \ dpd2bcd8(bcd+1, sourhi>>8); \ dpd2bcd8(bcd+4, (sourhi<<2) | (sourlo>>30)); \ dpd2bcd8(bcd+7, sourlo>>20); \ dpd2bcd8(bcd+10, sourlo>>10); \ dpd2bcd83(bcd+13, sourlo);} #elif DECPMAX==16 #define GETCOEFF(df, bcd) { \ uInt sourhi=DFWORD(df, 0); \ uInt sourlo=DFWORD(df, 1); \ *(bcd)=(uByte)DECCOMBMSD[sourhi>>26]; \ dpd2bcd8(bcd+1, sourhi>>8); \ dpd2bcd8(bcd+4, (sourhi<<2) | (sourlo>>30)); \ dpd2bcd8(bcd+7, sourlo>>20); \ dpd2bcd8(bcd+10, sourlo>>10); \ dpd2bcd83(bcd+13, sourlo);} #define GETWCOEFF(df, bcd) { \ uInt sourhi=DFWWORD(df, 0); \ uInt sourmh=DFWWORD(df, 1); \ uInt sourml=DFWWORD(df, 2); \ uInt sourlo=DFWWORD(df, 3); \ *(bcd)=(uByte)DECCOMBMSD[sourhi>>26]; \ dpd2bcd8(bcd+1, sourhi>>4); \ dpd2bcd8(bcd+4, ((sourhi)<<6) | (sourmh>>26)); \ dpd2bcd8(bcd+7, sourmh>>16); \ dpd2bcd8(bcd+10, sourmh>>6); \ dpd2bcd8(bcd+13, ((sourmh)<<4) | (sourml>>28)); \ dpd2bcd8(bcd+16, sourml>>18); \ dpd2bcd8(bcd+19, sourml>>8); \ dpd2bcd8(bcd+22, ((sourml)<<2) | (sourlo>>30)); \ dpd2bcd8(bcd+25, sourlo>>20); \ dpd2bcd8(bcd+28, sourlo>>10); \ dpd2bcd83(bcd+31, sourlo);} #elif DECPMAX==34 #define GETCOEFF(df, bcd) { \ uInt sourhi=DFWORD(df, 0); \ uInt sourmh=DFWORD(df, 1); \ uInt sourml=DFWORD(df, 2); \ uInt sourlo=DFWORD(df, 3); \ *(bcd)=(uByte)DECCOMBMSD[sourhi>>26]; \ dpd2bcd8(bcd+1, sourhi>>4); \ dpd2bcd8(bcd+4, ((sourhi)<<6) | (sourmh>>26)); \ dpd2bcd8(bcd+7, sourmh>>16); \ dpd2bcd8(bcd+10, sourmh>>6); \ dpd2bcd8(bcd+13, ((sourmh)<<4) | (sourml>>28)); \ dpd2bcd8(bcd+16, sourml>>18); \ dpd2bcd8(bcd+19, sourml>>8); \ dpd2bcd8(bcd+22, ((sourml)<<2) | (sourlo>>30)); \ dpd2bcd8(bcd+25, sourlo>>20); \ dpd2bcd8(bcd+28, sourlo>>10); \ dpd2bcd83(bcd+31, sourlo);} #define GETWCOEFF(df, bcd) {??} /* [should never be used] */ #endif /* Macros to decode the coefficient in a finite decFloat *df into */ /* a base-billion uInt array, with the least-significant */ /* 0-999999999 'digit' at offset 0. */ /* Decode the declets. After extracting each one, it is decoded */ /* to binary using a table lookup. Three tables are used; one */ /* the usual DPD to binary, the other two pre-multiplied by 1000 */ /* and 1000000 to avoid multiplication during decode. These */ /* tables can also be used for multiplying up the MSD as the DPD */ /* code for 0 through 9 is the identity. */ #define DPD2BIN0 DPD2BIN /* for prettier code */ #if DECPMAX==7 #define GETCOEFFBILL(df, buf) { \ uInt sourhi=DFWORD(df, 0); \ (buf)[0]=DPD2BIN0[sourhi&0x3ff] \ +DPD2BINK[(sourhi>>10)&0x3ff] \ +DPD2BINM[DECCOMBMSD[sourhi>>26]];} #elif DECPMAX==16 #define GETCOEFFBILL(df, buf) { \ uInt sourhi, sourlo; \ sourlo=DFWORD(df, 1); \ (buf)[0]=DPD2BIN0[sourlo&0x3ff] \ +DPD2BINK[(sourlo>>10)&0x3ff] \ +DPD2BINM[(sourlo>>20)&0x3ff]; \ sourhi=DFWORD(df, 0); \ (buf)[1]=DPD2BIN0[((sourhi<<2) | (sourlo>>30))&0x3ff] \ +DPD2BINK[(sourhi>>8)&0x3ff] \ +DPD2BINM[DECCOMBMSD[sourhi>>26]];} #elif DECPMAX==34 #define GETCOEFFBILL(df, buf) { \ uInt sourhi, sourmh, sourml, sourlo; \ sourlo=DFWORD(df, 3); \ (buf)[0]=DPD2BIN0[sourlo&0x3ff] \ +DPD2BINK[(sourlo>>10)&0x3ff] \ +DPD2BINM[(sourlo>>20)&0x3ff]; \ sourml=DFWORD(df, 2); \ (buf)[1]=DPD2BIN0[((sourml<<2) | (sourlo>>30))&0x3ff] \ +DPD2BINK[(sourml>>8)&0x3ff] \ +DPD2BINM[(sourml>>18)&0x3ff]; \ sourmh=DFWORD(df, 1); \ (buf)[2]=DPD2BIN0[((sourmh<<4) | (sourml>>28))&0x3ff] \ +DPD2BINK[(sourmh>>6)&0x3ff] \ +DPD2BINM[(sourmh>>16)&0x3ff]; \ sourhi=DFWORD(df, 0); \ (buf)[3]=DPD2BIN0[((sourhi<<6) | (sourmh>>26))&0x3ff] \ +DPD2BINK[(sourhi>>4)&0x3ff] \ +DPD2BINM[DECCOMBMSD[sourhi>>26]];} #endif /* Macros to decode the coefficient in a finite decFloat *df into */ /* a base-thousand uInt array (of size DECLETS+1, to allow for */ /* the MSD), with the least-significant 0-999 'digit' at offset 0.*/ /* Decode the declets. After extracting each one, it is decoded */ /* to binary using a table lookup. */ #if DECPMAX==7 #define GETCOEFFTHOU(df, buf) { \ uInt sourhi=DFWORD(df, 0); \ (buf)[0]=DPD2BIN[sourhi&0x3ff]; \ (buf)[1]=DPD2BIN[(sourhi>>10)&0x3ff]; \ (buf)[2]=DECCOMBMSD[sourhi>>26];} #elif DECPMAX==16 #define GETCOEFFTHOU(df, buf) { \ uInt sourhi, sourlo; \ sourlo=DFWORD(df, 1); \ (buf)[0]=DPD2BIN[sourlo&0x3ff]; \ (buf)[1]=DPD2BIN[(sourlo>>10)&0x3ff]; \ (buf)[2]=DPD2BIN[(sourlo>>20)&0x3ff]; \ sourhi=DFWORD(df, 0); \ (buf)[3]=DPD2BIN[((sourhi<<2) | (sourlo>>30))&0x3ff]; \ (buf)[4]=DPD2BIN[(sourhi>>8)&0x3ff]; \ (buf)[5]=DECCOMBMSD[sourhi>>26];} #elif DECPMAX==34 #define GETCOEFFTHOU(df, buf) { \ uInt sourhi, sourmh, sourml, sourlo; \ sourlo=DFWORD(df, 3); \ (buf)[0]=DPD2BIN[sourlo&0x3ff]; \ (buf)[1]=DPD2BIN[(sourlo>>10)&0x3ff]; \ (buf)[2]=DPD2BIN[(sourlo>>20)&0x3ff]; \ sourml=DFWORD(df, 2); \ (buf)[3]=DPD2BIN[((sourml<<2) | (sourlo>>30))&0x3ff]; \ (buf)[4]=DPD2BIN[(sourml>>8)&0x3ff]; \ (buf)[5]=DPD2BIN[(sourml>>18)&0x3ff]; \ sourmh=DFWORD(df, 1); \ (buf)[6]=DPD2BIN[((sourmh<<4) | (sourml>>28))&0x3ff]; \ (buf)[7]=DPD2BIN[(sourmh>>6)&0x3ff]; \ (buf)[8]=DPD2BIN[(sourmh>>16)&0x3ff]; \ sourhi=DFWORD(df, 0); \ (buf)[9]=DPD2BIN[((sourhi<<6) | (sourmh>>26))&0x3ff]; \ (buf)[10]=DPD2BIN[(sourhi>>4)&0x3ff]; \ (buf)[11]=DECCOMBMSD[sourhi>>26];} #endif /* Macros to decode the coefficient in a finite decFloat *df and */ /* add to a base-thousand uInt array (as for GETCOEFFTHOU). */ /* After the addition then most significant 'digit' in the array */ /* might have a value larger then 10 (with a maximum of 19). */ #if DECPMAX==7 #define ADDCOEFFTHOU(df, buf) { \ uInt sourhi=DFWORD(df, 0); \ (buf)[0]+=DPD2BIN[sourhi&0x3ff]; \ if (buf[0]>999) {buf[0]-=1000; buf[1]++;} \ (buf)[1]+=DPD2BIN[(sourhi>>10)&0x3ff]; \ if (buf[1]>999) {buf[1]-=1000; buf[2]++;} \ (buf)[2]+=DECCOMBMSD[sourhi>>26];} #elif DECPMAX==16 #define ADDCOEFFTHOU(df, buf) { \ uInt sourhi, sourlo; \ sourlo=DFWORD(df, 1); \ (buf)[0]+=DPD2BIN[sourlo&0x3ff]; \ if (buf[0]>999) {buf[0]-=1000; buf[1]++;} \ (buf)[1]+=DPD2BIN[(sourlo>>10)&0x3ff]; \ if (buf[1]>999) {buf[1]-=1000; buf[2]++;} \ (buf)[2]+=DPD2BIN[(sourlo>>20)&0x3ff]; \ if (buf[2]>999) {buf[2]-=1000; buf[3]++;} \ sourhi=DFWORD(df, 0); \ (buf)[3]+=DPD2BIN[((sourhi<<2) | (sourlo>>30))&0x3ff]; \ if (buf[3]>999) {buf[3]-=1000; buf[4]++;} \ (buf)[4]+=DPD2BIN[(sourhi>>8)&0x3ff]; \ if (buf[4]>999) {buf[4]-=1000; buf[5]++;} \ (buf)[5]+=DECCOMBMSD[sourhi>>26];} #elif DECPMAX==34 #define ADDCOEFFTHOU(df, buf) { \ uInt sourhi, sourmh, sourml, sourlo; \ sourlo=DFWORD(df, 3); \ (buf)[0]+=DPD2BIN[sourlo&0x3ff]; \ if (buf[0]>999) {buf[0]-=1000; buf[1]++;} \ (buf)[1]+=DPD2BIN[(sourlo>>10)&0x3ff]; \ if (buf[1]>999) {buf[1]-=1000; buf[2]++;} \ (buf)[2]+=DPD2BIN[(sourlo>>20)&0x3ff]; \ if (buf[2]>999) {buf[2]-=1000; buf[3]++;} \ sourml=DFWORD(df, 2); \ (buf)[3]+=DPD2BIN[((sourml<<2) | (sourlo>>30))&0x3ff]; \ if (buf[3]>999) {buf[3]-=1000; buf[4]++;} \ (buf)[4]+=DPD2BIN[(sourml>>8)&0x3ff]; \ if (buf[4]>999) {buf[4]-=1000; buf[5]++;} \ (buf)[5]+=DPD2BIN[(sourml>>18)&0x3ff]; \ if (buf[5]>999) {buf[5]-=1000; buf[6]++;} \ sourmh=DFWORD(df, 1); \ (buf)[6]+=DPD2BIN[((sourmh<<4) | (sourml>>28))&0x3ff]; \ if (buf[6]>999) {buf[6]-=1000; buf[7]++;} \ (buf)[7]+=DPD2BIN[(sourmh>>6)&0x3ff]; \ if (buf[7]>999) {buf[7]-=1000; buf[8]++;} \ (buf)[8]+=DPD2BIN[(sourmh>>16)&0x3ff]; \ if (buf[8]>999) {buf[8]-=1000; buf[9]++;} \ sourhi=DFWORD(df, 0); \ (buf)[9]+=DPD2BIN[((sourhi<<6) | (sourmh>>26))&0x3ff]; \ if (buf[9]>999) {buf[9]-=1000; buf[10]++;} \ (buf)[10]+=DPD2BIN[(sourhi>>4)&0x3ff]; \ if (buf[10]>999) {buf[10]-=1000; buf[11]++;} \ (buf)[11]+=DECCOMBMSD[sourhi>>26];} #endif /* Set a decFloat to the maximum positive finite number (Nmax) */ #if DECPMAX==7 #define DFSETNMAX(df) \ {DFWORD(df, 0)=0x77f3fcff;} #elif DECPMAX==16 #define DFSETNMAX(df) \ {DFWORD(df, 0)=0x77fcff3f; \ DFWORD(df, 1)=0xcff3fcff;} #elif DECPMAX==34 #define DFSETNMAX(df) \ {DFWORD(df, 0)=0x77ffcff3; \ DFWORD(df, 1)=0xfcff3fcf; \ DFWORD(df, 2)=0xf3fcff3f; \ DFWORD(df, 3)=0xcff3fcff;} #endif /* [end of format-dependent macros and constants] */ #endif #else #error decNumberLocal included more than once #endif luametatex-2.10.08/source/libraries/hnj/000077500000000000000000000000001442250314700200705ustar00rootroot00000000000000luametatex-2.10.08/source/libraries/hnj/hnjhyphen.c000066400000000000000000000465271442250314700222450ustar00rootroot00000000000000/* See license.txt in the root of this project. */ /* This file is derived from libhnj which is is dual licensed under LGPL and MPL. Boilerplate for both licenses follows. LibHnj - a library for high quality hyphenation and justification (C) 1998 Raph Levien, (C) 2001 ALTLinux, Moscow (http://www.alt-linux.org), (C) 2001 Peter Novodvorsky (nidd@cs.msu.su) This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307 USA. The contents of this file are subject to the Mozilla Public License Version 1.0 (the "MPL"); you may not use this file except in compliance with the MPL. You may obtain a copy of the MPL at http://www.mozilla.org/MPL/ Software distributed under the MPL is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the MPL for the specific language governing rights and limitations under the MPL. Remark: I'm not sure if something fundamental was adapted in the perspective of using this library in LuaTeX. However, for instance error reporting has been hooked into the Lua(Meta)TeX error reporting mechanisms. Also a bit of reformatting was done. This module won't change. Also, the code has been adapted a little in order to fit in the rest (function names etc) because it is more exposed. We use the alternative memory allocator. */ /*tex We need the warning subsystem, so: */ # include "luametatex.h" /*tex A few helpers (from |hnjalloc|): */ static void *hnj_malloc(int size) { void *p = lmt_memory_malloc((size_t) size); if (! p) { tex_formatted_error("hyphenation", "allocating %d bytes failed\n", size); } return p; } static void *hnj_realloc(void *p, int size) { void *n = lmt_memory_realloc(p, (size_t) size); if (! n) { tex_formatted_error("hyphenation", "reallocating %d bytes failed\n", size); } return n; } static void hnj_free(void *p) { lmt_memory_free(p); } static unsigned char *hnj_strdup(const unsigned char *s) { size_t l = strlen((const char *) s); unsigned char *n = hnj_malloc((int) l + 1); memcpy(n, s, l); n[l] = 0; return n; } /*tex Combine two right-aligned number patterns, 04000 + 020 becomes 04020. This works also for utf8 sequences because the substring is identical to the last |substring - length| bytes of expr except for the (single byte) hyphenation encoders */ static char *combine_patterns(char *expr, const char *subexpr) { size_t l1 = strlen(expr); size_t l2 = strlen(subexpr); size_t off = l1 - l2; for (unsigned j = 0; j < l2; j++) { if (expr[off + j] < subexpr[j]) { expr[off + j] = subexpr[j]; } } return expr; } /*tex Some original code: */ static hjn_hashiterator *new_hashiterator(hjn_hashtable *h) { hjn_hashiterator *i = hnj_malloc(sizeof(hjn_hashiterator)); i->e = h->entries; i->cur = NULL; i->ndx = -1; return i; } static int nexthashstealpattern(hjn_hashiterator *i, unsigned char **word, char **pattern) { while (i->cur == NULL) { if (i->ndx >= HNJ_HASH_SIZE - 1) { return 0; } else { i->cur = i->e[++i->ndx]; } } *word = i->cur->key; *pattern = i->cur->u.hyppat; i->cur->u.hyppat = NULL; i->cur = i->cur->next; return 1; } static int nexthash(hjn_hashiterator *i, unsigned char **word) { while (! i->cur) { if (i->ndx >= HNJ_HASH_SIZE - 1) { return 0; } else { i->cur = i->e[++i->ndx]; } } *word = i->cur->key; i->cur = i->cur->next; return 1; } static int eachhash(hjn_hashiterator *i, unsigned char **word, char **pattern) { while (! i->cur) { if (i->ndx >= HNJ_HASH_SIZE - 1) { return 0; } else { i->cur = i->e[++i->ndx]; } } *word = i->cur->key; *pattern = i->cur->u.hyppat; i->cur = i->cur->next; return 1; } static void delete_hashiterator(hjn_hashiterator *i) { hnj_free(i); } /*tex A |char*| hash function from ASU, adapted from |Gtk+|: */ static unsigned int string_hash(const unsigned char *s) { const unsigned char *p = s; unsigned int h = 0, g; for (; *p != '\0'; p += 1) { h = (h << 4) + *p; g = h & 0xf0000000; if (g) { h = h ^ (g >> 24); h = h ^ g; } } return h; } /*tex This assumes that key is not already present! */ static void state_insert(hjn_hashtable *hashtab, unsigned char *key, int state) { int i = (int) (string_hash(key) % HNJ_HASH_SIZE); hjn_hashentry* e = hnj_malloc(sizeof(hjn_hashentry)); e->next = hashtab->entries[i]; e->key = key; e->u.state = state; hashtab->entries[i] = e; } /*tex This also assumes that key is not already present! */ static void insert_pattern(hjn_hashtable *hashtab, unsigned char *key, char *hyppat, int trace) { hjn_hashentry *e; int i = (int) (string_hash(key) % HNJ_HASH_SIZE); for (e = hashtab->entries[i]; e; e = e->next) { if (strcmp((char *) e->key, (char *) key) == 0) { if (e->u.hyppat) { if (trace && hyppat && strcmp((char *) e->u.hyppat, (char *) hyppat) != 0) { tex_formatted_warning("hyphenation", "a conflicting pattern '%s' has been ignored", hyppat); } hnj_free(e->u.hyppat); } e->u.hyppat = hyppat; hnj_free(key); return; } } e = hnj_malloc(sizeof(hjn_hashentry)); e->next = hashtab->entries[i]; e->key = key; e->u.hyppat = hyppat; hashtab->entries[i] = e; } /*tex We return |state| if found, otherwise |-1|. */ static int state_lookup(hjn_hashtable *hashtab, const unsigned char *key) { int i = (int) (string_hash(key) % HNJ_HASH_SIZE); for (hjn_hashentry *e = hashtab->entries[i]; e; e = e->next) { if (! strcmp((const char *) key, (const char *) e->key)) { return e->u.state; } } return -1; } /*tex We return |state| if found, otherwise |-1|. The 256 should be enough. */ static char *lookup_pattern(hjn_hashtable * hashtab, const unsigned char *chars, int l) { int i; unsigned char key[256]; strncpy((char *) key, (const char *) chars, (size_t) l); key[l] = 0; i = (int) (string_hash(key) % HNJ_HASH_SIZE); for (hjn_hashentry *e = hashtab->entries[i]; e; e = e->next) { if (! strcmp((char *) key, (char *) e->key)) { return e->u.hyppat; } } return NULL; } /*tex Get the state number, allocating a new state if necessary. */ static int hnj_get_state(hjn_dictionary *dict, const unsigned char *str, int *state_num) { *state_num = state_lookup(dict->state_num, str); if (*state_num >= 0) { return *state_num; } else { state_insert(dict->state_num, hnj_strdup(str), dict->num_states); /*tex The predicate is true if |dict->num_states| is a power of two: */ if (! (dict->num_states & (dict->num_states - 1))) { dict->states = hnj_realloc(dict->states, (int) ((dict->num_states << 1) * (int) sizeof(hjn_state))); } dict->states[dict->num_states] = (hjn_state) { .match = NULL, .fallback_state = -1, .num_trans = 0, .trans = NULL }; return dict->num_states++; } } /*tex Add a transition from state1 to state2 through ch - assumes that the transition does not already exist. */ static void hnj_add_trans(hjn_dictionary *dict, int state1, int state2, int chr) { /*tex This test was a bit too strict, it is quite normal for old patterns to have chars in the range 0-31 or 127-159 (inclusive). To ease the transition, let's only disallow |nul| for now, which probably is a requirement of the code anyway. */ if (chr) { int num_trans = dict->states[state1].num_trans; if (num_trans == 0) { dict->states[state1].trans = hnj_malloc(sizeof(hjn_transition)); } else { /*tex The old version did: \starttyping } else if (!(num_trans & (num_trans - 1))) { ... = hnj_realloc(dict->states[state1].trans, (int) ((num_trans << 1) * sizeof(HyphenTrans))); \stoptyping but that is incredibly nasty when adding patters one-at-a-time. Controlled growth would be nicer than the current +1, but if no one complains, and no one did in a decade, this is good enough. */ dict->states[state1].trans = hnj_realloc(dict->states[state1].trans, (int) ((num_trans + 1) * sizeof(hjn_transition))); } dict->states[state1].trans[num_trans].uni_ch = chr; dict->states[state1].trans[num_trans].new_state = state2; dict->states[state1].num_trans++; } else { tex_normal_error("hyphenation","a nul character is not permited"); } } /*tex We did change the semantics a bit here: |hnj_hyphen_load| used to operate on a file, but now the argument is a string buffer. */ /* define tex_isspace(c) (c == ' ' || c == '\t') */ # define tex_isspace(c) (c == ' ') static const unsigned char *next_pattern(size_t* length, const unsigned char** buf) { const unsigned char *here, *rover = *buf; while (*rover && tex_isspace(*rover)) { rover++; } here = rover; while (*rover) { if (tex_isspace(*rover)) { *length = (size_t) (rover - here); *buf = rover; return here; } else { rover++; } } *length = (size_t) (rover - here); *buf = rover; return *length ? here : NULL; } static void init_hash(hjn_hashtable **h) { if (! *h) { *h = hnj_malloc(sizeof(hjn_hashtable)); for (int i = 0; i < HNJ_HASH_SIZE; i++) { (*h)->entries[i] = NULL; } } } static void clear_state_hash(hjn_hashtable **h) { if (*h) { for (int i = 0; i < HNJ_HASH_SIZE; i++) { hjn_hashentry *e, *next; for (e = (*h)->entries[i]; e; e = next) { next = e->next; hnj_free(e->key); hnj_free(e); } } hnj_free(*h); *h = NULL; } } static void clear_pattern_hash(hjn_hashtable **h) { if (*h) { for (int i = 0; i < HNJ_HASH_SIZE; i++) { hjn_hashentry *e, *next; for (e = (*h)->entries[i]; e; e = next) { next = e->next; hnj_free(e->key); if (e->u.hyppat) { hnj_free(e->u.hyppat); } hnj_free(e); } } hnj_free(*h); *h = NULL; } } static void init_dictionary(hjn_dictionary *dict) { dict->num_states = 1; dict->pat_length = 0; dict->states = hnj_malloc(sizeof(hjn_state)); dict->states[0] = (hjn_state) { .match = NULL, .fallback_state = -1, .num_trans = 0, .trans = NULL }; dict->patterns = NULL; dict->merged = NULL; dict->state_num = NULL; init_hash(&dict->patterns); } static void clear_dictionary(hjn_dictionary *dict) { for (int state_num = 0; state_num < dict->num_states; state_num++) { hjn_state *hstate = &dict->states[state_num]; if (hstate->match) { hnj_free(hstate->match); } if (hstate->trans) { hnj_free(hstate->trans); } } hnj_free(dict->states); clear_pattern_hash(&dict->patterns); clear_pattern_hash(&dict->merged); clear_state_hash(&dict->state_num); } hjn_dictionary *hnj_dictionary_new(void) { hjn_dictionary *dict = hnj_malloc(sizeof(hjn_dictionary)); init_dictionary(dict); return dict; } void hnj_dictionary_clear(hjn_dictionary *dict) { clear_dictionary(dict); init_dictionary(dict); } void hnj_dictionary_free(hjn_dictionary *dict) { clear_dictionary(dict); hnj_free(dict); } unsigned char *hnj_dictionary_tostring(hjn_dictionary *dict) { unsigned char *word; char *pattern; unsigned char *buf = hnj_malloc(dict->pat_length); unsigned char *cur = buf; hjn_hashiterator *v = new_hashiterator(dict->patterns); while (eachhash(v, &word, &pattern)) { int i = 0; int e = 0; while (word[e + i]) { if (pattern[i] != '0') { *cur++ = (unsigned char) pattern[i]; } *cur++ = word[e + i++]; while (is_utf8_follow(word[e + i])) { *cur++ = word[i + e++]; } } if (pattern[i] != '0') { *cur++ = (unsigned char) pattern[i]; } *cur++ = ' '; } delete_hashiterator(v); *cur = 0; return buf; } /*tex In hyphenation patterns we use signed bytes where |0|, or actually any negative number, indicates end: \starttyping prio(1+),startpos,length,len1,[replace],len2,[replace] \starttyping A basic example is: \starttyping p n 0 0 0 \starttyping for a hyphenation point between characters. */ void hnj_dictionary_load(hjn_dictionary *dict, const unsigned char *f, int trace) { int state_num, last_state; int ch; int found; hjn_hashiterator *v; unsigned char *word; char *pattern; size_t l = 0; const unsigned char *format; const unsigned char *begin = f; while ((format = next_pattern(&l, &f)) != NULL) { if (l > 0 && l < 255) { int i, j, e1; for (i = 0, j = 0, e1 = 0; (unsigned) i < l; i++) { if (format[i] >= '0' && format[i] <= '9') { j++; } if (is_utf8_follow(format[i])) { e1++; } } /*tex Here |l-e1| is the number of {\em characters} not {\em bytes}, |l-j| the number of pattern bytes and |l-e1-j| the number of pattern characters. */ { unsigned char *pat = (unsigned char *) hnj_malloc((1 + (int) l - j)); char *org = (char *) hnj_malloc(2 + (int) l - e1 - j); /*tex Remove hyphenation encoders (digits) from pat. */ org[0] = '0'; for (i = 0, j = 0, e1 = 0; (unsigned) i < l; i++) { unsigned char c = format[i]; if (is_utf8_follow(c)) { pat[j + e1++] = c; } else if (c < '0' || c > '9') { pat[e1 + j++] = c; org[j] = '0'; } else { org[j] = (char) c; } } pat[e1 + j] = 0; org[j + 1] = 0; insert_pattern(dict->patterns, pat, org, trace); } } else { tex_normal_warning("hyphenation", "a pattern of more than 254 bytes is ignored"); } } /*tex We add 2 bytes for spurious spaces. */ dict->pat_length += (int) ((f - begin) + 2); init_hash(&dict->merged); v = new_hashiterator(dict->patterns); while (nexthash(v, &word)) { int wordsize = (int) strlen((char *) word); for (int l1 = 1; l1 <= wordsize; l1++) { if (is_utf8_follow(word[l1])) { /*tex Do not clip an utf8 sequence. */ } else { for (int j1 = 1; j1 <= l1; j1++) { int i1 = l1 - j1; if (is_utf8_follow(word[i1])) { /*tex Do not start halfway an utf8 sequence. */ } else { char *subpat_pat = lookup_pattern(dict->patterns, word + i1, j1); if (subpat_pat) { char *newpat_pat = lookup_pattern(dict->merged, word, l1); if (! newpat_pat) { char *neworg; unsigned char *newword = (unsigned char *) hnj_malloc(l1 + 1); int e1 = 0; strncpy((char *) newword, (char *) word, (size_t) l1); newword[l1] = 0; for (i1 = 0; i1 < l1; i1++) { if (is_utf8_follow(newword[i1])) { e1++; } } neworg = hnj_malloc(l1 + 2 - e1); /*tex Fill with right amount of zeros: */ sprintf(neworg, "%0*d", l1 + 1 - e1, 0); insert_pattern(dict->merged, newword, combine_patterns(neworg, subpat_pat), trace); } else { combine_patterns(newpat_pat, subpat_pat); } } } } } } } delete_hashiterator(v); init_hash(&dict->state_num); state_insert(dict->state_num, hnj_strdup((const unsigned char *) ""), 0); v = new_hashiterator(dict->merged); while (nexthashstealpattern(v, &word, &pattern)) { static unsigned char mask[] = { 0x3F, 0x1F, 0xF, 0x7 }; int j1 = (int) strlen((char *) word); state_num = hnj_get_state(dict, word, &found); dict->states[state_num].match = pattern; /*tex Now, put in the prefix transitions. */ while (found < 0) { j1--; last_state = state_num; ch = word[j1]; if (ch >= 0x80) { /* why not is_utf8_follow(ch) here */ int m; int i1 = 1; while (is_utf8_follow(word[j1 - i1])) { i1++; } ch = word[j1 - i1] & mask[i1]; m = j1 - i1; while (i1--) { ch = (ch << 6) + (0x3F & word[j1 - i1]); } j1 = m; } word[j1] = '\0'; state_num = hnj_get_state(dict, word, &found); hnj_add_trans(dict, state_num, last_state, ch); } } delete_hashiterator(v); clear_pattern_hash(&dict->merged); /*tex Put in the fallback states. */ for (int i = 0; i < HNJ_HASH_SIZE; i++) { for (hjn_hashentry *e = dict->state_num->entries[i]; e; e = e->next) { /*tex Do not do |state == 0| otherwise things get confused. */ if (e->u.state) { for (int j = 1; 1; j++) { state_num = state_lookup(dict->state_num, e->key + j); if (state_num >= 0) { break; } } dict->states[e->u.state].fallback_state = state_num; } } } clear_state_hash(&dict->state_num); } luametatex-2.10.08/source/libraries/hnj/hnjhyphen.h000066400000000000000000000070571442250314700222450ustar00rootroot00000000000000/* See license.txt in the root of this project. */ /* The code is derived from LibHnj which is is dual licensed under LGPL and MPL. Boilerplate for both licenses follows. */ /* LibHnj - a library for high quality hyphenation and justification Copyright (C) 1998 Raph Levien, (C) 2001 ALTLinux, Moscow This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307 USA. */ /* The contents of this file are subject to the Mozilla Public License Version 1.0 (the "MPL"); you may not use this file except in compliance with the MPL. You may obtain a copy of the MPL at http://www.mozilla.org/MPL/ Software distributed under the MPL is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the MPL for the specific language governing rights and limitations under the MPL. */ # ifndef LMT_HNJHYPHEN_H # define LMT_HNJHYPHEN_H /*tex First some type definitions and a little bit of a hash table implementation. This simply maps strings to state numbers. In \LUATEX\ we have node related code in |hnjhyphen.c| but in \LUAMETATEX\ we moved that to |texlanguage.c| so we need to make some type definitions public. */ # define HNJ_MAXPATHS 40960 # define HNJ_HASH_SIZE 31627 # define HNJ_MAX_CHARS 256 # define HNJ_MAX_NAME 24 typedef struct _hjn_hashtable hjn_hashtable; typedef struct _hjn_hashentry hjn_hashentry; typedef struct _hjn_hashiterator hjn_hashiterator; typedef union _hjn_hashvalue hjn_hashvalue; /*tex A cheap, but effective, hack. */ struct _hjn_hashtable { hjn_hashentry *entries[HNJ_HASH_SIZE]; }; union _hjn_hashvalue { char *hyppat; int state; int padding; }; struct _hjn_hashentry { hjn_hashentry *next; unsigned char *key; hjn_hashvalue u; }; struct _hjn_hashiterator { hjn_hashentry **e; hjn_hashentry *cur; int ndx; int padding; }; /*tex The state state machine. */ typedef struct _hjn_transition hjn_transition; typedef struct _hjn_state hjn_state; typedef struct _hjn_dictionary hjn_dictionary; struct _hjn_transition { int uni_ch; int new_state; }; struct _hjn_state { char *match; int fallback_state; int num_trans; hjn_transition *trans; }; struct _hjn_dictionary { int num_states; int pat_length; char cset[HNJ_MAX_NAME]; hjn_state *states; hjn_hashtable *patterns; hjn_hashtable *merged; hjn_hashtable *state_num; }; extern hjn_dictionary *hnj_dictionary_new (void); extern void hnj_dictionary_load (hjn_dictionary *dict, const unsigned char *fn, int trace); extern void hnj_dictionary_free (hjn_dictionary *dict); extern void hnj_dictionary_clear (hjn_dictionary *dict); extern unsigned char *hnj_dictionary_tostring (hjn_dictionary *dict); # endif luametatex-2.10.08/source/libraries/libcerf/000077500000000000000000000000001442250314700207175ustar00rootroot00000000000000luametatex-2.10.08/source/libraries/libcerf/CHANGELOG000066400000000000000000000131511442250314700221320ustar00rootroot00000000000000== Revision history of libcerf, maintained by Joachim Wuttke == Homepage moved to https://jugit.fz-juelich.de/mlz/libcerf, 17mar19 libcerf-1.13, released 28feb19: - Further adjustments for compilation under Windows libcerf-1.12, released 7feb19: - Require CMake 3.6, outcomment code that requires 3.13. - Relative paths in CMake sources, for use as subproject. - When compiling as CPP, then #include, not ; revise the entire C-vs-CPP machinery. - Remove tests with different inf or nan results on different systems or under different compilers. libcerf-1.11, released 28dec18: - Cover voigt by test_voigt. - Implement new function voigt_hwhm. - Restore libcerf.pc. - Add INSTALL instructions, and other minor adjustments for use of libcerf in C++ projects. - Support 'ctest', which runs the numeric accuracy tests from test1.c. - Rename type cmplx into _cerf_cmplx to avoid name clash with Gnuplot pre 5.3. libcerf-1.8 [2oct18], libcerf-1.9 [16oct18] and libcerf-1.10 [20dec18] MUST NOT BE USED - A bug introduced in v1.8 had broken the normalization of the Voigt function. - The git history leading to v1.10 has been rewritten, starting anew from v1.7 libcerf-1.7, released 26sep18: - Option -DCERF_CPP allows to choose C++ compilation, which is useful because MS VisualStudio supports C++14, but not yet C99, and in particular does not support complex.h under C. libcerf-1.6, released 20sep18: - Migrated from automake to CMake. - Corrected typos in man pages. libcerf-1.5, released 12oct16: - Removed unused inline function (detected by clang-1.3., reported by Luke Benes) libcerf-1.4, released 27aug14: - HTML version of man pages no longer installs to man/html. - More concise man pages. - Delete a few unused include's. - Autotools script corrected (suggestions by Christoph Junghans). libcerf-1.3, released 17jul13: - Now supporting pkg-config (suggested by Mojca Miklavec). libcerf-1.2, released 16jul13: - Test programs no longer install to $bindir (reported by Mojca Miklavec). libcerf-1.1, released 12may13: - Added Fortran binding by Antonio Cervellino. libcerf-1.0, released 31jan13 by Joachim Wuttke: - Based on http://ab-initio.mit.edu/Faddeeva as of 28jan13. - Verified accuracy using double-exponential transform. - Simplified function names; use leading 'c' for complex functions (except in w_of_z). - Added function voigt(x,sigma,gamma). - Added configure.ac, Makefile.am &c to allow for autotools standard installation (commands ./configure, make, sudo make install). - Splitted source code into directories lib/ and test/. - Eliminated unused alternate code (!USE_CONTINUED_FRACTION). - Eliminated relerr arguments. - Replaced "complex" by "_Complex" for C++ compatibility. - Wrote man pages w_of_z(3), dawson(3), voigt(3), cerf(3), erfcx(3), erfi(3). - Created project home page http://apps.jcns.fz-juelich.de/libcerf. - Registered project "libcerf" at sourceforge.net. == Revision history of Faddeeva.cc by Steven G. Johnson == Project at http://ab-initio.mit.edu/Faddeeva 4 October 2012: Initial public release (SGJ) 5 October 2012: Revised (SGJ) to fix spelling error, start summation for large x at round(x/a) (> 1) rather than ceil(x/a) as in the original paper, which should slightly improve performance (and, apparently, slightly improves accuracy) 19 October 2012: Revised (SGJ) to fix bugs for large x, large -y, and 15 1e154. Set relerr argument to min(relerr,0.1). 27 October 2012: Enhance accuracy in Re[w(z)] taken by itself, by switching to Alg. 916 in a region near the real-z axis where continued fractions have poor relative accuracy in Re[w(z)]. Thanks to M. Zaghloul for the tip. 29 October 2012: Replace SLATEC-derived erfcx routine with completely rewritten code by me, using a very different algorithm which is much faster. 30 October 2012: Implemented special-case code for real z (where real part is exp(-x^2) and imag part is Dawson integral), using algorithm similar to erfx. Export ImFaddeeva_w function to make Dawson's integral directly accessible. 3 November 2012: Provide implementations of erf, erfc, erfcx, and Dawson functions in Faddeeva:: namespace, in addition to Faddeeva::w. Provide header file Faddeeva.hh. 4 November 2012: Slightly faster erf for real arguments. Updated MATLAB and Octave plugins. 27 November 2012: Support compilation with either C++ or plain C (using C99 complex numbers). For real x, use standard-library erf(x) and erfc(x) if available (for C99 or C++11). #include "config.h" if HAVE_CONFIG_H is #defined. 15 December 2012: Portability fixes (copysign, Inf/NaN creation), use CMPLX/__builtin_complex if available in C, slight accuracy improvements to erf and dawson functions near the origin. Use gnulib functions if GNULIB_NAMESPACE is defined. 18 December 2012: Slight tweaks (remove recomputation of x*x in Dawson) luametatex-2.10.08/source/libraries/libcerf/LICENSE000066400000000000000000000022531442250314700217260ustar00rootroot00000000000000/* Copyright (c) 2012 Massachusetts Institute of Technology * Copyright (c) 2013 Forschungszentrum Jülich GmbH * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ luametatex-2.10.08/source/libraries/libcerf/README.md000066400000000000000000000135251442250314700222040ustar00rootroot00000000000000# libcerf This is the home page of **libcerf**, a self-contained numeric library that provides an efficient and accurate implementation of complex error functions, along with Dawson, Faddeeva, and Voigt functions. # User Documentation ## Synopsis In the following, "complex" stands for the C99 data type "double _Complex": * complex [cerf](http://apps.jcns.fz-juelich.de/man/cerf.html) (complex): The complex error function erf(z). * complex [cerfc](http://apps.jcns.fz-juelich.de/man/cerf.html) (complex): The complex complementary error function erfc(z) = 1 - erf(z). * complex [cerfcx](http://apps.jcns.fz-juelich.de/man/erfcx.html) (complex z): The underflow-compensating function erfcx(z) = exp(z^2) erfc(z). * double [erfcx](http://apps.jcns.fz-juelich.de/man/erfcx.html) (double x): The same for real x. * complex [cerfi](http://apps.jcns.fz-juelich.de/man/erfi.html) (complex z): The imaginary error function erfi(z) = -i erf(iz). * double [erfi](http://apps.jcns.fz-juelich.de/man/erfi.html) (double x): The same for real x. * complex [w_of_z](http://apps.jcns.fz-juelich.de/man/w_of_z.html) (complex z): Faddeeva's scaled complex error function w(z) = exp(-z^2) erfc(-iz). * double [im_w_of_x](http://apps.jcns.fz-juelich.de/man/w_of_z.html) (double x): The same for real x, returning the purely imaginary result as a real number. * complex [cdawson](http://apps.jcns.fz-juelich.de/man/dawson.html) (complex z): Dawson's integral D(z) = sqrt(pi)/2 * exp(-z^2) * erfi(z). * double [dawson](http://apps.jcns.fz-juelich.de/man/dawson.html) (double x): The same for real x. * double [voigt](http://apps.jcns.fz-juelich.de/man/voigt.html) (double x, double sigma, double gamma): The convolution of a Gaussian and a Lorentzian. * double [voigt_hwhm](http://apps.jcns.fz-juelich.de/man/voigt_hwhm.html) (double sigma, double gamma): The half width at half maximum of the Voigt profile. ## Accuracy By construction, it is expected that the relative accuracy is generally better than 1E-13. This has been confirmed by comparison with high-precision Maple computations and with a *long double* computation using Fourier transform representation and double-exponential transform. ## Copyright and Citation Copyright (C) [Steven G. Johnson](http:*math.mit.edu/~stevenj), Massachusetts Institute of Technology, 2012; [Joachim Wuttke](http:*www.fz-juelich.de/SharedDocs/Personen/JCNS/EN/Wuttke_J.html), Forschungszentrum Jülich, 2013. License: [MIT License](http://opensource.org/licenses/MIT) When using libcerf in scientific work, please cite as follows: * S. G. Johnson, A. Cervellino, J. Wuttke: libcerf, numeric library for complex error functions, version [...], http://apps.jcns.fz-juelich.de/libcerf Please send bug reports to the authors, or submit them through the Gitlab issue tracker. ## Further references Most function evaluations in this library rely on Faddeeva's function w(z). This function has been reimplemented from scratch by [Steven G. Johnson](http://math.mit.edu/~stevenj); project web site http://ab-initio.mit.edu/Faddeeva. The implementation partly relies on algorithms from the following publications: * Walter Gautschi, *Efficient computation of the complex error function,* SIAM J. Numer. Anal. 7, 187 (1970). * G. P. M. Poppe and C. M. J. Wijers, *More efficient computation of the complex error function,* ACM Trans. Math. Soft. 16, 38 (1990). * Mofreh R. Zaghloul and Ahmed N. Ali, *Algorithm 916: Computing the Faddeyeva and Voigt Functions,* ACM Trans. Math. Soft. 38, 15 (2011). # Installation ## From source Download location: http://apps.jcns.fz-juelich.de/src/libcerf/ Build&install are based on CMake. Out-of-source build is enforced. After unpacking the source, go to the source directory and do: mkdir build cd build cmake .. make make install To test, run the programs in directory test/. The library has been developed using gcc-4.7. Reports about successful compilation with older versions of gcc would be welcome. For correct support of complex numbers it seems that at least gcc-4.3 is required. Compilation with gcc-4.2 works after removing of the "-Werror" flag from *configure*. ## Binary packages * Linux: * [rpm package](https://build.opensuse.org/package/show/science/libcerf) by Christoph Junghans * [Gentoo package](http://packages.gentoo.org/package/sci-libs/libcerf) by Christoph Junghans * [Debian package](https://packages.debian.org/jessie/libs/libcerf1) by Eugen Wintersberger * OS X: * [MacPorts::libcerf](http://www.macports.org/ports.php?by=name&substr=libcerf), by Mojca Miklavec * [Homebrew/homebrew-science/libcerf.rb](https://formulae.brew.sh/formula/libcerf), by Roman Garnett # Code structure The code consists of - the library's C source (directory lib/), - test code (directory test/), - manual pages (directory man/), - build utilities (aclocal.m4, build-aux/, config*, m4/, Makefile*). ## Compilation The library libcerf is written in C. It can be compiled as C code (default) or as C++ code (with option -DCERF_CPP=ON). Compilation as C++ is useful especially under MS Windows because as per 2018 the C compiler of Visual Studio does not support C90, nor any newer language standard, and is unable to cope with complex numbers. Otherwise, the library is self-contained, and installation should be straightforward, using the usual command sequence ./configure make sudo make install The command ./configure takes various options that are explained in the file INSTALL. ## Language bindings For use with other programming languages, libcerf should be either linked directly, or provided with a trivial wrapper. Such language bindings are added to the libcerf package as contributed by their authors. The following bindings are available: * **fortran**, by Antonio Cervellino (Paul Scherrer Institut) Further contributions will be highly welcome. Please report bugs to the package maintainer. luametatex-2.10.08/source/libraries/libcerf/cerf.h000066400000000000000000000113051442250314700220070ustar00rootroot00000000000000/* Library libcerf: * Compute complex error functions, based on a new implementation of * Faddeeva's w_of_z. Also provide Dawson and Voigt functions. * * File cerf.h: * Declare exported functions. * * Copyright: * (C) 2012 Massachusetts Institute of Technology * (C) 2013 Forschungszentrum Jülich GmbH * * Licence: * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * Authors: * Steven G. Johnson, Massachusetts Institute of Technology, 2012, core author * Joachim Wuttke, Forschungszentrum Jülich, 2013, package maintainer * * Website: * http://apps.jcns.fz-juelich.de/libcerf * * Revision history: * ../CHANGELOG * * Man pages: * w_of_z(3), dawson(3), voigt(3), cerf(3), erfcx(3), erfi(3) */ /* This file is patched by Mojca Miklavec and Hans Hagen for usage in LuaMetaTeX where we use only C and also want to compile with the Microsoft compiler. So, when updating this library one has to check for changes. Not that we expect many as this is a rather stable library. In the other files there are a few macros used that deal with the multiplication and addition of complex and real numbers. Of course the original code is kept as-is. */ # ifndef __CERF_H # define __CERF_H # include # if (_MSC_VER) # define _cerf_cmplx _Dcomplex # else typedef double _Complex _cerf_cmplx; # endif # define EXPORT extern _cerf_cmplx w_of_z (_cerf_cmplx z); /* compute w(z) = exp(-z^2) erfc(-iz), Faddeeva's scaled complex error function */ extern double im_w_of_x (double x); /* special case Im[w(x)] of real x */ extern double re_w_of_z (double x, double y); extern double im_w_of_z (double x, double y); extern _cerf_cmplx cerf (_cerf_cmplx z); /* compute erf(z), the error function of complex arguments */ extern _cerf_cmplx cerfc (_cerf_cmplx z); /* compute erfc(z) = 1 - erf(z), the complementary error function */ extern _cerf_cmplx cerfcx (_cerf_cmplx z); /* compute erfcx(z) = exp(z^2) erfc(z), an underflow-compensated version of erfc */ extern double erfcx (double x); /* special case for real x */ extern _cerf_cmplx cerfi (_cerf_cmplx z); /* compute erfi(z) = -i erf(iz), the imaginary error function */ extern double erfi (double x); /* special case for real x */ extern _cerf_cmplx cdawson (_cerf_cmplx z); /* compute dawson(z) = sqrt(pi)/2 * exp(-z^2) * erfi(z), Dawson's integral */ extern double dawson (double x); /* special case for real x */ extern double voigt (double x, double sigma, double gamma); /* compute voigt(x,...), the convolution of a Gaussian and a Lorentzian */ extern double voigt_hwhm (double sigma, double gamma, int *error); /* compute the full width at half maximum of the Voigt function */ extern double cerf_experimental_imw (double x, double y); extern double cerf_experimental_rew (double x, double y); #endif luametatex-2.10.08/source/libraries/libcerf/defs.h000066400000000000000000000064651442250314700220240ustar00rootroot00000000000000/* Library libcerf: * compute complex error functions, * along with Dawson, Faddeeva and Voigt functions * * File defs.h: * Language-dependent includes. * * Copyright: * (C) 2012 Massachusetts Institute of Technology * (C) 2013 Forschungszentrum Jülich GmbH * * Licence: * MIT Licence. * See ../COPYING * * Authors: * Steven G. Johnson, Massachusetts Institute of Technology, 2012, core author * Joachim Wuttke, Forschungszentrum Jülich, 2013, package maintainer * * Website: * http://apps.jcns.fz-juelich.de/libcerf */ /* This file is patched by Mojca Miklavec and Hans Hagen for usage in LuaMetaTeX where we use only C and also want to compile with the Microsoft compiler. So, when updating this library one has to check for changes. Not that we expect many as this is a rather stable library. In the other files there are a few macros used that deal with the multiplication and addition of complex and real nmbers. Of course the original code is kept as-is. */ # ifndef __CERF_C_H # define __CERF_C_H # define _GNU_SOURCE // enable GNU libc NAN extension if possible /* Constructing complex numbers like 0+i*NaN is problematic in C99 without the C11 CMPLX macro, because 0.+I*NAN may give NaN+i*NAN if I is a complex (rather than imaginary) constant. For some reason, however, it works fine in (pre-4.7) gcc if I define Inf and NaN as 1/0 and 0/0 (and only if I compile with optimization -O1 or more), but not if I use the INFINITY or NAN macros. */ /* __builtin_complex was introduced in gcc 4.7, but the C11 CMPLX macro may not be defined unless we are using a recent (2012) version of glibc and compile with -std=c11... note that icc lies about being gcc and probably doesn't have this builtin(?), so exclude icc explicitly. */ # if (_MSC_VER) # define C(a,b) _Cbuild((double)(a), (double)(b)) # define Inf INFINITY # define NaN NAN # else # define C(a,b) ((a) + I*(b)) # define Inf (1./0.) # define NaN (0./0.) # endif # include # if (_MSC_VER) # define _cerf_cmplx _Dcomplex static _Dcomplex complex_neg (_Dcomplex x) { return _Cmulcr(x, -1.0); } static _Dcomplex complex_add_cc(_Dcomplex x, _Dcomplex y) { return _Cbuild(creal(x) + creal(y), cimag(x) + cimag(y)); } static _Dcomplex complex_add_rc(double x, _Dcomplex y) { return _Cbuild(x + creal(y), x + cimag(y)); } static _Dcomplex complex_sub_cc(_Dcomplex x, _Dcomplex y) { return _Cbuild(creal(x) - creal(y), cimag(x) - cimag(y)); } static _Dcomplex complex_sub_rc(double x, _Dcomplex y) { return _Cbuild(x - creal(y), x - cimag(y)); } static _Dcomplex complex_mul_cc(_Dcomplex x, _Dcomplex y) { return _Cmulcc((y), (x)); } static _Dcomplex complex_mul_rc(double x, _Dcomplex y) { return _Cmulcr((y), (x)); } static _Dcomplex complex_mul_cr(_Dcomplex x, double y) { return _Cmulcr((x), (y)); } # else typedef double _Complex _cerf_cmplx; # define complex_neg(x) (-x) # define complex_add_cc(x,y) (x+y) # define complex_add_rc(x,y) (x+y) # define complex_sub_cc(x,y) (x-y) # define complex_sub_rc(x,y) (x-y) # define complex_mul_cc(x,y) (x*y) # define complex_mul_rc(x,y) (x*y) # define complex_mul_cr(x,y) (x*y) # endif # endif luametatex-2.10.08/source/libraries/libcerf/erfcx.c000066400000000000000000001046541442250314700222040ustar00rootroot00000000000000/* Library libcerf: * Compute complex error functions, based on a new implementation of * Faddeeva's w_of_z. Also provide Dawson and Voigt functions. * * File erfcx.c: * Compute erfcx(x) = exp(x^2) erfc(x) function, for real x, * using a novel algorithm that is much faster than DERFC of SLATEC. * This function is used in the computation of Faddeeva, Dawson, and * other complex error functions. * * Copyright: * (C) 2012 Massachusetts Institute of Technology * (C) 2013 Forschungszentrum Jülich GmbH * * Licence: * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * Authors: * Steven G. Johnson, Massachusetts Institute of Technology, 2012, core author * Joachim Wuttke, Forschungszentrum Jülich, 2013, package maintainer * * Website: * http://apps.jcns.fz-juelich.de/libcerf * * Revision history: * ../CHANGELOG * * Manual page: * man 3 erfcx */ #include "cerf.h" #include #include "defs.h" // defines _cerf_cmplx, NaN, C, cexp, ... /******************************************************************************/ /* Lookup-table for Chebyshev polynomials for smaller |x| */ /******************************************************************************/ static double erfcx_y100(double y100) { // Steven G. Johnson, October 2012. // Given y100=100*y, where y = 4/(4+x) for x >= 0, compute erfc(x). // Uses a look-up table of 100 different Chebyshev polynomials // for y intervals [0,0.01], [0.01,0.02], ...., [0.99,1], generated // with the help of Maple and a little shell script. This allows // the Chebyshev polynomials to be of significantly lower degree (about 1/4) // compared to fitting the whole [0,1] interval with a single polynomial. switch ((int) y100) { case 0: { double t = 2*y100 - 1; return 0.70878032454106438663e-3 + (0.71234091047026302958e-3 + (0.35779077297597742384e-5 + (0.17403143962587937815e-7 + (0.81710660047307788845e-10 + (0.36885022360434957634e-12 + 0.15917038551111111111e-14 * t) * t) * t) * t) * t) * t; } case 1: { double t = 2*y100 - 3; return 0.21479143208285144230e-2 + (0.72686402367379996033e-3 + (0.36843175430938995552e-5 + (0.18071841272149201685e-7 + (0.85496449296040325555e-10 + (0.38852037518534291510e-12 + 0.16868473576888888889e-14 * t) * t) * t) * t) * t) * t; } case 2: { double t = 2*y100 - 5; return 0.36165255935630175090e-2 + (0.74182092323555510862e-3 + (0.37948319957528242260e-5 + (0.18771627021793087350e-7 + (0.89484715122415089123e-10 + (0.40935858517772440862e-12 + 0.17872061464888888889e-14 * t) * t) * t) * t) * t) * t; } case 3: { double t = 2*y100 - 7; return 0.51154983860031979264e-2 + (0.75722840734791660540e-3 + (0.39096425726735703941e-5 + (0.19504168704300468210e-7 + (0.93687503063178993915e-10 + (0.43143925959079664747e-12 + 0.18939926435555555556e-14 * t) * t) * t) * t) * t) * t; } case 4: { double t = 2*y100 - 9; return 0.66457513172673049824e-2 + (0.77310406054447454920e-3 + (0.40289510589399439385e-5 + (0.20271233238288381092e-7 + (0.98117631321709100264e-10 + (0.45484207406017752971e-12 + 0.20076352213333333333e-14 * t) * t) * t) * t) * t) * t; } case 5: { double t = 2*y100 - 11; return 0.82082389970241207883e-2 + (0.78946629611881710721e-3 + (0.41529701552622656574e-5 + (0.21074693344544655714e-7 + (0.10278874108587317989e-9 + (0.47965201390613339638e-12 + 0.21285907413333333333e-14 * t) * t) * t) * t) * t) * t; } case 6: { double t = 2*y100 - 13; return 0.98039537275352193165e-2 + (0.80633440108342840956e-3 + (0.42819241329736982942e-5 + (0.21916534346907168612e-7 + (0.10771535136565470914e-9 + (0.50595972623692822410e-12 + 0.22573462684444444444e-14 * t) * t) * t) * t) * t) * t; } case 7: { double t = 2*y100 - 15; return 0.11433927298290302370e-1 + (0.82372858383196561209e-3 + (0.44160495311765438816e-5 + (0.22798861426211986056e-7 + (0.11291291745879239736e-9 + (0.53386189365816880454e-12 + 0.23944209546666666667e-14 * t) * t) * t) * t) * t) * t; } case 8: { double t = 2*y100 - 17; return 0.13099232878814653979e-1 + (0.84167002467906968214e-3 + (0.45555958988457506002e-5 + (0.23723907357214175198e-7 + (0.11839789326602695603e-9 + (0.56346163067550237877e-12 + 0.25403679644444444444e-14 * t) * t) * t) * t) * t) * t; } case 9: { double t = 2*y100 - 19; return 0.14800987015587535621e-1 + (0.86018092946345943214e-3 + (0.47008265848816866105e-5 + (0.24694040760197315333e-7 + (0.12418779768752299093e-9 + (0.59486890370320261949e-12 + 0.26957764568888888889e-14 * t) * t) * t) * t) * t) * t; } case 10: { double t = 2*y100 - 21; return 0.16540351739394069380e-1 + (0.87928458641241463952e-3 + (0.48520195793001753903e-5 + (0.25711774900881709176e-7 + (0.13030128534230822419e-9 + (0.62820097586874779402e-12 + 0.28612737351111111111e-14 * t) * t) * t) * t) * t) * t; } case 11: { double t = 2*y100 - 23; return 0.18318536789842392647e-1 + (0.89900542647891721692e-3 + (0.50094684089553365810e-5 + (0.26779777074218070482e-7 + (0.13675822186304615566e-9 + (0.66358287745352705725e-12 + 0.30375273884444444444e-14 * t) * t) * t) * t) * t) * t; } case 12: { double t = 2*y100 - 25; return 0.20136801964214276775e-1 + (0.91936908737673676012e-3 + (0.51734830914104276820e-5 + (0.27900878609710432673e-7 + (0.14357976402809042257e-9 + (0.70114790311043728387e-12 + 0.32252476000000000000e-14 * t) * t) * t) * t) * t) * t; } case 13: { double t = 2*y100 - 27; return 0.21996459598282740954e-1 + (0.94040248155366777784e-3 + (0.53443911508041164739e-5 + (0.29078085538049374673e-7 + (0.15078844500329731137e-9 + (0.74103813647499204269e-12 + 0.34251892320000000000e-14 * t) * t) * t) * t) * t) * t; } case 14: { double t = 2*y100 - 29; return 0.23898877187226319502e-1 + (0.96213386835900177540e-3 + (0.55225386998049012752e-5 + (0.30314589961047687059e-7 + (0.15840826497296335264e-9 + (0.78340500472414454395e-12 + 0.36381553564444444445e-14 * t) * t) * t) * t) * t) * t; } case 15: { double t = 2*y100 - 31; return 0.25845480155298518485e-1 + (0.98459293067820123389e-3 + (0.57082915920051843672e-5 + (0.31613782169164830118e-7 + (0.16646478745529630813e-9 + (0.82840985928785407942e-12 + 0.38649975768888888890e-14 * t) * t) * t) * t) * t) * t; } case 16: { double t = 2*y100 - 33; return 0.27837754783474696598e-1 + (0.10078108563256892757e-2 + (0.59020366493792212221e-5 + (0.32979263553246520417e-7 + (0.17498524159268458073e-9 + (0.87622459124842525110e-12 + 0.41066206488888888890e-14 * t) * t) * t) * t) * t) * t; } case 17: { double t = 2*y100 - 35; return 0.29877251304899307550e-1 + (0.10318204245057349310e-2 + (0.61041829697162055093e-5 + (0.34414860359542720579e-7 + (0.18399863072934089607e-9 + (0.92703227366365046533e-12 + 0.43639844053333333334e-14 * t) * t) * t) * t) * t) * t; } case 18: { double t = 2*y100 - 37; return 0.31965587178596443475e-1 + (0.10566560976716574401e-2 + (0.63151633192414586770e-5 + (0.35924638339521924242e-7 + (0.19353584758781174038e-9 + (0.98102783859889264382e-12 + 0.46381060817777777779e-14 * t) * t) * t) * t) * t) * t; } case 19: { double t = 2*y100 - 39; return 0.34104450552588334840e-1 + (0.10823541191350532574e-2 + (0.65354356159553934436e-5 + (0.37512918348533521149e-7 + (0.20362979635817883229e-9 + (0.10384187833037282363e-11 + 0.49300625262222222221e-14 * t) * t) * t) * t) * t) * t; } case 20: { double t = 2*y100 - 41; return 0.36295603928292425716e-1 + (0.11089526167995268200e-2 + (0.67654845095518363577e-5 + (0.39184292949913591646e-7 + (0.21431552202133775150e-9 + (0.10994259106646731797e-11 + 0.52409949102222222221e-14 * t) * t) * t) * t) * t) * t; } case 21: { double t = 2*y100 - 43; return 0.38540888038840509795e-1 + (0.11364917134175420009e-2 + (0.70058230641246312003e-5 + (0.40943644083718586939e-7 + (0.22563034723692881631e-9 + (0.11642841011361992885e-11 + 0.55721092871111111110e-14 * t) * t) * t) * t) * t) * t; } case 22: { double t = 2*y100 - 45; return 0.40842225954785960651e-1 + (0.11650136437945673891e-2 + (0.72569945502343006619e-5 + (0.42796161861855042273e-7 + (0.23761401711005024162e-9 + (0.12332431172381557035e-11 + 0.59246802364444444445e-14 * t) * t) * t) * t) * t) * t; } case 23: { double t = 2*y100 - 47; return 0.43201627431540222422e-1 + (0.11945628793917272199e-2 + (0.75195743532849206263e-5 + (0.44747364553960993492e-7 + (0.25030885216472953674e-9 + (0.13065684400300476484e-11 + 0.63000532853333333334e-14 * t) * t) * t) * t) * t) * t; } case 24: { double t = 2*y100 - 49; return 0.45621193513810471438e-1 + (0.12251862608067529503e-2 + (0.77941720055551920319e-5 + (0.46803119830954460212e-7 + (0.26375990983978426273e-9 + (0.13845421370977119765e-11 + 0.66996477404444444445e-14 * t) * t) * t) * t) * t) * t; } case 25: { double t = 2*y100 - 51; return 0.48103121413299865517e-1 + (0.12569331386432195113e-2 + (0.80814333496367673980e-5 + (0.48969667335682018324e-7 + (0.27801515481905748484e-9 + (0.14674637611609884208e-11 + 0.71249589351111111110e-14 * t) * t) * t) * t) * t) * t; } case 26: { double t = 2*y100 - 53; return 0.50649709676983338501e-1 + (0.12898555233099055810e-2 + (0.83820428414568799654e-5 + (0.51253642652551838659e-7 + (0.29312563849675507232e-9 + (0.15556512782814827846e-11 + 0.75775607822222222221e-14 * t) * t) * t) * t) * t) * t; } case 27: { double t = 2*y100 - 55; return 0.53263363664388864181e-1 + (0.13240082443256975769e-2 + (0.86967260015007658418e-5 + (0.53662102750396795566e-7 + (0.30914568786634796807e-9 + (0.16494420240828493176e-11 + 0.80591079644444444445e-14 * t) * t) * t) * t) * t) * t; } case 28: { double t = 2*y100 - 57; return 0.55946601353500013794e-1 + (0.13594491197408190706e-2 + (0.90262520233016380987e-5 + (0.56202552975056695376e-7 + (0.32613310410503135996e-9 + (0.17491936862246367398e-11 + 0.85713381688888888890e-14 * t) * t) * t) * t) * t) * t; } case 29: { double t = 2*y100 - 59; return 0.58702059496154081813e-1 + (0.13962391363223647892e-2 + (0.93714365487312784270e-5 + (0.58882975670265286526e-7 + (0.34414937110591753387e-9 + (0.18552853109751857859e-11 + 0.91160736711111111110e-14 * t) * t) * t) * t) * t) * t; } case 30: { double t = 2*y100 - 61; return 0.61532500145144778048e-1 + (0.14344426411912015247e-2 + (0.97331446201016809696e-5 + (0.61711860507347175097e-7 + (0.36325987418295300221e-9 + (0.19681183310134518232e-11 + 0.96952238400000000000e-14 * t) * t) * t) * t) * t) * t; } case 31: { double t = 2*y100 - 63; return 0.64440817576653297993e-1 + (0.14741275456383131151e-2 + (0.10112293819576437838e-4 + (0.64698236605933246196e-7 + (0.38353412915303665586e-9 + (0.20881176114385120186e-11 + 0.10310784480000000000e-13 * t) * t) * t) * t) * t) * t; } case 32: { double t = 2*y100 - 65; return 0.67430045633130393282e-1 + (0.15153655418916540370e-2 + (0.10509857606888328667e-4 + (0.67851706529363332855e-7 + (0.40504602194811140006e-9 + (0.22157325110542534469e-11 + 0.10964842115555555556e-13 * t) * t) * t) * t) * t) * t; } case 33: { double t = 2*y100 - 67; return 0.70503365513338850709e-1 + (0.15582323336495709827e-2 + (0.10926868866865231089e-4 + (0.71182482239613507542e-7 + (0.42787405890153386710e-9 + (0.23514379522274416437e-11 + 0.11659571751111111111e-13 * t) * t) * t) * t) * t) * t; } case 34: { double t = 2*y100 - 69; return 0.73664114037944596353e-1 + (0.16028078812438820413e-2 + (0.11364423678778207991e-4 + (0.74701423097423182009e-7 + (0.45210162777476488324e-9 + (0.24957355004088569134e-11 + 0.12397238257777777778e-13 * t) * t) * t) * t) * t) * t; } case 35: { double t = 2*y100 - 71; return 0.76915792420819562379e-1 + (0.16491766623447889354e-2 + (0.11823685320041302169e-4 + (0.78420075993781544386e-7 + (0.47781726956916478925e-9 + (0.26491544403815724749e-11 + 0.13180196462222222222e-13 * t) * t) * t) * t) * t) * t; } case 36: { double t = 2*y100 - 73; return 0.80262075578094612819e-1 + (0.16974279491709504117e-2 + (0.12305888517309891674e-4 + (0.82350717698979042290e-7 + (0.50511496109857113929e-9 + (0.28122528497626897696e-11 + 0.14010889635555555556e-13 * t) * t) * t) * t) * t) * t; } case 37: { double t = 2*y100 - 75; return 0.83706822008980357446e-1 + (0.17476561032212656962e-2 + (0.12812343958540763368e-4 + (0.86506399515036435592e-7 + (0.53409440823869467453e-9 + (0.29856186620887555043e-11 + 0.14891851591111111111e-13 * t) * t) * t) * t) * t) * t; } case 38: { double t = 2*y100 - 77; return 0.87254084284461718231e-1 + (0.17999608886001962327e-2 + (0.13344443080089492218e-4 + (0.90900994316429008631e-7 + (0.56486134972616465316e-9 + (0.31698707080033956934e-11 + 0.15825697795555555556e-13 * t) * t) * t) * t) * t) * t; } case 39: { double t = 2*y100 - 79; return 0.90908120182172748487e-1 + (0.18544478050657699758e-2 + (0.13903663143426120077e-4 + (0.95549246062549906177e-7 + (0.59752787125242054315e-9 + (0.33656597366099099413e-11 + 0.16815130613333333333e-13 * t) * t) * t) * t) * t) * t; } case 40: { double t = 2*y100 - 81; return 0.94673404508075481121e-1 + (0.19112284419887303347e-2 + (0.14491572616545004930e-4 + (0.10046682186333613697e-6 + (0.63221272959791000515e-9 + (0.35736693975589130818e-11 + 0.17862931591111111111e-13 * t) * t) * t) * t) * t) * t; } case 41: { double t = 2*y100 - 83; return 0.98554641648004456555e-1 + (0.19704208544725622126e-2 + (0.15109836875625443935e-4 + (0.10567036667675984067e-6 + (0.66904168640019354565e-9 + (0.37946171850824333014e-11 + 0.18971959040000000000e-13 * t) * t) * t) * t) * t) * t; } case 42: { double t = 2*y100 - 85; return 0.10255677889470089531e0 + (0.20321499629472857418e-2 + (0.15760224242962179564e-4 + (0.11117756071353507391e-6 + (0.70814785110097658502e-9 + (0.40292553276632563925e-11 + 0.20145143075555555556e-13 * t) * t) * t) * t) * t) * t; } case 43: { double t = 2*y100 - 87; return 0.10668502059865093318e0 + (0.20965479776148731610e-2 + (0.16444612377624983565e-4 + (0.11700717962026152749e-6 + (0.74967203250938418991e-9 + (0.42783716186085922176e-11 + 0.21385479360000000000e-13 * t) * t) * t) * t) * t) * t; } case 44: { double t = 2*y100 - 89; return 0.11094484319386444474e0 + (0.21637548491908170841e-2 + (0.17164995035719657111e-4 + (0.12317915750735938089e-6 + (0.79376309831499633734e-9 + (0.45427901763106353914e-11 + 0.22696025653333333333e-13 * t) * t) * t) * t) * t) * t; } case 45: { double t = 2*y100 - 91; return 0.11534201115268804714e0 + (0.22339187474546420375e-2 + (0.17923489217504226813e-4 + (0.12971465288245997681e-6 + (0.84057834180389073587e-9 + (0.48233721206418027227e-11 + 0.24079890062222222222e-13 * t) * t) * t) * t) * t) * t; } case 46: { double t = 2*y100 - 93; return 0.11988259392684094740e0 + (0.23071965691918689601e-2 + (0.18722342718958935446e-4 + (0.13663611754337957520e-6 + (0.89028385488493287005e-9 + (0.51210161569225846701e-11 + 0.25540227111111111111e-13 * t) * t) * t) * t) * t) * t; } case 47: { double t = 2*y100 - 95; return 0.12457298393509812907e0 + (0.23837544771809575380e-2 + (0.19563942105711612475e-4 + (0.14396736847739470782e-6 + (0.94305490646459247016e-9 + (0.54366590583134218096e-11 + 0.27080225920000000000e-13 * t) * t) * t) * t) * t) * t; } case 48: { double t = 2*y100 - 97; return 0.12941991566142438816e0 + (0.24637684719508859484e-2 + (0.20450821127475879816e-4 + (0.15173366280523906622e-6 + (0.99907632506389027739e-9 + (0.57712760311351625221e-11 + 0.28703099555555555556e-13 * t) * t) * t) * t) * t) * t; } case 49: { double t = 2*y100 - 99; return 0.13443048593088696613e0 + (0.25474249981080823877e-2 + (0.21385669591362915223e-4 + (0.15996177579900443030e-6 + (0.10585428844575134013e-8 + (0.61258809536787882989e-11 + 0.30412080142222222222e-13 * t) * t) * t) * t) * t) * t; } case 50: { double t = 2*y100 - 101; return 0.13961217543434561353e0 + (0.26349215871051761416e-2 + (0.22371342712572567744e-4 + (0.16868008199296822247e-6 + (0.11216596910444996246e-8 + (0.65015264753090890662e-11 + 0.32210394506666666666e-13 * t) * t) * t) * t) * t) * t; } case 51: { double t = 2*y100 - 103; return 0.14497287157673800690e0 + (0.27264675383982439814e-2 + (0.23410870961050950197e-4 + (0.17791863939526376477e-6 + (0.11886425714330958106e-8 + (0.68993039665054288034e-11 + 0.34101266222222222221e-13 * t) * t) * t) * t) * t) * t; } case 52: { double t = 2*y100 - 105; return 0.15052089272774618151e0 + (0.28222846410136238008e-2 + (0.24507470422713397006e-4 + (0.18770927679626136909e-6 + (0.12597184587583370712e-8 + (0.73203433049229821618e-11 + 0.36087889048888888890e-13 * t) * t) * t) * t) * t) * t; } case 53: { double t = 2*y100 - 107; return 0.15626501395774612325e0 + (0.29226079376196624949e-2 + (0.25664553693768450545e-4 + (0.19808568415654461964e-6 + (0.13351257759815557897e-8 + (0.77658124891046760667e-11 + 0.38173420035555555555e-13 * t) * t) * t) * t) * t) * t; } case 54: { double t = 2*y100 - 109; return 0.16221449434620737567e0 + (0.30276865332726475672e-2 + (0.26885741326534564336e-4 + (0.20908350604346384143e-6 + (0.14151148144240728728e-8 + (0.82369170665974313027e-11 + 0.40360957457777777779e-13 * t) * t) * t) * t) * t) * t; } case 55: { double t = 2*y100 - 111; return 0.16837910595412130659e0 + (0.31377844510793082301e-2 + (0.28174873844911175026e-4 + (0.22074043807045782387e-6 + (0.14999481055996090039e-8 + (0.87348993661930809254e-11 + 0.42653528977777777779e-13 * t) * t) * t) * t) * t) * t; } case 56: { double t = 2*y100 - 113; return 0.17476916455659369953e0 + (0.32531815370903068316e-2 + (0.29536024347344364074e-4 + (0.23309632627767074202e-6 + (0.15899007843582444846e-8 + (0.92610375235427359475e-11 + 0.45054073102222222221e-13 * t) * t) * t) * t) * t) * t; } case 57: { double t = 2*y100 - 115; return 0.18139556223643701364e0 + (0.33741744168096996041e-2 + (0.30973511714709500836e-4 + (0.24619326937592290996e-6 + (0.16852609412267750744e-8 + (0.98166442942854895573e-11 + 0.47565418097777777779e-13 * t) * t) * t) * t) * t) * t; } case 58: { double t = 2*y100 - 117; return 0.18826980194443664549e0 + (0.35010775057740317997e-2 + (0.32491914440014267480e-4 + (0.26007572375886319028e-6 + (0.17863299617388376116e-8 + (0.10403065638343878679e-10 + 0.50190265831111111110e-13 * t) * t) * t) * t) * t) * t; } case 59: { double t = 2*y100 - 119; return 0.19540403413693967350e0 + (0.36342240767211326315e-2 + (0.34096085096200907289e-4 + (0.27479061117017637474e-6 + (0.18934228504790032826e-8 + (0.11021679075323598664e-10 + 0.52931171733333333334e-13 * t) * t) * t) * t) * t) * t; } case 60: { double t = 2*y100 - 121; return 0.20281109560651886959e0 + (0.37739673859323597060e-2 + (0.35791165457592409054e-4 + (0.29038742889416172404e-6 + (0.20068685374849001770e-8 + (0.11673891799578381999e-10 + 0.55790523093333333334e-13 * t) * t) * t) * t) * t) * t; } case 61: { double t = 2*y100 - 123; return 0.21050455062669334978e0 + (0.39206818613925652425e-2 + (0.37582602289680101704e-4 + (0.30691836231886877385e-6 + (0.21270101645763677824e-8 + (0.12361138551062899455e-10 + 0.58770520160000000000e-13 * t) * t) * t) * t) * t) * t; } case 62: { double t = 2*y100 - 125; return 0.21849873453703332479e0 + (0.40747643554689586041e-2 + (0.39476163820986711501e-4 + (0.32443839970139918836e-6 + (0.22542053491518680200e-8 + (0.13084879235290858490e-10 + 0.61873153262222222221e-13 * t) * t) * t) * t) * t) * t; } case 63: { double t = 2*y100 - 127; return 0.22680879990043229327e0 + (0.42366354648628516935e-2 + (0.41477956909656896779e-4 + (0.34300544894502810002e-6 + (0.23888264229264067658e-8 + (0.13846596292818514601e-10 + 0.65100183751111111110e-13 * t) * t) * t) * t) * t) * t; } case 64: { double t = 2*y100 - 129; return 0.23545076536988703937e0 + (0.44067409206365170888e-2 + (0.43594444916224700881e-4 + (0.36268045617760415178e-6 + (0.25312606430853202748e-8 + (0.14647791812837903061e-10 + 0.68453122631111111110e-13 * t) * t) * t) * t) * t) * t; } case 65: { double t = 2*y100 - 131; return 0.24444156740777432838e0 + (0.45855530511605787178e-2 + (0.45832466292683085475e-4 + (0.38352752590033030472e-6 + (0.26819103733055603460e-8 + (0.15489984390884756993e-10 + 0.71933206364444444445e-13 * t) * t) * t) * t) * t) * t; } case 66: { double t = 2*y100 - 133; return 0.25379911500634264643e0 + (0.47735723208650032167e-2 + (0.48199253896534185372e-4 + (0.40561404245564732314e-6 + (0.28411932320871165585e-8 + (0.16374705736458320149e-10 + 0.75541379822222222221e-13 * t) * t) * t) * t) * t) * t; } case 67: { double t = 2*y100 - 135; return 0.26354234756393613032e0 + (0.49713289477083781266e-2 + (0.50702455036930367504e-4 + (0.42901079254268185722e-6 + (0.30095422058900481753e-8 + (0.17303497025347342498e-10 + 0.79278273368888888890e-13 * t) * t) * t) * t) * t) * t; } case 68: { double t = 2*y100 - 137; return 0.27369129607732343398e0 + (0.51793846023052643767e-2 + (0.53350152258326602629e-4 + (0.45379208848865015485e-6 + (0.31874057245814381257e-8 + (0.18277905010245111046e-10 + 0.83144182364444444445e-13 * t) * t) * t) * t) * t) * t; } case 69: { double t = 2*y100 - 139; return 0.28426714781640316172e0 + (0.53983341916695141966e-2 + (0.56150884865255810638e-4 + (0.48003589196494734238e-6 + (0.33752476967570796349e-8 + (0.19299477888083469086e-10 + 0.87139049137777777779e-13 * t) * t) * t) * t) * t) * t; } case 70: { double t = 2*y100 - 141; return 0.29529231465348519920e0 + (0.56288077305420795663e-2 + (0.59113671189913307427e-4 + (0.50782393781744840482e-6 + (0.35735475025851713168e-8 + (0.20369760937017070382e-10 + 0.91262442613333333334e-13 * t) * t) * t) * t) * t) * t; } case 71: { double t = 2*y100 - 143; return 0.30679050522528838613e0 + (0.58714723032745403331e-2 + (0.62248031602197686791e-4 + (0.53724185766200945789e-6 + (0.37827999418960232678e-8 + (0.21490291930444538307e-10 + 0.95513539182222222221e-13 * t) * t) * t) * t) * t) * t; } case 72: { double t = 2*y100 - 145; return 0.31878680111173319425e0 + (0.61270341192339103514e-2 + (0.65564012259707640976e-4 + (0.56837930287837738996e-6 + (0.40035151353392378882e-8 + (0.22662596341239294792e-10 + 0.99891109760000000000e-13 * t) * t) * t) * t) * t) * t; } case 73: { double t = 2*y100 - 147; return 0.33130773722152622027e0 + (0.63962406646798080903e-2 + (0.69072209592942396666e-4 + (0.60133006661885941812e-6 + (0.42362183765883466691e-8 + (0.23888182347073698382e-10 + 0.10439349811555555556e-12 * t) * t) * t) * t) * t) * t; } case 74: { double t = 2*y100 - 149; return 0.34438138658041336523e0 + (0.66798829540414007258e-2 + (0.72783795518603561144e-4 + (0.63619220443228800680e-6 + (0.44814499336514453364e-8 + (0.25168535651285475274e-10 + 0.10901861383111111111e-12 * t) * t) * t) * t) * t) * t; } case 75: { double t = 2*y100 - 151; return 0.35803744972380175583e0 + (0.69787978834882685031e-2 + (0.76710543371454822497e-4 + (0.67306815308917386747e-6 + (0.47397647975845228205e-8 + (0.26505114141143050509e-10 + 0.11376390933333333333e-12 * t) * t) * t) * t) * t) * t; } case 76: { double t = 2*y100 - 153; return 0.37230734890119724188e0 + (0.72938706896461381003e-2 + (0.80864854542670714092e-4 + (0.71206484718062688779e-6 + (0.50117323769745883805e-8 + (0.27899342394100074165e-10 + 0.11862637614222222222e-12 * t) * t) * t) * t) * t) * t; } case 77: { double t = 2*y100 - 155; return 0.38722432730555448223e0 + (0.76260375162549802745e-2 + (0.85259785810004603848e-4 + (0.75329383305171327677e-6 + (0.52979361368388119355e-8 + (0.29352606054164086709e-10 + 0.12360253370666666667e-12 * t) * t) * t) * t) * t) * t; } case 78: { double t = 2*y100 - 157; return 0.40282355354616940667e0 + (0.79762880915029728079e-2 + (0.89909077342438246452e-4 + (0.79687137961956194579e-6 + (0.55989731807360403195e-8 + (0.30866246101464869050e-10 + 0.12868841946666666667e-12 * t) * t) * t) * t) * t) * t; } case 79: { double t = 2*y100 - 159; return 0.41914223158913787649e0 + (0.83456685186950463538e-2 + (0.94827181359250161335e-4 + (0.84291858561783141014e-6 + (0.59154537751083485684e-8 + (0.32441553034347469291e-10 + 0.13387957943111111111e-12 * t) * t) * t) * t) * t) * t; } case 80: { double t = 2*y100 - 161; return 0.43621971639463786896e0 + (0.87352841828289495773e-2 + (0.10002929142066799966e-3 + (0.89156148280219880024e-6 + (0.62480008150788597147e-8 + (0.34079760983458878910e-10 + 0.13917107176888888889e-12 * t) * t) * t) * t) * t) * t; } case 81: { double t = 2*y100 - 163; return 0.45409763548534330981e0 + (0.91463027755548240654e-2 + (0.10553137232446167258e-3 + (0.94293113464638623798e-6 + (0.65972492312219959885e-8 + (0.35782041795476563662e-10 + 0.14455745872000000000e-12 * t) * t) * t) * t) * t) * t; } case 82: { double t = 2*y100 - 165; return 0.47282001668512331468e0 + (0.95799574408860463394e-2 + (0.11135019058000067469e-3 + (0.99716373005509038080e-6 + (0.69638453369956970347e-8 + (0.37549499088161345850e-10 + 0.15003280712888888889e-12 * t) * t) * t) * t) * t) * t; } case 83: { double t = 2*y100 - 167; return 0.49243342227179841649e0 + (0.10037550043909497071e-1 + (0.11750334542845234952e-3 + (0.10544006716188967172e-5 + (0.73484461168242224872e-8 + (0.39383162326435752965e-10 + 0.15559069118222222222e-12 * t) * t) * t) * t) * t) * t; } case 84: { double t = 2*y100 - 169; return 0.51298708979209258326e0 + (0.10520454564612427224e-1 + (0.12400930037494996655e-3 + (0.11147886579371265246e-5 + (0.77517184550568711454e-8 + (0.41283980931872622611e-10 + 0.16122419680000000000e-12 * t) * t) * t) * t) * t) * t; } case 85: { double t = 2*y100 - 171; return 0.53453307979101369843e0 + (0.11030120618800726938e-1 + (0.13088741519572269581e-3 + (0.11784797595374515432e-5 + (0.81743383063044825400e-8 + (0.43252818449517081051e-10 + 0.16692592640000000000e-12 * t) * t) * t) * t) * t) * t; } case 86: { double t = 2*y100 - 173; return 0.55712643071169299478e0 + (0.11568077107929735233e-1 + (0.13815797838036651289e-3 + (0.12456314879260904558e-5 + (0.86169898078969313597e-8 + (0.45290446811539652525e-10 + 0.17268801084444444444e-12 * t) * t) * t) * t) * t) * t; } case 87: { double t = 2*y100 - 175; return 0.58082532122519320968e0 + (0.12135935999503877077e-1 + (0.14584223996665838559e-3 + (0.13164068573095710742e-5 + (0.90803643355106020163e-8 + (0.47397540713124619155e-10 + 0.17850211608888888889e-12 * t) * t) * t) * t) * t) * t; } case 88: { double t = 2*y100 - 177; return 0.60569124025293375554e0 + (0.12735396239525550361e-1 + (0.15396244472258863344e-3 + (0.13909744385382818253e-5 + (0.95651595032306228245e-8 + (0.49574672127669041550e-10 + 0.18435945564444444444e-12 * t) * t) * t) * t) * t) * t; } case 89: { double t = 2*y100 - 179; return 0.63178916494715716894e0 + (0.13368247798287030927e-1 + (0.16254186562762076141e-3 + (0.14695084048334056083e-5 + (0.10072078109604152350e-7 + (0.51822304995680707483e-10 + 0.19025081422222222222e-12 * t) * t) * t) * t) * t) * t; } case 90: { double t = 2*y100 - 181; return 0.65918774689725319200e0 + (0.14036375850601992063e-1 + (0.17160483760259706354e-3 + (0.15521885688723188371e-5 + (0.10601827031535280590e-7 + (0.54140790105837520499e-10 + 0.19616655146666666667e-12 * t) * t) * t) * t) * t) * t; } case 91: { double t = 2*y100 - 183; return 0.68795950683174433822e0 + (0.14741765091365869084e-1 + (0.18117679143520433835e-3 + (0.16392004108230585213e-5 + (0.11155116068018043001e-7 + (0.56530360194925690374e-10 + 0.20209663662222222222e-12 * t) * t) * t) * t) * t) * t; } case 92: { double t = 2*y100 - 185; return 0.71818103808729967036e0 + (0.15486504187117112279e-1 + (0.19128428784550923217e-3 + (0.17307350969359975848e-5 + (0.11732656736113607751e-7 + (0.58991125287563833603e-10 + 0.20803065333333333333e-12 * t) * t) * t) * t) * t) * t; } case 93: { double t = 2*y100 - 187; return 0.74993321911726254661e0 + (0.16272790364044783382e-1 + (0.20195505163377912645e-3 + (0.18269894883203346953e-5 + (0.12335161021630225535e-7 + (0.61523068312169087227e-10 + 0.21395783431111111111e-12 * t) * t) * t) * t) * t) * t; } case 94: { double t = 2*y100 - 189; return 0.78330143531283492729e0 + (0.17102934132652429240e-1 + (0.21321800585063327041e-3 + (0.19281661395543913713e-5 + (0.12963340087354341574e-7 + (0.64126040998066348872e-10 + 0.21986708942222222222e-12 * t) * t) * t) * t) * t) * t; } case 95: { double t = 2*y100 - 191; return 0.81837581041023811832e0 + (0.17979364149044223802e-1 + (0.22510330592753129006e-3 + (0.20344732868018175389e-5 + (0.13617902941839949718e-7 + (0.66799760083972474642e-10 + 0.22574701262222222222e-12 * t) * t) * t) * t) * t) * t; } case 96: { double t = 2*y100 - 193; return 0.85525144775685126237e0 + (0.18904632212547561026e-1 + (0.23764237370371255638e-3 + (0.21461248251306387979e-5 + (0.14299555071870523786e-7 + (0.69543803864694171934e-10 + 0.23158593688888888889e-12 * t) * t) * t) * t) * t) * t; } case 97: { double t = 2*y100 - 195; return 0.89402868170849933734e0 + (0.19881418399127202569e-1 + (0.25086793128395995798e-3 + (0.22633402747585233180e-5 + (0.15008997042116532283e-7 + (0.72357609075043941261e-10 + 0.23737194737777777778e-12 * t) * t) * t) * t) * t) * t; } case 98: { double t = 2*y100 - 197; return 0.93481333942870796363e0 + (0.20912536329780368893e-1 + (0.26481403465998477969e-3 + (0.23863447359754921676e-5 + (0.15746923065472184451e-7 + (0.75240468141720143653e-10 + 0.24309291271111111111e-12 * t) * t) * t) * t) * t) * t; } case 99: { double t = 2*y100 - 199; return 0.97771701335885035464e0 + (0.22000938572830479551e-1 + (0.27951610702682383001e-3 + (0.25153688325245314530e-5 + (0.16514019547822821453e-7 + (0.78191526829368231251e-10 + 0.24873652355555555556e-12 * t) * t) * t) * t) * t) * t; } } // we only get here if y = 1, i.e. |x| < 4*eps, in which case // erfcx is within 1e-15 of 1.. return 1.0; } // erfcx_y100 /******************************************************************************/ /* Library function erfcx */ /******************************************************************************/ double erfcx(double x) { // Steven G. Johnson, October 2012. // This function combines a few different ideas. // First, for x > 50, it uses a continued-fraction expansion (same as // for the Faddeeva function, but with algebraic simplifications for z=i*x). // Second, for 0 <= x <= 50, it uses Chebyshev polynomial approximations, // but with two twists: // // a) It maps x to y = 4 / (4+x) in [0,1]. This simple transformation, // inspired by a similar transformation in the octave-forge/specfun // erfcx by Soren Hauberg, results in much faster Chebyshev convergence // than other simple transformations I have examined. // // b) Instead of using a single Chebyshev polynomial for the entire // [0,1] y interval, we break the interval up into 100 equal // subintervals, with a switch/lookup table, and use much lower // degree Chebyshev polynomials in each subinterval. This greatly // improves performance in my tests. // // For x < 0, we use the relationship erfcx(-x) = 2 exp(x^2) - erfc(x), // with the usual checks for overflow etcetera. // Performance-wise, it seems to be substantially faster than either // the SLATEC DERFC function [or an erfcx function derived therefrom] // or Cody's CALERF function (from netlib.org/specfun), while // retaining near machine precision in accuracy. if (x >= 0) { if (x > 50) { // continued-fraction expansion is faster const double ispi = 0.56418958354775628694807945156; // 1 / sqrt(pi) if (x > 5e7) { // 1-term expansion, important to avoid overflow */ return ispi / x; } else { // 5-term expansion (rely on compiler for CSE), simplified from: ispi / (x+0.5/(x+1/(x+1.5/(x+2/x)))) return ispi * ((x*x) * (x*x+4.5) + 2) / (x * ((x*x) * (x*x+5) + 3.75)); } } return erfcx_y100(400/(4+x)); } else { return x < -26.7 ? HUGE_VAL : (x < -6.1 ? 2*exp(x*x) : 2*exp(x*x) - erfcx_y100(400/(4-x))); } } // erfcx luametatex-2.10.08/source/libraries/libcerf/err_fcts.c000066400000000000000000000411021442250314700226700ustar00rootroot00000000000000/* Library libcerf: * Compute complex error functions, based on a new implementation of * Faddeeva's w_of_z. Also provide Dawson and Voigt functions. * * File err_fcts.c: * Computate Dawson, Voigt, and several error functions, * based on erfcx, im_w_of_x, w_of_z as implemented in separate files. * * Given w(z), the error functions are mostly straightforward * to compute, except for certain regions where we have to * switch to Taylor expansions to avoid cancellation errors * [e.g. near the origin for erf(z)]. * * Copyright: * (C) 2012 Massachusetts Institute of Technology * (C) 2013 Forschungszentrum Jülich GmbH * * Licence: * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * Authors: * Steven G. Johnson, Massachusetts Institute of Technology, 2012, core author * Joachim Wuttke, Forschungszentrum Jülich, 2013, package maintainer * * Website: * http://apps.jcns.fz-juelich.de/libcerf * * Revision history: * ../CHANGELOG * * Man pages: * cerf(3), dawson(3), voigt(3) */ #include "cerf.h" #include #include "defs.h" // defines _cerf_cmplx, NaN, C, cexp, ... const double spi2 = 0.8862269254527580136490837416705725913990; // sqrt(pi)/2 const double s2pi = 2.5066282746310005024157652848110; // sqrt(2*pi) const double pi = 3.141592653589793238462643383279503; /******************************************************************************/ /* Simple wrappers: cerfcx, cerfi, erfi, dawson */ /******************************************************************************/ _cerf_cmplx cerfcx(_cerf_cmplx z) { // Compute erfcx(z) = exp(z^2) erfc(z), // the complex underflow-compensated complementary error function, // trivially related to Faddeeva's w_of_z. return w_of_z(C(-cimag(z), creal(z))); } _cerf_cmplx cerfi(_cerf_cmplx z) { // Compute erfi(z) = -i erf(iz), // the rotated complex error function. _cerf_cmplx e = cerf(C(-cimag(z),creal(z))); return C(cimag(e), -creal(e)); } double erfi(double x) { // Compute erfi(x) = -i erf(ix), // the imaginary error function. return x*x > 720 ? (x > 0 ? Inf : -Inf) : exp(x*x) * im_w_of_x(x); } double dawson(double x) { // Compute dawson(x) = sqrt(pi)/2 * exp(-x^2) * erfi(x), // Dawson's integral for a real argument. return spi2 * im_w_of_x(x); } double re_w_of_z( double x, double y ) { return creal( w_of_z( C(x,y) ) ); } double im_w_of_z( double x, double y ) { return cimag( w_of_z( C(x,y) ) ); } /******************************************************************************/ /* voigt */ /******************************************************************************/ double voigt( double x, double sigma, double gamma ) { // Joachim Wuttke, January 2013. // Compute Voigt's convolution of a Gaussian // G(x,sigma) = 1/sqrt(2*pi)/|sigma| * exp(-x^2/2/sigma^2) // and a Lorentzian // L(x,gamma) = |gamma| / pi / ( x^2 + gamma^2 ), // namely // voigt(x,sigma,gamma) = // \int_{-infty}^{infty} dx' G(x',sigma) L(x-x',gamma) // using the relation // voigt(x,sigma,gamma) = Re{ w(z) } / sqrt(2*pi) / |sigma| // with // z = (x+i*|gamma|) / sqrt(2) / |sigma|. // Reference: Abramowitz&Stegun (1964), formula (7.4.13). double gam = gamma < 0 ? -gamma : gamma; double sig = sigma < 0 ? -sigma : sigma; if ( gam==0 ) { if ( sig==0 ) { // It's kind of a delta function return x ? 0 : Inf; } else { // It's a pure Gaussian return exp( -x*x/2/(sig*sig) ) / s2pi / sig; } } else { if ( sig==0 ) { // It's a pure Lorentzian return gam / pi / (x*x + gam*gam); } else { // Regular case, both parameters are nonzero _cerf_cmplx z = complex_mul_cr(C(x, gam), 1. / sqrt(2) / sig); return creal( w_of_z(z) ) / s2pi / sig; // TODO: correct and activate the following: // double w = sqrt(gam*gam+sig*sig); // to work in reduced units // _cerf_cmplx z = C(x/w,gam/w) / sqrt(2) / (sig/w); // return creal( w_of_z(z) ) / s2pi / (sig/w); } } } /******************************************************************************/ /* cerf */ /******************************************************************************/ _cerf_cmplx cerf(_cerf_cmplx z) { // Steven G. Johnson, October 2012. // Compute erf(z), the complex error function, // using w_of_z except for certain regions. double x = creal(z), y = cimag(z); if (y == 0) return C(erf(x), y); // preserve sign of 0 if (x == 0) // handle separately for speed & handling of y = Inf or NaN return C(x, // preserve sign of 0 /* handle y -> Inf limit manually, since exp(y^2) -> Inf but Im[w(y)] -> 0, so IEEE will give us a NaN when it should be Inf */ y*y > 720 ? (y > 0 ? Inf : -Inf) : exp(y*y) * im_w_of_x(y)); double mRe_z2 = (y - x) * (x + y); // Re(-z^2), being careful of overflow double mIm_z2 = -2*x*y; // Im(-z^2) if (mRe_z2 < -750) // underflow return (x >= 0 ? C(1.0, 0.0) : C(-1.0, 0.0));; /* Handle positive and negative x via different formulas, using the mirror symmetries of w, to avoid overflow/underflow problems from multiplying exponentially large and small quantities. */ if (x >= 0) { if (x < 8e-2) { if (fabs(y) < 1e-2) goto taylor; else if (fabs(mIm_z2) < 5e-3 && x < 5e-3) goto taylor_erfi; } /* don't use complex exp function, since that will produce spurious NaN values when multiplying w in an overflow situation. */ return complex_sub_rc(1.0, complex_mul_rc(exp(mRe_z2), complex_mul_cc(C(cos(mIm_z2), sin(mIm_z2)), w_of_z(C(-y, x))))); } else { // x < 0 if (x > -8e-2) { // duplicate from above to avoid fabs(x) call if (fabs(y) < 1e-2) goto taylor; else if (fabs(mIm_z2) < 5e-3 && x > -5e-3) goto taylor_erfi; } else if (isnan(x)) return C(NaN, y == 0 ? 0 : NaN); /* don't use complex exp function, since that will produce spurious NaN values when multiplying w in an overflow situation. */ return complex_add_rc(-1.0, complex_mul_rc(exp(mRe_z2), complex_mul_cc(C(cos(mIm_z2), sin(mIm_z2)), w_of_z(C(y, -x))))); } // Use Taylor series for small |z|, to avoid cancellation inaccuracy // erf(z) = 2/sqrt(pi) * z * (1 - z^2/3 + z^4/10 - z^6/42 + z^8/216 + ...) taylor: { _cerf_cmplx mz2 = C(mRe_z2, mIm_z2); // -z^2 return complex_mul_cc(z, complex_add_rc(1.1283791670955125739, complex_mul_cc(mz2, complex_add_rc(0.37612638903183752464, complex_mul_cc(mz2, complex_add_rc(0.11283791670955125739, complex_mul_cc(mz2, complex_add_rc(0.026866170645131251760, complex_mul_cr(mz2, 0.0052239776254421878422))))))))); } /* for small |x| and small |xy|, use Taylor series to avoid cancellation inaccuracy: erf(x+iy) = erf(iy) + 2*exp(y^2)/sqrt(pi) * [ x * (1 - x^2 * (1+2y^2)/3 + x^4 * (3+12y^2+4y^4)/30 + ... - i * x^2 * y * (1 - x^2 * (3+2y^2)/6 + ...) ] where: erf(iy) = exp(y^2) * Im[w(y)] */ taylor_erfi: { double x2 = x*x, y2 = y*y; double expy2 = exp(y2); return C (expy2 * x * (1.1283791670955125739 - x2 * (0.37612638903183752464 + 0.75225277806367504925*y2) + x2*x2 * (0.11283791670955125739 + y2 * (0.45135166683820502956 + 0.15045055561273500986*y2))), expy2 * (im_w_of_x(y) - x2*y * (1.1283791670955125739 - x2 * (0.56418958354775628695 + 0.37612638903183752464*y2)))); } } // cerf /******************************************************************************/ /* cerfc */ /******************************************************************************/ _cerf_cmplx cerfc(_cerf_cmplx z) { // Steven G. Johnson, October 2012. // Compute erfc(z) = 1 - erf(z), the complex complementary error function, // using w_of_z except for certain regions. double x = creal(z), y = cimag(z); if (x == 0.) return C(1, /* handle y -> Inf limit manually, since exp(y^2) -> Inf but Im[w(y)] -> 0, so IEEE will give us a NaN when it should be Inf */ y*y > 720 ? (y > 0 ? -Inf : Inf) : -exp(y*y) * im_w_of_x(y)); if (y == 0.) { if (x*x > 750) // underflow return C(x >= 0 ? 0.0 : 2.0, -y); // preserve sign of 0 return C(x >= 0 ? exp(-x*x) * erfcx(x) : 2. - exp(-x*x) * erfcx(-x), -y); // preserve sign of zero } double mRe_z2 = (y - x) * (x + y); // Re(-z^2), being careful of overflow double mIm_z2 = -2*x*y; // Im(-z^2) if (mRe_z2 < -750) // underflow return C((x >= 0 ? 0.0 : 2.0), 0.0); if (x >= 0) return cexp(complex_mul_cc(C(mRe_z2, mIm_z2), w_of_z(C(-y,x)))); else return complex_sub_rc(2.0, complex_mul_cc(cexp(C(mRe_z2, mIm_z2)), w_of_z(C(y, -x)))); } // cerfc /******************************************************************************/ /* cdawson */ /******************************************************************************/ _cerf_cmplx cdawson(_cerf_cmplx z) { // Steven G. Johnson, October 2012. // Compute Dawson(z) = sqrt(pi)/2 * exp(-z^2) * erfi(z), // Dawson's integral for a complex argument, // using w_of_z except for certain regions. double x = creal(z), y = cimag(z); // handle axes separately for speed & proper handling of x or y = Inf or NaN if (y == 0) return C(spi2 * im_w_of_x(x), -y); // preserve sign of 0 if (x == 0) { double y2 = y*y; if (y2 < 2.5e-5) { // Taylor expansion return C(x, // preserve sign of 0 y * (1. + y2 * (0.6666666666666666666666666666666666666667 + y2 * 0.26666666666666666666666666666666666667))); } return C(x, // preserve sign of 0 spi2 * (y >= 0 ? exp(y2) - erfcx(y) : erfcx(-y) - exp(y2))); } double mRe_z2 = (y - x) * (x + y); // Re(-z^2), being careful of overflow double mIm_z2 = -2*x*y; // Im(-z^2) _cerf_cmplx mz2 = C(mRe_z2, mIm_z2); // -z^2 /* Handle positive and negative x via different formulas, using the mirror symmetries of w, to avoid overflow/underflow problems from multiplying exponentially large and small quantities. */ if (y >= 0) { if (y < 5e-3) { if (fabs(x) < 5e-3) goto taylor; else if (fabs(mIm_z2) < 5e-3) goto taylor_realaxis; } _cerf_cmplx res = complex_sub_cc(cexp(mz2), w_of_z(z)); return complex_mul_rc(spi2, C(-cimag(res), creal(res))); } else { // y < 0 if (y > -5e-3) { // duplicate from above to avoid fabs(x) call if (fabs(x) < 5e-3) goto taylor; else if (fabs(mIm_z2) < 5e-3) goto taylor_realaxis; } else if (isnan(y)) return C(x == 0 ? 0 : NaN, NaN); { _cerf_cmplx res = complex_sub_cc(w_of_z(complex_neg(z)), cexp(mz2)); return complex_mul_rc(spi2, C(-cimag(res), creal(res))); } } // Use Taylor series for small |z|, to avoid cancellation inaccuracy // dawson(z) = z - 2/3 z^3 + 4/15 z^5 + ... taylor: return complex_mul_cc(z, complex_add_rc(1., complex_mul_cc(mz2, complex_add_rc(0.6666666666666666666666666666666666666667, complex_mul_cr(mz2, 0.2666666666666666666666666666666666666667))))); /* for small |y| and small |xy|, use Taylor series to avoid cancellation inaccuracy: dawson(x + iy) = D + y^2 (D + x - 2Dx^2) + y^4 (D/2 + 5x/6 - 2Dx^2 - x^3/3 + 2Dx^4/3) + iy [ (1-2Dx) + 2/3 y^2 (1 - 3Dx - x^2 + 2Dx^3) + y^4/15 (4 - 15Dx - 9x^2 + 20Dx^3 + 2x^4 - 4Dx^5) ] + ... where D = dawson(x) However, for large |x|, 2Dx -> 1 which gives cancellation problems in this series (many of the leading terms cancel). So, for large |x|, we need to substitute a continued-fraction expansion for D. dawson(x) = 0.5 / (x-0.5/(x-1/(x-1.5/(x-2/(x-2.5/(x...)))))) The 6 terms shown here seems to be the minimum needed to be accurate as soon as the simpler Taylor expansion above starts breaking down. Using this 6-term expansion, factoring out the denominator, and simplifying with Maple, we obtain: Re dawson(x + iy) * (-15 + 90x^2 - 60x^4 + 8x^6) / x = 33 - 28x^2 + 4x^4 + y^2 (18 - 4x^2) + 4 y^4 Im dawson(x + iy) * (-15 + 90x^2 - 60x^4 + 8x^6) / y = -15 + 24x^2 - 4x^4 + 2/3 y^2 (6x^2 - 15) - 4 y^4 Finally, for |x| > 5e7, we can use a simpler 1-term continued-fraction expansion for the real part, and a 2-term expansion for the imaginary part. (This avoids overflow problems for huge |x|.) This yields: Re dawson(x + iy) = [1 + y^2 (1 + y^2/2 - (xy)^2/3)] / (2x) Im dawson(x + iy) = y [ -1 - 2/3 y^2 + y^4/15 (2x^2 - 4) ] / (2x^2 - 1) */ taylor_realaxis: { double x2 = x*x; if (x2 > 1600) { // |x| > 40 double y2 = y*y; if (x2 > 25e14) {// |x| > 5e7 double xy2 = (x*y)*(x*y); return C((0.5 + y2 * (0.5 + 0.25*y2 - 0.16666666666666666667*xy2)) / x, y * (-1 + y2 * (-0.66666666666666666667 + 0.13333333333333333333*xy2 - 0.26666666666666666667*y2)) / (2*x2 - 1)); } return complex_mul_rc((1. / (-15 + x2 * (90 + x2 * (-60 + 8 * x2)))), C(x * (33 + x2 * (-28 + 4 * x2) + +y2 * (18 - 4 * x2 + 4 * y2)), +y * (-15 + x2 * (24 - 4 * x2) + +y2 * (4 * x2 - 10 - 4 * y2)))); } else { double D = spi2 * im_w_of_x(x); double y2 = y*y; return C (D + y2 * (D + x - 2*D*x2) + y2*y2 * (D * (0.5 - x2 * (2 - 0.66666666666666666667*x2)) + x * (0.83333333333333333333 - 0.33333333333333333333 * x2)), y * (1 - 2*D*x + y2 * 0.66666666666666666667 * (1 - x2 - D*x * (3 - 2*x2)) + y2*y2 * (0.26666666666666666667 - x2 * (0.6 - 0.13333333333333333333 * x2) - D*x * (1 - x2 * (1.3333333333333333333 - 0.26666666666666666667 * x2))))); } } } // cdawson luametatex-2.10.08/source/libraries/libcerf/experimental.c000066400000000000000000000143141442250314700235630ustar00rootroot00000000000000/******************************************************************************/ /* Experimental code */ /******************************************************************************/ /* Compute w_of_z via Fourier integration using Ooura-Mori transform. Agreement with Johnson's code usually < 1E-15, so far always < 1E-13. Todo: - sign for negative x or y - determine application limits - more systematical comparison with Johnson's code - comparison with Abrarov&Quine */ #define max_iter_int 10 #define num_range 5 #define PI 3.14159265358979323846L /* pi */ #define SQR(x) ((x)*(x)) #include double cerf_experimental_integration( int kind, double x, double y ) // kind: 0 cos, 1 sin transform (precomputing arrays[2] depend on this) { // unused parameters static int mu = 0; int intgr_debug = 0; static double intgr_delta=2.2e-16, intgr_eps=5.5e-20; if( x<0 || y<0 ) { fprintf( stderr, "negative arguments not yet implemented\n" ); exit( EDOM ); } double w = sqrt(2)*x; double gamma = sqrt(2)*y; int iter; int kaux; int isig; int N; int j; // range long double S=0; // trapezoid sum long double S_last; // - in last iteration long double s; // term contributing to S long double T; // sum of abs(s) // precomputed coefficients static int firstCall=1; static int iterDone[2][num_range]; // Nm,Np,ak,bk are precomputed up to this static int Nm[num_range][max_iter_int]; static int Np[num_range][max_iter_int]; static long double *ak[2][num_range][max_iter_int]; static long double *bk[2][num_range][max_iter_int]; // auxiliary for computing ak and bk long double u; long double e; long double tk; long double chi; long double dchi; long double h; long double k; long double f; long double ahk; long double chk; long double dhk; double p; double q; const double Smin=2e-20; // to assess worst truncation error // dynamic initialization upon first call if ( firstCall ) { for ( j=0; jiterDone[kind][j] ) { if ( N>1e6 ) return -3; // integral limits overflow Nm[j][iter] = N; Np[j][iter] = N; if ( !( ak[kind][j][iter]=malloc((sizeof(long double))* (Nm[j][iter]+1+Np[j][iter])) ) || !( bk[kind][j][iter]=malloc((sizeof(long double))* (Nm[j][iter]+1+Np[j][iter])) ) ) { fprintf( stderr, "Workspace allocation failed\n" ); exit( ENOMEM ); } h = logl( logl( 42*N/intgr_delta/Smin ) / p ) / N; // 42=(pi+1)*10 isig=1-2*(Nm[j][iter]&1); for ( kaux=-Nm[j][iter]; kaux<=Np[j][iter]; ++kaux ) { k = kaux; if( !kind ) k -= 0.5; u = k*h; chi = 2*p*sinhl(u) + 2*q*u; dchi = 2*p*coshl(u) + 2*q; if ( u==0 ) { if ( k!=0 ) return -4; // integration variable underflow // special treatment to bridge singularity at u=0 ahk = PI/h/dchi; dhk = 0.5; chk = sin( ahk ); } else { if ( -chi>DBL_MAX_EXP/2 ) return -5; // integral transformation overflow e = expl( -chi ); ahk = PI/h * u/(1-e); dhk = 1/(1-e) - u*e*dchi/SQR(1-e); chk = e>1 ? ( kind ? sinl( PI*k/(1-e) ) : cosl( PI*k/(1-e) ) ) : isig * sinl( PI*k*e/(1-e) ); } ak[kind][j][iter][kaux+Nm[j][iter]] = ahk; bk[kind][j][iter][kaux+Nm[j][iter]] = dhk * chk; isig = -isig; } iterDone[kind][j] = iter; } // integrate according to trapezoidal rule S_last = S; S = 0; T = 0; for ( kaux=-Nm[j][iter]; kaux<=Np[j][iter]; ++kaux ) { tk = ak[kind][j][iter][kaux+Nm[j][iter]] / w; f = expl(-tk*gamma-SQR(tk)/2); // Fourier kernel if ( mu ) f /= tk; // TODO s = bk[kind][j][iter][kaux+Nm[j][iter]] * f; S += s; T += fabsl(s); if( intgr_debug & 2 ) printf( "%2i %6i %12.4Lg %12.4Lg" " %12.4Lg %12.4Lg %12.4Lg %12.4Lg\n", iter, kaux, ak[kind][j][iter][kaux+Nm[j][iter]], bk[kind][j][iter][kaux+Nm[j][iter]], f, s, S, T ); } if( intgr_debug & 1 ) printf( "%23.17Le %23.17Le\n", S, T ); // intgr_num_of_terms += Np[j][iter]-(-Nm[j][iter])+1; // termination criteria if ( intgr_debug & 4 ) return -1; // we want to inspect just one sum else if ( S < 0 ) return -6; // cancelling terms lead to negative S else if ( intgr_eps*T > intgr_delta*fabs(S) ) return -2; // cancellation else if ( iter && fabs(S-S_last) + intgr_eps*T < intgr_delta*fabs(S) ) return S*sqrt(2*PI)/w; // success // factor 2 from int_-infty^+infty = 2 * int_0^+infty // factor pi/w from formula 48 in kww paper // factor 1/sqrt(2*pi) from Gaussian N *= 2; // retry with more points } return -9; // not converged } double cerf_experimental_imw( double x, double y ) { return cerf_experimental_integration( 1, x, y ); } double cerf_experimental_rew( double x, double y ) { return cerf_experimental_integration( 0, x, y ); } luametatex-2.10.08/source/libraries/libcerf/im_w_of_x.c000066400000000000000000001112551442250314700230360ustar00rootroot00000000000000/* Library libcerf: * Compute complex error functions, based on a new implementation of * Faddeeva's w_of_z. Also provide Dawson and Voigt functions. * * File im_w_of_x.c: * Compute scaled Dawson integral im_w_of_x(x) = 2*dawson(x)/sqrt(pi), * equivalent to the imaginary part of the Faddeeva function w(x) for real x. * * Copyright: * (C) 2012 Massachusetts Institute of Technology * (C) 2013 Forschungszentrum Jülich GmbH * * Licence: * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * Authors: * Steven G. Johnson, Massachusetts Institute of Technology, 2012, core author * Joachim Wuttke, Forschungszentrum Jülich, 2013, package maintainer * * Website: * http://apps.jcns.fz-juelich.de/libcerf * * Revision history: * ../CHANGELOG * * Manual page: * man 3 im_w_of_x */ #include "cerf.h" #include #include "defs.h" // defines _cerf_cmplx, NaN, C, cexp, ... /******************************************************************************/ /* Lookup-table for Chebyshev polynomials for smaller |x| */ /******************************************************************************/ static double w_im_y100(double y100, double x) { // Steven G. Johnson, October 2012. // Given y100=100*y, where y = 1/(1+x) for x >= 0, compute w_im(x). // Uses a look-up table of 100 different Chebyshev polynomials // for y intervals [0,0.01], [0.01,0.02], ...., [0.99,1], generated // with the help of Maple and a little shell script. // This allows the Chebyshev polynomials to be of significantly lower // degree (about 1/30) compared to fitting the whole [0,1] interval // with a single polynomial. switch ((int) y100) { case 0: { double t = 2*y100 - 1; return 0.28351593328822191546e-2 + (0.28494783221378400759e-2 + (0.14427470563276734183e-4 + (0.10939723080231588129e-6 + (0.92474307943275042045e-9 + (0.89128907666450075245e-11 + 0.92974121935111111110e-13 * t) * t) * t) * t) * t) * t; } case 1: { double t = 2*y100 - 3; return 0.85927161243940350562e-2 + (0.29085312941641339862e-2 + (0.15106783707725582090e-4 + (0.11716709978531327367e-6 + (0.10197387816021040024e-8 + (0.10122678863073360769e-10 + 0.10917479678400000000e-12 * t) * t) * t) * t) * t) * t; } case 2: { double t = 2*y100 - 5; return 0.14471159831187703054e-1 + (0.29703978970263836210e-2 + (0.15835096760173030976e-4 + (0.12574803383199211596e-6 + (0.11278672159518415848e-8 + (0.11547462300333495797e-10 + 0.12894535335111111111e-12 * t) * t) * t) * t) * t) * t; } case 3: { double t = 2*y100 - 7; return 0.20476320420324610618e-1 + (0.30352843012898665856e-2 + (0.16617609387003727409e-4 + (0.13525429711163116103e-6 + (0.12515095552507169013e-8 + (0.13235687543603382345e-10 + 0.15326595042666666667e-12 * t) * t) * t) * t) * t) * t; } case 4: { double t = 2*y100 - 9; return 0.26614461952489004566e-1 + (0.31034189276234947088e-2 + (0.17460268109986214274e-4 + (0.14582130824485709573e-6 + (0.13935959083809746345e-8 + (0.15249438072998932900e-10 + 0.18344741882133333333e-12 * t) * t) * t) * t) * t) * t; } case 5: { double t = 2*y100 - 11; return 0.32892330248093586215e-1 + (0.31750557067975068584e-2 + (0.18369907582308672632e-4 + (0.15761063702089457882e-6 + (0.15577638230480894382e-8 + (0.17663868462699097951e-10 + (0.22126732680711111111e-12 + 0.30273474177737853668e-14 * t) * t) * t) * t) * t) * t) * t; } case 6: { double t = 2*y100 - 13; return 0.39317207681134336024e-1 + (0.32504779701937539333e-2 + (0.19354426046513400534e-4 + (0.17081646971321290539e-6 + (0.17485733959327106250e-8 + (0.20593687304921961410e-10 + (0.26917401949155555556e-12 + 0.38562123837725712270e-14 * t) * t) * t) * t) * t) * t) * t; } case 7: { double t = 2*y100 - 15; return 0.45896976511367738235e-1 + (0.33300031273110976165e-2 + (0.20423005398039037313e-4 + (0.18567412470376467303e-6 + (0.19718038363586588213e-8 + (0.24175006536781219807e-10 + (0.33059982791466666666e-12 + 0.49756574284439426165e-14 * t) * t) * t) * t) * t) * t) * t; } case 8: { double t = 2*y100 - 17; return 0.52640192524848962855e-1 + (0.34139883358846720806e-2 + (0.21586390240603337337e-4 + (0.20247136501568904646e-6 + (0.22348696948197102935e-8 + (0.28597516301950162548e-10 + (0.41045502119111111110e-12 + 0.65151614515238361946e-14 * t) * t) * t) * t) * t) * t) * t; } case 9: { double t = 2*y100 - 19; return 0.59556171228656770456e-1 + (0.35028374386648914444e-2 + (0.22857246150998562824e-4 + (0.22156372146525190679e-6 + (0.25474171590893813583e-8 + (0.34122390890697400584e-10 + (0.51593189879111111110e-12 + 0.86775076853908006938e-14 * t) * t) * t) * t) * t) * t) * t; } case 10: { double t = 2*y100 - 21; return 0.66655089485108212551e-1 + (0.35970095381271285568e-2 + (0.24250626164318672928e-4 + (0.24339561521785040536e-6 + (0.29221990406518411415e-8 + (0.41117013527967776467e-10 + (0.65786450716444444445e-12 + 0.11791885745450623331e-13 * t) * t) * t) * t) * t) * t) * t; } case 11: { double t = 2*y100 - 23; return 0.73948106345519174661e-1 + (0.36970297216569341748e-2 + (0.25784588137312868792e-4 + (0.26853012002366752770e-6 + (0.33763958861206729592e-8 + (0.50111549981376976397e-10 + (0.85313857496888888890e-12 + 0.16417079927706899860e-13 * t) * t) * t) * t) * t) * t) * t; } case 12: { double t = 2*y100 - 25; return 0.81447508065002963203e-1 + (0.38035026606492705117e-2 + (0.27481027572231851896e-4 + (0.29769200731832331364e-6 + (0.39336816287457655076e-8 + (0.61895471132038157624e-10 + (0.11292303213511111111e-11 + 0.23558532213703884304e-13 * t) * t) * t) * t) * t) * t) * t; } case 13: { double t = 2*y100 - 27; return 0.89166884027582716628e-1 + (0.39171301322438946014e-2 + (0.29366827260422311668e-4 + (0.33183204390350724895e-6 + (0.46276006281647330524e-8 + (0.77692631378169813324e-10 + (0.15335153258844444444e-11 + 0.35183103415916026911e-13 * t) * t) * t) * t) * t) * t) * t; } case 14: { double t = 2*y100 - 29; return 0.97121342888032322019e-1 + (0.40387340353207909514e-2 + (0.31475490395950776930e-4 + (0.37222714227125135042e-6 + (0.55074373178613809996e-8 + (0.99509175283990337944e-10 + (0.21552645758222222222e-11 + 0.55728651431872687605e-13 * t) * t) * t) * t) * t) * t) * t; } case 15: { double t = 2*y100 - 31; return 0.10532778218603311137e0 + (0.41692873614065380607e-2 + (0.33849549774889456984e-4 + (0.42064596193692630143e-6 + (0.66494579697622432987e-8 + (0.13094103581931802337e-9 + (0.31896187409777777778e-11 + 0.97271974184476560742e-13 * t) * t) * t) * t) * t) * t) * t; } case 16: { double t = 2*y100 - 33; return 0.11380523107427108222e0 + (0.43099572287871821013e-2 + (0.36544324341565929930e-4 + (0.47965044028581857764e-6 + (0.81819034238463698796e-8 + (0.17934133239549647357e-9 + (0.50956666166186293627e-11 + (0.18850487318190638010e-12 + 0.79697813173519853340e-14 * t) * t) * t) * t) * t) * t) * t) * t; } case 17: { double t = 2*y100 - 35; return 0.12257529703447467345e0 + (0.44621675710026986366e-2 + (0.39634304721292440285e-4 + (0.55321553769873381819e-6 + (0.10343619428848520870e-7 + (0.26033830170470368088e-9 + (0.87743837749108025357e-11 + (0.34427092430230063401e-12 + 0.10205506615709843189e-13 * t) * t) * t) * t) * t) * t) * t) * t; } case 18: { double t = 2*y100 - 37; return 0.13166276955656699478e0 + (0.46276970481783001803e-2 + (0.43225026380496399310e-4 + (0.64799164020016902656e-6 + (0.13580082794704641782e-7 + (0.39839800853954313927e-9 + (0.14431142411840000000e-10 + 0.42193457308830027541e-12 * t) * t) * t) * t) * t) * t) * t; } case 19: { double t = 2*y100 - 39; return 0.14109647869803356475e0 + (0.48088424418545347758e-2 + (0.47474504753352150205e-4 + (0.77509866468724360352e-6 + (0.18536851570794291724e-7 + (0.60146623257887570439e-9 + (0.18533978397305276318e-10 + (0.41033845938901048380e-13 - 0.46160680279304825485e-13 * t) * t) * t) * t) * t) * t) * t) * t; } case 20: { double t = 2*y100 - 41; return 0.15091057940548936603e0 + (0.50086864672004685703e-2 + (0.52622482832192230762e-4 + (0.95034664722040355212e-6 + (0.25614261331144718769e-7 + (0.80183196716888606252e-9 + (0.12282524750534352272e-10 + (-0.10531774117332273617e-11 - 0.86157181395039646412e-13 * t) * t) * t) * t) * t) * t) * t) * t; } case 21: { double t = 2*y100 - 43; return 0.16114648116017010770e0 + (0.52314661581655369795e-2 + (0.59005534545908331315e-4 + (0.11885518333915387760e-5 + (0.33975801443239949256e-7 + (0.82111547144080388610e-9 + (-0.12357674017312854138e-10 + (-0.24355112256914479176e-11 - 0.75155506863572930844e-13 * t) * t) * t) * t) * t) * t) * t) * t; } case 22: { double t = 2*y100 - 45; return 0.17185551279680451144e0 + (0.54829002967599420860e-2 + (0.67013226658738082118e-4 + (0.14897400671425088807e-5 + (0.40690283917126153701e-7 + (0.44060872913473778318e-9 + (-0.52641873433280000000e-10 - 0.30940587864543343124e-11 * t) * t) * t) * t) * t) * t) * t; } case 23: { double t = 2*y100 - 47; return 0.18310194559815257381e0 + (0.57701559375966953174e-2 + (0.76948789401735193483e-4 + (0.18227569842290822512e-5 + (0.41092208344387212276e-7 + (-0.44009499965694442143e-9 + (-0.92195414685628803451e-10 + (-0.22657389705721753299e-11 + 0.10004784908106839254e-12 * t) * t) * t) * t) * t) * t) * t) * t; } case 24: { double t = 2*y100 - 49; return 0.19496527191546630345e0 + (0.61010853144364724856e-2 + (0.88812881056342004864e-4 + (0.21180686746360261031e-5 + (0.30652145555130049203e-7 + (-0.16841328574105890409e-8 + (-0.11008129460612823934e-9 + (-0.12180794204544515779e-12 + 0.15703325634590334097e-12 * t) * t) * t) * t) * t) * t) * t) * t; } case 25: { double t = 2*y100 - 51; return 0.20754006813966575720e0 + (0.64825787724922073908e-2 + (0.10209599627522311893e-3 + (0.22785233392557600468e-5 + (0.73495224449907568402e-8 + (-0.29442705974150112783e-8 + (-0.94082603434315016546e-10 + (0.23609990400179321267e-11 + 0.14141908654269023788e-12 * t) * t) * t) * t) * t) * t) * t) * t; } case 26: { double t = 2*y100 - 53; return 0.22093185554845172146e0 + (0.69182878150187964499e-2 + (0.11568723331156335712e-3 + (0.22060577946323627739e-5 + (-0.26929730679360840096e-7 + (-0.38176506152362058013e-8 + (-0.47399503861054459243e-10 + (0.40953700187172127264e-11 + 0.69157730376118511127e-13 * t) * t) * t) * t) * t) * t) * t) * t; } case 27: { double t = 2*y100 - 55; return 0.23524827304057813918e0 + (0.74063350762008734520e-2 + (0.12796333874615790348e-3 + (0.18327267316171054273e-5 + (-0.66742910737957100098e-7 + (-0.40204740975496797870e-8 + (0.14515984139495745330e-10 + (0.44921608954536047975e-11 - 0.18583341338983776219e-13 * t) * t) * t) * t) * t) * t) * t) * t; } case 28: { double t = 2*y100 - 57; return 0.25058626331812744775e0 + (0.79377285151602061328e-2 + (0.13704268650417478346e-3 + (0.11427511739544695861e-5 + (-0.10485442447768377485e-6 + (-0.34850364756499369763e-8 + (0.72656453829502179208e-10 + (0.36195460197779299406e-11 - 0.84882136022200714710e-13 * t) * t) * t) * t) * t) * t) * t) * t; } case 29: { double t = 2*y100 - 59; return 0.26701724900280689785e0 + (0.84959936119625864274e-2 + (0.14112359443938883232e-3 + (0.17800427288596909634e-6 + (-0.13443492107643109071e-6 + (-0.23512456315677680293e-8 + (0.11245846264695936769e-9 + (0.19850501334649565404e-11 - 0.11284666134635050832e-12 * t) * t) * t) * t) * t) * t) * t) * t; } case 30: { double t = 2*y100 - 61; return 0.28457293586253654144e0 + (0.90581563892650431899e-2 + (0.13880520331140646738e-3 + (-0.97262302362522896157e-6 + (-0.15077100040254187366e-6 + (-0.88574317464577116689e-9 + (0.12760311125637474581e-9 + (0.20155151018282695055e-12 - 0.10514169375181734921e-12 * t) * t) * t) * t) * t) * t) * t) * t; } case 31: { double t = 2*y100 - 63; return 0.30323425595617385705e0 + (0.95968346790597422934e-2 + (0.12931067776725883939e-3 + (-0.21938741702795543986e-5 + (-0.15202888584907373963e-6 + (0.61788350541116331411e-9 + (0.11957835742791248256e-9 + (-0.12598179834007710908e-11 - 0.75151817129574614194e-13 * t) * t) * t) * t) * t) * t) * t) * t; } case 32: { double t = 2*y100 - 65; return 0.32292521181517384379e0 + (0.10082957727001199408e-1 + (0.11257589426154962226e-3 + (-0.33670890319327881129e-5 + (-0.13910529040004008158e-6 + (0.19170714373047512945e-8 + (0.94840222377720494290e-10 + (-0.21650018351795353201e-11 - 0.37875211678024922689e-13 * t) * t) * t) * t) * t) * t) * t) * t; } case 33: { double t = 2*y100 - 67; return 0.34351233557911753862e0 + (0.10488575435572745309e-1 + (0.89209444197248726614e-4 + (-0.43893459576483345364e-5 + (-0.11488595830450424419e-6 + (0.28599494117122464806e-8 + (0.61537542799857777779e-10 - 0.24935749227658002212e-11 * t) * t) * t) * t) * t) * t) * t; } case 34: { double t = 2*y100 - 69; return 0.36480946642143669093e0 + (0.10789304203431861366e-1 + (0.60357993745283076834e-4 + (-0.51855862174130669389e-5 + (-0.83291664087289801313e-7 + (0.33898011178582671546e-8 + (0.27082948188277716482e-10 + (-0.23603379397408694974e-11 + 0.19328087692252869842e-13 * t) * t) * t) * t) * t) * t) * t) * t; } case 35: { double t = 2*y100 - 71; return 0.38658679935694939199e0 + (0.10966119158288804999e-1 + (0.27521612041849561426e-4 + (-0.57132774537670953638e-5 + (-0.48404772799207914899e-7 + (0.35268354132474570493e-8 + (-0.32383477652514618094e-11 + (-0.19334202915190442501e-11 + 0.32333189861286460270e-13 * t) * t) * t) * t) * t) * t) * t) * t; } case 36: { double t = 2*y100 - 73; return 0.40858275583808707870e0 + (0.11006378016848466550e-1 + (-0.76396376685213286033e-5 + (-0.59609835484245791439e-5 + (-0.13834610033859313213e-7 + (0.33406952974861448790e-8 + (-0.26474915974296612559e-10 + (-0.13750229270354351983e-11 + 0.36169366979417390637e-13 * t) * t) * t) * t) * t) * t) * t) * t; } case 37: { double t = 2*y100 - 75; return 0.43051714914006682977e0 + (0.10904106549500816155e-1 + (-0.43477527256787216909e-4 + (-0.59429739547798343948e-5 + (0.17639200194091885949e-7 + (0.29235991689639918688e-8 + (-0.41718791216277812879e-10 + (-0.81023337739508049606e-12 + 0.33618915934461994428e-13 * t) * t) * t) * t) * t) * t) * t) * t; } case 38: { double t = 2*y100 - 77; return 0.45210428135559607406e0 + (0.10659670756384400554e-1 + (-0.78488639913256978087e-4 + (-0.56919860886214735936e-5 + (0.44181850467477733407e-7 + (0.23694306174312688151e-8 + (-0.49492621596685443247e-10 + (-0.31827275712126287222e-12 + 0.27494438742721623654e-13 * t) * t) * t) * t) * t) * t) * t) * t; } case 39: { double t = 2*y100 - 79; return 0.47306491195005224077e0 + (0.10279006119745977570e-1 + (-0.11140268171830478306e-3 + (-0.52518035247451432069e-5 + (0.64846898158889479518e-7 + (0.17603624837787337662e-8 + (-0.51129481592926104316e-10 + (0.62674584974141049511e-13 + 0.20055478560829935356e-13 * t) * t) * t) * t) * t) * t) * t) * t; } case 40: { double t = 2*y100 - 81; return 0.49313638965719857647e0 + (0.97725799114772017662e-2 + (-0.14122854267291533334e-3 + (-0.46707252568834951907e-5 + (0.79421347979319449524e-7 + (0.11603027184324708643e-8 + (-0.48269605844397175946e-10 + (0.32477251431748571219e-12 + 0.12831052634143527985e-13 * t) * t) * t) * t) * t) * t) * t) * t; } case 41: { double t = 2*y100 - 83; return 0.51208057433416004042e0 + (0.91542422354009224951e-2 + (-0.16726530230228647275e-3 + (-0.39964621752527649409e-5 + (0.88232252903213171454e-7 + (0.61343113364949928501e-9 + (-0.42516755603130443051e-10 + (0.47910437172240209262e-12 + 0.66784341874437478953e-14 * t) * t) * t) * t) * t) * t) * t) * t; } case 42: { double t = 2*y100 - 85; return 0.52968945458607484524e0 + (0.84400880445116786088e-2 + (-0.18908729783854258774e-3 + (-0.32725905467782951931e-5 + (0.91956190588652090659e-7 + (0.14593989152420122909e-9 + (-0.35239490687644444445e-10 + 0.54613829888448694898e-12 * t) * t) * t) * t) * t) * t) * t; } case 43: { double t = 2*y100 - 87; return 0.54578857454330070965e0 + (0.76474155195880295311e-2 + (-0.20651230590808213884e-3 + (-0.25364339140543131706e-5 + (0.91455367999510681979e-7 + (-0.23061359005297528898e-9 + (-0.27512928625244444444e-10 + 0.54895806008493285579e-12 * t) * t) * t) * t) * t) * t) * t; } case 44: { double t = 2*y100 - 89; return 0.56023851910298493910e0 + (0.67938321739997196804e-2 + (-0.21956066613331411760e-3 + (-0.18181127670443266395e-5 + (0.87650335075416845987e-7 + (-0.51548062050366615977e-9 + (-0.20068462174044444444e-10 + 0.50912654909758187264e-12 * t) * t) * t) * t) * t) * t) * t; } case 45: { double t = 2*y100 - 91; return 0.57293478057455721150e0 + (0.58965321010394044087e-2 + (-0.22841145229276575597e-3 + (-0.11404605562013443659e-5 + (0.81430290992322326296e-7 + (-0.71512447242755357629e-9 + (-0.13372664928000000000e-10 + 0.44461498336689298148e-12 * t) * t) * t) * t) * t) * t) * t; } case 46: { double t = 2*y100 - 93; return 0.58380635448407827360e0 + (0.49717469530842831182e-2 + (-0.23336001540009645365e-3 + (-0.51952064448608850822e-6 + (0.73596577815411080511e-7 + (-0.84020916763091566035e-9 + (-0.76700972702222222221e-11 + 0.36914462807972467044e-12 * t) * t) * t) * t) * t) * t) * t; } case 47: { double t = 2*y100 - 95; return 0.59281340237769489597e0 + (0.40343592069379730568e-2 + (-0.23477963738658326185e-3 + (0.34615944987790224234e-7 + (0.64832803248395814574e-7 + (-0.90329163587627007971e-9 + (-0.30421940400000000000e-11 + 0.29237386653743536669e-12 * t) * t) * t) * t) * t) * t) * t; } case 48: { double t = 2*y100 - 97; return 0.59994428743114271918e0 + (0.30976579788271744329e-2 + (-0.23308875765700082835e-3 + (0.51681681023846925160e-6 + (0.55694594264948268169e-7 + (-0.91719117313243464652e-9 + (0.53982743680000000000e-12 + 0.22050829296187771142e-12 * t) * t) * t) * t) * t) * t) * t; } case 49: { double t = 2*y100 - 99; return 0.60521224471819875444e0 + (0.21732138012345456060e-2 + (-0.22872428969625997456e-3 + (0.92588959922653404233e-6 + (0.46612665806531930684e-7 + (-0.89393722514414153351e-9 + (0.31718550353777777778e-11 + 0.15705458816080549117e-12 * t) * t) * t) * t) * t) * t) * t; } case 50: { double t = 2*y100 - 101; return 0.60865189969791123620e0 + (0.12708480848877451719e-2 + (-0.22212090111534847166e-3 + (0.12636236031532793467e-5 + (0.37904037100232937574e-7 + (-0.84417089968101223519e-9 + (0.49843180828444444445e-11 + 0.10355439441049048273e-12 * t) * t) * t) * t) * t) * t) * t; } case 51: { double t = 2*y100 - 103; return 0.61031580103499200191e0 + (0.39867436055861038223e-3 + (-0.21369573439579869291e-3 + (0.15339402129026183670e-5 + (0.29787479206646594442e-7 + (-0.77687792914228632974e-9 + (0.61192452741333333334e-11 + 0.60216691829459295780e-13 * t) * t) * t) * t) * t) * t) * t; } case 52: { double t = 2*y100 - 105; return 0.61027109047879835868e0 + (-0.43680904508059878254e-3 + (-0.20383783788303894442e-3 + (0.17421743090883439959e-5 + (0.22400425572175715576e-7 + (-0.69934719320045128997e-9 + (0.67152759655111111110e-11 + 0.26419960042578359995e-13 * t) * t) * t) * t) * t) * t) * t; } case 53: { double t = 2*y100 - 107; return 0.60859639489217430521e0 + (-0.12305921390962936873e-2 + (-0.19290150253894682629e-3 + (0.18944904654478310128e-5 + (0.15815530398618149110e-7 + (-0.61726850580964876070e-9 + 0.68987888999111111110e-11 * t) * t) * t) * t) * t) * t; } case 54: { double t = 2*y100 - 109; return 0.60537899426486075181e0 + (-0.19790062241395705751e-2 + (-0.18120271393047062253e-3 + (0.19974264162313241405e-5 + (0.10055795094298172492e-7 + (-0.53491997919318263593e-9 + (0.67794550295111111110e-11 - 0.17059208095741511603e-13 * t) * t) * t) * t) * t) * t) * t; } case 55: { double t = 2*y100 - 111; return 0.60071229457904110537e0 + (-0.26795676776166354354e-2 + (-0.16901799553627508781e-3 + (0.20575498324332621581e-5 + (0.51077165074461745053e-8 + (-0.45536079828057221858e-9 + (0.64488005516444444445e-11 - 0.29311677573152766338e-13 * t) * t) * t) * t) * t) * t) * t; } case 56: { double t = 2*y100 - 113; return 0.59469361520112714738e0 + (-0.33308208190600993470e-2 + (-0.15658501295912405679e-3 + (0.20812116912895417272e-5 + (0.93227468760614182021e-9 + (-0.38066673740116080415e-9 + (0.59806790359111111110e-11 - 0.36887077278950440597e-13 * t) * t) * t) * t) * t) * t) * t; } case 57: { double t = 2*y100 - 115; return 0.58742228631775388268e0 + (-0.39321858196059227251e-2 + (-0.14410441141450122535e-3 + (0.20743790018404020716e-5 + (-0.25261903811221913762e-8 + (-0.31212416519526924318e-9 + (0.54328422462222222221e-11 - 0.40864152484979815972e-13 * t) * t) * t) * t) * t) * t) * t; } case 58: { double t = 2*y100 - 117; return 0.57899804200033018447e0 + (-0.44838157005618913447e-2 + (-0.13174245966501437965e-3 + (0.20425306888294362674e-5 + (-0.53330296023875447782e-8 + (-0.25041289435539821014e-9 + (0.48490437205333333334e-11 - 0.42162206939169045177e-13 * t) * t) * t) * t) * t) * t) * t; } case 59: { double t = 2*y100 - 119; return 0.56951968796931245974e0 + (-0.49864649488074868952e-2 + (-0.11963416583477567125e-3 + (0.19906021780991036425e-5 + (-0.75580140299436494248e-8 + (-0.19576060961919820491e-9 + (0.42613011928888888890e-11 - 0.41539443304115604377e-13 * t) * t) * t) * t) * t) * t) * t; } case 60: { double t = 2*y100 - 121; return 0.55908401930063918964e0 + (-0.54413711036826877753e-2 + (-0.10788661102511914628e-3 + (0.19229663322982839331e-5 + (-0.92714731195118129616e-8 + (-0.14807038677197394186e-9 + (0.36920870298666666666e-11 - 0.39603726688419162617e-13 * t) * t) * t) * t) * t) * t) * t; } case 61: { double t = 2*y100 - 123; return 0.54778496152925675315e0 + (-0.58501497933213396670e-2 + (-0.96582314317855227421e-4 + (0.18434405235069270228e-5 + (-0.10541580254317078711e-7 + (-0.10702303407788943498e-9 + (0.31563175582222222222e-11 - 0.36829748079110481422e-13 * t) * t) * t) * t) * t) * t) * t; } case 62: { double t = 2*y100 - 125; return 0.53571290831682823999e0 + (-0.62147030670760791791e-2 + (-0.85782497917111760790e-4 + (0.17553116363443470478e-5 + (-0.11432547349815541084e-7 + (-0.72157091369041330520e-10 + (0.26630811607111111111e-11 - 0.33578660425893164084e-13 * t) * t) * t) * t) * t) * t) * t; } case 63: { double t = 2*y100 - 127; return 0.52295422962048434978e0 + (-0.65371404367776320720e-2 + (-0.75530164941473343780e-4 + (0.16613725797181276790e-5 + (-0.12003521296598910761e-7 + (-0.42929753689181106171e-10 + (0.22170894940444444444e-11 - 0.30117697501065110505e-13 * t) * t) * t) * t) * t) * t) * t; } case 64: { double t = 2*y100 - 129; return 0.50959092577577886140e0 + (-0.68197117603118591766e-2 + (-0.65852936198953623307e-4 + (0.15639654113906716939e-5 + (-0.12308007991056524902e-7 + (-0.18761997536910939570e-10 + (0.18198628922666666667e-11 - 0.26638355362285200932e-13 * t) * t) * t) * t) * t) * t) * t; } case 65: { double t = 2*y100 - 131; return 0.49570040481823167970e0 + (-0.70647509397614398066e-2 + (-0.56765617728962588218e-4 + (0.14650274449141448497e-5 + (-0.12393681471984051132e-7 + (0.92904351801168955424e-12 + (0.14706755960177777778e-11 - 0.23272455351266325318e-13 * t) * t) * t) * t) * t) * t) * t; } case 66: { double t = 2*y100 - 133; return 0.48135536250935238066e0 + (-0.72746293327402359783e-2 + (-0.48272489495730030780e-4 + (0.13661377309113939689e-5 + (-0.12302464447599382189e-7 + (0.16707760028737074907e-10 + (0.11672928324444444444e-11 - 0.20105801424709924499e-13 * t) * t) * t) * t) * t) * t) * t; } case 67: { double t = 2*y100 - 135; return 0.46662374675511439448e0 + (-0.74517177649528487002e-2 + (-0.40369318744279128718e-4 + (0.12685621118898535407e-5 + (-0.12070791463315156250e-7 + (0.29105507892605823871e-10 + (0.90653314645333333334e-12 - 0.17189503312102982646e-13 * t) * t) * t) * t) * t) * t) * t; } case 68: { double t = 2*y100 - 137; return 0.45156879030168268778e0 + (-0.75983560650033817497e-2 + (-0.33045110380705139759e-4 + (0.11732956732035040896e-5 + (-0.11729986947158201869e-7 + (0.38611905704166441308e-10 + (0.68468768305777777779e-12 - 0.14549134330396754575e-13 * t) * t) * t) * t) * t) * t) * t; } case 69: { double t = 2*y100 - 139; return 0.43624909769330896904e0 + (-0.77168291040309554679e-2 + (-0.26283612321339907756e-4 + (0.10811018836893550820e-5 + (-0.11306707563739851552e-7 + (0.45670446788529607380e-10 + (0.49782492549333333334e-12 - 0.12191983967561779442e-13 * t) * t) * t) * t) * t) * t) * t; } case 70: { double t = 2*y100 - 141; return 0.42071877443548481181e0 + (-0.78093484015052730097e-2 + (-0.20064596897224934705e-4 + (0.99254806680671890766e-6 + (-0.10823412088884741451e-7 + (0.50677203326904716247e-10 + (0.34200547594666666666e-12 - 0.10112698698356194618e-13 * t) * t) * t) * t) * t) * t) * t; } case 71: { double t = 2*y100 - 143; return 0.40502758809710844280e0 + (-0.78780384460872937555e-2 + (-0.14364940764532853112e-4 + (0.90803709228265217384e-6 + (-0.10298832847014466907e-7 + (0.53981671221969478551e-10 + (0.21342751381333333333e-12 - 0.82975901848387729274e-14 * t) * t) * t) * t) * t) * t) * t; } case 72: { double t = 2*y100 - 145; return 0.38922115269731446690e0 + (-0.79249269708242064120e-2 + (-0.91595258799106970453e-5 + (0.82783535102217576495e-6 + (-0.97484311059617744437e-8 + (0.55889029041660225629e-10 + (0.10851981336888888889e-12 - 0.67278553237853459757e-14 * t) * t) * t) * t) * t) * t) * t; } case 73: { double t = 2*y100 - 147; return 0.37334112915460307335e0 + (-0.79519385109223148791e-2 + (-0.44219833548840469752e-5 + (0.75209719038240314732e-6 + (-0.91848251458553190451e-8 + (0.56663266668051433844e-10 + (0.23995894257777777778e-13 - 0.53819475285389344313e-14 * t) * t) * t) * t) * t) * t) * t; } case 74: { double t = 2*y100 - 149; return 0.35742543583374223085e0 + (-0.79608906571527956177e-2 + (-0.12530071050975781198e-6 + (0.68088605744900552505e-6 + (-0.86181844090844164075e-8 + (0.56530784203816176153e-10 + (-0.43120012248888888890e-13 - 0.42372603392496813810e-14 * t) * t) * t) * t) * t) * t) * t; } case 75: { double t = 2*y100 - 151; return 0.34150846431979618536e0 + (-0.79534924968773806029e-2 + (0.37576885610891515813e-5 + (0.61419263633090524326e-6 + (-0.80565865409945960125e-8 + (0.55684175248749269411e-10 + (-0.95486860764444444445e-13 - 0.32712946432984510595e-14 * t) * t) * t) * t) * t) * t) * t; } case 76: { double t = 2*y100 - 153; return 0.32562129649136346824e0 + (-0.79313448067948884309e-2 + (0.72539159933545300034e-5 + (0.55195028297415503083e-6 + (-0.75063365335570475258e-8 + (0.54281686749699595941e-10 - 0.13545424295111111111e-12 * t) * t) * t) * t) * t) * t; } case 77: { double t = 2*y100 - 155; return 0.30979191977078391864e0 + (-0.78959416264207333695e-2 + (0.10389774377677210794e-4 + (0.49404804463196316464e-6 + (-0.69722488229411164685e-8 + (0.52469254655951393842e-10 - 0.16507860650666666667e-12 * t) * t) * t) * t) * t) * t; } case 78: { double t = 2*y100 - 157; return 0.29404543811214459904e0 + (-0.78486728990364155356e-2 + (0.13190885683106990459e-4 + (0.44034158861387909694e-6 + (-0.64578942561562616481e-8 + (0.50354306498006928984e-10 - 0.18614473550222222222e-12 * t) * t) * t) * t) * t) * t; } case 79: { double t = 2*y100 - 159; return 0.27840427686253660515e0 + (-0.77908279176252742013e-2 + (0.15681928798708548349e-4 + (0.39066226205099807573e-6 + (-0.59658144820660420814e-8 + (0.48030086420373141763e-10 - 0.20018995173333333333e-12 * t) * t) * t) * t) * t) * t; } case 80: { double t = 2*y100 - 161; return 0.26288838011163800908e0 + (-0.77235993576119469018e-2 + (0.17886516796198660969e-4 + (0.34482457073472497720e-6 + (-0.54977066551955420066e-8 + (0.45572749379147269213e-10 - 0.20852924954666666667e-12 * t) * t) * t) * t) * t) * t; } case 81: { double t = 2*y100 - 163; return 0.24751539954181029717e0 + (-0.76480877165290370975e-2 + (0.19827114835033977049e-4 + (0.30263228619976332110e-6 + (-0.50545814570120129947e-8 + (0.43043879374212005966e-10 - 0.21228012028444444444e-12 * t) * t) * t) * t) * t) * t; } case 82: { double t = 2*y100 - 165; return 0.23230087411688914593e0 + (-0.75653060136384041587e-2 + (0.21524991113020016415e-4 + (0.26388338542539382413e-6 + (-0.46368974069671446622e-8 + (0.40492715758206515307e-10 - 0.21238627815111111111e-12 * t) * t) * t) * t) * t) * t; } case 83: { double t = 2*y100 - 167; return 0.21725840021297341931e0 + (-0.74761846305979730439e-2 + (0.23000194404129495243e-4 + (0.22837400135642906796e-6 + (-0.42446743058417541277e-8 + (0.37958104071765923728e-10 - 0.20963978568888888889e-12 * t) * t) * t) * t) * t) * t; } case 84: { double t = 2*y100 - 169; return 0.20239979200788191491e0 + (-0.73815761980493466516e-2 + (0.24271552727631854013e-4 + (0.19590154043390012843e-6 + (-0.38775884642456551753e-8 + (0.35470192372162901168e-10 - 0.20470131678222222222e-12 * t) * t) * t) * t) * t) * t; } case 85: { double t = 2*y100 - 171; return 0.18773523211558098962e0 + (-0.72822604530339834448e-2 + (0.25356688567841293697e-4 + (0.16626710297744290016e-6 + (-0.35350521468015310830e-8 + (0.33051896213898864306e-10 - 0.19811844544000000000e-12 * t) * t) * t) * t) * t) * t; } case 86: { double t = 2*y100 - 173; return 0.17327341258479649442e0 + (-0.71789490089142761950e-2 + (0.26272046822383820476e-4 + (0.13927732375657362345e-6 + (-0.32162794266956859603e-8 + (0.30720156036105652035e-10 - 0.19034196304000000000e-12 * t) * t) * t) * t) * t) * t; } case 87: { double t = 2*y100 - 175; return 0.15902166648328672043e0 + (-0.70722899934245504034e-2 + (0.27032932310132226025e-4 + (0.11474573347816568279e-6 + (-0.29203404091754665063e-8 + (0.28487010262547971859e-10 - 0.18174029063111111111e-12 * t) * t) * t) * t) * t) * t; } case 88: { double t = 2*y100 - 177; return 0.14498609036610283865e0 + (-0.69628725220045029273e-2 + (0.27653554229160596221e-4 + (0.92493727167393036470e-7 + (-0.26462055548683583849e-8 + (0.26360506250989943739e-10 - 0.17261211260444444444e-12 * t) * t) * t) * t) * t) * t; } case 89: { double t = 2*y100 - 179; return 0.13117165798208050667e0 + (-0.68512309830281084723e-2 + (0.28147075431133863774e-4 + (0.72351212437979583441e-7 + (-0.23927816200314358570e-8 + (0.24345469651209833155e-10 - 0.16319736960000000000e-12 * t) * t) * t) * t) * t) * t; } case 90: { double t = 2*y100 - 181; return 0.11758232561160626306e0 + (-0.67378491192463392927e-2 + (0.28525664781722907847e-4 + (0.54156999310046790024e-7 + (-0.21589405340123827823e-8 + (0.22444150951727334619e-10 - 0.15368675584000000000e-12 * t) * t) * t) * t) * t) * t; } case 91: { double t = 2*y100 - 183; return 0.10422112945361673560e0 + (-0.66231638959845581564e-2 + (0.28800551216363918088e-4 + (0.37758983397952149613e-7 + (-0.19435423557038933431e-8 + (0.20656766125421362458e-10 - 0.14422990012444444444e-12 * t) * t) * t) * t) * t) * t; } case 92: { double t = 2*y100 - 185; return 0.91090275493541084785e-1 + (-0.65075691516115160062e-2 + (0.28982078385527224867e-4 + (0.23014165807643012781e-7 + (-0.17454532910249875958e-8 + (0.18981946442680092373e-10 - 0.13494234691555555556e-12 * t) * t) * t) * t) * t) * t; } case 93: { double t = 2*y100 - 187; return 0.78191222288771379358e-1 + (-0.63914190297303976434e-2 + (0.29079759021299682675e-4 + (0.97885458059415717014e-8 + (-0.15635596116134296819e-8 + (0.17417110744051331974e-10 - 0.12591151763555555556e-12 * t) * t) * t) * t) * t) * t; } case 94: { double t = 2*y100 - 189; return 0.65524757106147402224e-1 + (-0.62750311956082444159e-2 + (0.29102328354323449795e-4 + (-0.20430838882727954582e-8 + (-0.13967781903855367270e-8 + (0.15958771833747057569e-10 - 0.11720175765333333333e-12 * t) * t) * t) * t) * t) * t; } case 95: { double t = 2*y100 - 191; return 0.53091065838453612773e-1 + (-0.61586898417077043662e-2 + (0.29057796072960100710e-4 + (-0.12597414620517987536e-7 + (-0.12440642607426861943e-8 + (0.14602787128447932137e-10 - 0.10885859114666666667e-12 * t) * t) * t) * t) * t) * t; } case 96: { double t = 2*y100 - 193; return 0.40889797115352738582e-1 + (-0.60426484889413678200e-2 + (0.28953496450191694606e-4 + (-0.21982952021823718400e-7 + (-0.11044169117553026211e-8 + (0.13344562332430552171e-10 - 0.10091231402844444444e-12 * t) * t) * t) * t) * t) * t; } case 97: case 98: case 99: case 100: { // use Taylor expansion for small x (|x| <= 0.0309...) // (2/sqrt(pi)) * (x - 2/3 x^3 + 4/15 x^5 - 8/105 x^7 + 16/945 x^9) double x2 = x*x; return x * (1.1283791670955125739 - x2 * (0.75225277806367504925 - x2 * (0.30090111122547001970 - x2 * (0.085971746064420005629 - x2 * 0.016931216931216931217)))); } } /* Since 0 <= y100 < 101, this is only reached if x is NaN, in which case we should return NaN. */ return NaN; } // w_im_y100 /******************************************************************************/ /* Library function im_w_of_z */ /******************************************************************************/ double im_w_of_x(double x) { // Steven G. Johnson, October 2012. // Uses methods similar to the erfcx calculation: // continued fractions for large |x|, // a lookup table of Chebyshev polynomials for smaller |x|, // and finally a Taylor expansion for |x|<0.01. const double ispi = 0.56418958354775628694807945156; // 1 / sqrt(pi) if (x >= 0) { if (x > 45) { // continued-fraction expansion is faster if (x > 5e7) { // 1-term expansion, important to avoid overflow return ispi / x; } else { // 5-term expansion (rely on compiler for CSE), simplified from: ispi / (x-0.5/(x-1/(x-1.5/(x-2/x)))) return ispi*((x*x) * (x*x-4.5) + 2) / (x * ((x*x) * (x*x-5) + 3.75)); } } else { return w_im_y100(100/(1+x), x); } } else { // = -im_w_of_x(-x) if (x < -45) { // continued-fraction expansion is faster if (x < -5e7) { // 1-term expansion, important to avoid overflow return ispi / x; } else { // 5-term expansion (rely on compiler for CSE), simplified from: ispi / (x-0.5/(x-1/(x-1.5/(x-2/x)))) return ispi*((x*x) * (x*x-4.5) + 2) / (x * ((x*x) * (x*x-5) + 3.75)); } } else { return -w_im_y100(100/(1-x), -x); } } } // im_w_of_z luametatex-2.10.08/source/libraries/libcerf/readme-luametatex.txt000066400000000000000000000023731442250314700250710ustar00rootroot00000000000000LS, In the following files you can find the comment below. We don't want to bother or burden the original authors with our problems. The cerf code is mostly used in MetaFun macros (by Alan Braslau). The c.h and cpp.h files are gone. defs.h cerf.h --------------------------------------------------------------------------------------------- This file is patched by Mojca Miklavec and Hans Hagen for usage in LuaMetaTeX where we use only C and also want to compile with the Microsoft compiler. So, when updating this library one has to check for changes. Not that we expect many as this is a rather stable library. In the other files there are a few macros used that deal with the multiplication and addition of complex and real nmbers. Of course the original code is kept as-is. --------------------------------------------------------------------------------------------- So, when updating the library you need to diff for the changes that are needed in order to compile the files with the Microsoft compiler. At some point I might patch the files so that we can intercept error messages in a way that permits recovery and also plugs them into our normal message handlers. Maybe I should also merge the code into just one file because it doesn't change. Hans luametatex-2.10.08/source/libraries/libcerf/w_of_z.c000066400000000000000000000364031442250314700223540ustar00rootroot00000000000000/* Library libcerf: * Compute complex error functions, based on a new implementation of * Faddeeva's w_of_z. Also provide Dawson and Voigt functions. * * File w_of_z.c: * Computation of Faddeeva's complex scaled error function, * w(z) = exp(-z^2) * erfc(-i*z), * nameless function (7.1.3) of Abramowitz&Stegun (1964), * also known as the plasma dispersion function. * * This implementation uses a combination of different algorithms. * See man 3 w_of_z for references. * * Copyright: * (C) 2012 Massachusetts Institute of Technology * (C) 2013 Forschungszentrum Jülich GmbH * * Licence: * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * Authors: * Steven G. Johnson, Massachusetts Institute of Technology, 2012, core author * Joachim Wuttke, Forschungszentrum Jülich, 2013, package maintainer * * Website: * http://apps.jcns.fz-juelich.de/libcerf * * Revision history: * ../CHANGELOG * * Man page: * w_of_z(3) */ /* Todo: use local declarations (older compilers) (HH). */ /* Computes various error functions (erf, erfc, erfi, erfcx), including the Dawson integral, in the complex plane, based on algorithms for the computation of the Faddeeva function w(z) = exp(-z^2) * erfc(-i*z). Given w(z), the error functions are mostly straightforward to compute, except for certain regions where we have to switch to Taylor expansions to avoid cancellation errors [e.g. near the origin for erf(z)]. */ #include "cerf.h" #include #include #include "defs.h" // defines _cerf_cmplx, NaN, C, cexp, ... // for analysing the algorithm: EXPORT int faddeeva_algorithm; EXPORT int faddeeva_nofterms; /******************************************************************************/ /* auxiliary functions */ /******************************************************************************/ static inline double sinc(double x, double sinx) { // return sinc(x) = sin(x)/x, given both x and sin(x) // [since we only use this in cases where sin(x) has already been computed] return fabs(x) < 1e-4 ? 1 - (0.1666666666666666666667)*x*x : sinx / x; } static inline double sinh_taylor(double x) { // sinh(x) via Taylor series, accurate to machine precision for |x| < 1e-2 return x * (1 + (x*x) * (0.1666666666666666666667 + 0.00833333333333333333333 * (x*x))); } static inline double sqr(double x) { return x*x; } /******************************************************************************/ /* precomputed table of expa2n2[n-1] = exp(-a2*n*n) */ /* for double-precision a2 = 0.26865... in w_of_z, below. */ /******************************************************************************/ static const double expa2n2[] = { 7.64405281671221563e-01, 3.41424527166548425e-01, 8.91072646929412548e-02, 1.35887299055460086e-02, 1.21085455253437481e-03, 6.30452613933449404e-05, 1.91805156577114683e-06, 3.40969447714832381e-08, 3.54175089099469393e-10, 2.14965079583260682e-12, 7.62368911833724354e-15, 1.57982797110681093e-17, 1.91294189103582677e-20, 1.35344656764205340e-23, 5.59535712428588720e-27, 1.35164257972401769e-30, 1.90784582843501167e-34, 1.57351920291442930e-38, 7.58312432328032845e-43, 2.13536275438697082e-47, 3.51352063787195769e-52, 3.37800830266396920e-57, 1.89769439468301000e-62, 6.22929926072668851e-68, 1.19481172006938722e-73, 1.33908181133005953e-79, 8.76924303483223939e-86, 3.35555576166254986e-92, 7.50264110688173024e-99, 9.80192200745410268e-106, 7.48265412822268959e-113, 3.33770122566809425e-120, 8.69934598159861140e-128, 1.32486951484088852e-135, 1.17898144201315253e-143, 6.13039120236180012e-152, 1.86258785950822098e-160, 3.30668408201432783e-169, 3.43017280887946235e-178, 2.07915397775808219e-187, 7.36384545323984966e-197, 1.52394760394085741e-206, 1.84281935046532100e-216, 1.30209553802992923e-226, 5.37588903521080531e-237, 1.29689584599763145e-247, 1.82813078022866562e-258, 1.50576355348684241e-269, 7.24692320799294194e-281, 2.03797051314726829e-292, 3.34880215927873807e-304, 0.0 // underflow (also prevents reads past array end, below) }; // expa2n2 /******************************************************************************/ /* w_of_z, Faddeeva's scaled complex error function */ /******************************************************************************/ _cerf_cmplx w_of_z(_cerf_cmplx z) { faddeeva_nofterms = 0; // Steven G. Johnson, October 2012. if (creal(z) == 0.0) { // Purely imaginary input, purely real output. // However, use creal(z) to give correct sign of 0 in cimag(w). return C(erfcx(cimag(z)), creal(z)); } if (cimag(z) == 0) { // Purely real input, complex output. return C(exp(-sqr(creal(z))), im_w_of_x(creal(z))); } const double relerr = DBL_EPSILON; const double a = 0.518321480430085929872; // pi / sqrt(-log(eps*0.5)) const double c = 0.329973702884629072537; // (2/pi) * a; const double a2 = 0.268657157075235951582; // a^2 const double x = fabs(creal(z)); const double y = cimag(z); const double ya = fabs(y); _cerf_cmplx ret = C(0., 0.); // return value double sum1 = 0, sum2 = 0, sum3 = 0, sum4 = 0, sum5 = 0; if (ya > 7 || (x > 6 // continued fraction is faster /* As pointed out by M. Zaghloul, the continued fraction seems to give a large relative error in Re w(z) for |x| ~ 6 and small |y|, so use algorithm 816 in this region: */ && (ya > 0.1 || (x > 8 && ya > 1e-10) || x > 28))) { faddeeva_algorithm = 100; /* Poppe & Wijers suggest using a number of terms nu = 3 + 1442 / (26*rho + 77) where rho = sqrt((x/x0)^2 + (y/y0)^2) where x0=6.3, y0=4.4. (They only use this expansion for rho >= 1, but rho a little less than 1 seems okay too.) Instead, I did my own fit to a slightly different function that avoids the hypotenuse calculation, using NLopt to minimize the sum of the squares of the errors in nu with the constraint that the estimated nu be >= minimum nu to attain machine precision. I also separate the regions where nu == 2 and nu == 1. */ const double ispi = 0.56418958354775628694807945156; // 1 / sqrt(pi) double xs = y < 0 ? -creal(z) : creal(z); // compute for -z if y < 0 if (x + ya > 4000) { // nu <= 2 if (x + ya > 1e7) { // nu == 1, w(z) = i/sqrt(pi) / z // scale to avoid overflow if (x > ya) { faddeeva_algorithm += 1; double yax = ya / xs; faddeeva_algorithm = 100; double denom = ispi / (xs + yax*ya); ret = C(denom*yax, denom); } else if (isinf(ya)) { faddeeva_algorithm += 2; return ((isnan(x) || y < 0) ? C(NaN,NaN) : C(0,0)); } else { faddeeva_algorithm += 3; double xya = xs / ya; double denom = ispi / (xya*xs + ya); ret = C(denom, denom*xya); } } else { // nu == 2, w(z) = i/sqrt(pi) * z / (z*z - 0.5) faddeeva_algorithm += 4; double dr = xs*xs - ya*ya - 0.5, di = 2*xs*ya; double denom = ispi / (dr*dr + di*di); ret = C(denom * (xs*di-ya*dr), denom * (xs*dr+ya*di)); } } else { // compute nu(z) estimate and do general continued fraction faddeeva_algorithm += 5; const double c0=3.9, c1=11.398, c2=0.08254, c3=0.1421, c4=0.2023; // fit double nu = floor(c0 + c1 / (c2*x + c3*ya + c4)); double wr = xs, wi = ya; for (nu = 0.5 * (nu - 1); nu > 0.4; nu -= 0.5) { // w <- z - nu/w: double denom = nu / (wr*wr + wi*wi); wr = xs - wr * denom; wi = ya + wi * denom; } { // w(z) = i/sqrt(pi) / w: double denom = ispi / (wr*wr + wi*wi); ret = C(denom*wi, denom*wr); } } if (y < 0) { faddeeva_algorithm += 10; // use w(z) = 2.0*exp(-z*z) - w(-z), // but be careful of overflow in exp(-z*z) // = exp(-(xs*xs-ya*ya) -2*i*xs*ya) return complex_sub_cc(complex_mul_rc(2.0,cexp(C((ya-xs)*(xs+ya), 2*xs*y))), ret); } else return ret; } /* Note: The test that seems to be suggested in the paper is x < sqrt(-log(DBL_MIN)), about 26.6, since otherwise exp(-x^2) underflows to zero and sum1,sum2,sum4 are zero. However, long before this occurs, the sum1,sum2,sum4 contributions are negligible in double precision; I find that this happens for x > about 6, for all y. On the other hand, I find that the case where we compute all of the sums is faster (at least with the precomputed expa2n2 table) until about x=10. Furthermore, if we try to compute all of the sums for x > 20, I find that we sometimes run into numerical problems because underflow/overflow problems start to appear in the various coefficients of the sums, below. Therefore, we use x < 10 here. */ else if (x < 10) { faddeeva_algorithm = 200; double prod2ax = 1, prodm2ax = 1; double expx2; if (isnan(y)) { faddeeva_algorithm += 99; return C(y,y); } if (x < 5e-4) { // compute sum4 and sum5 together as sum5-sum4 // This special case is needed for accuracy. faddeeva_algorithm += 1; const double x2 = x*x; expx2 = 1 - x2 * (1 - 0.5*x2); // exp(-x*x) via Taylor // compute exp(2*a*x) and exp(-2*a*x) via Taylor, to double precision const double ax2 = 1.036642960860171859744*x; // 2*a*x const double exp2ax = 1 + ax2 * (1 + ax2 * (0.5 + 0.166666666666666666667*ax2)); const double expm2ax = 1 - ax2 * (1 - ax2 * (0.5 - 0.166666666666666666667*ax2)); for (int n = 1; ; ++n) { ++faddeeva_nofterms; const double coef = expa2n2[n-1] * expx2 / (a2*(n*n) + y*y); prod2ax *= exp2ax; prodm2ax *= expm2ax; sum1 += coef; sum2 += coef * prodm2ax; sum3 += coef * prod2ax; // really = sum5 - sum4 sum5 += coef * (2*a) * n * sinh_taylor((2*a)*n*x); // test convergence via sum3 if (coef * prod2ax < relerr * sum3) break; } } else { // x > 5e-4, compute sum4 and sum5 separately faddeeva_algorithm += 2; expx2 = exp(-x*x); const double exp2ax = exp((2*a)*x), expm2ax = 1 / exp2ax; for (int n = 1; ; ++n) { ++faddeeva_nofterms; const double coef = expa2n2[n-1] * expx2 / (a2*(n*n) + y*y); prod2ax *= exp2ax; prodm2ax *= expm2ax; sum1 += coef; sum2 += coef * prodm2ax; sum4 += (coef * prodm2ax) * (a*n); sum3 += coef * prod2ax; sum5 += (coef * prod2ax) * (a*n); // test convergence via sum5, since this sum has the slowest decay if ((coef * prod2ax) * (a*n) < relerr * sum5) break; } } const double expx2erfcxy = // avoid spurious overflow for large negative y y > -6 // for y < -6, erfcx(y) = 2*exp(y*y) to double precision ? expx2*erfcx(y) : 2*exp(y*y-x*x); if (y > 5) { // imaginary terms cancel faddeeva_algorithm += 10; const double sinxy = sin(x*y); ret = C((expx2erfcxy - c*y*sum1) * cos(2*x*y) + (c*x*expx2) * sinxy * sinc(x*y, sinxy), 0.0); } else { faddeeva_algorithm += 20; double xs = creal(z); const double sinxy = sin(xs*y); const double sin2xy = sin(2*xs*y), cos2xy = cos(2*xs*y); const double coef1 = expx2erfcxy - c*y*sum1; const double coef2 = c*xs*expx2; ret = C(coef1 * cos2xy + coef2 * sinxy * sinc(xs*y, sinxy), coef2 * sinc(2*xs*y, sin2xy) - coef1 * sin2xy); } } else { // x large: only sum3 & sum5 contribute (see above note) faddeeva_algorithm = 300; if (isnan(x)) return C(x,x); if (isnan(y)) return C(y,y); ret = C(exp(-x*x),0.0); // |y| < 1e-10, so we only need exp(-x*x) term // (round instead of ceil as in original paper; note that x/a > 1 here) double n0 = floor(x/a + 0.5); // sum in both directions, starting at n0 double dx = a*n0 - x; sum3 = exp(-dx*dx) / (a2*(n0*n0) + y*y); sum5 = a*n0 * sum3; double exp1 = exp(4*a*dx), exp1dn = 1; int dn; for (dn = 1; n0 - dn > 0; ++dn) { // loop over n0-dn and n0+dn terms double np = n0 + dn, nm = n0 - dn; double tp = exp(-sqr(a*dn+dx)); double tm = tp * (exp1dn *= exp1); // trick to get tm from tp tp /= (a2*(np*np) + y*y); tm /= (a2*(nm*nm) + y*y); sum3 += tp + tm; sum5 += a * (np * tp + nm * tm); if (a * (np * tp + nm * tm) < relerr * sum5) goto finish; } while (1) { // loop over n0+dn terms only (since n0-dn <= 0) double np = n0 + dn++; double tp = exp(-sqr(a*dn+dx)) / (a2*(np*np) + y*y); sum3 += tp; sum5 += a * np * tp; if (a * np * tp < relerr * sum5) goto finish; } } finish: return complex_add_cc(ret, C((0.5*c)*y*(sum2+sum3),(0.5*c)*copysign(sum5-sum4, creal(z)))); } // w_of_z luametatex-2.10.08/source/libraries/libcerf/width.c000066400000000000000000000070271442250314700222100ustar00rootroot00000000000000/* Library libcerf: * Compute complex error functions, based on a new implementation of * Faddeeva's w_of_z. Also provide Dawson and Voigt functions. * * File width.c: * Computate voigt_hwhm, using Newton's iteration. * * Copyright: * (C) 2018 Forschungszentrum Jülich GmbH * * Licence: * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * Authors: * Joachim Wuttke, Forschungszentrum Jülich, 2018 * * Website: * http://apps.jcns.fz-juelich.de/libcerf * * Revision history: * ../CHANGELOG * * Man pages: * voigt_fwhm(3) */ /* This file is patched by Hans Hagen for usage in LuaMetaTeX where we don't want to exit on an error so we intercept it. */ #include "cerf.h" #include #include #include double dvoigt(double x, double sigma, double gamma, double v0) { return voigt(x, sigma, gamma)/v0 - .5; } double voigt_hwhm(double sigma, double gamma, int *error) { *error = 0; if (sigma == 0 && gamma == 0) { return 0; } else if (isnan(sigma) || isnan(gamma)) { *error = 1; return 0; // return NAN; } else { // start from an excellent approximation [Olivero & Longbothum, J Quant Spec Rad Transf 1977]: const double eps = 1e-14; const double hwhm0 = .5*(1.06868*gamma+sqrt(0.86743*gamma*gamma+4*2*log(2)*sigma*sigma)); const double del = eps*hwhm0; double ret = hwhm0; const double v0 = voigt(0, sigma, gamma); for (int i=0; i<300; ++i) { double val = dvoigt(ret, sigma, gamma, v0); if (fabs(val) < 1e-15) { return ret; } else { double step = -del/(dvoigt(ret+del, sigma, gamma, v0)/val-1); double nxt = ret + step; if (nxt < ret/3) { *error = 2; // fprintf(stderr, "voigt_fwhm terminated because of huge deviation from 1st approx\n"); nxt = ret/3; } else if (nxt > 2*ret) { *error = 2; // fprintf(stderr, "voigt_fwhm terminated because of huge deviation from 1st approx\n"); nxt = 2*ret; } if (fabs(ret-nxt) < del) { return nxt; } else { ret = nxt; } } } *error = 3; // fprintf(stderr, "voigt_fwhm failed: Newton's iteration did not converge with sigma = %f and gamma = %f\n", sigma, gamma); exit(-1); return 0; } } luametatex-2.10.08/source/libraries/mimalloc/000077500000000000000000000000001442250314700211065ustar00rootroot00000000000000luametatex-2.10.08/source/libraries/mimalloc/CMakeLists.txt000066400000000000000000000514431442250314700236550ustar00rootroot00000000000000cmake_minimum_required(VERSION 3.13) project(libmimalloc C CXX) set(CMAKE_C_STANDARD 11) set(CMAKE_CXX_STANDARD 17) option(MI_SECURE "Use full security mitigations (like guard pages, allocation randomization, double-free mitigation, and free-list corruption detection)" OFF) option(MI_DEBUG_FULL "Use full internal heap invariant checking in DEBUG mode (expensive)" OFF) option(MI_PADDING "Enable padding to detect heap block overflow (always on in DEBUG or SECURE mode, or with Valgrind/ASAN)" OFF) option(MI_OVERRIDE "Override the standard malloc interface (e.g. define entry points for malloc() etc)" ON) option(MI_XMALLOC "Enable abort() call on memory allocation failure by default" OFF) option(MI_SHOW_ERRORS "Show error and warning messages by default (only enabled by default in DEBUG mode)" OFF) option(MI_TRACK_VALGRIND "Compile with Valgrind support (adds a small overhead)" OFF) option(MI_TRACK_ASAN "Compile with address sanitizer support (adds a small overhead)" OFF) option(MI_TRACK_ETW "Compile with Windows event tracing (ETW) support (adds a small overhead)" OFF) option(MI_USE_CXX "Use the C++ compiler to compile the library (instead of the C compiler)" OFF) option(MI_SEE_ASM "Generate assembly files" OFF) option(MI_OSX_INTERPOSE "Use interpose to override standard malloc on macOS" ON) option(MI_OSX_ZONE "Use malloc zone to override standard malloc on macOS" ON) option(MI_WIN_REDIRECT "Use redirection module ('mimalloc-redirect') on Windows if compiling mimalloc as a DLL" ON) option(MI_LOCAL_DYNAMIC_TLS "Use slightly slower, dlopen-compatible TLS mechanism (Unix)" OFF) option(MI_BUILD_SHARED "Build shared library" ON) option(MI_BUILD_STATIC "Build static library" ON) option(MI_BUILD_OBJECT "Build object library" ON) option(MI_BUILD_TESTS "Build test executables" ON) option(MI_DEBUG_TSAN "Build with thread sanitizer (needs clang)" OFF) option(MI_DEBUG_UBSAN "Build with undefined-behavior sanitizer (needs clang++)" OFF) option(MI_SKIP_COLLECT_ON_EXIT "Skip collecting memory on program exit" OFF) option(MI_NO_PADDING "Force no use of padding even in DEBUG mode etc." OFF) # deprecated options option(MI_CHECK_FULL "Use full internal invariant checking in DEBUG mode (deprecated, use MI_DEBUG_FULL instead)" OFF) option(MI_INSTALL_TOPLEVEL "Install directly into $CMAKE_INSTALL_PREFIX instead of PREFIX/lib/mimalloc-version (deprecated)" OFF) option(MI_USE_LIBATOMIC "Explicitly link with -latomic (on older systems) (deprecated and detected automatically)" OFF) include(CheckIncludeFiles) include(GNUInstallDirs) include("cmake/mimalloc-config-version.cmake") set(mi_sources src/alloc.c src/alloc-aligned.c src/alloc-posix.c src/arena.c src/bitmap.c src/heap.c src/init.c src/options.c src/os.c src/page.c src/random.c src/segment.c src/segment-map.c src/stats.c src/prim/prim.c) set(mi_cflags "") set(mi_libraries "") # ----------------------------------------------------------------------------- # Convenience: set default build type depending on the build directory # ----------------------------------------------------------------------------- message(STATUS "") if (NOT CMAKE_BUILD_TYPE) if ("${CMAKE_BINARY_DIR}" MATCHES ".*(D|d)ebug$" OR MI_DEBUG_FULL) message(STATUS "No build type selected, default to: Debug") set(CMAKE_BUILD_TYPE "Debug") else() message(STATUS "No build type selected, default to: Release") set(CMAKE_BUILD_TYPE "Release") endif() endif() if("${CMAKE_BINARY_DIR}" MATCHES ".*(S|s)ecure$") message(STATUS "Default to secure build") set(MI_SECURE "ON") endif() # ----------------------------------------------------------------------------- # Process options # ----------------------------------------------------------------------------- if(CMAKE_C_COMPILER_ID MATCHES "MSVC|Intel") set(MI_USE_CXX "ON") endif() if(MI_OVERRIDE) message(STATUS "Override standard malloc (MI_OVERRIDE=ON)") if(APPLE) if(MI_OSX_ZONE) # use zone's on macOS message(STATUS " Use malloc zone to override malloc (MI_OSX_ZONE=ON)") list(APPEND mi_sources src/prim/osx/alloc-override-zone.c) list(APPEND mi_defines MI_OSX_ZONE=1) if (NOT MI_OSX_INTERPOSE) message(STATUS " WARNING: zone overriding usually also needs interpose (use -DMI_OSX_INTERPOSE=ON)") endif() endif() if(MI_OSX_INTERPOSE) # use interpose on macOS message(STATUS " Use interpose to override malloc (MI_OSX_INTERPOSE=ON)") list(APPEND mi_defines MI_OSX_INTERPOSE=1) if (NOT MI_OSX_ZONE) message(STATUS " WARNING: interpose usually also needs zone overriding (use -DMI_OSX_INTERPOSE=ON)") endif() endif() if(MI_USE_CXX AND MI_OSX_INTERPOSE) message(STATUS " WARNING: if dynamically overriding malloc/free, it is more reliable to build mimalloc as C code (use -DMI_USE_CXX=OFF)") endif() endif() endif() if(WIN32) if (MI_WIN_REDIRECT) if (MSVC_C_ARCHITECTURE_ID MATCHES "ARM") message(STATUS "Cannot use redirection on Windows ARM (MI_WIN_REDIRECT=OFF)") set(MI_WIN_REDIRECT OFF) endif() endif() if (NOT MI_WIN_REDIRECT) # use a negative define for backward compatibility list(APPEND mi_defines MI_WIN_NOREDIRECT=1) endif() endif() if(MI_SECURE) message(STATUS "Set full secure build (MI_SECURE=ON)") list(APPEND mi_defines MI_SECURE=4) endif() if(MI_TRACK_VALGRIND) CHECK_INCLUDE_FILES("valgrind/valgrind.h;valgrind/memcheck.h" MI_HAS_VALGRINDH) if (NOT MI_HAS_VALGRINDH) set(MI_TRACK_VALGRIND OFF) message(WARNING "Cannot find the 'valgrind/valgrind.h' and 'valgrind/memcheck.h' -- install valgrind first") message(STATUS "Compile **without** Valgrind support (MI_TRACK_VALGRIND=OFF)") else() message(STATUS "Compile with Valgrind support (MI_TRACK_VALGRIND=ON)") list(APPEND mi_defines MI_TRACK_VALGRIND=1) endif() endif() if(MI_TRACK_ASAN) if (APPLE AND MI_OVERRIDE) set(MI_TRACK_ASAN OFF) message(WARNING "Cannot enable address sanitizer support on macOS if MI_OVERRIDE is ON (MI_TRACK_ASAN=OFF)") endif() if (MI_TRACK_VALGRIND) set(MI_TRACK_ASAN OFF) message(WARNING "Cannot enable address sanitizer support with also Valgrind support enabled (MI_TRACK_ASAN=OFF)") endif() if(MI_TRACK_ASAN) CHECK_INCLUDE_FILES("sanitizer/asan_interface.h" MI_HAS_ASANH) if (NOT MI_HAS_ASANH) set(MI_TRACK_ASAN OFF) message(WARNING "Cannot find the 'sanitizer/asan_interface.h' -- install address sanitizer support first") message(STATUS "Compile **without** address sanitizer support (MI_TRACK_ASAN=OFF)") else() message(STATUS "Compile with address sanitizer support (MI_TRACK_ASAN=ON)") list(APPEND mi_defines MI_TRACK_ASAN=1) list(APPEND mi_cflags -fsanitize=address) list(APPEND mi_libraries -fsanitize=address) endif() endif() endif() if(MI_TRACK_ETW) if(NOT WIN32) set(MI_TRACK_ETW OFF) message(WARNING "Can only enable ETW support on Windows (MI_TRACK_ETW=OFF)") endif() if (MI_TRACK_VALGRIND OR MI_TRACK_ASAN) set(MI_TRACK_ETW OFF) message(WARNING "Cannot enable ETW support with also Valgrind or ASAN support enabled (MI_TRACK_ETW=OFF)") endif() if(MI_TRACK_ETW) message(STATUS "Compile with Windows event tracing support (MI_TRACK_ETW=ON)") list(APPEND mi_defines MI_TRACK_ETW=1) endif() endif() if(MI_SEE_ASM) message(STATUS "Generate assembly listings (MI_SEE_ASM=ON)") list(APPEND mi_cflags -save-temps) endif() if(MI_CHECK_FULL) message(STATUS "The MI_CHECK_FULL option is deprecated, use MI_DEBUG_FULL instead") set(MI_DEBUG_FULL "ON") endif() if (MI_SKIP_COLLECT_ON_EXIT) message(STATUS "Skip collecting memory on program exit (MI_SKIP_COLLECT_ON_EXIT=ON)") list(APPEND mi_defines MI_SKIP_COLLECT_ON_EXIT=1) endif() if(MI_DEBUG_FULL) message(STATUS "Set debug level to full internal invariant checking (MI_DEBUG_FULL=ON)") list(APPEND mi_defines MI_DEBUG=3) # full invariant checking endif() if(MI_NO_PADDING) message(STATUS "Suppress any padding of heap blocks (MI_NO_PADDING=ON)") list(APPEND mi_defines MI_PADDING=0) else() if(MI_PADDING) message(STATUS "Enable explicit padding of heap blocks (MI_PADDING=ON)") list(APPEND mi_defines MI_PADDING=1) endif() endif() if(MI_XMALLOC) message(STATUS "Enable abort() calls on memory allocation failure (MI_XMALLOC=ON)") list(APPEND mi_defines MI_XMALLOC=1) endif() if(MI_SHOW_ERRORS) message(STATUS "Enable printing of error and warning messages by default (MI_SHOW_ERRORS=ON)") list(APPEND mi_defines MI_SHOW_ERRORS=1) endif() if(MI_DEBUG_TSAN) if(CMAKE_C_COMPILER_ID MATCHES "Clang") message(STATUS "Build with thread sanitizer (MI_DEBUG_TSAN=ON)") list(APPEND mi_defines MI_TSAN=1) list(APPEND mi_cflags -fsanitize=thread -g -O1) list(APPEND mi_libraries -fsanitize=thread) else() message(WARNING "Can only use thread sanitizer with clang (MI_DEBUG_TSAN=ON but ignored)") endif() endif() if(MI_DEBUG_UBSAN) if(CMAKE_BUILD_TYPE MATCHES "Debug") if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") message(STATUS "Build with undefined-behavior sanitizer (MI_DEBUG_UBSAN=ON)") list(APPEND mi_cflags -fsanitize=undefined -g -fno-sanitize-recover=undefined) list(APPEND mi_libraries -fsanitize=undefined) if (NOT MI_USE_CXX) message(STATUS "(switch to use C++ due to MI_DEBUG_UBSAN)") set(MI_USE_CXX "ON") endif() else() message(WARNING "Can only use undefined-behavior sanitizer with clang++ (MI_DEBUG_UBSAN=ON but ignored)") endif() else() message(WARNING "Can only use thread sanitizer with a debug build (CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE})") endif() endif() if(MI_USE_CXX) message(STATUS "Use the C++ compiler to compile (MI_USE_CXX=ON)") set_source_files_properties(${mi_sources} PROPERTIES LANGUAGE CXX ) set_source_files_properties(src/static.c test/test-api.c test/test-api-fill test/test-stress PROPERTIES LANGUAGE CXX ) if(CMAKE_CXX_COMPILER_ID MATCHES "AppleClang|Clang") list(APPEND mi_cflags -Wno-deprecated) endif() if(CMAKE_CXX_COMPILER_ID MATCHES "Intel" AND NOT CMAKE_CXX_COMPILER_ID MATCHES "IntelLLVM") list(APPEND mi_cflags -Kc++) endif() endif() if(CMAKE_SYSTEM_NAME MATCHES "Haiku") SET(CMAKE_INSTALL_LIBDIR ~/config/non-packaged/lib) SET(CMAKE_INSTALL_INCLUDEDIR ~/config/non-packaged/headers) endif() # Compiler flags if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU") list(APPEND mi_cflags -Wall -Wextra -Wno-unknown-pragmas -fvisibility=hidden) if(NOT MI_USE_CXX) list(APPEND mi_cflags -Wstrict-prototypes) endif() if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang") list(APPEND mi_cflags -Wpedantic -Wno-static-in-inline) endif() endif() if(CMAKE_C_COMPILER_ID MATCHES "Intel") list(APPEND mi_cflags -Wall -fvisibility=hidden) endif() if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel" AND NOT CMAKE_SYSTEM_NAME MATCHES "Haiku") if(MI_LOCAL_DYNAMIC_TLS) list(APPEND mi_cflags -ftls-model=local-dynamic) else() list(APPEND mi_cflags -ftls-model=initial-exec) endif() if(MI_OVERRIDE) list(APPEND mi_cflags -fno-builtin-malloc) endif() endif() if (MSVC AND MSVC_VERSION GREATER_EQUAL 1914) list(APPEND mi_cflags /Zc:__cplusplus) endif() # extra needed libraries if(WIN32) list(APPEND mi_libraries psapi shell32 user32 advapi32 bcrypt) set(pc_libraries "-lpsapi -lshell32 -luser32 -ladvapi32 -lbcrypt") else() set(pc_libraries "") find_library(MI_LIBPTHREAD pthread) if (MI_LIBPTHREAD) list(APPEND mi_libraries ${MI_LIBPTHREAD}) set(pc_libraries "${pc_libraries} -pthread") endif() find_library(MI_LIBRT rt) if(MI_LIBRT) list(APPEND mi_libraries ${MI_LIBRT}) set(pc_libraries "${pc_libraries} -lrt") endif() find_library(MI_LIBATOMIC atomic) if (NOT MI_LIBATOMIC AND MI_USE_LIBATOMIC) set(MI_LIBATOMIC atomic) endif() if (MI_LIBATOMIC) list(APPEND mi_libraries ${MI_LIBATOMIC}) set(pc_libraries "${pc_libraries} -latomic") endif() endif() # ----------------------------------------------------------------------------- # Install and output names # ----------------------------------------------------------------------------- # dynamic/shared library and symlinks always go to /usr/local/lib equivalent set(mi_install_libdir "${CMAKE_INSTALL_LIBDIR}") # static libraries and object files, includes, and cmake config files # are either installed at top level, or use versioned directories for side-by-side installation (default) if (MI_INSTALL_TOPLEVEL) set(mi_install_objdir "${CMAKE_INSTALL_LIBDIR}") set(mi_install_incdir "${CMAKE_INSTALL_INCLUDEDIR}") set(mi_install_cmakedir "${CMAKE_INSTALL_LIBDIR}/cmake/mimalloc") else() set(mi_install_objdir "${CMAKE_INSTALL_LIBDIR}/mimalloc-${mi_version}") # for static library and object files set(mi_install_incdir "${CMAKE_INSTALL_INCLUDEDIR}/mimalloc-${mi_version}") # for includes set(mi_install_cmakedir "${CMAKE_INSTALL_LIBDIR}/cmake/mimalloc-${mi_version}") # for cmake package info endif() set(mi_basename "mimalloc") if(MI_SECURE) set(mi_basename "${mi_basename}-secure") endif() if(MI_TRACK_VALGRIND) set(mi_basename "${mi_basename}-valgrind") endif() if(MI_TRACK_ASAN) set(mi_basename "${mi_basename}-asan") endif() string(TOLOWER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE_LC) if(NOT(CMAKE_BUILD_TYPE_LC MATCHES "^(release|relwithdebinfo|minsizerel|none)$")) set(mi_basename "${mi_basename}-${CMAKE_BUILD_TYPE_LC}") #append build type (e.g. -debug) if not a release version endif() if(MI_BUILD_SHARED) list(APPEND mi_build_targets "shared") endif() if(MI_BUILD_STATIC) list(APPEND mi_build_targets "static") endif() if(MI_BUILD_OBJECT) list(APPEND mi_build_targets "object") endif() if(MI_BUILD_TESTS) list(APPEND mi_build_targets "tests") endif() message(STATUS "") message(STATUS "Library base name: ${mi_basename}") message(STATUS "Version : ${mi_version}") message(STATUS "Build type : ${CMAKE_BUILD_TYPE_LC}") if(MI_USE_CXX) message(STATUS "C++ Compiler : ${CMAKE_CXX_COMPILER}") else() message(STATUS "C Compiler : ${CMAKE_C_COMPILER}") endif() message(STATUS "Compiler flags : ${mi_cflags}") message(STATUS "Compiler defines : ${mi_defines}") message(STATUS "Link libraries : ${mi_libraries}") message(STATUS "Build targets : ${mi_build_targets}") message(STATUS "") # ----------------------------------------------------------------------------- # Main targets # ----------------------------------------------------------------------------- # shared library if(MI_BUILD_SHARED) add_library(mimalloc SHARED ${mi_sources}) set_target_properties(mimalloc PROPERTIES VERSION ${mi_version} SOVERSION ${mi_version_major} OUTPUT_NAME ${mi_basename} ) target_compile_definitions(mimalloc PRIVATE ${mi_defines} MI_SHARED_LIB MI_SHARED_LIB_EXPORT) target_compile_options(mimalloc PRIVATE ${mi_cflags}) target_link_libraries(mimalloc PRIVATE ${mi_libraries}) target_include_directories(mimalloc PUBLIC $ $ ) if(WIN32 AND MI_WIN_REDIRECT) # On windows, link and copy the mimalloc redirection dll too. if(CMAKE_SIZEOF_VOID_P EQUAL 4) set(MIMALLOC_REDIRECT_SUFFIX "32") else() set(MIMALLOC_REDIRECT_SUFFIX "") endif() target_link_libraries(mimalloc PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/bin/mimalloc-redirect${MIMALLOC_REDIRECT_SUFFIX}.lib) add_custom_command(TARGET mimalloc POST_BUILD COMMAND "${CMAKE_COMMAND}" -E copy "${CMAKE_CURRENT_SOURCE_DIR}/bin/mimalloc-redirect${MIMALLOC_REDIRECT_SUFFIX}.dll" $ COMMENT "Copy mimalloc-redirect${MIMALLOC_REDIRECT_SUFFIX}.dll to output directory") install(FILES "$/mimalloc-redirect${MIMALLOC_REDIRECT_SUFFIX}.dll" DESTINATION ${mi_install_libdir}) endif() install(TARGETS mimalloc EXPORT mimalloc DESTINATION ${mi_install_libdir} LIBRARY) install(EXPORT mimalloc DESTINATION ${mi_install_cmakedir}) endif() # static library if (MI_BUILD_STATIC) add_library(mimalloc-static STATIC ${mi_sources}) set_property(TARGET mimalloc-static PROPERTY POSITION_INDEPENDENT_CODE ON) target_compile_definitions(mimalloc-static PRIVATE ${mi_defines} MI_STATIC_LIB) target_compile_options(mimalloc-static PRIVATE ${mi_cflags}) target_link_libraries(mimalloc-static PRIVATE ${mi_libraries}) target_include_directories(mimalloc-static PUBLIC $ $ ) if(WIN32) # When building both static and shared libraries on Windows, a static library should use a # different output name to avoid the conflict with the import library of a shared one. string(REPLACE "mimalloc" "mimalloc-static" mi_output_name ${mi_basename}) set_target_properties(mimalloc-static PROPERTIES OUTPUT_NAME ${mi_output_name}) else() set_target_properties(mimalloc-static PROPERTIES OUTPUT_NAME ${mi_basename}) endif() install(TARGETS mimalloc-static EXPORT mimalloc DESTINATION ${mi_install_objdir} LIBRARY) install(EXPORT mimalloc DESTINATION ${mi_install_cmakedir}) endif() # install include files install(FILES include/mimalloc.h DESTINATION ${mi_install_incdir}) install(FILES include/mimalloc-override.h DESTINATION ${mi_install_incdir}) install(FILES include/mimalloc-new-delete.h DESTINATION ${mi_install_incdir}) install(FILES cmake/mimalloc-config.cmake DESTINATION ${mi_install_cmakedir}) install(FILES cmake/mimalloc-config-version.cmake DESTINATION ${mi_install_cmakedir}) # single object file for more predictable static overriding if (MI_BUILD_OBJECT) add_library(mimalloc-obj OBJECT src/static.c) set_property(TARGET mimalloc-obj PROPERTY POSITION_INDEPENDENT_CODE ON) target_compile_definitions(mimalloc-obj PRIVATE ${mi_defines}) target_compile_options(mimalloc-obj PRIVATE ${mi_cflags}) target_include_directories(mimalloc-obj PUBLIC $ $ ) # Copy the generated object file (`static.o`) to the output directory (as `mimalloc.o`) if(NOT WIN32) set(mimalloc-obj-static "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/mimalloc-obj.dir/src/static.c${CMAKE_C_OUTPUT_EXTENSION}") set(mimalloc-obj-out "${CMAKE_CURRENT_BINARY_DIR}/${mi_basename}${CMAKE_C_OUTPUT_EXTENSION}") add_custom_command(OUTPUT ${mimalloc-obj-out} DEPENDS mimalloc-obj COMMAND "${CMAKE_COMMAND}" -E copy "${mimalloc-obj-static}" "${mimalloc-obj-out}") add_custom_target(mimalloc-obj-target ALL DEPENDS ${mimalloc-obj-out}) endif() # the following seems to lead to cmake warnings/errors on some systems, disable for now :-( # install(TARGETS mimalloc-obj EXPORT mimalloc DESTINATION ${mi_install_objdir}) # the FILES expression can also be: $ # but that fails cmake versions less than 3.10 so we leave it as is for now install(FILES ${mimalloc-obj-static} DESTINATION ${mi_install_objdir} RENAME ${mi_basename}${CMAKE_C_OUTPUT_EXTENSION} ) endif() # pkg-config file support include("cmake/JoinPaths.cmake") join_paths(includedir_for_pc_file "\${prefix}" "${CMAKE_INSTALL_INCLUDEDIR}") join_paths(libdir_for_pc_file "\${prefix}" "${CMAKE_INSTALL_LIBDIR}") configure_file(mimalloc.pc.in mimalloc.pc @ONLY) install(FILES "${CMAKE_CURRENT_BINARY_DIR}/mimalloc.pc" DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig/") # ----------------------------------------------------------------------------- # API surface testing # ----------------------------------------------------------------------------- if (MI_BUILD_TESTS) enable_testing() foreach(TEST_NAME api api-fill stress) add_executable(mimalloc-test-${TEST_NAME} test/test-${TEST_NAME}.c) target_compile_definitions(mimalloc-test-${TEST_NAME} PRIVATE ${mi_defines}) target_compile_options(mimalloc-test-${TEST_NAME} PRIVATE ${mi_cflags}) target_include_directories(mimalloc-test-${TEST_NAME} PRIVATE include) target_link_libraries(mimalloc-test-${TEST_NAME} PRIVATE mimalloc ${mi_libraries}) add_test(NAME test-${TEST_NAME} COMMAND mimalloc-test-${TEST_NAME}) endforeach() endif() # ----------------------------------------------------------------------------- # Set override properties # ----------------------------------------------------------------------------- if (MI_OVERRIDE) if (MI_BUILD_SHARED) target_compile_definitions(mimalloc PRIVATE MI_MALLOC_OVERRIDE) endif() if(NOT WIN32) # It is only possible to override malloc on Windows when building as a DLL. if (MI_BUILD_STATIC) target_compile_definitions(mimalloc-static PRIVATE MI_MALLOC_OVERRIDE) endif() if (MI_BUILD_OBJECT) target_compile_definitions(mimalloc-obj PRIVATE MI_MALLOC_OVERRIDE) endif() endif() endif() luametatex-2.10.08/source/libraries/mimalloc/LICENSE000066400000000000000000000021101442250314700221050ustar00rootroot00000000000000MIT License Copyright (c) 2018-2021 Microsoft Corporation, Daan Leijen Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. luametatex-2.10.08/source/libraries/mimalloc/cmake/000077500000000000000000000000001442250314700221665ustar00rootroot00000000000000luametatex-2.10.08/source/libraries/mimalloc/cmake/JoinPaths.cmake000066400000000000000000000014611442250314700250710ustar00rootroot00000000000000# This module provides function for joining paths # known from most languages # # SPDX-License-Identifier: (MIT OR CC0-1.0) # Copyright 2020 Jan Tojnar # https://github.com/jtojnar/cmake-snips # # Modelled after Python’s os.path.join # https://docs.python.org/3.7/library/os.path.html#os.path.join # Windows not supported function(join_paths joined_path first_path_segment) set(temp_path "${first_path_segment}") foreach(current_segment IN LISTS ARGN) if(NOT ("${current_segment}" STREQUAL "")) if(IS_ABSOLUTE "${current_segment}") set(temp_path "${current_segment}") else() set(temp_path "${temp_path}/${current_segment}") endif() endif() endforeach() set(${joined_path} "${temp_path}" PARENT_SCOPE) endfunction() luametatex-2.10.08/source/libraries/mimalloc/cmake/mimalloc-config-version.cmake000066400000000000000000000012241442250314700277120ustar00rootroot00000000000000set(mi_version_major 2) set(mi_version_minor 1) set(mi_version_patch 2) set(mi_version ${mi_version_major}.${mi_version_minor}) set(PACKAGE_VERSION ${mi_version}) if(PACKAGE_FIND_VERSION_MAJOR) if("${PACKAGE_FIND_VERSION_MAJOR}" EQUAL "${mi_version_major}") if ("${PACKAGE_FIND_VERSION_MINOR}" EQUAL "${mi_version_minor}") set(PACKAGE_VERSION_EXACT TRUE) elseif("${PACKAGE_FIND_VERSION_MINOR}" LESS "${mi_version_minor}") set(PACKAGE_VERSION_COMPATIBLE TRUE) else() set(PACKAGE_VERSION_UNSUITABLE TRUE) endif() else() set(PACKAGE_VERSION_UNSUITABLE TRUE) endif() endif() luametatex-2.10.08/source/libraries/mimalloc/cmake/mimalloc-config.cmake000066400000000000000000000015011442250314700262250ustar00rootroot00000000000000include(${CMAKE_CURRENT_LIST_DIR}/mimalloc.cmake) get_filename_component(MIMALLOC_CMAKE_DIR "${CMAKE_CURRENT_LIST_DIR}" PATH) # one up from the cmake dir, e.g. /usr/local/lib/cmake/mimalloc-2.0 get_filename_component(MIMALLOC_VERSION_DIR "${CMAKE_CURRENT_LIST_DIR}" NAME) string(REPLACE "/lib/cmake" "/lib" MIMALLOC_LIBRARY_DIR "${MIMALLOC_CMAKE_DIR}") if("${MIMALLOC_VERSION_DIR}" EQUAL "mimalloc") # top level install string(REPLACE "/lib/cmake" "/include" MIMALLOC_INCLUDE_DIR "${MIMALLOC_CMAKE_DIR}") set(MIMALLOC_OBJECT_DIR "${MIMALLOC_LIBRARY_DIR}") else() # versioned string(REPLACE "/lib/cmake/" "/include/" MIMALLOC_INCLUDE_DIR "${CMAKE_CURRENT_LIST_DIR}") string(REPLACE "/lib/cmake/" "/lib/" MIMALLOC_OBJECT_DIR "${CMAKE_CURRENT_LIST_DIR}") endif() set(MIMALLOC_TARGET_DIR "${MIMALLOC_LIBRARY_DIR}") # legacy luametatex-2.10.08/source/libraries/mimalloc/include/000077500000000000000000000000001442250314700225315ustar00rootroot00000000000000luametatex-2.10.08/source/libraries/mimalloc/include/mimalloc-new-delete.h000066400000000000000000000077141442250314700265370ustar00rootroot00000000000000/* ---------------------------------------------------------------------------- Copyright (c) 2018-2020 Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ #pragma once #ifndef MIMALLOC_NEW_DELETE_H #define MIMALLOC_NEW_DELETE_H // ---------------------------------------------------------------------------- // This header provides convenient overrides for the new and // delete operations in C++. // // This header should be included in only one source file! // // On Windows, or when linking dynamically with mimalloc, these // can be more performant than the standard new-delete operations. // See // --------------------------------------------------------------------------- #if defined(__cplusplus) #include #include #if defined(_MSC_VER) && defined(_Ret_notnull_) && defined(_Post_writable_byte_size_) // stay consistent with VCRT definitions #define mi_decl_new(n) mi_decl_nodiscard mi_decl_restrict _Ret_notnull_ _Post_writable_byte_size_(n) #define mi_decl_new_nothrow(n) mi_decl_nodiscard mi_decl_restrict _Ret_maybenull_ _Success_(return != NULL) _Post_writable_byte_size_(n) #else #define mi_decl_new(n) mi_decl_nodiscard mi_decl_restrict #define mi_decl_new_nothrow(n) mi_decl_nodiscard mi_decl_restrict #endif void operator delete(void* p) noexcept { mi_free(p); }; void operator delete[](void* p) noexcept { mi_free(p); }; void operator delete (void* p, const std::nothrow_t&) noexcept { mi_free(p); } void operator delete[](void* p, const std::nothrow_t&) noexcept { mi_free(p); } mi_decl_new(n) void* operator new(std::size_t n) noexcept(false) { return mi_new(n); } mi_decl_new(n) void* operator new[](std::size_t n) noexcept(false) { return mi_new(n); } mi_decl_new_nothrow(n) void* operator new (std::size_t n, const std::nothrow_t& tag) noexcept { (void)(tag); return mi_new_nothrow(n); } mi_decl_new_nothrow(n) void* operator new[](std::size_t n, const std::nothrow_t& tag) noexcept { (void)(tag); return mi_new_nothrow(n); } #if (__cplusplus >= 201402L || _MSC_VER >= 1916) void operator delete (void* p, std::size_t n) noexcept { mi_free_size(p,n); }; void operator delete[](void* p, std::size_t n) noexcept { mi_free_size(p,n); }; #endif #if (__cplusplus > 201402L || defined(__cpp_aligned_new)) void operator delete (void* p, std::align_val_t al) noexcept { mi_free_aligned(p, static_cast(al)); } void operator delete[](void* p, std::align_val_t al) noexcept { mi_free_aligned(p, static_cast(al)); } void operator delete (void* p, std::size_t n, std::align_val_t al) noexcept { mi_free_size_aligned(p, n, static_cast(al)); }; void operator delete[](void* p, std::size_t n, std::align_val_t al) noexcept { mi_free_size_aligned(p, n, static_cast(al)); }; void operator delete (void* p, std::align_val_t al, const std::nothrow_t&) noexcept { mi_free_aligned(p, static_cast(al)); } void operator delete[](void* p, std::align_val_t al, const std::nothrow_t&) noexcept { mi_free_aligned(p, static_cast(al)); } void* operator new (std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast(al)); } void* operator new[](std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast(al)); } void* operator new (std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept { return mi_new_aligned_nothrow(n, static_cast(al)); } void* operator new[](std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept { return mi_new_aligned_nothrow(n, static_cast(al)); } #endif #endif #endif // MIMALLOC_NEW_DELETE_H luametatex-2.10.08/source/libraries/mimalloc/include/mimalloc-override.h000066400000000000000000000060471442250314700263230ustar00rootroot00000000000000/* ---------------------------------------------------------------------------- Copyright (c) 2018-2020 Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ #pragma once #ifndef MIMALLOC_OVERRIDE_H #define MIMALLOC_OVERRIDE_H /* ---------------------------------------------------------------------------- This header can be used to statically redirect malloc/free and new/delete to the mimalloc variants. This can be useful if one can include this file on each source file in a project (but be careful when using external code to not accidentally mix pointers from different allocators). -----------------------------------------------------------------------------*/ #include // Standard C allocation #define malloc(n) mi_malloc(n) #define calloc(n,c) mi_calloc(n,c) #define realloc(p,n) mi_realloc(p,n) #define free(p) mi_free(p) #define strdup(s) mi_strdup(s) #define strndup(s,n) mi_strndup(s,n) #define realpath(f,n) mi_realpath(f,n) // Microsoft extensions #define _expand(p,n) mi_expand(p,n) #define _msize(p) mi_usable_size(p) #define _recalloc(p,n,c) mi_recalloc(p,n,c) #define _strdup(s) mi_strdup(s) #define _strndup(s,n) mi_strndup(s,n) #define _wcsdup(s) (wchar_t*)mi_wcsdup((const unsigned short*)(s)) #define _mbsdup(s) mi_mbsdup(s) #define _dupenv_s(b,n,v) mi_dupenv_s(b,n,v) #define _wdupenv_s(b,n,v) mi_wdupenv_s((unsigned short*)(b),n,(const unsigned short*)(v)) // Various Posix and Unix variants #define reallocf(p,n) mi_reallocf(p,n) #define malloc_size(p) mi_usable_size(p) #define malloc_usable_size(p) mi_usable_size(p) #define cfree(p) mi_free(p) #define valloc(n) mi_valloc(n) #define pvalloc(n) mi_pvalloc(n) #define reallocarray(p,s,n) mi_reallocarray(p,s,n) #define reallocarr(p,s,n) mi_reallocarr(p,s,n) #define memalign(a,n) mi_memalign(a,n) #define aligned_alloc(a,n) mi_aligned_alloc(a,n) #define posix_memalign(p,a,n) mi_posix_memalign(p,a,n) #define _posix_memalign(p,a,n) mi_posix_memalign(p,a,n) // Microsoft aligned variants #define _aligned_malloc(n,a) mi_malloc_aligned(n,a) #define _aligned_realloc(p,n,a) mi_realloc_aligned(p,n,a) #define _aligned_recalloc(p,s,n,a) mi_aligned_recalloc(p,s,n,a) #define _aligned_msize(p,a,o) mi_usable_size(p) #define _aligned_free(p) mi_free(p) #define _aligned_offset_malloc(n,a,o) mi_malloc_aligned_at(n,a,o) #define _aligned_offset_realloc(p,n,a,o) mi_realloc_aligned_at(p,n,a,o) #define _aligned_offset_recalloc(p,s,n,a,o) mi_recalloc_aligned_at(p,s,n,a,o) #endif // MIMALLOC_OVERRIDE_H luametatex-2.10.08/source/libraries/mimalloc/include/mimalloc.h000066400000000000000000001064021442250314700245020ustar00rootroot00000000000000/* ---------------------------------------------------------------------------- Copyright (c) 2018-2023, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ #pragma once #ifndef MIMALLOC_H #define MIMALLOC_H #define MI_MALLOC_VERSION 212 // major + 2 digits minor // ------------------------------------------------------ // Compiler specific attributes // ------------------------------------------------------ #ifdef __cplusplus #if (__cplusplus >= 201103L) || (_MSC_VER > 1900) // C++11 #define mi_attr_noexcept noexcept #else #define mi_attr_noexcept throw() #endif #else #define mi_attr_noexcept #endif #if defined(__cplusplus) && (__cplusplus >= 201703) #define mi_decl_nodiscard [[nodiscard]] #elif (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__) // includes clang, icc, and clang-cl #define mi_decl_nodiscard __attribute__((warn_unused_result)) #elif defined(_HAS_NODISCARD) #define mi_decl_nodiscard _NODISCARD #elif (_MSC_VER >= 1700) #define mi_decl_nodiscard _Check_return_ #else #define mi_decl_nodiscard #endif #if defined(_MSC_VER) || defined(__MINGW32__) #if !defined(MI_SHARED_LIB) #define mi_decl_export #elif defined(MI_SHARED_LIB_EXPORT) #define mi_decl_export __declspec(dllexport) #else #define mi_decl_export __declspec(dllimport) #endif #if defined(__MINGW32__) #define mi_decl_restrict #define mi_attr_malloc __attribute__((malloc)) #else #if (_MSC_VER >= 1900) && !defined(__EDG__) #define mi_decl_restrict __declspec(allocator) __declspec(restrict) #else #define mi_decl_restrict __declspec(restrict) #endif #define mi_attr_malloc #endif #define mi_cdecl __cdecl #define mi_attr_alloc_size(s) #define mi_attr_alloc_size2(s1,s2) #define mi_attr_alloc_align(p) #elif defined(__GNUC__) // includes clang and icc #if defined(MI_SHARED_LIB) && defined(MI_SHARED_LIB_EXPORT) #define mi_decl_export __attribute__((visibility("default"))) #else #define mi_decl_export #endif #define mi_cdecl // leads to warnings... __attribute__((cdecl)) #define mi_decl_restrict #define mi_attr_malloc __attribute__((malloc)) #if (defined(__clang_major__) && (__clang_major__ < 4)) || (__GNUC__ < 5) #define mi_attr_alloc_size(s) #define mi_attr_alloc_size2(s1,s2) #define mi_attr_alloc_align(p) #elif defined(__INTEL_COMPILER) #define mi_attr_alloc_size(s) __attribute__((alloc_size(s))) #define mi_attr_alloc_size2(s1,s2) __attribute__((alloc_size(s1,s2))) #define mi_attr_alloc_align(p) #else #define mi_attr_alloc_size(s) __attribute__((alloc_size(s))) #define mi_attr_alloc_size2(s1,s2) __attribute__((alloc_size(s1,s2))) #define mi_attr_alloc_align(p) __attribute__((alloc_align(p))) #endif #else #define mi_cdecl #define mi_decl_export #define mi_decl_restrict #define mi_attr_malloc #define mi_attr_alloc_size(s) #define mi_attr_alloc_size2(s1,s2) #define mi_attr_alloc_align(p) #endif // ------------------------------------------------------ // Includes // ------------------------------------------------------ #include // size_t #include // bool #include // INTPTR_MAX #ifdef __cplusplus extern "C" { #endif // ------------------------------------------------------ // Standard malloc interface // ------------------------------------------------------ mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_calloc(size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2); mi_decl_nodiscard mi_decl_export void* mi_realloc(void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(2); mi_decl_export void* mi_expand(void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(2); mi_decl_export void mi_free(void* p) mi_attr_noexcept; mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_strdup(const char* s) mi_attr_noexcept mi_attr_malloc; mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_strndup(const char* s, size_t n) mi_attr_noexcept mi_attr_malloc; mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_realpath(const char* fname, char* resolved_name) mi_attr_noexcept mi_attr_malloc; // ------------------------------------------------------ // Extended functionality // ------------------------------------------------------ #define MI_SMALL_WSIZE_MAX (128) #define MI_SMALL_SIZE_MAX (MI_SMALL_WSIZE_MAX*sizeof(void*)) mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_small(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_zalloc_small(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_zalloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_mallocn(size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2); mi_decl_nodiscard mi_decl_export void* mi_reallocn(void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_alloc_size2(2,3); mi_decl_nodiscard mi_decl_export void* mi_reallocf(void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(2); mi_decl_nodiscard mi_decl_export size_t mi_usable_size(const void* p) mi_attr_noexcept; mi_decl_nodiscard mi_decl_export size_t mi_good_size(size_t size) mi_attr_noexcept; // ------------------------------------------------------ // Internals // ------------------------------------------------------ typedef void (mi_cdecl mi_deferred_free_fun)(bool force, unsigned long long heartbeat, void* arg); mi_decl_export void mi_register_deferred_free(mi_deferred_free_fun* deferred_free, void* arg) mi_attr_noexcept; typedef void (mi_cdecl mi_output_fun)(const char* msg, void* arg); mi_decl_export void mi_register_output(mi_output_fun* out, void* arg) mi_attr_noexcept; typedef void (mi_cdecl mi_error_fun)(int err, void* arg); mi_decl_export void mi_register_error(mi_error_fun* fun, void* arg); mi_decl_export void mi_collect(bool force) mi_attr_noexcept; mi_decl_export int mi_version(void) mi_attr_noexcept; mi_decl_export void mi_stats_reset(void) mi_attr_noexcept; mi_decl_export void mi_stats_merge(void) mi_attr_noexcept; mi_decl_export void mi_stats_print(void* out) mi_attr_noexcept; // backward compatibility: `out` is ignored and should be NULL mi_decl_export void mi_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept; mi_decl_export void mi_process_init(void) mi_attr_noexcept; mi_decl_export void mi_thread_init(void) mi_attr_noexcept; mi_decl_export void mi_thread_done(void) mi_attr_noexcept; mi_decl_export void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept; mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept; // ------------------------------------------------------------------------------------- // Aligned allocation // Note that `alignment` always follows `size` for consistency with unaligned // allocation, but unfortunately this differs from `posix_memalign` and `aligned_alloc`. // ------------------------------------------------------------------------------------- mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_zalloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_calloc_aligned(size_t count, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2) mi_attr_alloc_align(3); mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2); mi_decl_nodiscard mi_decl_export void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_alloc_size(2) mi_attr_alloc_align(3); mi_decl_nodiscard mi_decl_export void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size(2); // ------------------------------------------------------------------------------------- // Heaps: first-class, but can only allocate from the same thread that created it. // ------------------------------------------------------------------------------------- struct mi_heap_s; typedef struct mi_heap_s mi_heap_t; mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_new(void); mi_decl_export void mi_heap_delete(mi_heap_t* heap); mi_decl_export void mi_heap_destroy(mi_heap_t* heap); mi_decl_export mi_heap_t* mi_heap_set_default(mi_heap_t* heap); mi_decl_export mi_heap_t* mi_heap_get_default(void); mi_decl_export mi_heap_t* mi_heap_get_backing(void); mi_decl_export void mi_heap_collect(mi_heap_t* heap, bool force) mi_attr_noexcept; mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_zalloc(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3); mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3); mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); mi_decl_nodiscard mi_decl_export void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(3); mi_decl_nodiscard mi_decl_export void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_alloc_size2(3,4); mi_decl_nodiscard mi_decl_export void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(3); mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_heap_strdup(mi_heap_t* heap, const char* s) mi_attr_noexcept mi_attr_malloc; mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n) mi_attr_noexcept mi_attr_malloc; mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept mi_attr_malloc; mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(3); mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(3); mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3) mi_attr_alloc_align(4); mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3); mi_decl_nodiscard mi_decl_export void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_alloc_size(3) mi_attr_alloc_align(4); mi_decl_nodiscard mi_decl_export void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size(3); // -------------------------------------------------------------------------------- // Zero initialized re-allocation. // Only valid on memory that was originally allocated with zero initialization too. // e.g. `mi_calloc`, `mi_zalloc`, `mi_zalloc_aligned` etc. // see // -------------------------------------------------------------------------------- mi_decl_nodiscard mi_decl_export void* mi_rezalloc(void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(2); mi_decl_nodiscard mi_decl_export void* mi_recalloc(void* p, size_t newcount, size_t size) mi_attr_noexcept mi_attr_alloc_size2(2,3); mi_decl_nodiscard mi_decl_export void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_alloc_size(2) mi_attr_alloc_align(3); mi_decl_nodiscard mi_decl_export void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size(2); mi_decl_nodiscard mi_decl_export void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept mi_attr_alloc_size2(2,3) mi_attr_alloc_align(4); mi_decl_nodiscard mi_decl_export void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size2(2,3); mi_decl_nodiscard mi_decl_export void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(3); mi_decl_nodiscard mi_decl_export void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t newcount, size_t size) mi_attr_noexcept mi_attr_alloc_size2(3,4); mi_decl_nodiscard mi_decl_export void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_alloc_size(3) mi_attr_alloc_align(4); mi_decl_nodiscard mi_decl_export void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size(3); mi_decl_nodiscard mi_decl_export void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept mi_attr_alloc_size2(3,4) mi_attr_alloc_align(5); mi_decl_nodiscard mi_decl_export void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size2(3,4); // ------------------------------------------------------ // Analysis // ------------------------------------------------------ mi_decl_export bool mi_heap_contains_block(mi_heap_t* heap, const void* p); mi_decl_export bool mi_heap_check_owned(mi_heap_t* heap, const void* p); mi_decl_export bool mi_check_owned(const void* p); // An area of heap space contains blocks of a single size. typedef struct mi_heap_area_s { void* blocks; // start of the area containing heap blocks size_t reserved; // bytes reserved for this area (virtual) size_t committed; // current available bytes for this area size_t used; // number of allocated blocks size_t block_size; // size in bytes of each block size_t full_block_size; // size in bytes of a full block including padding and metadata. } mi_heap_area_t; typedef bool (mi_cdecl mi_block_visit_fun)(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg); mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_blocks, mi_block_visit_fun* visitor, void* arg); // Experimental mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept; mi_decl_nodiscard mi_decl_export bool mi_is_redirected(void) mi_attr_noexcept; mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept; mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept; mi_decl_export int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept; mi_decl_export bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept; mi_decl_export void mi_debug_show_arenas(void) mi_attr_noexcept; // Experimental: heaps associated with specific memory arena's typedef int mi_arena_id_t; mi_decl_export void* mi_arena_area(mi_arena_id_t arena_id, size_t* size); mi_decl_export int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_msecs, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept; mi_decl_export int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept; mi_decl_export bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept; #if MI_MALLOC_VERSION >= 182 // Create a heap that only allocates in the specified arena mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id); #endif // deprecated mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept; // ------------------------------------------------------ // Convenience // ------------------------------------------------------ #define mi_malloc_tp(tp) ((tp*)mi_malloc(sizeof(tp))) #define mi_zalloc_tp(tp) ((tp*)mi_zalloc(sizeof(tp))) #define mi_calloc_tp(tp,n) ((tp*)mi_calloc(n,sizeof(tp))) #define mi_mallocn_tp(tp,n) ((tp*)mi_mallocn(n,sizeof(tp))) #define mi_reallocn_tp(p,tp,n) ((tp*)mi_reallocn(p,n,sizeof(tp))) #define mi_recalloc_tp(p,tp,n) ((tp*)mi_recalloc(p,n,sizeof(tp))) #define mi_heap_malloc_tp(hp,tp) ((tp*)mi_heap_malloc(hp,sizeof(tp))) #define mi_heap_zalloc_tp(hp,tp) ((tp*)mi_heap_zalloc(hp,sizeof(tp))) #define mi_heap_calloc_tp(hp,tp,n) ((tp*)mi_heap_calloc(hp,n,sizeof(tp))) #define mi_heap_mallocn_tp(hp,tp,n) ((tp*)mi_heap_mallocn(hp,n,sizeof(tp))) #define mi_heap_reallocn_tp(hp,p,tp,n) ((tp*)mi_heap_reallocn(hp,p,n,sizeof(tp))) #define mi_heap_recalloc_tp(hp,p,tp,n) ((tp*)mi_heap_recalloc(hp,p,n,sizeof(tp))) // ------------------------------------------------------ // Options // ------------------------------------------------------ typedef enum mi_option_e { // stable options mi_option_show_errors, // print error messages mi_option_show_stats, // print statistics on termination mi_option_verbose, // print verbose messages // the following options are experimental (see src/options.h) mi_option_eager_commit, // eager commit segments? (after `eager_commit_delay` segments) (=1) mi_option_arena_eager_commit, // eager commit arenas? Use 2 to enable just on overcommit systems (=2) mi_option_purge_decommits, // should a memory purge decommit (or only reset) (=1) mi_option_allow_large_os_pages, // allow large (2MiB) OS pages, implies eager commit mi_option_reserve_huge_os_pages, // reserve N huge OS pages (1GiB/page) at startup mi_option_reserve_huge_os_pages_at, // reserve huge OS pages at a specific NUMA node mi_option_reserve_os_memory, // reserve specified amount of OS memory in an arena at startup mi_option_deprecated_segment_cache, mi_option_deprecated_page_reset, mi_option_abandoned_page_purge, // immediately purge delayed purges on thread termination mi_option_deprecated_segment_reset, mi_option_eager_commit_delay, mi_option_purge_delay, // memory purging is delayed by N milli seconds; use 0 for immediate purging or -1 for no purging at all. mi_option_use_numa_nodes, // 0 = use all available numa nodes, otherwise use at most N nodes. mi_option_limit_os_alloc, // 1 = do not use OS memory for allocation (but only programmatically reserved arenas) mi_option_os_tag, // tag used for OS logging (macOS only for now) mi_option_max_errors, // issue at most N error messages mi_option_max_warnings, // issue at most N warning messages mi_option_max_segment_reclaim, mi_option_destroy_on_exit, // if set, release all memory on exit; sometimes used for dynamic unloading but can be unsafe. mi_option_arena_reserve, // initial memory size in KiB for arena reservation (1GiB on 64-bit) mi_option_arena_purge_mult, mi_option_purge_extend_delay, _mi_option_last, // legacy option names mi_option_large_os_pages = mi_option_allow_large_os_pages, mi_option_eager_region_commit = mi_option_arena_eager_commit, mi_option_reset_decommits = mi_option_purge_decommits, mi_option_reset_delay = mi_option_purge_delay, mi_option_abandoned_page_reset = mi_option_abandoned_page_purge } mi_option_t; mi_decl_nodiscard mi_decl_export bool mi_option_is_enabled(mi_option_t option); mi_decl_export void mi_option_enable(mi_option_t option); mi_decl_export void mi_option_disable(mi_option_t option); mi_decl_export void mi_option_set_enabled(mi_option_t option, bool enable); mi_decl_export void mi_option_set_enabled_default(mi_option_t option, bool enable); mi_decl_nodiscard mi_decl_export long mi_option_get(mi_option_t option); mi_decl_nodiscard mi_decl_export long mi_option_get_clamp(mi_option_t option, long min, long max); mi_decl_nodiscard mi_decl_export size_t mi_option_get_size(mi_option_t option); mi_decl_export void mi_option_set(mi_option_t option, long value); mi_decl_export void mi_option_set_default(mi_option_t option, long value); // ------------------------------------------------------------------------------------------------------- // "mi" prefixed implementations of various posix, Unix, Windows, and C++ allocation functions. // (This can be convenient when providing overrides of these functions as done in `mimalloc-override.h`.) // note: we use `mi_cfree` as "checked free" and it checks if the pointer is in our heap before free-ing. // ------------------------------------------------------------------------------------------------------- mi_decl_export void mi_cfree(void* p) mi_attr_noexcept; mi_decl_export void* mi__expand(void* p, size_t newsize) mi_attr_noexcept; mi_decl_nodiscard mi_decl_export size_t mi_malloc_size(const void* p) mi_attr_noexcept; mi_decl_nodiscard mi_decl_export size_t mi_malloc_good_size(size_t size) mi_attr_noexcept; mi_decl_nodiscard mi_decl_export size_t mi_malloc_usable_size(const void *p) mi_attr_noexcept; mi_decl_export int mi_posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept; mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_memalign(size_t alignment, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(1); mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_valloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_pvalloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_aligned_alloc(size_t alignment, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(1); mi_decl_nodiscard mi_decl_export void* mi_reallocarray(void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_alloc_size2(2,3); mi_decl_nodiscard mi_decl_export int mi_reallocarr(void* p, size_t count, size_t size) mi_attr_noexcept; mi_decl_nodiscard mi_decl_export void* mi_aligned_recalloc(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept; mi_decl_nodiscard mi_decl_export void* mi_aligned_offset_recalloc(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept; mi_decl_nodiscard mi_decl_export mi_decl_restrict unsigned short* mi_wcsdup(const unsigned short* s) mi_attr_noexcept mi_attr_malloc; mi_decl_nodiscard mi_decl_export mi_decl_restrict unsigned char* mi_mbsdup(const unsigned char* s) mi_attr_noexcept mi_attr_malloc; mi_decl_export int mi_dupenv_s(char** buf, size_t* size, const char* name) mi_attr_noexcept; mi_decl_export int mi_wdupenv_s(unsigned short** buf, size_t* size, const unsigned short* name) mi_attr_noexcept; mi_decl_export void mi_free_size(void* p, size_t size) mi_attr_noexcept; mi_decl_export void mi_free_size_aligned(void* p, size_t size, size_t alignment) mi_attr_noexcept; mi_decl_export void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept; // The `mi_new` wrappers implement C++ semantics on out-of-memory instead of directly returning `NULL`. // (and call `std::get_new_handler` and potentially raise a `std::bad_alloc` exception). mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new(size_t size) mi_attr_malloc mi_attr_alloc_size(1); mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new_aligned(size_t size, size_t alignment) mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new_nothrow(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new_aligned_nothrow(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new_n(size_t count, size_t size) mi_attr_malloc mi_attr_alloc_size2(1, 2); mi_decl_nodiscard mi_decl_export void* mi_new_realloc(void* p, size_t newsize) mi_attr_alloc_size(2); mi_decl_nodiscard mi_decl_export void* mi_new_reallocn(void* p, size_t newcount, size_t size) mi_attr_alloc_size2(2, 3); mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_alloc_new(mi_heap_t* heap, size_t size) mi_attr_malloc mi_attr_alloc_size(2); mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_alloc_new_n(mi_heap_t* heap, size_t count, size_t size) mi_attr_malloc mi_attr_alloc_size2(2, 3); #ifdef __cplusplus } #endif // --------------------------------------------------------------------------------------------- // Implement the C++ std::allocator interface for use in STL containers. // (note: see `mimalloc-new-delete.h` for overriding the new/delete operators globally) // --------------------------------------------------------------------------------------------- #ifdef __cplusplus #include // std::size_t #include // PTRDIFF_MAX #if (__cplusplus >= 201103L) || (_MSC_VER > 1900) // C++11 #include // std::true_type #include // std::forward #endif template struct _mi_stl_allocator_common { typedef T value_type; typedef std::size_t size_type; typedef std::ptrdiff_t difference_type; typedef value_type& reference; typedef value_type const& const_reference; typedef value_type* pointer; typedef value_type const* const_pointer; #if ((__cplusplus >= 201103L) || (_MSC_VER > 1900)) // C++11 using propagate_on_container_copy_assignment = std::true_type; using propagate_on_container_move_assignment = std::true_type; using propagate_on_container_swap = std::true_type; template void construct(U* p, Args&& ...args) { ::new(p) U(std::forward(args)...); } template void destroy(U* p) mi_attr_noexcept { p->~U(); } #else void construct(pointer p, value_type const& val) { ::new(p) value_type(val); } void destroy(pointer p) { p->~value_type(); } #endif size_type max_size() const mi_attr_noexcept { return (PTRDIFF_MAX/sizeof(value_type)); } pointer address(reference x) const { return &x; } const_pointer address(const_reference x) const { return &x; } }; template struct mi_stl_allocator : public _mi_stl_allocator_common { using typename _mi_stl_allocator_common::size_type; using typename _mi_stl_allocator_common::value_type; using typename _mi_stl_allocator_common::pointer; template struct rebind { typedef mi_stl_allocator other; }; mi_stl_allocator() mi_attr_noexcept = default; mi_stl_allocator(const mi_stl_allocator&) mi_attr_noexcept = default; template mi_stl_allocator(const mi_stl_allocator&) mi_attr_noexcept { } mi_stl_allocator select_on_container_copy_construction() const { return *this; } void deallocate(T* p, size_type) { mi_free(p); } #if (__cplusplus >= 201703L) // C++17 mi_decl_nodiscard T* allocate(size_type count) { return static_cast(mi_new_n(count, sizeof(T))); } mi_decl_nodiscard T* allocate(size_type count, const void*) { return allocate(count); } #else mi_decl_nodiscard pointer allocate(size_type count, const void* = 0) { return static_cast(mi_new_n(count, sizeof(value_type))); } #endif #if ((__cplusplus >= 201103L) || (_MSC_VER > 1900)) // C++11 using is_always_equal = std::true_type; #endif }; template bool operator==(const mi_stl_allocator& , const mi_stl_allocator& ) mi_attr_noexcept { return true; } template bool operator!=(const mi_stl_allocator& , const mi_stl_allocator& ) mi_attr_noexcept { return false; } #if (__cplusplus >= 201103L) || (_MSC_VER >= 1900) // C++11 #define MI_HAS_HEAP_STL_ALLOCATOR 1 #include // std::shared_ptr // Common base class for STL allocators in a specific heap template struct _mi_heap_stl_allocator_common : public _mi_stl_allocator_common { using typename _mi_stl_allocator_common::size_type; using typename _mi_stl_allocator_common::value_type; using typename _mi_stl_allocator_common::pointer; _mi_heap_stl_allocator_common(mi_heap_t* hp) : heap(hp) { } /* will not delete nor destroy the passed in heap */ #if (__cplusplus >= 201703L) // C++17 mi_decl_nodiscard T* allocate(size_type count) { return static_cast(mi_heap_alloc_new_n(this->heap.get(), count, sizeof(T))); } mi_decl_nodiscard T* allocate(size_type count, const void*) { return allocate(count); } #else mi_decl_nodiscard pointer allocate(size_type count, const void* = 0) { return static_cast(mi_heap_alloc_new_n(this->heap.get(), count, sizeof(value_type))); } #endif #if ((__cplusplus >= 201103L) || (_MSC_VER > 1900)) // C++11 using is_always_equal = std::false_type; #endif void collect(bool force) { mi_heap_collect(this->heap.get(), force); } template bool is_equal(const _mi_heap_stl_allocator_common& x) const { return (this->heap == x.heap); } protected: std::shared_ptr heap; template friend struct _mi_heap_stl_allocator_common; _mi_heap_stl_allocator_common() { mi_heap_t* hp = mi_heap_new(); this->heap.reset(hp, (_mi_destroy ? &heap_destroy : &heap_delete)); /* calls heap_delete/destroy when the refcount drops to zero */ } _mi_heap_stl_allocator_common(const _mi_heap_stl_allocator_common& x) mi_attr_noexcept : heap(x.heap) { } template _mi_heap_stl_allocator_common(const _mi_heap_stl_allocator_common& x) mi_attr_noexcept : heap(x.heap) { } private: static void heap_delete(mi_heap_t* hp) { if (hp != NULL) { mi_heap_delete(hp); } } static void heap_destroy(mi_heap_t* hp) { if (hp != NULL) { mi_heap_destroy(hp); } } }; // STL allocator allocation in a specific heap template struct mi_heap_stl_allocator : public _mi_heap_stl_allocator_common { using typename _mi_heap_stl_allocator_common::size_type; mi_heap_stl_allocator() : _mi_heap_stl_allocator_common() { } // creates fresh heap that is deleted when the destructor is called mi_heap_stl_allocator(mi_heap_t* hp) : _mi_heap_stl_allocator_common(hp) { } // no delete nor destroy on the passed in heap template mi_heap_stl_allocator(const mi_heap_stl_allocator& x) mi_attr_noexcept : _mi_heap_stl_allocator_common(x) { } mi_heap_stl_allocator select_on_container_copy_construction() const { return *this; } void deallocate(T* p, size_type) { mi_free(p); } template struct rebind { typedef mi_heap_stl_allocator other; }; }; template bool operator==(const mi_heap_stl_allocator& x, const mi_heap_stl_allocator& y) mi_attr_noexcept { return (x.is_equal(y)); } template bool operator!=(const mi_heap_stl_allocator& x, const mi_heap_stl_allocator& y) mi_attr_noexcept { return (!x.is_equal(y)); } // STL allocator allocation in a specific heap, where `free` does nothing and // the heap is destroyed in one go on destruction -- use with care! template struct mi_heap_destroy_stl_allocator : public _mi_heap_stl_allocator_common { using typename _mi_heap_stl_allocator_common::size_type; mi_heap_destroy_stl_allocator() : _mi_heap_stl_allocator_common() { } // creates fresh heap that is destroyed when the destructor is called mi_heap_destroy_stl_allocator(mi_heap_t* hp) : _mi_heap_stl_allocator_common(hp) { } // no delete nor destroy on the passed in heap template mi_heap_destroy_stl_allocator(const mi_heap_destroy_stl_allocator& x) mi_attr_noexcept : _mi_heap_stl_allocator_common(x) { } mi_heap_destroy_stl_allocator select_on_container_copy_construction() const { return *this; } void deallocate(T*, size_type) { /* do nothing as we destroy the heap on destruct. */ } template struct rebind { typedef mi_heap_destroy_stl_allocator other; }; }; template bool operator==(const mi_heap_destroy_stl_allocator& x, const mi_heap_destroy_stl_allocator& y) mi_attr_noexcept { return (x.is_equal(y)); } template bool operator!=(const mi_heap_destroy_stl_allocator& x, const mi_heap_destroy_stl_allocator& y) mi_attr_noexcept { return (!x.is_equal(y)); } #endif // C++11 #endif // __cplusplus #endif luametatex-2.10.08/source/libraries/mimalloc/include/mimalloc/000077500000000000000000000000001442250314700243265ustar00rootroot00000000000000luametatex-2.10.08/source/libraries/mimalloc/include/mimalloc/atomic.h000066400000000000000000000406061442250314700257610ustar00rootroot00000000000000/* ---------------------------------------------------------------------------- Copyright (c) 2018-2023 Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ #pragma once #ifndef MIMALLOC_ATOMIC_H #define MIMALLOC_ATOMIC_H // -------------------------------------------------------------------------------------------- // Atomics // We need to be portable between C, C++, and MSVC. // We base the primitives on the C/C++ atomics and create a mimimal wrapper for MSVC in C compilation mode. // This is why we try to use only `uintptr_t` and `*` as atomic types. // To gain better insight in the range of used atomics, we use explicitly named memory order operations // instead of passing the memory order as a parameter. // ----------------------------------------------------------------------------------------------- #if defined(__cplusplus) // Use C++ atomics #include #define _Atomic(tp) std::atomic #define mi_atomic(name) std::atomic_##name #define mi_memory_order(name) std::memory_order_##name #if !defined(ATOMIC_VAR_INIT) || (__cplusplus >= 202002L) // c++20, see issue #571 #define MI_ATOMIC_VAR_INIT(x) x #else #define MI_ATOMIC_VAR_INIT(x) ATOMIC_VAR_INIT(x) #endif #elif defined(_MSC_VER) // Use MSVC C wrapper for C11 atomics #define _Atomic(tp) tp #define MI_ATOMIC_VAR_INIT(x) x #define mi_atomic(name) mi_atomic_##name #define mi_memory_order(name) mi_memory_order_##name #else // Use C11 atomics #include #define mi_atomic(name) atomic_##name #define mi_memory_order(name) memory_order_##name #if !defined(ATOMIC_VAR_INIT) || (__STDC_VERSION__ >= 201710L) // c17, see issue #735 #define MI_ATOMIC_VAR_INIT(x) x #else #define MI_ATOMIC_VAR_INIT(x) ATOMIC_VAR_INIT(x) #endif #endif // Various defines for all used memory orders in mimalloc #define mi_atomic_cas_weak(p,expected,desired,mem_success,mem_fail) \ mi_atomic(compare_exchange_weak_explicit)(p,expected,desired,mem_success,mem_fail) #define mi_atomic_cas_strong(p,expected,desired,mem_success,mem_fail) \ mi_atomic(compare_exchange_strong_explicit)(p,expected,desired,mem_success,mem_fail) #define mi_atomic_load_acquire(p) mi_atomic(load_explicit)(p,mi_memory_order(acquire)) #define mi_atomic_load_relaxed(p) mi_atomic(load_explicit)(p,mi_memory_order(relaxed)) #define mi_atomic_store_release(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(release)) #define mi_atomic_store_relaxed(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(relaxed)) #define mi_atomic_exchange_release(p,x) mi_atomic(exchange_explicit)(p,x,mi_memory_order(release)) #define mi_atomic_exchange_acq_rel(p,x) mi_atomic(exchange_explicit)(p,x,mi_memory_order(acq_rel)) #define mi_atomic_cas_weak_release(p,exp,des) mi_atomic_cas_weak(p,exp,des,mi_memory_order(release),mi_memory_order(relaxed)) #define mi_atomic_cas_weak_acq_rel(p,exp,des) mi_atomic_cas_weak(p,exp,des,mi_memory_order(acq_rel),mi_memory_order(acquire)) #define mi_atomic_cas_strong_release(p,exp,des) mi_atomic_cas_strong(p,exp,des,mi_memory_order(release),mi_memory_order(relaxed)) #define mi_atomic_cas_strong_acq_rel(p,exp,des) mi_atomic_cas_strong(p,exp,des,mi_memory_order(acq_rel),mi_memory_order(acquire)) #define mi_atomic_add_relaxed(p,x) mi_atomic(fetch_add_explicit)(p,x,mi_memory_order(relaxed)) #define mi_atomic_sub_relaxed(p,x) mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(relaxed)) #define mi_atomic_add_acq_rel(p,x) mi_atomic(fetch_add_explicit)(p,x,mi_memory_order(acq_rel)) #define mi_atomic_sub_acq_rel(p,x) mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(acq_rel)) #define mi_atomic_and_acq_rel(p,x) mi_atomic(fetch_and_explicit)(p,x,mi_memory_order(acq_rel)) #define mi_atomic_or_acq_rel(p,x) mi_atomic(fetch_or_explicit)(p,x,mi_memory_order(acq_rel)) #define mi_atomic_increment_relaxed(p) mi_atomic_add_relaxed(p,(uintptr_t)1) #define mi_atomic_decrement_relaxed(p) mi_atomic_sub_relaxed(p,(uintptr_t)1) #define mi_atomic_increment_acq_rel(p) mi_atomic_add_acq_rel(p,(uintptr_t)1) #define mi_atomic_decrement_acq_rel(p) mi_atomic_sub_acq_rel(p,(uintptr_t)1) static inline void mi_atomic_yield(void); static inline intptr_t mi_atomic_addi(_Atomic(intptr_t)*p, intptr_t add); static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub); #if defined(__cplusplus) || !defined(_MSC_VER) // In C++/C11 atomics we have polymorphic atomics so can use the typed `ptr` variants (where `tp` is the type of atomic value) // We use these macros so we can provide a typed wrapper in MSVC in C compilation mode as well #define mi_atomic_load_ptr_acquire(tp,p) mi_atomic_load_acquire(p) #define mi_atomic_load_ptr_relaxed(tp,p) mi_atomic_load_relaxed(p) // In C++ we need to add casts to help resolve templates if NULL is passed #if defined(__cplusplus) #define mi_atomic_store_ptr_release(tp,p,x) mi_atomic_store_release(p,(tp*)x) #define mi_atomic_store_ptr_relaxed(tp,p,x) mi_atomic_store_relaxed(p,(tp*)x) #define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,(tp*)des) #define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,(tp*)des) #define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,(tp*)des) #define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,(tp*)x) #define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,(tp*)x) #else #define mi_atomic_store_ptr_release(tp,p,x) mi_atomic_store_release(p,x) #define mi_atomic_store_ptr_relaxed(tp,p,x) mi_atomic_store_relaxed(p,x) #define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,des) #define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,des) #define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,des) #define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,x) #define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,x) #endif // These are used by the statistics static inline int64_t mi_atomic_addi64_relaxed(volatile int64_t* p, int64_t add) { return mi_atomic(fetch_add_explicit)((_Atomic(int64_t)*)p, add, mi_memory_order(relaxed)); } static inline void mi_atomic_maxi64_relaxed(volatile int64_t* p, int64_t x) { int64_t current = mi_atomic_load_relaxed((_Atomic(int64_t)*)p); while (current < x && !mi_atomic_cas_weak_release((_Atomic(int64_t)*)p, ¤t, x)) { /* nothing */ }; } // Used by timers #define mi_atomic_loadi64_acquire(p) mi_atomic(load_explicit)(p,mi_memory_order(acquire)) #define mi_atomic_loadi64_relaxed(p) mi_atomic(load_explicit)(p,mi_memory_order(relaxed)) #define mi_atomic_storei64_release(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(release)) #define mi_atomic_storei64_relaxed(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(relaxed)) #define mi_atomic_casi64_strong_acq_rel(p,e,d) mi_atomic_cas_strong_acq_rel(p,e,d) #define mi_atomic_addi64_acq_rel(p,i) mi_atomic_add_acq_rel(p,i) #elif defined(_MSC_VER) // MSVC C compilation wrapper that uses Interlocked operations to model C11 atomics. #define WIN32_LEAN_AND_MEAN #include #include #ifdef _WIN64 typedef LONG64 msc_intptr_t; #define MI_64(f) f##64 #else typedef LONG msc_intptr_t; #define MI_64(f) f #endif typedef enum mi_memory_order_e { mi_memory_order_relaxed, mi_memory_order_consume, mi_memory_order_acquire, mi_memory_order_release, mi_memory_order_acq_rel, mi_memory_order_seq_cst } mi_memory_order; static inline uintptr_t mi_atomic_fetch_add_explicit(_Atomic(uintptr_t)*p, uintptr_t add, mi_memory_order mo) { (void)(mo); return (uintptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add); } static inline uintptr_t mi_atomic_fetch_sub_explicit(_Atomic(uintptr_t)*p, uintptr_t sub, mi_memory_order mo) { (void)(mo); return (uintptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, -((msc_intptr_t)sub)); } static inline uintptr_t mi_atomic_fetch_and_explicit(_Atomic(uintptr_t)*p, uintptr_t x, mi_memory_order mo) { (void)(mo); return (uintptr_t)MI_64(_InterlockedAnd)((volatile msc_intptr_t*)p, (msc_intptr_t)x); } static inline uintptr_t mi_atomic_fetch_or_explicit(_Atomic(uintptr_t)*p, uintptr_t x, mi_memory_order mo) { (void)(mo); return (uintptr_t)MI_64(_InterlockedOr)((volatile msc_intptr_t*)p, (msc_intptr_t)x); } static inline bool mi_atomic_compare_exchange_strong_explicit(_Atomic(uintptr_t)*p, uintptr_t* expected, uintptr_t desired, mi_memory_order mo1, mi_memory_order mo2) { (void)(mo1); (void)(mo2); uintptr_t read = (uintptr_t)MI_64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)(*expected)); if (read == *expected) { return true; } else { *expected = read; return false; } } static inline bool mi_atomic_compare_exchange_weak_explicit(_Atomic(uintptr_t)*p, uintptr_t* expected, uintptr_t desired, mi_memory_order mo1, mi_memory_order mo2) { return mi_atomic_compare_exchange_strong_explicit(p, expected, desired, mo1, mo2); } static inline uintptr_t mi_atomic_exchange_explicit(_Atomic(uintptr_t)*p, uintptr_t exchange, mi_memory_order mo) { (void)(mo); return (uintptr_t)MI_64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange); } static inline void mi_atomic_thread_fence(mi_memory_order mo) { (void)(mo); _Atomic(uintptr_t) x = 0; mi_atomic_exchange_explicit(&x, 1, mo); } static inline uintptr_t mi_atomic_load_explicit(_Atomic(uintptr_t) const* p, mi_memory_order mo) { (void)(mo); #if defined(_M_IX86) || defined(_M_X64) return *p; #else uintptr_t x = *p; if (mo > mi_memory_order_relaxed) { while (!mi_atomic_compare_exchange_weak_explicit(p, &x, x, mo, mi_memory_order_relaxed)) { /* nothing */ }; } return x; #endif } static inline void mi_atomic_store_explicit(_Atomic(uintptr_t)*p, uintptr_t x, mi_memory_order mo) { (void)(mo); #if defined(_M_IX86) || defined(_M_X64) *p = x; #else mi_atomic_exchange_explicit(p, x, mo); #endif } static inline int64_t mi_atomic_loadi64_explicit(_Atomic(int64_t)*p, mi_memory_order mo) { (void)(mo); #if defined(_M_X64) return *p; #else int64_t old = *p; int64_t x = old; while ((old = InterlockedCompareExchange64(p, x, old)) != x) { x = old; } return x; #endif } static inline void mi_atomic_storei64_explicit(_Atomic(int64_t)*p, int64_t x, mi_memory_order mo) { (void)(mo); #if defined(x_M_IX86) || defined(_M_X64) *p = x; #else InterlockedExchange64(p, x); #endif } // These are used by the statistics static inline int64_t mi_atomic_addi64_relaxed(volatile _Atomic(int64_t)*p, int64_t add) { #ifdef _WIN64 return (int64_t)mi_atomic_addi((int64_t*)p, add); #else int64_t current; int64_t sum; do { current = *p; sum = current + add; } while (_InterlockedCompareExchange64(p, sum, current) != current); return current; #endif } static inline void mi_atomic_maxi64_relaxed(volatile _Atomic(int64_t)*p, int64_t x) { int64_t current; do { current = *p; } while (current < x && _InterlockedCompareExchange64(p, x, current) != current); } static inline void mi_atomic_addi64_acq_rel(volatile _Atomic(int64_t*)p, int64_t i) { mi_atomic_addi64_relaxed(p, i); } static inline bool mi_atomic_casi64_strong_acq_rel(volatile _Atomic(int64_t*)p, int64_t* exp, int64_t des) { int64_t read = _InterlockedCompareExchange64(p, des, *exp); if (read == *exp) { return true; } else { *exp = read; return false; } } // The pointer macros cast to `uintptr_t`. #define mi_atomic_load_ptr_acquire(tp,p) (tp*)mi_atomic_load_acquire((_Atomic(uintptr_t)*)(p)) #define mi_atomic_load_ptr_relaxed(tp,p) (tp*)mi_atomic_load_relaxed((_Atomic(uintptr_t)*)(p)) #define mi_atomic_store_ptr_release(tp,p,x) mi_atomic_store_release((_Atomic(uintptr_t)*)(p),(uintptr_t)(x)) #define mi_atomic_store_ptr_relaxed(tp,p,x) mi_atomic_store_relaxed((_Atomic(uintptr_t)*)(p),(uintptr_t)(x)) #define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) #define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) #define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) #define mi_atomic_exchange_ptr_release(tp,p,x) (tp*)mi_atomic_exchange_release((_Atomic(uintptr_t)*)(p),(uintptr_t)x) #define mi_atomic_exchange_ptr_acq_rel(tp,p,x) (tp*)mi_atomic_exchange_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t)x) #define mi_atomic_loadi64_acquire(p) mi_atomic(loadi64_explicit)(p,mi_memory_order(acquire)) #define mi_atomic_loadi64_relaxed(p) mi_atomic(loadi64_explicit)(p,mi_memory_order(relaxed)) #define mi_atomic_storei64_release(p,x) mi_atomic(storei64_explicit)(p,x,mi_memory_order(release)) #define mi_atomic_storei64_relaxed(p,x) mi_atomic(storei64_explicit)(p,x,mi_memory_order(relaxed)) #endif // Atomically add a signed value; returns the previous value. static inline intptr_t mi_atomic_addi(_Atomic(intptr_t)*p, intptr_t add) { return (intptr_t)mi_atomic_add_acq_rel((_Atomic(uintptr_t)*)p, (uintptr_t)add); } // Atomically subtract a signed value; returns the previous value. static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub) { return (intptr_t)mi_atomic_addi(p, -sub); } typedef _Atomic(uintptr_t) mi_atomic_once_t; // Returns true only on the first invocation static inline bool mi_atomic_once( mi_atomic_once_t* once ) { if (mi_atomic_load_relaxed(once) != 0) return false; // quick test uintptr_t expected = 0; return mi_atomic_cas_strong_acq_rel(once, &expected, (uintptr_t)1); // try to set to 1 } typedef _Atomic(uintptr_t) mi_atomic_guard_t; // Allows only one thread to execute at a time #define mi_atomic_guard(guard) \ uintptr_t _mi_guard_expected = 0; \ for(bool _mi_guard_once = true; \ _mi_guard_once && mi_atomic_cas_strong_acq_rel(guard,&_mi_guard_expected,(uintptr_t)1); \ (mi_atomic_store_release(guard,(uintptr_t)0), _mi_guard_once = false) ) // Yield #if defined(__cplusplus) #include static inline void mi_atomic_yield(void) { std::this_thread::yield(); } #elif defined(_WIN32) #define WIN32_LEAN_AND_MEAN #include static inline void mi_atomic_yield(void) { YieldProcessor(); } #elif defined(__SSE2__) #include static inline void mi_atomic_yield(void) { _mm_pause(); } #elif (defined(__GNUC__) || defined(__clang__)) && \ (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__armel__) || defined(__ARMEL__) || \ defined(__aarch64__) || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__)) || defined(__POWERPC__) #if defined(__x86_64__) || defined(__i386__) static inline void mi_atomic_yield(void) { __asm__ volatile ("pause" ::: "memory"); } #elif defined(__aarch64__) static inline void mi_atomic_yield(void) { __asm__ volatile("wfe"); } #elif (defined(__arm__) && __ARM_ARCH__ >= 7) static inline void mi_atomic_yield(void) { __asm__ volatile("yield" ::: "memory"); } #elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__POWERPC__) #ifdef __APPLE__ static inline void mi_atomic_yield(void) { __asm__ volatile ("or r27,r27,r27" ::: "memory"); } #else static inline void mi_atomic_yield(void) { __asm__ __volatile__ ("or 27,27,27" ::: "memory"); } #endif #elif defined(__armel__) || defined(__ARMEL__) static inline void mi_atomic_yield(void) { __asm__ volatile ("nop" ::: "memory"); } #endif #elif defined(__sun) // Fallback for other archs #include static inline void mi_atomic_yield(void) { smt_pause(); } #elif defined(__wasi__) #include static inline void mi_atomic_yield(void) { sched_yield(); } #else #include static inline void mi_atomic_yield(void) { sleep(0); } #endif #endif // __MIMALLOC_ATOMIC_H luametatex-2.10.08/source/libraries/mimalloc/include/mimalloc/internal.h000066400000000000000000001100621442250314700263130ustar00rootroot00000000000000/* ---------------------------------------------------------------------------- Copyright (c) 2018-2023, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ #pragma once #ifndef MIMALLOC_INTERNAL_H #define MIMALLOC_INTERNAL_H // -------------------------------------------------------------------------- // This file contains the interal API's of mimalloc and various utility // functions and macros. // -------------------------------------------------------------------------- #include "mimalloc/types.h" #include "mimalloc/track.h" #if (MI_DEBUG>0) #define mi_trace_message(...) _mi_trace_message(__VA_ARGS__) #else #define mi_trace_message(...) #endif #define MI_CACHE_LINE 64 #if defined(_MSC_VER) #pragma warning(disable:4127) // suppress constant conditional warning (due to MI_SECURE paths) #pragma warning(disable:26812) // unscoped enum warning #define mi_decl_noinline __declspec(noinline) #define mi_decl_thread __declspec(thread) #define mi_decl_cache_align __declspec(align(MI_CACHE_LINE)) #elif (defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__clang__) // includes clang and icc #define mi_decl_noinline __attribute__((noinline)) #define mi_decl_thread __thread #define mi_decl_cache_align __attribute__((aligned(MI_CACHE_LINE))) #else #define mi_decl_noinline #define mi_decl_thread __thread // hope for the best :-) #define mi_decl_cache_align #endif #if defined(__EMSCRIPTEN__) && !defined(__wasi__) #define __wasi__ #endif #if defined(__cplusplus) #define mi_decl_externc extern "C" #else #define mi_decl_externc #endif // pthreads #if !defined(_WIN32) && !defined(__wasi__) #define MI_USE_PTHREADS #include #endif // "options.c" void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message); void _mi_fprintf(mi_output_fun* out, void* arg, const char* fmt, ...); void _mi_warning_message(const char* fmt, ...); void _mi_verbose_message(const char* fmt, ...); void _mi_trace_message(const char* fmt, ...); void _mi_options_init(void); void _mi_error_message(int err, const char* fmt, ...); // random.c void _mi_random_init(mi_random_ctx_t* ctx); void _mi_random_init_weak(mi_random_ctx_t* ctx); void _mi_random_reinit_if_weak(mi_random_ctx_t * ctx); void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx); uintptr_t _mi_random_next(mi_random_ctx_t* ctx); uintptr_t _mi_heap_random_next(mi_heap_t* heap); uintptr_t _mi_os_random_weak(uintptr_t extra_seed); static inline uintptr_t _mi_random_shuffle(uintptr_t x); // init.c extern mi_decl_cache_align mi_stats_t _mi_stats_main; extern mi_decl_cache_align const mi_page_t _mi_page_empty; bool _mi_is_main_thread(void); size_t _mi_current_thread_count(void); bool _mi_preloading(void); // true while the C runtime is not initialized yet mi_threadid_t _mi_thread_id(void) mi_attr_noexcept; mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing heap void _mi_thread_done(mi_heap_t* heap); void _mi_thread_data_collect(void); // os.c void _mi_os_init(void); // called from process init void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* stats); void _mi_os_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* stats); void _mi_os_free_ex(void* p, size_t size, bool still_committed, mi_memid_t memid, mi_stats_t* stats); size_t _mi_os_page_size(void); size_t _mi_os_good_alloc_size(size_t size); bool _mi_os_has_overcommit(void); bool _mi_os_has_virtual_reserve(void); bool _mi_os_purge(void* p, size_t size, mi_stats_t* stats); bool _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats); bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); bool _mi_os_protect(void* addr, size_t size); bool _mi_os_unprotect(void* addr, size_t size); bool _mi_os_purge(void* p, size_t size, mi_stats_t* stats); bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, mi_stats_t* stats); void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* stats); void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* tld_stats); void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size); bool _mi_os_use_large_page(size_t size, size_t alignment); size_t _mi_os_large_page_size(void); void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize, mi_memid_t* memid); // arena.c mi_arena_id_t _mi_arena_id_none(void); void _mi_arena_free(void* p, size_t size, size_t still_committed_size, mi_memid_t memid, mi_stats_t* stats); void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld); void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld); bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id); bool _mi_arena_contains(const void* p); void _mi_arena_collect(bool force_purge, mi_stats_t* stats); void _mi_arena_unsafe_destroy_all(mi_stats_t* stats); // "segment-map.c" void _mi_segment_map_allocated_at(const mi_segment_t* segment); void _mi_segment_map_freed_at(const mi_segment_t* segment); // "segment.c" mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld); void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld); void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld); bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld); void _mi_segment_thread_collect(mi_segments_tld_t* tld); #if MI_HUGE_PAGE_ABANDON void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block); #else void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, mi_block_t* block); #endif uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size); // page start for any page void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld); void _mi_abandoned_await_readers(void); void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld); // "page.c" void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept mi_attr_malloc; void _mi_page_retire(mi_page_t* page) mi_attr_noexcept; // free the page if there are no other pages with many free blocks void _mi_page_unfull(mi_page_t* page); void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force); // free the page void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq); // abandon the page, to be picked up by another thread... void _mi_heap_delayed_free_all(mi_heap_t* heap); bool _mi_heap_delayed_free_partial(mi_heap_t* heap); void _mi_heap_collect_retired(mi_heap_t* heap, bool force); void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never); bool _mi_page_try_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never); size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append); void _mi_deferred_free(mi_heap_t* heap, bool force); void _mi_page_free_collect(mi_page_t* page,bool force); void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page); // callback from segments size_t _mi_bin_size(uint8_t bin); // for stats uint8_t _mi_bin(size_t size); // for stats // "heap.c" void _mi_heap_destroy_pages(mi_heap_t* heap); void _mi_heap_collect_abandon(mi_heap_t* heap); void _mi_heap_set_default_direct(mi_heap_t* heap); bool _mi_heap_memid_is_suitable(mi_heap_t* heap, mi_memid_t memid); void _mi_heap_unsafe_destroy_all(void); // "stats.c" void _mi_stats_done(mi_stats_t* stats); mi_msecs_t _mi_clock_now(void); mi_msecs_t _mi_clock_end(mi_msecs_t start); mi_msecs_t _mi_clock_start(void); // "alloc.c" void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept; // called from `_mi_malloc_generic` void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept; void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept; // called from `_mi_heap_malloc_aligned` void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept; mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p); bool _mi_free_delayed_block(mi_block_t* block); void _mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept; // for runtime integration void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size); // option.c, c primitives char _mi_toupper(char c); int _mi_strnicmp(const char* s, const char* t, size_t n); void _mi_strlcpy(char* dest, const char* src, size_t dest_size); void _mi_strlcat(char* dest, const char* src, size_t dest_size); size_t _mi_strlen(const char* s); size_t _mi_strnlen(const char* s, size_t max_len); #if MI_DEBUG>1 bool _mi_page_is_valid(mi_page_t* page); #endif // ------------------------------------------------------ // Branches // ------------------------------------------------------ #if defined(__GNUC__) || defined(__clang__) #define mi_unlikely(x) (__builtin_expect(!!(x),false)) #define mi_likely(x) (__builtin_expect(!!(x),true)) #elif (defined(__cplusplus) && (__cplusplus >= 202002L)) || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L) #define mi_unlikely(x) (x) [[unlikely]] #define mi_likely(x) (x) [[likely]] #else #define mi_unlikely(x) (x) #define mi_likely(x) (x) #endif #ifndef __has_builtin #define __has_builtin(x) 0 #endif /* ----------------------------------------------------------- Error codes passed to `_mi_fatal_error` All are recoverable but EFAULT is a serious error and aborts by default in secure mode. For portability define undefined error codes using common Unix codes: ----------------------------------------------------------- */ #include #ifndef EAGAIN // double free #define EAGAIN (11) #endif #ifndef ENOMEM // out of memory #define ENOMEM (12) #endif #ifndef EFAULT // corrupted free-list or meta-data #define EFAULT (14) #endif #ifndef EINVAL // trying to free an invalid pointer #define EINVAL (22) #endif #ifndef EOVERFLOW // count*size overflow #define EOVERFLOW (75) #endif /* ----------------------------------------------------------- Inlined definitions ----------------------------------------------------------- */ #define MI_UNUSED(x) (void)(x) #if (MI_DEBUG>0) #define MI_UNUSED_RELEASE(x) #else #define MI_UNUSED_RELEASE(x) MI_UNUSED(x) #endif #define MI_INIT4(x) x(),x(),x(),x() #define MI_INIT8(x) MI_INIT4(x),MI_INIT4(x) #define MI_INIT16(x) MI_INIT8(x),MI_INIT8(x) #define MI_INIT32(x) MI_INIT16(x),MI_INIT16(x) #define MI_INIT64(x) MI_INIT32(x),MI_INIT32(x) #define MI_INIT128(x) MI_INIT64(x),MI_INIT64(x) #define MI_INIT256(x) MI_INIT128(x),MI_INIT128(x) #include // initialize a local variable to zero; use memset as compilers optimize constant sized memset's #define _mi_memzero_var(x) memset(&x,0,sizeof(x)) // Is `x` a power of two? (0 is considered a power of two) static inline bool _mi_is_power_of_two(uintptr_t x) { return ((x & (x - 1)) == 0); } // Is a pointer aligned? static inline bool _mi_is_aligned(void* p, size_t alignment) { mi_assert_internal(alignment != 0); return (((uintptr_t)p % alignment) == 0); } // Align upwards static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) { mi_assert_internal(alignment != 0); uintptr_t mask = alignment - 1; if ((alignment & mask) == 0) { // power of two? return ((sz + mask) & ~mask); } else { return (((sz + mask)/alignment)*alignment); } } // Align downwards static inline uintptr_t _mi_align_down(uintptr_t sz, size_t alignment) { mi_assert_internal(alignment != 0); uintptr_t mask = alignment - 1; if ((alignment & mask) == 0) { // power of two? return (sz & ~mask); } else { return ((sz / alignment) * alignment); } } // Divide upwards: `s <= _mi_divide_up(s,d)*d < s+d`. static inline uintptr_t _mi_divide_up(uintptr_t size, size_t divider) { mi_assert_internal(divider != 0); return (divider == 0 ? size : ((size + divider - 1) / divider)); } // Is memory zero initialized? static inline bool mi_mem_is_zero(const void* p, size_t size) { for (size_t i = 0; i < size; i++) { if (((uint8_t*)p)[i] != 0) return false; } return true; } // Align a byte size to a size in _machine words_, // i.e. byte size == `wsize*sizeof(void*)`. static inline size_t _mi_wsize_from_size(size_t size) { mi_assert_internal(size <= SIZE_MAX - sizeof(uintptr_t)); return (size + sizeof(uintptr_t) - 1) / sizeof(uintptr_t); } // Overflow detecting multiply #if __has_builtin(__builtin_umul_overflow) || (defined(__GNUC__) && (__GNUC__ >= 5)) #include // UINT_MAX, ULONG_MAX #if defined(_CLOCK_T) // for Illumos #undef _CLOCK_T #endif static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) { #if (SIZE_MAX == ULONG_MAX) return __builtin_umull_overflow(count, size, (unsigned long *)total); #elif (SIZE_MAX == UINT_MAX) return __builtin_umul_overflow(count, size, (unsigned int *)total); #else return __builtin_umulll_overflow(count, size, (unsigned long long *)total); #endif } #else /* __builtin_umul_overflow is unavailable */ static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) { #define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX) *total = count * size; // note: gcc/clang optimize this to directly check the overflow flag return ((size >= MI_MUL_NO_OVERFLOW || count >= MI_MUL_NO_OVERFLOW) && size > 0 && (SIZE_MAX / size) < count); } #endif // Safe multiply `count*size` into `total`; return `true` on overflow. static inline bool mi_count_size_overflow(size_t count, size_t size, size_t* total) { if (count==1) { // quick check for the case where count is one (common for C++ allocators) *total = size; return false; } else if mi_unlikely(mi_mul_overflow(count, size, total)) { #if MI_DEBUG > 0 _mi_error_message(EOVERFLOW, "allocation request is too large (%zu * %zu bytes)\n", count, size); #endif *total = SIZE_MAX; return true; } else return false; } /*---------------------------------------------------------------------------------------- Heap functions ------------------------------------------------------------------------------------------- */ extern const mi_heap_t _mi_heap_empty; // read-only empty heap, initial value of the thread local default heap static inline bool mi_heap_is_backing(const mi_heap_t* heap) { return (heap->tld->heap_backing == heap); } static inline bool mi_heap_is_initialized(mi_heap_t* heap) { mi_assert_internal(heap != NULL); return (heap != &_mi_heap_empty); } static inline uintptr_t _mi_ptr_cookie(const void* p) { extern mi_heap_t _mi_heap_main; mi_assert_internal(_mi_heap_main.cookie != 0); return ((uintptr_t)p ^ _mi_heap_main.cookie); } /* ----------------------------------------------------------- Pages ----------------------------------------------------------- */ static inline mi_page_t* _mi_heap_get_free_small_page(mi_heap_t* heap, size_t size) { mi_assert_internal(size <= (MI_SMALL_SIZE_MAX + MI_PADDING_SIZE)); const size_t idx = _mi_wsize_from_size(size); mi_assert_internal(idx < MI_PAGES_DIRECT); return heap->pages_free_direct[idx]; } // Segment that contains the pointer // Large aligned blocks may be aligned at N*MI_SEGMENT_SIZE (inside a huge segment > MI_SEGMENT_SIZE), // and we need align "down" to the segment info which is `MI_SEGMENT_SIZE` bytes before it; // therefore we align one byte before `p`. static inline mi_segment_t* _mi_ptr_segment(const void* p) { mi_assert_internal(p != NULL); return (mi_segment_t*)(((uintptr_t)p - 1) & ~MI_SEGMENT_MASK); } static inline mi_page_t* mi_slice_to_page(mi_slice_t* s) { mi_assert_internal(s->slice_offset== 0 && s->slice_count > 0); return (mi_page_t*)(s); } static inline mi_slice_t* mi_page_to_slice(mi_page_t* p) { mi_assert_internal(p->slice_offset== 0 && p->slice_count > 0); return (mi_slice_t*)(p); } // Segment belonging to a page static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) { mi_segment_t* segment = _mi_ptr_segment(page); mi_assert_internal(segment == NULL || ((mi_slice_t*)page >= segment->slices && (mi_slice_t*)page < segment->slices + segment->slice_entries)); return segment; } static inline mi_slice_t* mi_slice_first(const mi_slice_t* slice) { mi_slice_t* start = (mi_slice_t*)((uint8_t*)slice - slice->slice_offset); mi_assert_internal(start >= _mi_ptr_segment(slice)->slices); mi_assert_internal(start->slice_offset == 0); mi_assert_internal(start + start->slice_count > slice); return start; } // Get the page containing the pointer (performance critical as it is called in mi_free) static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) { mi_assert_internal(p > (void*)segment); ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment; mi_assert_internal(diff > 0 && diff <= (ptrdiff_t)MI_SEGMENT_SIZE); size_t idx = (size_t)diff >> MI_SEGMENT_SLICE_SHIFT; mi_assert_internal(idx <= segment->slice_entries); mi_slice_t* slice0 = (mi_slice_t*)&segment->slices[idx]; mi_slice_t* slice = mi_slice_first(slice0); // adjust to the block that holds the page data mi_assert_internal(slice->slice_offset == 0); mi_assert_internal(slice >= segment->slices && slice < segment->slices + segment->slice_entries); return mi_slice_to_page(slice); } // Quick page start for initialized pages static inline uint8_t* _mi_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) { return _mi_segment_page_start(segment, page, page_size); } // Get the page containing the pointer static inline mi_page_t* _mi_ptr_page(void* p) { return _mi_segment_page_of(_mi_ptr_segment(p), p); } // Get the block size of a page (special case for huge objects) static inline size_t mi_page_block_size(const mi_page_t* page) { const size_t bsize = page->xblock_size; mi_assert_internal(bsize > 0); if mi_likely(bsize < MI_HUGE_BLOCK_SIZE) { return bsize; } else { size_t psize; _mi_segment_page_start(_mi_page_segment(page), page, &psize); return psize; } } static inline bool mi_page_is_huge(const mi_page_t* page) { return (_mi_page_segment(page)->kind == MI_SEGMENT_HUGE); } // Get the usable block size of a page without fixed padding. // This may still include internal padding due to alignment and rounding up size classes. static inline size_t mi_page_usable_block_size(const mi_page_t* page) { return mi_page_block_size(page) - MI_PADDING_SIZE; } // size of a segment static inline size_t mi_segment_size(mi_segment_t* segment) { return segment->segment_slices * MI_SEGMENT_SLICE_SIZE; } static inline uint8_t* mi_segment_end(mi_segment_t* segment) { return (uint8_t*)segment + mi_segment_size(segment); } // Thread free access static inline mi_block_t* mi_page_thread_free(const mi_page_t* page) { return (mi_block_t*)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free) & ~3); } static inline mi_delayed_t mi_page_thread_free_flag(const mi_page_t* page) { return (mi_delayed_t)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free) & 3); } // Heap access static inline mi_heap_t* mi_page_heap(const mi_page_t* page) { return (mi_heap_t*)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xheap)); } static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) { mi_assert_internal(mi_page_thread_free_flag(page) != MI_DELAYED_FREEING); mi_atomic_store_release(&page->xheap,(uintptr_t)heap); } // Thread free flag helpers static inline mi_block_t* mi_tf_block(mi_thread_free_t tf) { return (mi_block_t*)(tf & ~0x03); } static inline mi_delayed_t mi_tf_delayed(mi_thread_free_t tf) { return (mi_delayed_t)(tf & 0x03); } static inline mi_thread_free_t mi_tf_make(mi_block_t* block, mi_delayed_t delayed) { return (mi_thread_free_t)((uintptr_t)block | (uintptr_t)delayed); } static inline mi_thread_free_t mi_tf_set_delayed(mi_thread_free_t tf, mi_delayed_t delayed) { return mi_tf_make(mi_tf_block(tf),delayed); } static inline mi_thread_free_t mi_tf_set_block(mi_thread_free_t tf, mi_block_t* block) { return mi_tf_make(block, mi_tf_delayed(tf)); } // are all blocks in a page freed? // note: needs up-to-date used count, (as the `xthread_free` list may not be empty). see `_mi_page_collect_free`. static inline bool mi_page_all_free(const mi_page_t* page) { mi_assert_internal(page != NULL); return (page->used == 0); } // are there any available blocks? static inline bool mi_page_has_any_available(const mi_page_t* page) { mi_assert_internal(page != NULL && page->reserved > 0); return (page->used < page->reserved || (mi_page_thread_free(page) != NULL)); } // are there immediately available blocks, i.e. blocks available on the free list. static inline bool mi_page_immediate_available(const mi_page_t* page) { mi_assert_internal(page != NULL); return (page->free != NULL); } // is more than 7/8th of a page in use? static inline bool mi_page_mostly_used(const mi_page_t* page) { if (page==NULL) return true; uint16_t frac = page->reserved / 8U; return (page->reserved - page->used <= frac); } static inline mi_page_queue_t* mi_page_queue(const mi_heap_t* heap, size_t size) { return &((mi_heap_t*)heap)->pages[_mi_bin(size)]; } //----------------------------------------------------------- // Page flags //----------------------------------------------------------- static inline bool mi_page_is_in_full(const mi_page_t* page) { return page->flags.x.in_full; } static inline void mi_page_set_in_full(mi_page_t* page, bool in_full) { page->flags.x.in_full = in_full; } static inline bool mi_page_has_aligned(const mi_page_t* page) { return page->flags.x.has_aligned; } static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) { page->flags.x.has_aligned = has_aligned; } /* ------------------------------------------------------------------- Encoding/Decoding the free list next pointers This is to protect against buffer overflow exploits where the free list is mutated. Many hardened allocators xor the next pointer `p` with a secret key `k1`, as `p^k1`. This prevents overwriting with known values but might be still too weak: if the attacker can guess the pointer `p` this can reveal `k1` (since `p^k1^p == k1`). Moreover, if multiple blocks can be read as well, the attacker can xor both as `(p1^k1) ^ (p2^k1) == p1^p2` which may reveal a lot about the pointers (and subsequently `k1`). Instead mimalloc uses an extra key `k2` and encodes as `((p^k2)<<> (MI_INTPTR_BITS - shift)))); } static inline uintptr_t mi_rotr(uintptr_t x, uintptr_t shift) { shift %= MI_INTPTR_BITS; return (shift==0 ? x : ((x >> shift) | (x << (MI_INTPTR_BITS - shift)))); } static inline void* mi_ptr_decode(const void* null, const mi_encoded_t x, const uintptr_t* keys) { void* p = (void*)(mi_rotr(x - keys[0], keys[0]) ^ keys[1]); return (p==null ? NULL : p); } static inline mi_encoded_t mi_ptr_encode(const void* null, const void* p, const uintptr_t* keys) { uintptr_t x = (uintptr_t)(p==NULL ? null : p); return mi_rotl(x ^ keys[1], keys[0]) + keys[0]; } static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, const uintptr_t* keys ) { mi_track_mem_defined(block,sizeof(mi_block_t)); mi_block_t* next; #ifdef MI_ENCODE_FREELIST next = (mi_block_t*)mi_ptr_decode(null, block->next, keys); #else MI_UNUSED(keys); MI_UNUSED(null); next = (mi_block_t*)block->next; #endif mi_track_mem_noaccess(block,sizeof(mi_block_t)); return next; } static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, const uintptr_t* keys) { mi_track_mem_undefined(block,sizeof(mi_block_t)); #ifdef MI_ENCODE_FREELIST block->next = mi_ptr_encode(null, next, keys); #else MI_UNUSED(keys); MI_UNUSED(null); block->next = (mi_encoded_t)next; #endif mi_track_mem_noaccess(block,sizeof(mi_block_t)); } static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* block) { #ifdef MI_ENCODE_FREELIST mi_block_t* next = mi_block_nextx(page,block,page->keys); // check for free list corruption: is `next` at least in the same page? // TODO: check if `next` is `page->block_size` aligned? if mi_unlikely(next!=NULL && !mi_is_in_same_page(block, next)) { _mi_error_message(EFAULT, "corrupted free list entry of size %zub at %p: value 0x%zx\n", mi_page_block_size(page), block, (uintptr_t)next); next = NULL; } return next; #else MI_UNUSED(page); return mi_block_nextx(page,block,NULL); #endif } static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, const mi_block_t* next) { #ifdef MI_ENCODE_FREELIST mi_block_set_nextx(page,block,next, page->keys); #else MI_UNUSED(page); mi_block_set_nextx(page,block,next,NULL); #endif } // ------------------------------------------------------------------- // commit mask // ------------------------------------------------------------------- static inline void mi_commit_mask_create_empty(mi_commit_mask_t* cm) { for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { cm->mask[i] = 0; } } static inline void mi_commit_mask_create_full(mi_commit_mask_t* cm) { for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { cm->mask[i] = ~((size_t)0); } } static inline bool mi_commit_mask_is_empty(const mi_commit_mask_t* cm) { for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { if (cm->mask[i] != 0) return false; } return true; } static inline bool mi_commit_mask_is_full(const mi_commit_mask_t* cm) { for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { if (cm->mask[i] != ~((size_t)0)) return false; } return true; } // defined in `segment.c`: size_t _mi_commit_mask_committed_size(const mi_commit_mask_t* cm, size_t total); size_t _mi_commit_mask_next_run(const mi_commit_mask_t* cm, size_t* idx); #define mi_commit_mask_foreach(cm,idx,count) \ idx = 0; \ while ((count = _mi_commit_mask_next_run(cm,&idx)) > 0) { #define mi_commit_mask_foreach_end() \ idx += count; \ } /* ----------------------------------------------------------- memory id's ----------------------------------------------------------- */ static inline mi_memid_t _mi_memid_create(mi_memkind_t memkind) { mi_memid_t memid; _mi_memzero_var(memid); memid.memkind = memkind; return memid; } static inline mi_memid_t _mi_memid_none(void) { return _mi_memid_create(MI_MEM_NONE); } static inline mi_memid_t _mi_memid_create_os(bool committed, bool is_zero, bool is_large) { mi_memid_t memid = _mi_memid_create(MI_MEM_OS); memid.initially_committed = committed; memid.initially_zero = is_zero; memid.is_pinned = is_large; return memid; } // ------------------------------------------------------------------- // Fast "random" shuffle // ------------------------------------------------------------------- static inline uintptr_t _mi_random_shuffle(uintptr_t x) { if (x==0) { x = 17; } // ensure we don't get stuck in generating zeros #if (MI_INTPTR_SIZE==8) // by Sebastiano Vigna, see: x ^= x >> 30; x *= 0xbf58476d1ce4e5b9UL; x ^= x >> 27; x *= 0x94d049bb133111ebUL; x ^= x >> 31; #elif (MI_INTPTR_SIZE==4) // by Chris Wellons, see: x ^= x >> 16; x *= 0x7feb352dUL; x ^= x >> 15; x *= 0x846ca68bUL; x ^= x >> 16; #endif return x; } // ------------------------------------------------------------------- // Optimize numa node access for the common case (= one node) // ------------------------------------------------------------------- int _mi_os_numa_node_get(mi_os_tld_t* tld); size_t _mi_os_numa_node_count_get(void); extern _Atomic(size_t) _mi_numa_node_count; static inline int _mi_os_numa_node(mi_os_tld_t* tld) { if mi_likely(mi_atomic_load_relaxed(&_mi_numa_node_count) == 1) { return 0; } else return _mi_os_numa_node_get(tld); } static inline size_t _mi_os_numa_node_count(void) { const size_t count = mi_atomic_load_relaxed(&_mi_numa_node_count); if mi_likely(count > 0) { return count; } else return _mi_os_numa_node_count_get(); } // ----------------------------------------------------------------------- // Count bits: trailing or leading zeros (with MI_INTPTR_BITS on all zero) // ----------------------------------------------------------------------- #if defined(__GNUC__) #include // LONG_MAX #define MI_HAVE_FAST_BITSCAN static inline size_t mi_clz(uintptr_t x) { if (x==0) return MI_INTPTR_BITS; #if (INTPTR_MAX == LONG_MAX) return __builtin_clzl(x); #else return __builtin_clzll(x); #endif } static inline size_t mi_ctz(uintptr_t x) { if (x==0) return MI_INTPTR_BITS; #if (INTPTR_MAX == LONG_MAX) return __builtin_ctzl(x); #else return __builtin_ctzll(x); #endif } #elif defined(_MSC_VER) #include // LONG_MAX #include // BitScanReverse64 #define MI_HAVE_FAST_BITSCAN static inline size_t mi_clz(uintptr_t x) { if (x==0) return MI_INTPTR_BITS; unsigned long idx; #if (INTPTR_MAX == LONG_MAX) _BitScanReverse(&idx, x); #else _BitScanReverse64(&idx, x); #endif return ((MI_INTPTR_BITS - 1) - idx); } static inline size_t mi_ctz(uintptr_t x) { if (x==0) return MI_INTPTR_BITS; unsigned long idx; #if (INTPTR_MAX == LONG_MAX) _BitScanForward(&idx, x); #else _BitScanForward64(&idx, x); #endif return idx; } #else static inline size_t mi_ctz32(uint32_t x) { // de Bruijn multiplication, see static const unsigned char debruijn[32] = { 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 }; if (x==0) return 32; return debruijn[((x & -(int32_t)x) * 0x077CB531UL) >> 27]; } static inline size_t mi_clz32(uint32_t x) { // de Bruijn multiplication, see static const uint8_t debruijn[32] = { 31, 22, 30, 21, 18, 10, 29, 2, 20, 17, 15, 13, 9, 6, 28, 1, 23, 19, 11, 3, 16, 14, 7, 24, 12, 4, 8, 25, 5, 26, 27, 0 }; if (x==0) return 32; x |= x >> 1; x |= x >> 2; x |= x >> 4; x |= x >> 8; x |= x >> 16; return debruijn[(uint32_t)(x * 0x07C4ACDDUL) >> 27]; } static inline size_t mi_clz(uintptr_t x) { if (x==0) return MI_INTPTR_BITS; #if (MI_INTPTR_BITS <= 32) return mi_clz32((uint32_t)x); #else size_t count = mi_clz32((uint32_t)(x >> 32)); if (count < 32) return count; return (32 + mi_clz32((uint32_t)x)); #endif } static inline size_t mi_ctz(uintptr_t x) { if (x==0) return MI_INTPTR_BITS; #if (MI_INTPTR_BITS <= 32) return mi_ctz32((uint32_t)x); #else size_t count = mi_ctz32((uint32_t)x); if (count < 32) return count; return (32 + mi_ctz32((uint32_t)(x>>32))); #endif } #endif // "bit scan reverse": Return index of the highest bit (or MI_INTPTR_BITS if `x` is zero) static inline size_t mi_bsr(uintptr_t x) { return (x==0 ? MI_INTPTR_BITS : MI_INTPTR_BITS - 1 - mi_clz(x)); } // --------------------------------------------------------------------------------- // Provide our own `_mi_memcpy` for potential performance optimizations. // // For now, only on Windows with msvc/clang-cl we optimize to `rep movsb` if // we happen to run on x86/x64 cpu's that have "fast short rep movsb" (FSRM) support // (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017). See also issue #201 and pr #253. // --------------------------------------------------------------------------------- #if !MI_TRACK_ENABLED && defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64)) #include extern bool _mi_cpu_has_fsrm; static inline void _mi_memcpy(void* dst, const void* src, size_t n) { if (_mi_cpu_has_fsrm) { __movsb((unsigned char*)dst, (const unsigned char*)src, n); } else { memcpy(dst, src, n); } } static inline void _mi_memzero(void* dst, size_t n) { if (_mi_cpu_has_fsrm) { __stosb((unsigned char*)dst, 0, n); } else { memset(dst, 0, n); } } #else static inline void _mi_memcpy(void* dst, const void* src, size_t n) { memcpy(dst, src, n); } static inline void _mi_memzero(void* dst, size_t n) { memset(dst, 0, n); } #endif // ------------------------------------------------------------------------------- // The `_mi_memcpy_aligned` can be used if the pointers are machine-word aligned // This is used for example in `mi_realloc`. // ------------------------------------------------------------------------------- #if (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__) // On GCC/CLang we provide a hint that the pointers are word aligned. static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) { mi_assert_internal(((uintptr_t)dst % MI_INTPTR_SIZE == 0) && ((uintptr_t)src % MI_INTPTR_SIZE == 0)); void* adst = __builtin_assume_aligned(dst, MI_INTPTR_SIZE); const void* asrc = __builtin_assume_aligned(src, MI_INTPTR_SIZE); _mi_memcpy(adst, asrc, n); } static inline void _mi_memzero_aligned(void* dst, size_t n) { mi_assert_internal((uintptr_t)dst % MI_INTPTR_SIZE == 0); void* adst = __builtin_assume_aligned(dst, MI_INTPTR_SIZE); _mi_memzero(adst, n); } #else // Default fallback on `_mi_memcpy` static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) { mi_assert_internal(((uintptr_t)dst % MI_INTPTR_SIZE == 0) && ((uintptr_t)src % MI_INTPTR_SIZE == 0)); _mi_memcpy(dst, src, n); } static inline void _mi_memzero_aligned(void* dst, size_t n) { mi_assert_internal((uintptr_t)dst % MI_INTPTR_SIZE == 0); _mi_memzero(dst, n); } #endif #endif luametatex-2.10.08/source/libraries/mimalloc/include/mimalloc/prim.h000066400000000000000000000336771442250314700254660ustar00rootroot00000000000000/* ---------------------------------------------------------------------------- Copyright (c) 2018-2023, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ #pragma once #ifndef MIMALLOC_PRIM_H #define MIMALLOC_PRIM_H // -------------------------------------------------------------------------- // This file specifies the primitive portability API. // Each OS/host needs to implement these primitives, see `src/prim` // for implementations on Window, macOS, WASI, and Linux/Unix. // // note: on all primitive functions, we always have result parameters != NUL, and: // addr != NULL and page aligned // size > 0 and page aligned // return value is an error code an int where 0 is success. // -------------------------------------------------------------------------- // OS memory configuration typedef struct mi_os_mem_config_s { size_t page_size; // 4KiB size_t large_page_size; // 2MiB size_t alloc_granularity; // smallest allocation size (on Windows 64KiB) bool has_overcommit; // can we reserve more memory than can be actually committed? bool must_free_whole; // must allocated blocks be freed as a whole (false for mmap, true for VirtualAlloc) bool has_virtual_reserve; // supports virtual address space reservation? (if true we can reserve virtual address space without using commit or physical memory) } mi_os_mem_config_t; // Initialize void _mi_prim_mem_init( mi_os_mem_config_t* config ); // Free OS memory int _mi_prim_free(void* addr, size_t size ); // Allocate OS memory. Return NULL on error. // The `try_alignment` is just a hint and the returned pointer does not have to be aligned. // If `commit` is false, the virtual memory range only needs to be reserved (with no access) // which will later be committed explicitly using `_mi_prim_commit`. // `is_zero` is set to true if the memory was zero initialized (as on most OS's) // pre: !commit => !allow_large // try_alignment >= _mi_os_page_size() and a power of 2 int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr); // Commit memory. Returns error code or 0 on success. // For example, on Linux this would make the memory PROT_READ|PROT_WRITE. // `is_zero` is set to true if the memory was zero initialized (e.g. on Windows) int _mi_prim_commit(void* addr, size_t size, bool* is_zero); // Decommit memory. Returns error code or 0 on success. The `needs_recommit` result is true // if the memory would need to be re-committed. For example, on Windows this is always true, // but on Linux we could use MADV_DONTNEED to decommit which does not need a recommit. // pre: needs_recommit != NULL int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit); // Reset memory. The range keeps being accessible but the content might be reset. // Returns error code or 0 on success. int _mi_prim_reset(void* addr, size_t size); // Protect memory. Returns error code or 0 on success. int _mi_prim_protect(void* addr, size_t size, bool protect); // Allocate huge (1GiB) pages possibly associated with a NUMA node. // `is_zero` is set to true if the memory was zero initialized (as on most OS's) // pre: size > 0 and a multiple of 1GiB. // numa_node is either negative (don't care), or a numa node number. int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr); // Return the current NUMA node size_t _mi_prim_numa_node(void); // Return the number of logical NUMA nodes size_t _mi_prim_numa_node_count(void); // Clock ticks mi_msecs_t _mi_prim_clock_now(void); // Return process information (only for statistics) typedef struct mi_process_info_s { mi_msecs_t elapsed; mi_msecs_t utime; mi_msecs_t stime; size_t current_rss; size_t peak_rss; size_t current_commit; size_t peak_commit; size_t page_faults; } mi_process_info_t; void _mi_prim_process_info(mi_process_info_t* pinfo); // Default stderr output. (only for warnings etc. with verbose enabled) // msg != NULL && _mi_strlen(msg) > 0 void _mi_prim_out_stderr( const char* msg ); // Get an environment variable. (only for options) // name != NULL, result != NULL, result_size >= 64 bool _mi_prim_getenv(const char* name, char* result, size_t result_size); // Fill a buffer with strong randomness; return `false` on error or if // there is no strong randomization available. bool _mi_prim_random_buf(void* buf, size_t buf_len); // Called on the first thread start, and should ensure `_mi_thread_done` is called on thread termination. void _mi_prim_thread_init_auto_done(void); // Called on process exit and may take action to clean up resources associated with the thread auto done. void _mi_prim_thread_done_auto_done(void); // Called when the default heap for a thread changes void _mi_prim_thread_associate_default_heap(mi_heap_t* heap); //------------------------------------------------------------------- // Thread id: `_mi_prim_thread_id()` // // Getting the thread id should be performant as it is called in the // fast path of `_mi_free` and we specialize for various platforms as // inlined definitions. Regular code should call `init.c:_mi_thread_id()`. // We only require _mi_prim_thread_id() to return a unique id // for each thread (unequal to zero). //------------------------------------------------------------------- // defined in `init.c`; do not use these directly extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from extern bool _mi_process_is_initialized; // has mi_process_init been called? static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept; #if defined(_WIN32) #define WIN32_LEAN_AND_MEAN #include static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { // Windows: works on Intel and ARM in both 32- and 64-bit return (uintptr_t)NtCurrentTeb(); } // We use assembly for a fast thread id on the main platforms. The TLS layout depends on // both the OS and libc implementation so we use specific tests for each main platform. // If you test on another platform and it works please send a PR :-) // see also https://akkadia.org/drepper/tls.pdf for more info on the TLS register. #elif defined(__GNUC__) && ( \ (defined(__GLIBC__) && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \ || (defined(__APPLE__) && (defined(__x86_64__) || defined(__aarch64__))) \ || (defined(__BIONIC__) && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \ || (defined(__FreeBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \ || (defined(__OpenBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \ ) static inline void* mi_prim_tls_slot(size_t slot) mi_attr_noexcept { void* res; const size_t ofs = (slot*sizeof(void*)); #if defined(__i386__) __asm__("movl %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86 32-bit always uses GS #elif defined(__APPLE__) && defined(__x86_64__) __asm__("movq %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86_64 macOSX uses GS #elif defined(__x86_64__) && (MI_INTPTR_SIZE==4) __asm__("movl %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x32 ABI #elif defined(__x86_64__) __asm__("movq %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86_64 Linux, BSD uses FS #elif defined(__arm__) void** tcb; MI_UNUSED(ofs); __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb)); res = tcb[slot]; #elif defined(__aarch64__) void** tcb; MI_UNUSED(ofs); #if defined(__APPLE__) // M1, issue #343 __asm__ volatile ("mrs %0, tpidrro_el0\nbic %0, %0, #7" : "=r" (tcb)); #else __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb)); #endif res = tcb[slot]; #endif return res; } // setting a tls slot is only used on macOS for now static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexcept { const size_t ofs = (slot*sizeof(void*)); #if defined(__i386__) __asm__("movl %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // 32-bit always uses GS #elif defined(__APPLE__) && defined(__x86_64__) __asm__("movq %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x86_64 macOS uses GS #elif defined(__x86_64__) && (MI_INTPTR_SIZE==4) __asm__("movl %1,%%fs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x32 ABI #elif defined(__x86_64__) __asm__("movq %1,%%fs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x86_64 Linux, BSD uses FS #elif defined(__arm__) void** tcb; MI_UNUSED(ofs); __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb)); tcb[slot] = value; #elif defined(__aarch64__) void** tcb; MI_UNUSED(ofs); #if defined(__APPLE__) // M1, issue #343 __asm__ volatile ("mrs %0, tpidrro_el0\nbic %0, %0, #7" : "=r" (tcb)); #else __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb)); #endif tcb[slot] = value; #endif } static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { #if defined(__BIONIC__) // issue #384, #495: on the Bionic libc (Android), slot 1 is the thread id // see: https://github.com/aosp-mirror/platform_bionic/blob/c44b1d0676ded732df4b3b21c5f798eacae93228/libc/platform/bionic/tls_defines.h#L86 return (uintptr_t)mi_prim_tls_slot(1); #else // in all our other targets, slot 0 is the thread id // glibc: https://sourceware.org/git/?p=glibc.git;a=blob_plain;f=sysdeps/x86_64/nptl/tls.h // apple: https://github.com/apple/darwin-xnu/blob/main/libsyscall/os/tsd.h#L36 return (uintptr_t)mi_prim_tls_slot(0); #endif } #else // otherwise use portable C, taking the address of a thread local variable (this is still very fast on most platforms). static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { return (uintptr_t)&_mi_heap_default; } #endif /* ---------------------------------------------------------------------------------------- The thread local default heap: `_mi_prim_get_default_heap()` This is inlined here as it is on the fast path for allocation functions. On most platforms (Windows, Linux, FreeBSD, NetBSD, etc), this just returns a __thread local variable (`_mi_heap_default`). With the initial-exec TLS model this ensures that the storage will always be available (allocated on the thread stacks). On some platforms though we cannot use that when overriding `malloc` since the underlying TLS implementation (or the loader) will call itself `malloc` on a first access and recurse. We try to circumvent this in an efficient way: - macOSX : we use an unused TLS slot from the OS allocated slots (MI_TLS_SLOT). On OSX, the loader itself calls `malloc` even before the modules are initialized. - OpenBSD: we use an unused slot from the pthread block (MI_TLS_PTHREAD_SLOT_OFS). - DragonFly: defaults are working but seem slow compared to freeBSD (see PR #323) ------------------------------------------------------------------------------------------- */ static inline mi_heap_t* mi_prim_get_default_heap(void); #if defined(MI_MALLOC_OVERRIDE) #if defined(__APPLE__) // macOS #define MI_TLS_SLOT 89 // seems unused? // #define MI_TLS_RECURSE_GUARD 1 // other possible unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89) // see #elif defined(__OpenBSD__) // use end bytes of a name; goes wrong if anyone uses names > 23 characters (ptrhread specifies 16) // see #define MI_TLS_PTHREAD_SLOT_OFS (6*sizeof(int) + 4*sizeof(void*) + 24) // #elif defined(__DragonFly__) // #warning "mimalloc is not working correctly on DragonFly yet." // #define MI_TLS_PTHREAD_SLOT_OFS (4 + 1*sizeof(void*)) // offset `uniqueid` (also used by gdb?) #elif defined(__ANDROID__) // See issue #381 #define MI_TLS_PTHREAD #endif #endif #if defined(MI_TLS_SLOT) static inline mi_heap_t* mi_prim_get_default_heap(void) { mi_heap_t* heap = (mi_heap_t*)mi_prim_tls_slot(MI_TLS_SLOT); if mi_unlikely(heap == NULL) { #ifdef __GNUC__ __asm(""); // prevent conditional load of the address of _mi_heap_empty #endif heap = (mi_heap_t*)&_mi_heap_empty; } return heap; } #elif defined(MI_TLS_PTHREAD_SLOT_OFS) static inline mi_heap_t** mi_prim_tls_pthread_heap_slot(void) { pthread_t self = pthread_self(); #if defined(__DragonFly__) if (self==NULL) return NULL; #endif return (mi_heap_t**)((uint8_t*)self + MI_TLS_PTHREAD_SLOT_OFS); } static inline mi_heap_t* mi_prim_get_default_heap(void) { mi_heap_t** pheap = mi_prim_tls_pthread_heap_slot(); if mi_unlikely(pheap == NULL) return _mi_heap_main_get(); mi_heap_t* heap = *pheap; if mi_unlikely(heap == NULL) return (mi_heap_t*)&_mi_heap_empty; return heap; } #elif defined(MI_TLS_PTHREAD) extern pthread_key_t _mi_heap_default_key; static inline mi_heap_t* mi_prim_get_default_heap(void) { mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? _mi_heap_main_get() : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key)); return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap); } #else // default using a thread local variable; used on most platforms. static inline mi_heap_t* mi_prim_get_default_heap(void) { #if defined(MI_TLS_RECURSE_GUARD) if (mi_unlikely(!_mi_process_is_initialized)) return _mi_heap_main_get(); #endif return _mi_heap_default; } #endif // mi_prim_get_default_heap() #endif // MIMALLOC_PRIM_H luametatex-2.10.08/source/libraries/mimalloc/include/mimalloc/track.h000066400000000000000000000125171442250314700256110ustar00rootroot00000000000000/* ---------------------------------------------------------------------------- Copyright (c) 2018-2023, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ #pragma once #ifndef MIMALLOC_TRACK_H #define MIMALLOC_TRACK_H /* ------------------------------------------------------------------------------------------------------ Track memory ranges with macros for tools like Valgrind address sanitizer, or other memory checkers. These can be defined for tracking allocation: #define mi_track_malloc_size(p,reqsize,size,zero) #define mi_track_free_size(p,_size) The macros are set up such that the size passed to `mi_track_free_size` always matches the size of `mi_track_malloc_size`. (currently, `size == mi_usable_size(p)`). The `reqsize` is what the user requested, and `size >= reqsize`. The `size` is either byte precise (and `size==reqsize`) if `MI_PADDING` is enabled, or otherwise it is the usable block size which may be larger than the original request. Use `_mi_block_size_of(void* p)` to get the full block size that was allocated (including padding etc). The `zero` parameter is `true` if the allocated block is zero initialized. Optional: #define mi_track_align(p,alignedp,offset,size) #define mi_track_resize(p,oldsize,newsize) #define mi_track_init() The `mi_track_align` is called right after a `mi_track_malloc` for aligned pointers in a block. The corresponding `mi_track_free` still uses the block start pointer and original size (corresponding to the `mi_track_malloc`). The `mi_track_resize` is currently unused but could be called on reallocations within a block. `mi_track_init` is called at program start. The following macros are for tools like asan and valgrind to track whether memory is defined, undefined, or not accessible at all: #define mi_track_mem_defined(p,size) #define mi_track_mem_undefined(p,size) #define mi_track_mem_noaccess(p,size) -------------------------------------------------------------------------------------------------------*/ #if MI_TRACK_VALGRIND // valgrind tool #define MI_TRACK_ENABLED 1 #define MI_TRACK_HEAP_DESTROY 1 // track free of individual blocks on heap_destroy #define MI_TRACK_TOOL "valgrind" #include #include #define mi_track_malloc_size(p,reqsize,size,zero) VALGRIND_MALLOCLIKE_BLOCK(p,size,MI_PADDING_SIZE /*red zone*/,zero) #define mi_track_free_size(p,_size) VALGRIND_FREELIKE_BLOCK(p,MI_PADDING_SIZE /*red zone*/) #define mi_track_resize(p,oldsize,newsize) VALGRIND_RESIZEINPLACE_BLOCK(p,oldsize,newsize,MI_PADDING_SIZE /*red zone*/) #define mi_track_mem_defined(p,size) VALGRIND_MAKE_MEM_DEFINED(p,size) #define mi_track_mem_undefined(p,size) VALGRIND_MAKE_MEM_UNDEFINED(p,size) #define mi_track_mem_noaccess(p,size) VALGRIND_MAKE_MEM_NOACCESS(p,size) #elif MI_TRACK_ASAN // address sanitizer #define MI_TRACK_ENABLED 1 #define MI_TRACK_HEAP_DESTROY 0 #define MI_TRACK_TOOL "asan" #include #define mi_track_malloc_size(p,reqsize,size,zero) ASAN_UNPOISON_MEMORY_REGION(p,size) #define mi_track_free_size(p,size) ASAN_POISON_MEMORY_REGION(p,size) #define mi_track_mem_defined(p,size) ASAN_UNPOISON_MEMORY_REGION(p,size) #define mi_track_mem_undefined(p,size) ASAN_UNPOISON_MEMORY_REGION(p,size) #define mi_track_mem_noaccess(p,size) ASAN_POISON_MEMORY_REGION(p,size) #elif MI_TRACK_ETW // windows event tracing #define MI_TRACK_ENABLED 1 #define MI_TRACK_HEAP_DESTROY 1 #define MI_TRACK_TOOL "ETW" #define WIN32_LEAN_AND_MEAN #include #include "../src/prim/windows/etw.h" #define mi_track_init() EventRegistermicrosoft_windows_mimalloc(); #define mi_track_malloc_size(p,reqsize,size,zero) EventWriteETW_MI_ALLOC((UINT64)(p), size) #define mi_track_free_size(p,size) EventWriteETW_MI_FREE((UINT64)(p), size) #else // no tracking #define MI_TRACK_ENABLED 0 #define MI_TRACK_HEAP_DESTROY 0 #define MI_TRACK_TOOL "none" #define mi_track_malloc_size(p,reqsize,size,zero) #define mi_track_free_size(p,_size) #endif // ------------------- // Utility definitions #ifndef mi_track_resize #define mi_track_resize(p,oldsize,newsize) mi_track_free_size(p,oldsize); mi_track_malloc(p,newsize,false) #endif #ifndef mi_track_align #define mi_track_align(p,alignedp,offset,size) mi_track_mem_noaccess(p,offset) #endif #ifndef mi_track_init #define mi_track_init() #endif #ifndef mi_track_mem_defined #define mi_track_mem_defined(p,size) #endif #ifndef mi_track_mem_undefined #define mi_track_mem_undefined(p,size) #endif #ifndef mi_track_mem_noaccess #define mi_track_mem_noaccess(p,size) #endif #if MI_PADDING #define mi_track_malloc(p,reqsize,zero) \ if ((p)!=NULL) { \ mi_assert_internal(mi_usable_size(p)==(reqsize)); \ mi_track_malloc_size(p,reqsize,reqsize,zero); \ } #else #define mi_track_malloc(p,reqsize,zero) \ if ((p)!=NULL) { \ mi_assert_internal(mi_usable_size(p)>=(reqsize)); \ mi_track_malloc_size(p,reqsize,mi_usable_size(p),zero); \ } #endif #endif luametatex-2.10.08/source/libraries/mimalloc/include/mimalloc/types.h000066400000000000000000000652121442250314700256510ustar00rootroot00000000000000/* ---------------------------------------------------------------------------- Copyright (c) 2018-2023, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ #pragma once #ifndef MIMALLOC_TYPES_H #define MIMALLOC_TYPES_H // -------------------------------------------------------------------------- // This file contains the main type definitions for mimalloc: // mi_heap_t : all data for a thread-local heap, contains // lists of all managed heap pages. // mi_segment_t : a larger chunk of memory (32GiB) from where pages // are allocated. // mi_page_t : a mimalloc page (usually 64KiB or 512KiB) from // where objects are allocated. // -------------------------------------------------------------------------- #include // ptrdiff_t #include // uintptr_t, uint16_t, etc #include "mimalloc/atomic.h" // _Atomic #ifdef _MSC_VER #pragma warning(disable:4214) // bitfield is not int #endif // Minimal alignment necessary. On most platforms 16 bytes are needed // due to SSE registers for example. This must be at least `sizeof(void*)` #ifndef MI_MAX_ALIGN_SIZE #define MI_MAX_ALIGN_SIZE 16 // sizeof(max_align_t) #endif // ------------------------------------------------------ // Variants // ------------------------------------------------------ // Define NDEBUG in the release version to disable assertions. // #define NDEBUG // Define MI_TRACK_ to enable tracking support // #define MI_TRACK_VALGRIND 1 // #define MI_TRACK_ASAN 1 // #define MI_TRACK_ETW 1 // Define MI_STAT as 1 to maintain statistics; set it to 2 to have detailed statistics (but costs some performance). // #define MI_STAT 1 // Define MI_SECURE to enable security mitigations // #define MI_SECURE 1 // guard page around metadata // #define MI_SECURE 2 // guard page around each mimalloc page // #define MI_SECURE 3 // encode free lists (detect corrupted free list (buffer overflow), and invalid pointer free) // #define MI_SECURE 4 // checks for double free. (may be more expensive) #if !defined(MI_SECURE) #define MI_SECURE 0 #endif // Define MI_DEBUG for debug mode // #define MI_DEBUG 1 // basic assertion checks and statistics, check double free, corrupted free list, and invalid pointer free. // #define MI_DEBUG 2 // + internal assertion checks // #define MI_DEBUG 3 // + extensive internal invariant checking (cmake -DMI_DEBUG_FULL=ON) #if !defined(MI_DEBUG) #if !defined(NDEBUG) || defined(_DEBUG) #define MI_DEBUG 2 #else #define MI_DEBUG 0 #endif #endif // Reserve extra padding at the end of each block to be more resilient against heap block overflows. // The padding can detect buffer overflow on free. #if !defined(MI_PADDING) && (MI_SECURE>=3 || MI_DEBUG>=1 || (MI_TRACK_VALGRIND || MI_TRACK_ASAN || MI_TRACK_ETW)) #define MI_PADDING 1 #endif // Check padding bytes; allows byte-precise buffer overflow detection #if !defined(MI_PADDING_CHECK) && MI_PADDING && (MI_SECURE>=3 || MI_DEBUG>=1) #define MI_PADDING_CHECK 1 #endif // Encoded free lists allow detection of corrupted free lists // and can detect buffer overflows, modify after free, and double `free`s. #if (MI_SECURE>=3 || MI_DEBUG>=1) #define MI_ENCODE_FREELIST 1 #endif // We used to abandon huge pages but to eagerly deallocate if freed from another thread, // but that makes it not possible to visit them during a heap walk or include them in a // `mi_heap_destroy`. We therefore instead reset/decommit the huge blocks if freed from // another thread so most memory is available until it gets properly freed by the owning thread. // #define MI_HUGE_PAGE_ABANDON 1 // ------------------------------------------------------ // Platform specific values // ------------------------------------------------------ // ------------------------------------------------------ // Size of a pointer. // We assume that `sizeof(void*)==sizeof(intptr_t)` // and it holds for all platforms we know of. // // However, the C standard only requires that: // p == (void*)((intptr_t)p)) // but we also need: // i == (intptr_t)((void*)i) // or otherwise one might define an intptr_t type that is larger than a pointer... // ------------------------------------------------------ #if INTPTR_MAX > INT64_MAX # define MI_INTPTR_SHIFT (4) // assume 128-bit (as on arm CHERI for example) #elif INTPTR_MAX == INT64_MAX # define MI_INTPTR_SHIFT (3) #elif INTPTR_MAX == INT32_MAX # define MI_INTPTR_SHIFT (2) #else #error platform pointers must be 32, 64, or 128 bits #endif #if SIZE_MAX == UINT64_MAX # define MI_SIZE_SHIFT (3) typedef int64_t mi_ssize_t; #elif SIZE_MAX == UINT32_MAX # define MI_SIZE_SHIFT (2) typedef int32_t mi_ssize_t; #else #error platform objects must be 32 or 64 bits #endif #if (SIZE_MAX/2) > LONG_MAX # define MI_ZU(x) x##ULL # define MI_ZI(x) x##LL #else # define MI_ZU(x) x##UL # define MI_ZI(x) x##L #endif #define MI_INTPTR_SIZE (1< 4 #define MI_SEGMENT_SHIFT ( 9 + MI_SEGMENT_SLICE_SHIFT) // 32MiB #else #define MI_SEGMENT_SHIFT ( 7 + MI_SEGMENT_SLICE_SHIFT) // 4MiB on 32-bit #endif #define MI_SMALL_PAGE_SHIFT (MI_SEGMENT_SLICE_SHIFT) // 64KiB #define MI_MEDIUM_PAGE_SHIFT ( 3 + MI_SMALL_PAGE_SHIFT) // 512KiB // Derived constants #define MI_SEGMENT_SIZE (MI_ZU(1)<= 655360) #error "mimalloc internal: define more bins" #endif // Maximum slice offset (15) #define MI_MAX_SLICE_OFFSET ((MI_ALIGNMENT_MAX / MI_SEGMENT_SLICE_SIZE) - 1) // Used as a special value to encode block sizes in 32 bits. #define MI_HUGE_BLOCK_SIZE ((uint32_t)(2*MI_GiB)) // blocks up to this size are always allocated aligned #define MI_MAX_ALIGN_GUARANTEE (8*MI_MAX_ALIGN_SIZE) // Alignments over MI_ALIGNMENT_MAX are allocated in dedicated huge page segments #define MI_ALIGNMENT_MAX (MI_SEGMENT_SIZE >> 1) // ------------------------------------------------------ // Mimalloc pages contain allocated blocks // ------------------------------------------------------ // The free lists use encoded next fields // (Only actually encodes when MI_ENCODED_FREELIST is defined.) typedef uintptr_t mi_encoded_t; // thread id's typedef size_t mi_threadid_t; // free lists contain blocks typedef struct mi_block_s { mi_encoded_t next; } mi_block_t; // The delayed flags are used for efficient multi-threaded free-ing typedef enum mi_delayed_e { MI_USE_DELAYED_FREE = 0, // push on the owning heap thread delayed list MI_DELAYED_FREEING = 1, // temporary: another thread is accessing the owning heap MI_NO_DELAYED_FREE = 2, // optimize: push on page local thread free queue if another block is already in the heap thread delayed free list MI_NEVER_DELAYED_FREE = 3 // sticky, only resets on page reclaim } mi_delayed_t; // The `in_full` and `has_aligned` page flags are put in a union to efficiently // test if both are false (`full_aligned == 0`) in the `mi_free` routine. #if !MI_TSAN typedef union mi_page_flags_s { uint8_t full_aligned; struct { uint8_t in_full : 1; uint8_t has_aligned : 1; } x; } mi_page_flags_t; #else // under thread sanitizer, use a byte for each flag to suppress warning, issue #130 typedef union mi_page_flags_s { uint16_t full_aligned; struct { uint8_t in_full; uint8_t has_aligned; } x; } mi_page_flags_t; #endif // Thread free list. // We use the bottom 2 bits of the pointer for mi_delayed_t flags typedef uintptr_t mi_thread_free_t; // A page contains blocks of one specific size (`block_size`). // Each page has three list of free blocks: // `free` for blocks that can be allocated, // `local_free` for freed blocks that are not yet available to `mi_malloc` // `thread_free` for freed blocks by other threads // The `local_free` and `thread_free` lists are migrated to the `free` list // when it is exhausted. The separate `local_free` list is necessary to // implement a monotonic heartbeat. The `thread_free` list is needed for // avoiding atomic operations in the common case. // // // `used - |thread_free|` == actual blocks that are in use (alive) // `used - |thread_free| + |free| + |local_free| == capacity` // // We don't count `freed` (as |free|) but use `used` to reduce // the number of memory accesses in the `mi_page_all_free` function(s). // // Notes: // - Access is optimized for `mi_free` and `mi_page_alloc` (in `alloc.c`) // - Using `uint16_t` does not seem to slow things down // - The size is 8 words on 64-bit which helps the page index calculations // (and 10 words on 32-bit, and encoded free lists add 2 words. Sizes 10 // and 12 are still good for address calculation) // - To limit the structure size, the `xblock_size` is 32-bits only; for // blocks > MI_HUGE_BLOCK_SIZE the size is determined from the segment page size // - `thread_free` uses the bottom bits as a delayed-free flags to optimize // concurrent frees where only the first concurrent free adds to the owning // heap `thread_delayed_free` list (see `alloc.c:mi_free_block_mt`). // The invariant is that no-delayed-free is only set if there is // at least one block that will be added, or as already been added, to // the owning heap `thread_delayed_free` list. This guarantees that pages // will be freed correctly even if only other threads free blocks. typedef struct mi_page_s { // "owned" by the segment uint32_t slice_count; // slices in this page (0 if not a page) uint32_t slice_offset; // distance from the actual page data slice (0 if a page) uint8_t is_committed : 1; // `true` if the page virtual memory is committed uint8_t is_zero_init : 1; // `true` if the page was initially zero initialized // layout like this to optimize access in `mi_malloc` and `mi_free` uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear` uint16_t reserved; // number of blocks reserved in memory mi_page_flags_t flags; // `in_full` and `has_aligned` flags (8 bits) uint8_t free_is_zero : 1; // `true` if the blocks in the free list are zero initialized uint8_t retire_expire : 7; // expiration count for retired blocks mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) uint32_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`) uint32_t xblock_size; // size available in each block (always `>0`) mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) #if (MI_ENCODE_FREELIST || MI_PADDING) uintptr_t keys[2]; // two random keys to encode the free lists (see `_mi_block_next`) or padding canary #endif _Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads _Atomic(uintptr_t) xheap; struct mi_page_s* next; // next page owned by this thread with the same `block_size` struct mi_page_s* prev; // previous page owned by this thread with the same `block_size` // 64-bit 9 words, 32-bit 12 words, (+2 for secure) #if MI_INTPTR_SIZE==8 uintptr_t padding[1]; #endif } mi_page_t; // ------------------------------------------------------ // Mimalloc segments contain mimalloc pages // ------------------------------------------------------ typedef enum mi_page_kind_e { MI_PAGE_SMALL, // small blocks go into 64KiB pages inside a segment MI_PAGE_MEDIUM, // medium blocks go into medium pages inside a segment MI_PAGE_LARGE, // larger blocks go into a page of just one block MI_PAGE_HUGE, // huge blocks (> 16 MiB) are put into a single page in a single segment. } mi_page_kind_t; typedef enum mi_segment_kind_e { MI_SEGMENT_NORMAL, // MI_SEGMENT_SIZE size with pages inside. MI_SEGMENT_HUGE, // > MI_LARGE_SIZE_MAX segment with just one huge page inside. } mi_segment_kind_t; // ------------------------------------------------------ // A segment holds a commit mask where a bit is set if // the corresponding MI_COMMIT_SIZE area is committed. // The MI_COMMIT_SIZE must be a multiple of the slice // size. If it is equal we have the most fine grained // decommit (but setting it higher can be more efficient). // The MI_MINIMAL_COMMIT_SIZE is the minimal amount that will // be committed in one go which can be set higher than // MI_COMMIT_SIZE for efficiency (while the decommit mask // is still tracked in fine-grained MI_COMMIT_SIZE chunks) // ------------------------------------------------------ #define MI_MINIMAL_COMMIT_SIZE (1*MI_SEGMENT_SLICE_SIZE) #define MI_COMMIT_SIZE (MI_SEGMENT_SLICE_SIZE) // 64KiB #define MI_COMMIT_MASK_BITS (MI_SEGMENT_SIZE / MI_COMMIT_SIZE) #define MI_COMMIT_MASK_FIELD_BITS MI_SIZE_BITS #define MI_COMMIT_MASK_FIELD_COUNT (MI_COMMIT_MASK_BITS / MI_COMMIT_MASK_FIELD_BITS) #if (MI_COMMIT_MASK_BITS != (MI_COMMIT_MASK_FIELD_COUNT * MI_COMMIT_MASK_FIELD_BITS)) #error "the segment size must be exactly divisible by the (commit size * size_t bits)" #endif typedef struct mi_commit_mask_s { size_t mask[MI_COMMIT_MASK_FIELD_COUNT]; } mi_commit_mask_t; typedef mi_page_t mi_slice_t; typedef int64_t mi_msecs_t; // Memory can reside in arena's, direct OS allocated, or statically allocated. The memid keeps track of this. typedef enum mi_memkind_e { MI_MEM_NONE, // not allocated MI_MEM_EXTERNAL, // not owned by mimalloc but provided externally (via `mi_manage_os_memory` for example) MI_MEM_STATIC, // allocated in a static area and should not be freed (for arena meta data for example) MI_MEM_OS, // allocated from the OS MI_MEM_OS_HUGE, // allocated as huge os pages MI_MEM_OS_REMAP, // allocated in a remapable area (i.e. using `mremap`) MI_MEM_ARENA // allocated from an arena (the usual case) } mi_memkind_t; static inline bool mi_memkind_is_os(mi_memkind_t memkind) { return (memkind >= MI_MEM_OS && memkind <= MI_MEM_OS_REMAP); } typedef struct mi_memid_os_info { void* base; // actual base address of the block (used for offset aligned allocations) size_t alignment; // alignment at allocation } mi_memid_os_info_t; typedef struct mi_memid_arena_info { size_t block_index; // index in the arena mi_arena_id_t id; // arena id (>= 1) bool is_exclusive; // the arena can only be used for specific arena allocations } mi_memid_arena_info_t; typedef struct mi_memid_s { union { mi_memid_os_info_t os; // only used for MI_MEM_OS mi_memid_arena_info_t arena; // only used for MI_MEM_ARENA } mem; bool is_pinned; // `true` if we cannot decommit/reset/protect in this memory (e.g. when allocated using large OS pages) bool initially_committed;// `true` if the memory was originally allocated as committed bool initially_zero; // `true` if the memory was originally zero initialized mi_memkind_t memkind; } mi_memid_t; // Segments are large allocated memory blocks (8mb on 64 bit) from // the OS. Inside segments we allocated fixed size _pages_ that // contain blocks. typedef struct mi_segment_s { // constant fields mi_memid_t memid; // memory id for arena allocation bool allow_decommit; bool allow_purge; size_t segment_size; // segment fields mi_msecs_t purge_expire; mi_commit_mask_t purge_mask; mi_commit_mask_t commit_mask; _Atomic(struct mi_segment_s*) abandoned_next; // from here is zero initialized struct mi_segment_s* next; // the list of freed segments in the cache (must be first field, see `segment.c:mi_segment_init`) size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`) size_t abandoned_visits; // count how often this segment is visited in the abandoned list (to force reclaim it it is too long) size_t used; // count of pages in use uintptr_t cookie; // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie` size_t segment_slices; // for huge segments this may be different from `MI_SLICES_PER_SEGMENT` size_t segment_info_slices; // initial slices we are using segment info and possible guard pages. // layout like this to optimize access in `mi_free` mi_segment_kind_t kind; size_t slice_entries; // entries in the `slices` array, at most `MI_SLICES_PER_SEGMENT` _Atomic(mi_threadid_t) thread_id; // unique id of the thread owning this segment mi_slice_t slices[MI_SLICES_PER_SEGMENT+1]; // one more for huge blocks with large alignment } mi_segment_t; // ------------------------------------------------------ // Heaps // Provide first-class heaps to allocate from. // A heap just owns a set of pages for allocation and // can only be allocate/reallocate from the thread that created it. // Freeing blocks can be done from any thread though. // Per thread, the segments are shared among its heaps. // Per thread, there is always a default heap that is // used for allocation; it is initialized to statically // point to an empty heap to avoid initialization checks // in the fast path. // ------------------------------------------------------ // Thread local data typedef struct mi_tld_s mi_tld_t; // Pages of a certain block size are held in a queue. typedef struct mi_page_queue_s { mi_page_t* first; mi_page_t* last; size_t block_size; } mi_page_queue_t; #define MI_BIN_FULL (MI_BIN_HUGE+1) // Random context typedef struct mi_random_cxt_s { uint32_t input[16]; uint32_t output[16]; int output_available; bool weak; } mi_random_ctx_t; // In debug mode there is a padding structure at the end of the blocks to check for buffer overflows #if (MI_PADDING) typedef struct mi_padding_s { uint32_t canary; // encoded block value to check validity of the padding (in case of overflow) uint32_t delta; // padding bytes before the block. (mi_usable_size(p) - delta == exact allocated bytes) } mi_padding_t; #define MI_PADDING_SIZE (sizeof(mi_padding_t)) #define MI_PADDING_WSIZE ((MI_PADDING_SIZE + MI_INTPTR_SIZE - 1) / MI_INTPTR_SIZE) #else #define MI_PADDING_SIZE 0 #define MI_PADDING_WSIZE 0 #endif #define MI_PAGES_DIRECT (MI_SMALL_WSIZE_MAX + MI_PADDING_WSIZE + 1) // A heap owns a set of pages. struct mi_heap_s { mi_tld_t* tld; mi_page_t* pages_free_direct[MI_PAGES_DIRECT]; // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size. mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin") _Atomic(mi_block_t*) thread_delayed_free; mi_threadid_t thread_id; // thread this heap belongs too mi_arena_id_t arena_id; // arena id if the heap belongs to a specific arena (or 0) uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`) uintptr_t keys[2]; // two random keys used to encode the `thread_delayed_free` list mi_random_ctx_t random; // random number context used for secure allocation size_t page_count; // total number of pages in the `pages` queues. size_t page_retired_min; // smallest retired index (retired pages are fully free, but still in the page queues) size_t page_retired_max; // largest retired index into the `pages` array. mi_heap_t* next; // list of heaps per thread bool no_reclaim; // `true` if this heap should not reclaim abandoned pages }; // ------------------------------------------------------ // Debug // ------------------------------------------------------ #if !defined(MI_DEBUG_UNINIT) #define MI_DEBUG_UNINIT (0xD0) #endif #if !defined(MI_DEBUG_FREED) #define MI_DEBUG_FREED (0xDF) #endif #if !defined(MI_DEBUG_PADDING) #define MI_DEBUG_PADDING (0xDE) #endif #if (MI_DEBUG) // use our own assertion to print without memory allocation void _mi_assert_fail(const char* assertion, const char* fname, unsigned int line, const char* func ); #define mi_assert(expr) ((expr) ? (void)0 : _mi_assert_fail(#expr,__FILE__,__LINE__,__func__)) #else #define mi_assert(x) #endif #if (MI_DEBUG>1) #define mi_assert_internal mi_assert #else #define mi_assert_internal(x) #endif #if (MI_DEBUG>2) #define mi_assert_expensive mi_assert #else #define mi_assert_expensive(x) #endif // ------------------------------------------------------ // Statistics // ------------------------------------------------------ #ifndef MI_STAT #if (MI_DEBUG>0) #define MI_STAT 2 #else #define MI_STAT 0 #endif #endif typedef struct mi_stat_count_s { int64_t allocated; int64_t freed; int64_t peak; int64_t current; } mi_stat_count_t; typedef struct mi_stat_counter_s { int64_t total; int64_t count; } mi_stat_counter_t; typedef struct mi_stats_s { mi_stat_count_t segments; mi_stat_count_t pages; mi_stat_count_t reserved; mi_stat_count_t committed; mi_stat_count_t reset; mi_stat_count_t purged; mi_stat_count_t page_committed; mi_stat_count_t segments_abandoned; mi_stat_count_t pages_abandoned; mi_stat_count_t threads; mi_stat_count_t normal; mi_stat_count_t huge; mi_stat_count_t large; mi_stat_count_t malloc; mi_stat_count_t segments_cache; mi_stat_counter_t pages_extended; mi_stat_counter_t mmap_calls; mi_stat_counter_t commit_calls; mi_stat_counter_t reset_calls; mi_stat_counter_t purge_calls; mi_stat_counter_t page_no_retire; mi_stat_counter_t searches; mi_stat_counter_t normal_count; mi_stat_counter_t huge_count; mi_stat_counter_t large_count; #if MI_STAT>1 mi_stat_count_t normal_bins[MI_BIN_HUGE+1]; #endif } mi_stats_t; void _mi_stat_increase(mi_stat_count_t* stat, size_t amount); void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount); void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount); #if (MI_STAT) #define mi_stat_increase(stat,amount) _mi_stat_increase( &(stat), amount) #define mi_stat_decrease(stat,amount) _mi_stat_decrease( &(stat), amount) #define mi_stat_counter_increase(stat,amount) _mi_stat_counter_increase( &(stat), amount) #else #define mi_stat_increase(stat,amount) (void)0 #define mi_stat_decrease(stat,amount) (void)0 #define mi_stat_counter_increase(stat,amount) (void)0 #endif #define mi_heap_stat_counter_increase(heap,stat,amount) mi_stat_counter_increase( (heap)->tld->stats.stat, amount) #define mi_heap_stat_increase(heap,stat,amount) mi_stat_increase( (heap)->tld->stats.stat, amount) #define mi_heap_stat_decrease(heap,stat,amount) mi_stat_decrease( (heap)->tld->stats.stat, amount) // ------------------------------------------------------ // Thread Local data // ------------------------------------------------------ // A "span" is is an available range of slices. The span queues keep // track of slice spans of at most the given `slice_count` (but more than the previous size class). typedef struct mi_span_queue_s { mi_slice_t* first; mi_slice_t* last; size_t slice_count; } mi_span_queue_t; #define MI_SEGMENT_BIN_MAX (35) // 35 == mi_segment_bin(MI_SLICES_PER_SEGMENT) // OS thread local data typedef struct mi_os_tld_s { size_t region_idx; // start point for next allocation mi_stats_t* stats; // points to tld stats } mi_os_tld_t; // Segments thread local data typedef struct mi_segments_tld_s { mi_span_queue_t spans[MI_SEGMENT_BIN_MAX+1]; // free slice spans inside segments size_t count; // current number of segments; size_t peak_count; // peak number of segments size_t current_size; // current size of all segments size_t peak_size; // peak size of all segments mi_stats_t* stats; // points to tld stats mi_os_tld_t* os; // points to os stats } mi_segments_tld_t; // Thread local data struct mi_tld_s { unsigned long long heartbeat; // monotonic heartbeat count bool recurse; // true if deferred was called; used to prevent infinite recursion. mi_heap_t* heap_backing; // backing heap of this thread (cannot be deleted) mi_heap_t* heaps; // list of heaps in this thread (so we can abandon all when the thread terminates) mi_segments_tld_t segments; // segment tld mi_os_tld_t os; // os tld mi_stats_t stats; // statistics }; #endif luametatex-2.10.08/source/libraries/mimalloc/readme.md000066400000000000000000001307101442250314700226670ustar00rootroot00000000000000 [](https://dev.azure.com/Daan0324/mimalloc/_build?definitionId=1&_a=summary) # mimalloc   mimalloc (pronounced "me-malloc") is a general purpose allocator with excellent [performance](#performance) characteristics. Initially developed by Daan Leijen for the runtime systems of the [Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages. Latest release tag: `v2.1.2` (2023-04-24). Latest stable tag: `v1.8.2` (2023-04-24). mimalloc is a drop-in replacement for `malloc` and can be used in other programs without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as: ``` > LD_PRELOAD=/usr/lib/libmimalloc.so myprogram ``` It also includes a robust way to override the default allocator in [Windows](#override_on_windows). Notable aspects of the design include: - __small and consistent__: the library is about 8k LOC using simple and consistent data structures. This makes it very suitable to integrate and adapt in other projects. For runtime systems it provides hooks for a monotonic _heartbeat_ and deferred freeing (for bounded worst-case times with reference counting). Partly due to its simplicity, mimalloc has been ported to many systems (Windows, macOS, Linux, WASM, various BSD's, Haiku, MUSL, etc) and has excellent support for dynamic overriding. - __free list sharding__: instead of one big free list (per size class) we have many smaller lists per "mimalloc page" which reduces fragmentation and increases locality -- things that are allocated close in time get allocated close in memory. (A mimalloc page contains blocks of one size class and is usually 64KiB on a 64-bit system). - __free list multi-sharding__: the big idea! Not only do we shard the free list per mimalloc page, but for each page we have multiple free lists. In particular, there is one list for thread-local `free` operations, and another one for concurrent `free` operations. Free-ing from another thread can now be a single CAS without needing sophisticated coordination between threads. Since there will be thousands of separate free lists, contention is naturally distributed over the heap, and the chance of contending on a single location will be low -- this is quite similar to randomized algorithms like skip lists where adding a random oracle removes the need for a more complex algorithm. - __eager page purging__: when a "page" becomes empty (with increased chance due to free list sharding) the memory is marked to the OS as unused (reset or decommitted) reducing (real) memory pressure and fragmentation, especially in long running programs. - __secure__: _mimalloc_ can be built in secure mode, adding guard pages, randomized allocation, encrypted free lists, etc. to protect against various heap vulnerabilities. The performance penalty is usually around 10% on average over our benchmarks. - __first-class heaps__: efficiently create and use multiple heaps to allocate across different regions. A heap can be destroyed at once instead of deallocating each object separately. - __bounded__: it does not suffer from _blowup_ \[1\], has bounded worst-case allocation times (_wcat_) (upto OS primitives), bounded space overhead (~0.2% meta-data, with low internal fragmentation), and has no internal points of contention using only atomic operations. - __fast__: In our benchmarks (see [below](#performance)), _mimalloc_ outperforms other leading allocators (_jemalloc_, _tcmalloc_, _Hoard_, etc), and often uses less memory. A nice property is that it does consistently well over a wide range of benchmarks. There is also good huge OS page support for larger server programs. The [documentation](https://microsoft.github.io/mimalloc) gives a full overview of the API. You can read more on the design of _mimalloc_ in the [technical report](https://www.microsoft.com/en-us/research/publication/mimalloc-free-list-sharding-in-action) which also has detailed benchmark results. Enjoy! ### Branches * `master`: latest stable release (based on `dev-slice`). * `dev`: development branch for mimalloc v1. Use this branch for submitting PR's. * `dev-slice`: development branch for mimalloc v2. This branch is downstream of `dev`. ### Releases Note: the `v2.x` version has a new algorithm for managing internal mimalloc pages that tends to reduce memory usage and fragmentation compared to mimalloc `v1.x` (especially for large workloads). Should otherwise have similar performance (see [below](#performance)); please report if you observe any significant performance regression. * 2023-04-24, `v1.8.2`, `v2.1.2`: Fixes build issues on freeBSD, musl, and C17 (UE 5.1.1). Reduce code size/complexity by removing regions and segment-cache's and only use arenas with improved memory purging -- this may improve memory usage as well for larger services. Renamed options for consistency. Improved Valgrind and ASAN checking. * 2023-04-03, `v1.8.1`, `v2.1.1`: Fixes build issues on some platforms. * 2023-03-29, `v1.8.0`, `v2.1.0`: Improved support dynamic overriding on Windows 11. Improved tracing precision with [asan](#asan) and [Valgrind](#valgrind), and added Windows event tracing [ETW](#ETW) (contributed by Xinglong He). Created an OS abstraction layer to make it easier to port and separate platform dependent code (in `src/prim`). Fixed C++ STL compilation on older Microsoft C++ compilers, and various small bug fixes. * 2022-12-23, `v1.7.9`, `v2.0.9`: Supports building with [asan](#asan) and improved [Valgrind](#valgrind) support. Support abitrary large alignments (in particular for `std::pmr` pools). Added C++ STL allocators attached to a specific heap (thanks @vmarkovtsev). Heap walks now visit all object (including huge objects). Support Windows nano server containers (by Johannes Schindelin,@dscho). Various small bug fixes. * 2022-11-03, `v1.7.7`, `v2.0.7`: Initial support for [Valgrind](#valgrind) for leak testing and heap block overflow detection. Initial support for attaching heaps to a speficic memory area (only in v2). Fix `realloc` behavior for zero size blocks, remove restriction to integral multiple of the alignment in `alloc_align`, improved aligned allocation performance, reduced contention with many threads on few processors (thank you @dposluns!), vs2022 support, support `pkg-config`, . * 2022-04-14, `v1.7.6`, `v2.0.6`: fix fallback path for aligned OS allocation on Windows, improve Windows aligned allocation even when compiling with older SDK's, fix dynamic overriding on macOS Monterey, fix MSVC C++ dynamic overriding, fix warnings under Clang 14, improve performance if many OS threads are created and destroyed, fix statistics for large object allocations, using MIMALLOC_VERBOSE=1 has no maximum on the number of error messages, various small fixes. * 2022-02-14, `v1.7.5`, `v2.0.5` (alpha): fix malloc override on Windows 11, fix compilation with musl, potentially reduced committed memory, add `bin/minject` for Windows, improved wasm support, faster aligned allocation, various small fixes. * [Older release notes](#older-release-notes) Special thanks to: * [David Carlier](https://devnexen.blogspot.com/) (@devnexen) for his many contributions, and making mimalloc work better on many less common operating systems, like Haiku, Dragonfly, etc. * Mary Feofanova (@mary3000), Evgeniy Moiseenko, and Manuel Pöter (@mpoeter) for making mimalloc TSAN checkable, and finding memory model bugs using the [genMC] model checker. * Weipeng Liu (@pongba), Zhuowei Li, Junhua Wang, and Jakub Szymanski, for their early support of mimalloc and deployment at large scale services, leading to many improvements in the mimalloc algorithms for large workloads. * Jason Gibson (@jasongibson) for exhaustive testing on large scale workloads and server environments, and finding complex bugs in (early versions of) `mimalloc`. * Manuel Pöter (@mpoeter) and Sam Gross(@colesbury) for finding an ABA concurrency issue in abandoned segment reclamation. Sam also created the [no GIL](https://github.com/colesbury/nogil) Python fork which uses mimalloc internally. [genMC]: https://plv.mpi-sws.org/genmc/ ### Usage mimalloc is used in various large scale low-latency services and programs, for example: # Building ## Windows Open `ide/vs2019/mimalloc.sln` in Visual Studio 2019 and build. The `mimalloc` project builds a static library (in `out/msvc-x64`), while the `mimalloc-override` project builds a DLL for overriding malloc in the entire program. ## macOS, Linux, BSD, etc. We use [`cmake`](https://cmake.org)1 as the build system: ``` > mkdir -p out/release > cd out/release > cmake ../.. > make ``` This builds the library as a shared (dynamic) library (`.so` or `.dylib`), a static library (`.a`), and as a single object file (`.o`). `> sudo make install` (install the library and header files in `/usr/local/lib` and `/usr/local/include`) You can build the debug version which does many internal checks and maintains detailed statistics as: ``` > mkdir -p out/debug > cd out/debug > cmake -DCMAKE_BUILD_TYPE=Debug ../.. > make ``` This will name the shared library as `libmimalloc-debug.so`. Finally, you can build a _secure_ version that uses guard pages, encrypted free lists, etc., as: ``` > mkdir -p out/secure > cd out/secure > cmake -DMI_SECURE=ON ../.. > make ``` This will name the shared library as `libmimalloc-secure.so`. Use `ccmake`2 instead of `cmake` to see and customize all the available build options. Notes: 1. Install CMake: `sudo apt-get install cmake` 2. Install CCMake: `sudo apt-get install cmake-curses-gui` ## Single source You can also directly build the single `src/static.c` file as part of your project without needing `cmake` at all. Make sure to also add the mimalloc `include` directory to the include path. # Using the library The preferred usage is including ``, linking with the shared- or static library, and using the `mi_malloc` API exclusively for allocation. For example, ``` > gcc -o myprogram -lmimalloc myfile.c ``` mimalloc uses only safe OS calls (`mmap` and `VirtualAlloc`) and can co-exist with other allocators linked to the same program. If you use `cmake`, you can simply use: ``` find_package(mimalloc 1.4 REQUIRED) ``` in your `CMakeLists.txt` to find a locally installed mimalloc. Then use either: ``` target_link_libraries(myapp PUBLIC mimalloc) ``` to link with the shared (dynamic) library, or: ``` target_link_libraries(myapp PUBLIC mimalloc-static) ``` to link with the static library. See `test\CMakeLists.txt` for an example. For best performance in C++ programs, it is also recommended to override the global `new` and `delete` operators. For convience, mimalloc provides [`mimalloc-new-delete.h`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-new-delete.h) which does this for you -- just include it in a single(!) source file in your project. In C++, mimalloc also provides the `mi_stl_allocator` struct which implements the `std::allocator` interface. You can pass environment variables to print verbose messages (`MIMALLOC_VERBOSE=1`) and statistics (`MIMALLOC_SHOW_STATS=1`) (in the debug version): ``` > env MIMALLOC_SHOW_STATS=1 ./cfrac 175451865205073170563711388363 175451865205073170563711388363 = 374456281610909315237213 * 468551 heap stats: peak total freed unit normal 2: 16.4 kb 17.5 mb 17.5 mb 16 b ok normal 3: 16.3 kb 15.2 mb 15.2 mb 24 b ok normal 4: 64 b 4.6 kb 4.6 kb 32 b ok normal 5: 80 b 118.4 kb 118.4 kb 40 b ok normal 6: 48 b 48 b 48 b 48 b ok normal 17: 960 b 960 b 960 b 320 b ok heap stats: peak total freed unit normal: 33.9 kb 32.8 mb 32.8 mb 1 b ok huge: 0 b 0 b 0 b 1 b ok total: 33.9 kb 32.8 mb 32.8 mb 1 b ok malloc requested: 32.8 mb committed: 58.2 kb 58.2 kb 58.2 kb 1 b ok reserved: 2.0 mb 2.0 mb 2.0 mb 1 b ok reset: 0 b 0 b 0 b 1 b ok segments: 1 1 1 -abandoned: 0 pages: 6 6 6 -abandoned: 0 mmaps: 3 mmap fast: 0 mmap slow: 1 threads: 0 elapsed: 2.022s process: user: 1.781s, system: 0.016s, faults: 756, reclaims: 0, rss: 2.7 mb ``` The above model of using the `mi_` prefixed API is not always possible though in existing programs that already use the standard malloc interface, and another option is to override the standard malloc interface completely and redirect all calls to the _mimalloc_ library instead . ## Environment Options You can set further options either programmatically (using [`mi_option_set`](https://microsoft.github.io/mimalloc/group__options.html)), or via environment variables: - `MIMALLOC_SHOW_STATS=1`: show statistics when the program terminates. - `MIMALLOC_VERBOSE=1`: show verbose messages. - `MIMALLOC_SHOW_ERRORS=1`: show error and warning messages. Advanced options: - `MIMALLOC_PURGE_DELAY=N`: the delay in `N` milli-seconds (by default `10`) after which mimalloc will purge OS pages that are not in use. This signals to the OS that the underlying physical memory can be reused which can reduce memory fragmentation especially in long running (server) programs. Setting `N` to `0` purges immediately when a page becomes unused which can improve memory usage but also decreases performance. Setting `N` to a higher value like `100` can improve performance (sometimes by a lot) at the cost of potentially using more memory at times. Setting it to `-1` disables purging completely. - `MIMALLOC_ARENA_EAGER_COMMIT=1`: turns on eager commit for the large arenas (usually 1GiB) from which mimalloc allocates segments and pages. This is by default only enabled on overcommit systems (e.g. Linux) but enabling it explicitly on other systems (like Windows or macOS) may improve performance. Note that eager commit only increases the commit but not the actual the peak resident set (rss) so it is generally ok to enable this. Further options for large workloads and services: - `MIMALLOC_USE_NUMA_NODES=N`: pretend there are at most `N` NUMA nodes. If not set, the actual NUMA nodes are detected at runtime. Setting `N` to 1 may avoid problems in some virtual environments. Also, setting it to a lower number than the actual NUMA nodes is fine and will only cause threads to potentially allocate more memory across actual NUMA nodes (but this can happen in any case as NUMA local allocation is always a best effort but not guaranteed). - `MIMALLOC_ALLOW_LARGE_OS_PAGES=1`: use large OS pages (2MiB) when available; for some workloads this can significantly improve performance. Use `MIMALLOC_VERBOSE` to check if the large OS pages are enabled -- usually one needs to explicitly allow large OS pages (as on [Windows][windows-huge] and [Linux][linux-huge]). However, sometimes the OS is very slow to reserve contiguous physical memory for large OS pages so use with care on systems that can have fragmented memory (for that reason, we generally recommend to use `MIMALLOC_RESERVE_HUGE_OS_PAGES` instead whenever possible). - `MIMALLOC_RESERVE_HUGE_OS_PAGES=N`: where `N` is the number of 1GiB _huge_ OS pages. This reserves the huge pages at startup and sometimes this can give a large (latency) performance improvement on big workloads. Usually it is better to not use `MIMALLOC_ALLOW_LARGE_OS_PAGES=1` in combination with this setting. Just like large OS pages, use with care as reserving contiguous physical memory can take a long time when memory is fragmented (but reserving the huge pages is done at startup only once). Note that we usually need to explicitly enable huge OS pages (as on [Windows][windows-huge] and [Linux][linux-huge])). With huge OS pages, it may be beneficial to set the setting `MIMALLOC_EAGER_COMMIT_DELAY=N` (`N` is 1 by default) to delay the initial `N` segments (of 4MiB) of a thread to not allocate in the huge OS pages; this prevents threads that are short lived and allocate just a little to take up space in the huge OS page area (which cannot be purged). The huge pages are usually allocated evenly among NUMA nodes. We can use `MIMALLOC_RESERVE_HUGE_OS_PAGES_AT=N` where `N` is the numa node (starting at 0) to allocate all the huge pages at a specific numa node instead. Use caution when using `fork` in combination with either large or huge OS pages: on a fork, the OS uses copy-on-write for all pages in the original process including the huge OS pages. When any memory is now written in that area, the OS will copy the entire 1GiB huge page (or 2MiB large page) which can cause the memory usage to grow in large increments. [linux-huge]: https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/5/html/tuning_and_optimizing_red_hat_enterprise_linux_for_oracle_9i_and_10g_databases/sect-oracle_9i_and_10g_tuning_guide-large_memory_optimization_big_pages_and_huge_pages-configuring_huge_pages_in_red_hat_enterprise_linux_4_or_5 [windows-huge]: https://docs.microsoft.com/en-us/sql/database-engine/configure-windows/enable-the-lock-pages-in-memory-option-windows?view=sql-server-2017 ## Secure Mode _mimalloc_ can be build in secure mode by using the `-DMI_SECURE=ON` flags in `cmake`. This build enables various mitigations to make mimalloc more robust against exploits. In particular: - All internal mimalloc pages are surrounded by guard pages and the heap metadata is behind a guard page as well (so a buffer overflow exploit cannot reach into the metadata). - All free list pointers are [encoded](https://github.com/microsoft/mimalloc/blob/783e3377f79ee82af43a0793910a9f2d01ac7863/include/mimalloc-internal.h#L396) with per-page keys which is used both to prevent overwrites with a known pointer, as well as to detect heap corruption. - Double free's are detected (and ignored). - The free lists are initialized in a random order and allocation randomly chooses between extension and reuse within a page to mitigate against attacks that rely on a predicable allocation order. Similarly, the larger heap blocks allocated by mimalloc from the OS are also address randomized. As always, evaluate with care as part of an overall security strategy as all of the above are mitigations but not guarantees. ## Debug Mode When _mimalloc_ is built using debug mode, various checks are done at runtime to catch development errors. - Statistics are maintained in detail for each object size. They can be shown using `MIMALLOC_SHOW_STATS=1` at runtime. - All objects have padding at the end to detect (byte precise) heap block overflows. - Double free's, and freeing invalid heap pointers are detected. - Corrupted free-lists and some forms of use-after-free are detected. # Overriding Standard Malloc Overriding the standard `malloc` (and `new`) can be done either _dynamically_ or _statically_. ## Dynamic override This is the recommended way to override the standard malloc interface. ### Dynamic Override on Linux, BSD On these ELF-based systems we preload the mimalloc shared library so all calls to the standard `malloc` interface are resolved to the _mimalloc_ library. ``` > env LD_PRELOAD=/usr/lib/libmimalloc.so myprogram ``` You can set extra environment variables to check that mimalloc is running, like: ``` > env MIMALLOC_VERBOSE=1 LD_PRELOAD=/usr/lib/libmimalloc.so myprogram ``` or run with the debug version to get detailed statistics: ``` > env MIMALLOC_SHOW_STATS=1 LD_PRELOAD=/usr/lib/libmimalloc-debug.so myprogram ``` ### Dynamic Override on MacOS On macOS we can also preload the mimalloc shared library so all calls to the standard `malloc` interface are resolved to the _mimalloc_ library. ``` > env DYLD_INSERT_LIBRARIES=/usr/lib/libmimalloc.dylib myprogram ``` Note that certain security restrictions may apply when doing this from the [shell](https://stackoverflow.com/questions/43941322/dyld-insert-libraries-ignored-when-calling-application-through-bash). ### Dynamic Override on Windows Overriding on Windows is robust and has the particular advantage to be able to redirect all malloc/free calls that go through the (dynamic) C runtime allocator, including those from other DLL's or libraries. The overriding on Windows requires that you link your program explicitly with the mimalloc DLL and use the C-runtime library as a DLL (using the `/MD` or `/MDd` switch). Also, the `mimalloc-redirect.dll` (or `mimalloc-redirect32.dll`) must be put in the same folder as the main `mimalloc-override.dll` at runtime (as it is a dependency). The redirection DLL ensures that all calls to the C runtime malloc API get redirected to mimalloc (in `mimalloc-override.dll`). To ensure the mimalloc DLL is loaded at run-time it is easiest to insert some call to the mimalloc API in the `main` function, like `mi_version()` (or use the `/INCLUDE:mi_version` switch on the linker). See the `mimalloc-override-test` project for an example on how to use this. For best performance on Windows with C++, it is also recommended to also override the `new`/`delete` operations (by including [`mimalloc-new-delete.h`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-new-delete.h) a single(!) source file in your project). The environment variable `MIMALLOC_DISABLE_REDIRECT=1` can be used to disable dynamic overriding at run-time. Use `MIMALLOC_VERBOSE=1` to check if mimalloc was successfully redirected. (Note: in principle, it is possible to even patch existing executables without any recompilation if they are linked with the dynamic C runtime (`ucrtbase.dll`) -- just put the `mimalloc-override.dll` into the import table (and put `mimalloc-redirect.dll` in the same folder) Such patching can be done for example with [CFF Explorer](https://ntcore.com/?page_id=388)). ## Static override On Unix-like systems, you can also statically link with _mimalloc_ to override the standard malloc interface. The recommended way is to link the final program with the _mimalloc_ single object file (`mimalloc.o`). We use an object file instead of a library file as linkers give preference to that over archives to resolve symbols. To ensure that the standard malloc interface resolves to the _mimalloc_ library, link it as the first object file. For example: ``` > gcc -o myprogram mimalloc.o myfile1.c ... ``` Another way to override statically that works on all platforms, is to link statically to mimalloc (as shown in the introduction) and include a header file in each source file that re-defines `malloc` etc. to `mi_malloc`. This is provided by [`mimalloc-override.h`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-override.h). This only works reliably though if all sources are under your control or otherwise mixing of pointers from different heaps may occur! ## Tools Generally, we recommend using the standard allocator with memory tracking tools, but mimalloc can also be build to support the [address sanitizer][asan] or the excellent [Valgrind] tool. Moreover, it can be build to support Windows event tracing ([ETW]). This has a small performance overhead but does allow detecting memory leaks and byte-precise buffer overflows directly on final executables. See also the `test/test-wrong.c` file to test with various tools. ### Valgrind To build with [valgrind] support, use the `MI_TRACK_VALGRIND=ON` cmake option: ``` > cmake ../.. -DMI_TRACK_VALGRIND=ON ``` This can also be combined with secure mode or debug mode. You can then run your programs directly under valgrind: ``` > valgrind ``` If you rely on overriding `malloc`/`free` by mimalloc (instead of using the `mi_malloc`/`mi_free` API directly), you also need to tell `valgrind` to not intercept those calls itself, and use: ``` > MIMALLOC_SHOW_STATS=1 valgrind --soname-synonyms=somalloc=*mimalloc* -- ``` By setting the `MIMALLOC_SHOW_STATS` environment variable you can check that mimalloc is indeed used and not the standard allocator. Even though the [Valgrind option][valgrind-soname] is called `--soname-synonyms`, this also works when overriding with a static library or object file. Unfortunately, it is not possible to dynamically override mimalloc using `LD_PRELOAD` together with `valgrind`. See also the `test/test-wrong.c` file to test with `valgrind`. Valgrind support is in its initial development -- please report any issues. [Valgrind]: https://valgrind.org/ [valgrind-soname]: https://valgrind.org/docs/manual/manual-core.html#opt.soname-synonyms ### ASAN To build with the address sanitizer, use the `-DMI_TRACK_ASAN=ON` cmake option: ``` > cmake ../.. -DMI_TRACK_ASAN=ON ``` This can also be combined with secure mode or debug mode. You can then run your programs as:' ``` > ASAN_OPTIONS=verbosity=1 ``` When you link a program with an address sanitizer build of mimalloc, you should generally compile that program too with the address sanitizer enabled. For example, assuming you build mimalloc in `out/debug`: ``` clang -g -o test-wrong -Iinclude test/test-wrong.c out/debug/libmimalloc-asan-debug.a -lpthread -fsanitize=address -fsanitize-recover=address ``` Since the address sanitizer redirects the standard allocation functions, on some platforms (macOSX for example) it is required to compile mimalloc with `-DMI_OVERRIDE=OFF`. Adress sanitizer support is in its initial development -- please report any issues. [asan]: https://github.com/google/sanitizers/wiki/AddressSanitizer ### ETW Event tracing for Windows ([ETW]) provides a high performance way to capture all allocations though mimalloc and analyze them later. To build with ETW support, use the `-DMI_TRACK_ETW=ON` cmake option. You can then capture an allocation trace using the Windows performance recorder (WPR), using the `src/prim/windows/etw-mimalloc.wprp` profile. In an admin prompt, you can use: ``` > wpr -start src\prim\windows\etw-mimalloc.wprp -filemode > > wpr -stop .etl ``` and then open `.etl` in the Windows Performance Analyzer (WPA), or use a tool like [TraceControl] that is specialized for analyzing mimalloc traces. [ETW]: https://learn.microsoft.com/en-us/windows-hardware/test/wpt/event-tracing-for-windows [TraceControl]: https://github.com/xinglonghe/TraceControl # Performance Last update: 2021-01-30 We tested _mimalloc_ against many other top allocators over a wide range of benchmarks, ranging from various real world programs to synthetic benchmarks that see how the allocator behaves under more extreme circumstances. In our benchmark suite, _mimalloc_ outperforms other leading allocators (_jemalloc_, _tcmalloc_, _Hoard_, etc), and has a similar memory footprint. A nice property is that it does consistently well over the wide range of benchmarks. General memory allocators are interesting as there exists no algorithm that is optimal -- for a given allocator one can usually construct a workload where it does not do so well. The goal is thus to find an allocation strategy that performs well over a wide range of benchmarks without suffering from (too much) underperformance in less common situations. As always, interpret these results with care since some benchmarks test synthetic or uncommon situations that may never apply to your workloads. For example, most allocators do not do well on `xmalloc-testN` but that includes even the best industrial allocators like _jemalloc_ and _tcmalloc_ that are used in some of the world's largest systems (like Chrome or FreeBSD). Also, the benchmarks here do not measure the behaviour on very large and long-running server workloads, or worst-case latencies of allocation. Much work has gone into `mimalloc` to work well on such workloads (for example, to reduce virtual memory fragmentation on long-running services) but such optimizations are not always reflected in the current benchmark suite. We show here only an overview -- for more specific details and further benchmarks we refer to the [technical report](https://www.microsoft.com/en-us/research/publication/mimalloc-free-list-sharding-in-action). The benchmark suite is automated and available separately as [mimalloc-bench](https://github.com/daanx/mimalloc-bench). ## Benchmark Results on a 16-core AMD 5950x (Zen3) Testing on the 16-core AMD 5950x processor at 3.4Ghz (4.9Ghz boost), with with 32GiB memory at 3600Mhz, running Ubuntu 20.04 with glibc 2.31 and GCC 9.3.0. We measure three versions of _mimalloc_: the main version `mi` (tag:v1.7.0), the new v2.0 beta version as `xmi` (tag:v2.0.0), and the main version in secure mode as `smi` (tag:v1.7.0). The other allocators are Google's [_tcmalloc_](https://github.com/gperftools/gperftools) (`tc`, tag:gperftools-2.8.1) used in Chrome, Facebook's [_jemalloc_](https://github.com/jemalloc/jemalloc) (`je`, tag:5.2.1) by Jason Evans used in Firefox and FreeBSD, the Intel thread building blocks [allocator](https://github.com/intel/tbb) (`tbb`, tag:v2020.3), [rpmalloc](https://github.com/mjansson/rpmalloc) (`rp`,tag:1.4.1) by Mattias Jansson, the original scalable [_Hoard_](https://github.com/emeryberger/Hoard) (git:d880f72) allocator by Emery Berger \[1], the memory compacting [_Mesh_](https://github.com/plasma-umass/Mesh) (git:67ff31a) allocator by Bobby Powers _et al_ \[8], and finally the default system allocator (`glibc`, 2.31) (based on _PtMalloc2_). Any benchmarks ending in `N` run on all 32 logical cores in parallel. Results are averaged over 10 runs and reported relative to mimalloc (where 1.2 means it took 1.2× longer to run). The legend also contains the _overall relative score_ between the allocators where 100 points is the maximum if an allocator is fastest on all benchmarks. The single threaded _cfrac_ benchmark by Dave Barrett is an implementation of continued fraction factorization which uses many small short-lived allocations. All allocators do well on such common usage, where _mimalloc_ is just a tad faster than _tcmalloc_ and _jemalloc_. The _leanN_ program is interesting as a large realistic and concurrent workload of the [Lean](https://github.com/leanprover/lean) theorem prover compiling its own standard library, and there is a 13% speedup over _tcmalloc_. This is quite significant: if Lean spends 20% of its time in the allocator that means that _mimalloc_ is 1.6× faster than _tcmalloc_ here. (This is surprising as that is not measured in a pure allocation benchmark like _alloc-test_. We conjecture that we see this outsized improvement here because _mimalloc_ has better locality in the allocation which improves performance for the *other* computations in a program as well). The single threaded _redis_ benchmark again show that most allocators do well on such workloads. The _larsonN_ server benchmark by Larson and Krishnan \[2] allocates and frees between threads. They observed this behavior (which they call _bleeding_) in actual server applications, and the benchmark simulates this. Here, _mimalloc_ is quite a bit faster than _tcmalloc_ and _jemalloc_ probably due to the object migration between different threads. The _mstressN_ workload performs many allocations and re-allocations, and migrates objects between threads (as in _larsonN_). However, it also creates and destroys the _N_ worker threads a few times keeping some objects alive beyond the life time of the allocating thread. We observed this behavior in many larger server applications. The [_rptestN_](https://github.com/mjansson/rpmalloc-benchmark) benchmark by Mattias Jansson is a allocator test originally designed for _rpmalloc_, and tries to simulate realistic allocation patterns over multiple threads. Here the differences between allocators become more apparent. The second benchmark set tests specific aspects of the allocators and shows even more extreme differences between them. The _alloc-test_, by [OLogN Technologies AG](http://ithare.com/testing-memory-allocators-ptmalloc2-tcmalloc-hoard-jemalloc-while-trying-to-simulate-real-world-loads/), is a very allocation intensive benchmark doing millions of allocations in various size classes. The test is scaled such that when an allocator performs almost identically on _alloc-test1_ as _alloc-testN_ it means that it scales linearly. The _sh6bench_ and _sh8bench_ benchmarks are developed by [MicroQuill](http://www.microquill.com/) as part of SmartHeap. In _sh6bench_ _mimalloc_ does much better than the others (more than 2.5× faster than _jemalloc_). We cannot explain this well but believe it is caused in part by the "reverse" free-ing pattern in _sh6bench_. The _sh8bench_ is a variation with object migration between threads; whereas _tcmalloc_ did well on _sh6bench_, the addition of object migration causes it to be 10× slower than before. The _xmalloc-testN_ benchmark by Lever and Boreham \[5] and Christian Eder, simulates an asymmetric workload where some threads only allocate, and others only free -- they observed this pattern in larger server applications. Here we see that the _mimalloc_ technique of having non-contended sharded thread free lists pays off as it outperforms others by a very large margin. Only _rpmalloc_, _tbb_, and _glibc_ also scale well on this benchmark. The _cache-scratch_ benchmark by Emery Berger \[1], and introduced with the Hoard allocator to test for _passive-false_ sharing of cache lines. With a single thread they all perform the same, but when running with multiple threads the potential allocator induced false sharing of the cache lines can cause large run-time differences. Crundal \[6] describes in detail why the false cache line sharing occurs in the _tcmalloc_ design, and also discusses how this can be avoided with some small implementation changes. Only the _tbb_, _rpmalloc_ and _mesh_ allocators also avoid the cache line sharing completely, while _Hoard_ and _glibc_ seem to mitigate the effects. Kukanov and Voss \[7] describe in detail how the design of _tbb_ avoids the false cache line sharing. ## On a 36-core Intel Xeon For completeness, here are the results on a big Amazon [c5.18xlarge](https://aws.amazon.com/ec2/instance-types/#Compute_Optimized) instance consisting of a 2×18-core Intel Xeon (Cascade Lake) at 3.4GHz (boost 3.5GHz) with 144GiB ECC memory, running Ubuntu 20.04 with glibc 2.31, GCC 9.3.0, and Clang 10.0.0. This time, the mimalloc allocators (mi, xmi, and smi) were compiled with the Clang compiler instead of GCC. The results are similar to the AMD results but it is interesting to see the differences in the _larsonN_, _mstressN_, and _xmalloc-testN_ benchmarks. ## Peak Working Set The following figure shows the peak working set (rss) of the allocators on the benchmarks (on the c5.18xlarge instance). Note that the _xmalloc-testN_ memory usage should be disregarded as it allocates more the faster the program runs. Similarly, memory usage of _larsonN_, _mstressN_, _rptestN_ and _sh8bench_ can vary depending on scheduling and speed. Nevertheless, we hope to improve the memory usage on _mstressN_ and _rptestN_ (just as _cfrac_, _larsonN_ and _sh8bench_ have a small working set which skews the results). # References - \[1] Emery D. Berger, Kathryn S. McKinley, Robert D. Blumofe, and Paul R. Wilson. _Hoard: A Scalable Memory Allocator for Multithreaded Applications_ the Ninth International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS-IX). Cambridge, MA, November 2000. [pdf](http://www.cs.utexas.edu/users/mckinley/papers/asplos-2000.pdf) - \[2] P. Larson and M. Krishnan. _Memory allocation for long-running server applications_. In ISMM, Vancouver, B.C., Canada, 1998. [pdf](http://citeseer.ist.psu.edu/viewdoc/download?doi=10.1.1.45.1947&rep=rep1&type=pdf) - \[3] D. Grunwald, B. Zorn, and R. Henderson. _Improving the cache locality of memory allocation_. In R. Cartwright, editor, Proceedings of the Conference on Programming Language Design and Implementation, pages 177–186, New York, NY, USA, June 1993. [pdf](http://citeseer.ist.psu.edu/viewdoc/download?doi=10.1.1.43.6621&rep=rep1&type=pdf) - \[4] J. Barnes and P. Hut. _A hierarchical O(n*log(n)) force-calculation algorithm_. Nature, 324:446-449, 1986. - \[5] C. Lever, and D. Boreham. _Malloc() Performance in a Multithreaded Linux Environment._ In USENIX Annual Technical Conference, Freenix Session. San Diego, CA. Jun. 2000. Available at - \[6] Timothy Crundal. _Reducing Active-False Sharing in TCMalloc_. 2016. CS16S1 project at the Australian National University. [pdf](http://courses.cecs.anu.edu.au/courses/CSPROJECTS/16S1/Reports/Timothy_Crundal_Report.pdf) - \[7] Alexey Kukanov, and Michael J Voss. _The Foundations for Scalable Multi-Core Software in Intel Threading Building Blocks._ Intel Technology Journal 11 (4). 2007 - \[8] Bobby Powers, David Tench, Emery D. Berger, and Andrew McGregor. _Mesh: Compacting Memory Management for C/C++_ In Proceedings of the 40th ACM SIGPLAN Conference on Programming Language Design and Implementation (PLDI'19), June 2019, pages 333-–346. # Contributing This project welcomes contributions and suggestions. Most contributions require you to agree to a Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us the rights to use your contribution. For details, visit https://cla.microsoft.com. When you submit a pull request, a CLA-bot will automatically determine whether you need to provide a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the instructions provided by the bot. You will only need to do this once across all repos using our CLA. # Older Release Notes * 2021-11-14, `v1.7.3`, `v2.0.3` (beta): improved WASM support, improved macOS support and performance (including M1), improved performance for v2 for large objects, Python integration improvements, more standard installation directories, various small fixes. * 2021-06-17, `v1.7.2`, `v2.0.2` (beta): support M1, better installation layout on Linux, fix thread_id on Android, prefer 2-6TiB area for aligned allocation to work better on pre-windows 8, various small fixes. * 2021-04-06, `v1.7.1`, `v2.0.1` (beta): fix bug in arena allocation for huge pages, improved aslr on large allocations, initial M1 support (still experimental). * 2021-01-31, `v2.0.0`: beta release 2.0: new slice algorithm for managing internal mimalloc pages. * 2021-01-31, `v1.7.0`: stable release 1.7: support explicit user provided memory regions, more precise statistics, improve macOS overriding, initial support for Apple M1, improved DragonFly support, faster memcpy on Windows, various small fixes. * 2020-09-24, `v1.6.7`: stable release 1.6: using standard C atomics, passing tsan testing, improved handling of failing to commit on Windows, add [`mi_process_info`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc.h#L156) api call. * 2020-08-06, `v1.6.4`: stable release 1.6: improved error recovery in low-memory situations, support for IllumOS and Haiku, NUMA support for Vista/XP, improved NUMA detection for AMD Ryzen, ubsan support. * 2020-05-05, `v1.6.3`: stable release 1.6: improved behavior in out-of-memory situations, improved malloc zones on macOS, build PIC static libraries by default, add option to abort on out-of-memory, line buffered statistics. * 2020-04-20, `v1.6.2`: stable release 1.6: fix compilation on Android, MingW, Raspberry, and Conda, stability fix for Windows 7, fix multiple mimalloc instances in one executable, fix `strnlen` overload, fix aligned debug padding. * 2020-02-17, `v1.6.1`: stable release 1.6: minor updates (build with clang-cl, fix alignment issue for small objects). * 2020-02-09, `v1.6.0`: stable release 1.6: fixed potential memory leak, improved overriding and thread local support on FreeBSD, NetBSD, DragonFly, and macOSX. New byte-precise heap block overflow detection in debug mode (besides the double-free detection and free-list corruption detection). Add `nodiscard` attribute to most allocation functions. Enable `MIMALLOC_PAGE_RESET` by default. New reclamation strategy for abandoned heap pages for better memory footprint. * 2020-02-09, `v1.5.0`: stable release 1.5: improved free performance, small bug fixes. * 2020-01-22, `v1.4.0`: stable release 1.4: improved performance for delayed OS page reset, more eager concurrent free, addition of STL allocator, fixed potential memory leak. * 2020-01-15, `v1.3.0`: stable release 1.3: bug fixes, improved randomness and [stronger free list encoding](https://github.com/microsoft/mimalloc/blob/783e3377f79ee82af43a0793910a9f2d01ac7863/include/mimalloc-internal.h#L396) in secure mode. * 2019-12-22, `v1.2.2`: stable release 1.2: minor updates. * 2019-11-22, `v1.2.0`: stable release 1.2: bug fixes, improved secure mode (free list corruption checks, double free mitigation). Improved dynamic overriding on Windows. * 2019-10-07, `v1.1.0`: stable release 1.1. * 2019-09-01, `v1.0.8`: pre-release 8: more robust windows dynamic overriding, initial huge page support. * 2019-08-10, `v1.0.6`: pre-release 6: various performance improvements. luametatex-2.10.08/source/libraries/mimalloc/src/000077500000000000000000000000001442250314700216755ustar00rootroot00000000000000luametatex-2.10.08/source/libraries/mimalloc/src/alloc-aligned.c000066400000000000000000000344201442250314700245370ustar00rootroot00000000000000/* ---------------------------------------------------------------------------- Copyright (c) 2018-2021, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ #include "mimalloc.h" #include "mimalloc/internal.h" #include "mimalloc/prim.h" // mi_prim_get_default_heap #include // memset // ------------------------------------------------------ // Aligned Allocation // ------------------------------------------------------ // Fallback primitive aligned allocation -- split out for better codegen static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept { mi_assert_internal(size <= PTRDIFF_MAX); mi_assert_internal(alignment != 0 && _mi_is_power_of_two(alignment)); const uintptr_t align_mask = alignment - 1; // for any x, `(x & align_mask) == (x % alignment)` const size_t padsize = size + MI_PADDING_SIZE; // use regular allocation if it is guaranteed to fit the alignment constraints if (offset==0 && alignment<=padsize && padsize<=MI_MAX_ALIGN_GUARANTEE && (padsize&align_mask)==0) { void* p = _mi_heap_malloc_zero(heap, size, zero); mi_assert_internal(p == NULL || ((uintptr_t)p % alignment) == 0); return p; } void* p; size_t oversize; if mi_unlikely(alignment > MI_ALIGNMENT_MAX) { // use OS allocation for very large alignment and allocate inside a huge page (dedicated segment with 1 page) // This can support alignments >= MI_SEGMENT_SIZE by ensuring the object can be aligned at a point in the // first (and single) page such that the segment info is `MI_SEGMENT_SIZE` bytes before it (so it can be found by aligning the pointer down) if mi_unlikely(offset != 0) { // todo: cannot support offset alignment for very large alignments yet #if MI_DEBUG > 0 _mi_error_message(EOVERFLOW, "aligned allocation with a very large alignment cannot be used with an alignment offset (size %zu, alignment %zu, offset %zu)\n", size, alignment, offset); #endif return NULL; } oversize = (size <= MI_SMALL_SIZE_MAX ? MI_SMALL_SIZE_MAX + 1 /* ensure we use generic malloc path */ : size); p = _mi_heap_malloc_zero_ex(heap, oversize, false, alignment); // the page block size should be large enough to align in the single huge page block // zero afterwards as only the area from the aligned_p may be committed! if (p == NULL) return NULL; } else { // otherwise over-allocate oversize = size + alignment - 1; p = _mi_heap_malloc_zero(heap, oversize, zero); if (p == NULL) return NULL; } // .. and align within the allocation const uintptr_t poffset = ((uintptr_t)p + offset) & align_mask; const uintptr_t adjust = (poffset == 0 ? 0 : alignment - poffset); mi_assert_internal(adjust < alignment); void* aligned_p = (void*)((uintptr_t)p + adjust); if (aligned_p != p) { mi_page_t* page = _mi_ptr_page(p); mi_page_set_has_aligned(page, true); _mi_padding_shrink(page, (mi_block_t*)p, adjust + size); } // todo: expand padding if overallocated ? mi_assert_internal(mi_page_usable_block_size(_mi_ptr_page(p)) >= adjust + size); mi_assert_internal(p == _mi_page_ptr_unalign(_mi_ptr_segment(aligned_p), _mi_ptr_page(aligned_p), aligned_p)); mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0); mi_assert_internal(mi_usable_size(aligned_p)>=size); mi_assert_internal(mi_usable_size(p) == mi_usable_size(aligned_p)+adjust); // now zero the block if needed if (alignment > MI_ALIGNMENT_MAX) { // for the tracker, on huge aligned allocations only from the start of the large block is defined mi_track_mem_undefined(aligned_p, size); if (zero) { _mi_memzero_aligned(aligned_p, mi_usable_size(aligned_p)); } } if (p != aligned_p) { mi_track_align(p,aligned_p,adjust,mi_usable_size(aligned_p)); } return aligned_p; } // Primitive aligned allocation static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept { // note: we don't require `size > offset`, we just guarantee that the address at offset is aligned regardless of the allocated size. if mi_unlikely(alignment == 0 || !_mi_is_power_of_two(alignment)) { // require power-of-two (see ) #if MI_DEBUG > 0 _mi_error_message(EOVERFLOW, "aligned allocation requires the alignment to be a power-of-two (size %zu, alignment %zu)\n", size, alignment); #endif return NULL; } if mi_unlikely(size > PTRDIFF_MAX) { // we don't allocate more than PTRDIFF_MAX (see ) #if MI_DEBUG > 0 _mi_error_message(EOVERFLOW, "aligned allocation request is too large (size %zu, alignment %zu)\n", size, alignment); #endif return NULL; } const uintptr_t align_mask = alignment-1; // for any x, `(x & align_mask) == (x % alignment)` const size_t padsize = size + MI_PADDING_SIZE; // note: cannot overflow due to earlier size > PTRDIFF_MAX check // try first if there happens to be a small block available with just the right alignment if mi_likely(padsize <= MI_SMALL_SIZE_MAX && alignment <= padsize) { mi_page_t* page = _mi_heap_get_free_small_page(heap, padsize); const bool is_aligned = (((uintptr_t)page->free+offset) & align_mask)==0; if mi_likely(page->free != NULL && is_aligned) { #if MI_STAT>1 mi_heap_stat_increase(heap, malloc, size); #endif void* p = _mi_page_malloc(heap, page, padsize, zero); // TODO: inline _mi_page_malloc mi_assert_internal(p != NULL); mi_assert_internal(((uintptr_t)p + offset) % alignment == 0); mi_track_malloc(p,size,zero); return p; } } // fallback return mi_heap_malloc_zero_aligned_at_fallback(heap, size, alignment, offset, zero); } // ------------------------------------------------------ // Optimized mi_heap_malloc_aligned / mi_malloc_aligned // ------------------------------------------------------ mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { return mi_heap_malloc_zero_aligned_at(heap, size, alignment, offset, false); } mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept { if mi_unlikely(alignment == 0 || !_mi_is_power_of_two(alignment)) return NULL; #if !MI_PADDING // without padding, any small sized allocation is naturally aligned (see also `_mi_segment_page_start`) if mi_likely(_mi_is_power_of_two(size) && size >= alignment && size <= MI_SMALL_SIZE_MAX) #else // with padding, we can only guarantee this for fixed alignments if mi_likely((alignment == sizeof(void*) || (alignment == MI_MAX_ALIGN_SIZE && size > (MI_MAX_ALIGN_SIZE/2))) && size <= MI_SMALL_SIZE_MAX) #endif { // fast path for common alignment and size return mi_heap_malloc_small(heap, size); } else { return mi_heap_malloc_aligned_at(heap, size, alignment, 0); } } // ensure a definition is emitted #if defined(__cplusplus) static void* _mi_heap_malloc_aligned = (void*)&mi_heap_malloc_aligned; #endif // ------------------------------------------------------ // Aligned Allocation // ------------------------------------------------------ mi_decl_nodiscard mi_decl_restrict void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { return mi_heap_malloc_zero_aligned_at(heap, size, alignment, offset, true); } mi_decl_nodiscard mi_decl_restrict void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept { return mi_heap_zalloc_aligned_at(heap, size, alignment, 0); } mi_decl_nodiscard mi_decl_restrict void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { size_t total; if (mi_count_size_overflow(count, size, &total)) return NULL; return mi_heap_zalloc_aligned_at(heap, total, alignment, offset); } mi_decl_nodiscard mi_decl_restrict void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment) mi_attr_noexcept { return mi_heap_calloc_aligned_at(heap,count,size,alignment,0); } mi_decl_nodiscard mi_decl_restrict void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept { return mi_heap_malloc_aligned_at(mi_prim_get_default_heap(), size, alignment, offset); } mi_decl_nodiscard mi_decl_restrict void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept { return mi_heap_malloc_aligned(mi_prim_get_default_heap(), size, alignment); } mi_decl_nodiscard mi_decl_restrict void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept { return mi_heap_zalloc_aligned_at(mi_prim_get_default_heap(), size, alignment, offset); } mi_decl_nodiscard mi_decl_restrict void* mi_zalloc_aligned(size_t size, size_t alignment) mi_attr_noexcept { return mi_heap_zalloc_aligned(mi_prim_get_default_heap(), size, alignment); } mi_decl_nodiscard mi_decl_restrict void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { return mi_heap_calloc_aligned_at(mi_prim_get_default_heap(), count, size, alignment, offset); } mi_decl_nodiscard mi_decl_restrict void* mi_calloc_aligned(size_t count, size_t size, size_t alignment) mi_attr_noexcept { return mi_heap_calloc_aligned(mi_prim_get_default_heap(), count, size, alignment); } // ------------------------------------------------------ // Aligned re-allocation // ------------------------------------------------------ static void* mi_heap_realloc_zero_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset, bool zero) mi_attr_noexcept { mi_assert(alignment > 0); if (alignment <= sizeof(uintptr_t)) return _mi_heap_realloc_zero(heap,p,newsize,zero); if (p == NULL) return mi_heap_malloc_zero_aligned_at(heap,newsize,alignment,offset,zero); size_t size = mi_usable_size(p); if (newsize <= size && newsize >= (size - (size / 2)) && (((uintptr_t)p + offset) % alignment) == 0) { return p; // reallocation still fits, is aligned and not more than 50% waste } else { // note: we don't zero allocate upfront so we only zero initialize the expanded part void* newp = mi_heap_malloc_aligned_at(heap,newsize,alignment,offset); if (newp != NULL) { if (zero && newsize > size) { // also set last word in the previous allocation to zero to ensure any padding is zero-initialized size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0); _mi_memzero((uint8_t*)newp + start, newsize - start); } _mi_memcpy_aligned(newp, p, (newsize > size ? size : newsize)); mi_free(p); // only free if successful } return newp; } } static void* mi_heap_realloc_zero_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, bool zero) mi_attr_noexcept { mi_assert(alignment > 0); if (alignment <= sizeof(uintptr_t)) return _mi_heap_realloc_zero(heap,p,newsize,zero); size_t offset = ((uintptr_t)p % alignment); // use offset of previous allocation (p can be NULL) return mi_heap_realloc_zero_aligned_at(heap,p,newsize,alignment,offset,zero); } mi_decl_nodiscard void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { return mi_heap_realloc_zero_aligned_at(heap,p,newsize,alignment,offset,false); } mi_decl_nodiscard void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept { return mi_heap_realloc_zero_aligned(heap,p,newsize,alignment,false); } mi_decl_nodiscard void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { return mi_heap_realloc_zero_aligned_at(heap, p, newsize, alignment, offset, true); } mi_decl_nodiscard void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept { return mi_heap_realloc_zero_aligned(heap, p, newsize, alignment, true); } mi_decl_nodiscard void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { size_t total; if (mi_count_size_overflow(newcount, size, &total)) return NULL; return mi_heap_rezalloc_aligned_at(heap, p, total, alignment, offset); } mi_decl_nodiscard void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept { size_t total; if (mi_count_size_overflow(newcount, size, &total)) return NULL; return mi_heap_rezalloc_aligned(heap, p, total, alignment); } mi_decl_nodiscard void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { return mi_heap_realloc_aligned_at(mi_prim_get_default_heap(), p, newsize, alignment, offset); } mi_decl_nodiscard void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept { return mi_heap_realloc_aligned(mi_prim_get_default_heap(), p, newsize, alignment); } mi_decl_nodiscard void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { return mi_heap_rezalloc_aligned_at(mi_prim_get_default_heap(), p, newsize, alignment, offset); } mi_decl_nodiscard void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept { return mi_heap_rezalloc_aligned(mi_prim_get_default_heap(), p, newsize, alignment); } mi_decl_nodiscard void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { return mi_heap_recalloc_aligned_at(mi_prim_get_default_heap(), p, newcount, size, alignment, offset); } mi_decl_nodiscard void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept { return mi_heap_recalloc_aligned(mi_prim_get_default_heap(), p, newcount, size, alignment); } luametatex-2.10.08/source/libraries/mimalloc/src/alloc-override-osx.c000066400000000000000000000322651442250314700255670ustar00rootroot00000000000000/* ---------------------------------------------------------------------------- Copyright (c) 2018-2022, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ #include "mimalloc.h" #include "mimalloc-internal.h" #if defined(MI_MALLOC_OVERRIDE) #if !defined(__APPLE__) #error "this file should only be included on macOS" #endif /* ------------------------------------------------------ Override system malloc on macOS This is done through the malloc zone interface. It seems to be most robust in combination with interposing though or otherwise we may get zone errors as there are could be allocations done by the time we take over the zone. ------------------------------------------------------ */ #include #include #include // memset #include #ifdef __cplusplus extern "C" { #endif #if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) // only available from OSX 10.6 extern malloc_zone_t* malloc_default_purgeable_zone(void) __attribute__((weak_import)); #endif /* ------------------------------------------------------ malloc zone members ------------------------------------------------------ */ static size_t zone_size(malloc_zone_t* zone, const void* p) { MI_UNUSED(zone); if (!mi_is_in_heap_region(p)){ return 0; } // not our pointer, bail out return mi_usable_size(p); } static void* zone_malloc(malloc_zone_t* zone, size_t size) { MI_UNUSED(zone); return mi_malloc(size); } static void* zone_calloc(malloc_zone_t* zone, size_t count, size_t size) { MI_UNUSED(zone); return mi_calloc(count, size); } static void* zone_valloc(malloc_zone_t* zone, size_t size) { MI_UNUSED(zone); return mi_malloc_aligned(size, _mi_os_page_size()); } static void zone_free(malloc_zone_t* zone, void* p) { MI_UNUSED(zone); mi_cfree(p); } static void* zone_realloc(malloc_zone_t* zone, void* p, size_t newsize) { MI_UNUSED(zone); return mi_realloc(p, newsize); } static void* zone_memalign(malloc_zone_t* zone, size_t alignment, size_t size) { MI_UNUSED(zone); return mi_malloc_aligned(size,alignment); } static void zone_destroy(malloc_zone_t* zone) { MI_UNUSED(zone); // todo: ignore for now? } static unsigned zone_batch_malloc(malloc_zone_t* zone, size_t size, void** ps, unsigned count) { size_t i; for (i = 0; i < count; i++) { ps[i] = zone_malloc(zone, size); if (ps[i] == NULL) break; } return i; } static void zone_batch_free(malloc_zone_t* zone, void** ps, unsigned count) { for(size_t i = 0; i < count; i++) { zone_free(zone, ps[i]); ps[i] = NULL; } } static size_t zone_pressure_relief(malloc_zone_t* zone, size_t size) { MI_UNUSED(zone); MI_UNUSED(size); mi_collect(false); return 0; } static void zone_free_definite_size(malloc_zone_t* zone, void* p, size_t size) { MI_UNUSED(size); zone_free(zone,p); } static boolean_t zone_claimed_address(malloc_zone_t* zone, void* p) { MI_UNUSED(zone); return mi_is_in_heap_region(p); } /* ------------------------------------------------------ Introspection members ------------------------------------------------------ */ static kern_return_t intro_enumerator(task_t task, void* p, unsigned type_mask, vm_address_t zone_address, memory_reader_t reader, vm_range_recorder_t recorder) { // todo: enumerate all memory MI_UNUSED(task); MI_UNUSED(p); MI_UNUSED(type_mask); MI_UNUSED(zone_address); MI_UNUSED(reader); MI_UNUSED(recorder); return KERN_SUCCESS; } static size_t intro_good_size(malloc_zone_t* zone, size_t size) { MI_UNUSED(zone); return mi_good_size(size); } static boolean_t intro_check(malloc_zone_t* zone) { MI_UNUSED(zone); return true; } static void intro_print(malloc_zone_t* zone, boolean_t verbose) { MI_UNUSED(zone); MI_UNUSED(verbose); mi_stats_print(NULL); } static void intro_log(malloc_zone_t* zone, void* p) { MI_UNUSED(zone); MI_UNUSED(p); // todo? } static void intro_force_lock(malloc_zone_t* zone) { MI_UNUSED(zone); // todo? } static void intro_force_unlock(malloc_zone_t* zone) { MI_UNUSED(zone); // todo? } static void intro_statistics(malloc_zone_t* zone, malloc_statistics_t* stats) { MI_UNUSED(zone); // todo... stats->blocks_in_use = 0; stats->size_in_use = 0; stats->max_size_in_use = 0; stats->size_allocated = 0; } static boolean_t intro_zone_locked(malloc_zone_t* zone) { MI_UNUSED(zone); return false; } /* ------------------------------------------------------ At process start, override the default allocator ------------------------------------------------------ */ #if defined(__GNUC__) && !defined(__clang__) #pragma GCC diagnostic ignored "-Wmissing-field-initializers" #endif #if defined(__clang__) #pragma clang diagnostic ignored "-Wc99-extensions" #endif static malloc_introspection_t mi_introspect = { .enumerator = &intro_enumerator, .good_size = &intro_good_size, .check = &intro_check, .print = &intro_print, .log = &intro_log, .force_lock = &intro_force_lock, .force_unlock = &intro_force_unlock, #if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) .statistics = &intro_statistics, .zone_locked = &intro_zone_locked, #endif }; static malloc_zone_t mi_malloc_zone = { // note: even with designators, the order is important for C++ compilation //.reserved1 = NULL, //.reserved2 = NULL, .size = &zone_size, .malloc = &zone_malloc, .calloc = &zone_calloc, .valloc = &zone_valloc, .free = &zone_free, .realloc = &zone_realloc, .destroy = &zone_destroy, .zone_name = "mimalloc", .batch_malloc = &zone_batch_malloc, .batch_free = &zone_batch_free, .introspect = &mi_introspect, #if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) #if defined(MAC_OS_X_VERSION_10_14) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_14) .version = 10, #else .version = 9, #endif // switch to version 9+ on OSX 10.6 to support memalign. .memalign = &zone_memalign, .free_definite_size = &zone_free_definite_size, .pressure_relief = &zone_pressure_relief, #if defined(MAC_OS_X_VERSION_10_14) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_14) .claimed_address = &zone_claimed_address, #endif #else .version = 4, #endif }; #ifdef __cplusplus } #endif #if defined(MI_OSX_INTERPOSE) && defined(MI_SHARED_LIB_EXPORT) // ------------------------------------------------------ // Override malloc_xxx and malloc_zone_xxx api's to use only // our mimalloc zone. Since even the loader uses malloc // on macOS, this ensures that all allocations go through // mimalloc (as all calls are interposed). // The main `malloc`, `free`, etc calls are interposed in `alloc-override.c`, // Here, we also override macOS specific API's like // `malloc_zone_calloc` etc. see // ------------------------------------------------------ static inline malloc_zone_t* mi_get_default_zone(void) { static bool init; if mi_unlikely(!init) { init = true; malloc_zone_register(&mi_malloc_zone); // by calling register we avoid a zone error on free (see ) } return &mi_malloc_zone; } mi_decl_externc int malloc_jumpstart(uintptr_t cookie); mi_decl_externc void _malloc_fork_prepare(void); mi_decl_externc void _malloc_fork_parent(void); mi_decl_externc void _malloc_fork_child(void); static malloc_zone_t* mi_malloc_create_zone(vm_size_t size, unsigned flags) { MI_UNUSED(size); MI_UNUSED(flags); return mi_get_default_zone(); } static malloc_zone_t* mi_malloc_default_zone (void) { return mi_get_default_zone(); } static malloc_zone_t* mi_malloc_default_purgeable_zone(void) { return mi_get_default_zone(); } static void mi_malloc_destroy_zone(malloc_zone_t* zone) { MI_UNUSED(zone); // nothing. } static kern_return_t mi_malloc_get_all_zones (task_t task, memory_reader_t mr, vm_address_t** addresses, unsigned* count) { MI_UNUSED(task); MI_UNUSED(mr); if (addresses != NULL) *addresses = NULL; if (count != NULL) *count = 0; return KERN_SUCCESS; } static const char* mi_malloc_get_zone_name(malloc_zone_t* zone) { return (zone == NULL ? mi_malloc_zone.zone_name : zone->zone_name); } static void mi_malloc_set_zone_name(malloc_zone_t* zone, const char* name) { MI_UNUSED(zone); MI_UNUSED(name); } static int mi_malloc_jumpstart(uintptr_t cookie) { MI_UNUSED(cookie); return 1; // or 0 for no error? } static void mi__malloc_fork_prepare(void) { // nothing } static void mi__malloc_fork_parent(void) { // nothing } static void mi__malloc_fork_child(void) { // nothing } static void mi_malloc_printf(const char* fmt, ...) { MI_UNUSED(fmt); } static bool zone_check(malloc_zone_t* zone) { MI_UNUSED(zone); return true; } static malloc_zone_t* zone_from_ptr(const void* p) { MI_UNUSED(p); return mi_get_default_zone(); } static void zone_log(malloc_zone_t* zone, void* p) { MI_UNUSED(zone); MI_UNUSED(p); } static void zone_print(malloc_zone_t* zone, bool b) { MI_UNUSED(zone); MI_UNUSED(b); } static void zone_print_ptr_info(void* p) { MI_UNUSED(p); } static void zone_register(malloc_zone_t* zone) { MI_UNUSED(zone); } static void zone_unregister(malloc_zone_t* zone) { MI_UNUSED(zone); } // use interposing so `DYLD_INSERT_LIBRARIES` works without `DYLD_FORCE_FLAT_NAMESPACE=1` // See: struct mi_interpose_s { const void* replacement; const void* target; }; #define MI_INTERPOSE_FUN(oldfun,newfun) { (const void*)&newfun, (const void*)&oldfun } #define MI_INTERPOSE_MI(fun) MI_INTERPOSE_FUN(fun,mi_##fun) #define MI_INTERPOSE_ZONE(fun) MI_INTERPOSE_FUN(malloc_##fun,fun) __attribute__((used)) static const struct mi_interpose_s _mi_zone_interposes[] __attribute__((section("__DATA, __interpose"))) = { MI_INTERPOSE_MI(malloc_create_zone), MI_INTERPOSE_MI(malloc_default_purgeable_zone), MI_INTERPOSE_MI(malloc_default_zone), MI_INTERPOSE_MI(malloc_destroy_zone), MI_INTERPOSE_MI(malloc_get_all_zones), MI_INTERPOSE_MI(malloc_get_zone_name), MI_INTERPOSE_MI(malloc_jumpstart), MI_INTERPOSE_MI(malloc_printf), MI_INTERPOSE_MI(malloc_set_zone_name), MI_INTERPOSE_MI(_malloc_fork_child), MI_INTERPOSE_MI(_malloc_fork_parent), MI_INTERPOSE_MI(_malloc_fork_prepare), MI_INTERPOSE_ZONE(zone_batch_free), MI_INTERPOSE_ZONE(zone_batch_malloc), MI_INTERPOSE_ZONE(zone_calloc), MI_INTERPOSE_ZONE(zone_check), MI_INTERPOSE_ZONE(zone_free), MI_INTERPOSE_ZONE(zone_from_ptr), MI_INTERPOSE_ZONE(zone_log), MI_INTERPOSE_ZONE(zone_malloc), MI_INTERPOSE_ZONE(zone_memalign), MI_INTERPOSE_ZONE(zone_print), MI_INTERPOSE_ZONE(zone_print_ptr_info), MI_INTERPOSE_ZONE(zone_realloc), MI_INTERPOSE_ZONE(zone_register), MI_INTERPOSE_ZONE(zone_unregister), MI_INTERPOSE_ZONE(zone_valloc) }; #else // ------------------------------------------------------ // hook into the zone api's without interposing // This is the official way of adding an allocator but // it seems less robust than using interpose. // ------------------------------------------------------ static inline malloc_zone_t* mi_get_default_zone(void) { // The first returned zone is the real default malloc_zone_t** zones = NULL; unsigned count = 0; kern_return_t ret = malloc_get_all_zones(0, NULL, (vm_address_t**)&zones, &count); if (ret == KERN_SUCCESS && count > 0) { return zones[0]; } else { // fallback return malloc_default_zone(); } } #if defined(__clang__) __attribute__((constructor(0))) #else __attribute__((constructor)) // seems not supported by g++-11 on the M1 #endif static void _mi_macos_override_malloc() { malloc_zone_t* purgeable_zone = NULL; #if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) // force the purgeable zone to exist to avoid strange bugs if (malloc_default_purgeable_zone) { purgeable_zone = malloc_default_purgeable_zone(); } #endif // Register our zone. // thomcc: I think this is still needed to put us in the zone list. malloc_zone_register(&mi_malloc_zone); // Unregister the default zone, this makes our zone the new default // as that was the last registered. malloc_zone_t *default_zone = mi_get_default_zone(); // thomcc: Unsure if the next test is *always* false or just false in the // cases I've tried. I'm also unsure if the code inside is needed. at all if (default_zone != &mi_malloc_zone) { malloc_zone_unregister(default_zone); // Reregister the default zone so free and realloc in that zone keep working. malloc_zone_register(default_zone); } // Unregister, and re-register the purgeable_zone to avoid bugs if it occurs // earlier than the default zone. if (purgeable_zone != NULL) { malloc_zone_unregister(purgeable_zone); malloc_zone_register(purgeable_zone); } } #endif // MI_OSX_INTERPOSE #endif // MI_MALLOC_OVERRIDE luametatex-2.10.08/source/libraries/mimalloc/src/alloc-override.c000066400000000000000000000365371442250314700247660ustar00rootroot00000000000000/* ---------------------------------------------------------------------------- Copyright (c) 2018-2021, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ #if !defined(MI_IN_ALLOC_C) #error "this file should be included from 'alloc.c' (so aliases can work)" #endif #if defined(MI_MALLOC_OVERRIDE) && defined(_WIN32) && !(defined(MI_SHARED_LIB) && defined(_DLL)) #error "It is only possible to override "malloc" on Windows when building as a DLL (and linking the C runtime as a DLL)" #endif #if defined(MI_MALLOC_OVERRIDE) && !(defined(_WIN32)) #if defined(__APPLE__) #include mi_decl_externc void vfree(void* p); mi_decl_externc size_t malloc_size(const void* p); mi_decl_externc size_t malloc_good_size(size_t size); #endif // helper definition for C override of C++ new typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t; // ------------------------------------------------------ // Override system malloc // ------------------------------------------------------ #if (defined(__GNUC__) || defined(__clang__)) && !defined(__APPLE__) && !MI_TRACK_ENABLED // gcc, clang: use aliasing to alias the exported function to one of our `mi_` functions #if (defined(__GNUC__) && __GNUC__ >= 9) #pragma GCC diagnostic ignored "-Wattributes" // or we get warnings that nodiscard is ignored on a forward #define MI_FORWARD(fun) __attribute__((alias(#fun), used, visibility("default"), copy(fun))); #else #define MI_FORWARD(fun) __attribute__((alias(#fun), used, visibility("default"))); #endif #define MI_FORWARD1(fun,x) MI_FORWARD(fun) #define MI_FORWARD2(fun,x,y) MI_FORWARD(fun) #define MI_FORWARD3(fun,x,y,z) MI_FORWARD(fun) #define MI_FORWARD0(fun,x) MI_FORWARD(fun) #define MI_FORWARD02(fun,x,y) MI_FORWARD(fun) #else // otherwise use forwarding by calling our `mi_` function #define MI_FORWARD1(fun,x) { return fun(x); } #define MI_FORWARD2(fun,x,y) { return fun(x,y); } #define MI_FORWARD3(fun,x,y,z) { return fun(x,y,z); } #define MI_FORWARD0(fun,x) { fun(x); } #define MI_FORWARD02(fun,x,y) { fun(x,y); } #endif #if defined(__APPLE__) && defined(MI_SHARED_LIB_EXPORT) && defined(MI_OSX_INTERPOSE) // define MI_OSX_IS_INTERPOSED as we should not provide forwarding definitions for // functions that are interposed (or the interposing does not work) #define MI_OSX_IS_INTERPOSED mi_decl_externc size_t mi_malloc_size_checked(void *p) { if (!mi_is_in_heap_region(p)) return 0; return mi_usable_size(p); } // use interposing so `DYLD_INSERT_LIBRARIES` works without `DYLD_FORCE_FLAT_NAMESPACE=1` // See: struct mi_interpose_s { const void* replacement; const void* target; }; #define MI_INTERPOSE_FUN(oldfun,newfun) { (const void*)&newfun, (const void*)&oldfun } #define MI_INTERPOSE_MI(fun) MI_INTERPOSE_FUN(fun,mi_##fun) __attribute__((used)) static struct mi_interpose_s _mi_interposes[] __attribute__((section("__DATA, __interpose"))) = { MI_INTERPOSE_MI(malloc), MI_INTERPOSE_MI(calloc), MI_INTERPOSE_MI(realloc), MI_INTERPOSE_MI(strdup), MI_INTERPOSE_MI(strndup), MI_INTERPOSE_MI(realpath), MI_INTERPOSE_MI(posix_memalign), MI_INTERPOSE_MI(reallocf), MI_INTERPOSE_MI(valloc), MI_INTERPOSE_FUN(malloc_size,mi_malloc_size_checked), MI_INTERPOSE_MI(malloc_good_size), #if defined(MAC_OS_X_VERSION_10_15) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_15 MI_INTERPOSE_MI(aligned_alloc), #endif #ifdef MI_OSX_ZONE // we interpose malloc_default_zone in alloc-override-osx.c so we can use mi_free safely MI_INTERPOSE_MI(free), MI_INTERPOSE_FUN(vfree,mi_free), #else // sometimes code allocates from default zone but deallocates using plain free :-( (like NxHashResizeToCapacity ) MI_INTERPOSE_FUN(free,mi_cfree), // use safe free that checks if pointers are from us MI_INTERPOSE_FUN(vfree,mi_cfree), #endif }; #ifdef __cplusplus extern "C" { #endif void _ZdlPv(void* p); // delete void _ZdaPv(void* p); // delete[] void _ZdlPvm(void* p, size_t n); // delete void _ZdaPvm(void* p, size_t n); // delete[] void* _Znwm(size_t n); // new void* _Znam(size_t n); // new[] void* _ZnwmRKSt9nothrow_t(size_t n, mi_nothrow_t tag); // new nothrow void* _ZnamRKSt9nothrow_t(size_t n, mi_nothrow_t tag); // new[] nothrow #ifdef __cplusplus } #endif __attribute__((used)) static struct mi_interpose_s _mi_cxx_interposes[] __attribute__((section("__DATA, __interpose"))) = { MI_INTERPOSE_FUN(_ZdlPv,mi_free), MI_INTERPOSE_FUN(_ZdaPv,mi_free), MI_INTERPOSE_FUN(_ZdlPvm,mi_free_size), MI_INTERPOSE_FUN(_ZdaPvm,mi_free_size), MI_INTERPOSE_FUN(_Znwm,mi_new), MI_INTERPOSE_FUN(_Znam,mi_new), MI_INTERPOSE_FUN(_ZnwmRKSt9nothrow_t,mi_new_nothrow), MI_INTERPOSE_FUN(_ZnamRKSt9nothrow_t,mi_new_nothrow), }; #elif defined(_MSC_VER) // cannot override malloc unless using a dll. // we just override new/delete which does work in a static library. #else // On all other systems forward to our API mi_decl_export void* malloc(size_t size) MI_FORWARD1(mi_malloc, size) mi_decl_export void* calloc(size_t size, size_t n) MI_FORWARD2(mi_calloc, size, n) mi_decl_export void* realloc(void* p, size_t newsize) MI_FORWARD2(mi_realloc, p, newsize) mi_decl_export void free(void* p) MI_FORWARD0(mi_free, p) #endif #if (defined(__GNUC__) || defined(__clang__)) && !defined(__APPLE__) #pragma GCC visibility push(default) #endif // ------------------------------------------------------ // Override new/delete // This is not really necessary as they usually call // malloc/free anyway, but it improves performance. // ------------------------------------------------------ #ifdef __cplusplus // ------------------------------------------------------ // With a C++ compiler we override the new/delete operators. // see // ------------------------------------------------------ #include #ifndef MI_OSX_IS_INTERPOSED void operator delete(void* p) noexcept MI_FORWARD0(mi_free,p) void operator delete[](void* p) noexcept MI_FORWARD0(mi_free,p) void* operator new(std::size_t n) noexcept(false) MI_FORWARD1(mi_new,n) void* operator new[](std::size_t n) noexcept(false) MI_FORWARD1(mi_new,n) void* operator new (std::size_t n, const std::nothrow_t& tag) noexcept { MI_UNUSED(tag); return mi_new_nothrow(n); } void* operator new[](std::size_t n, const std::nothrow_t& tag) noexcept { MI_UNUSED(tag); return mi_new_nothrow(n); } #if (__cplusplus >= 201402L || _MSC_VER >= 1916) void operator delete (void* p, std::size_t n) noexcept MI_FORWARD02(mi_free_size,p,n) void operator delete[](void* p, std::size_t n) noexcept MI_FORWARD02(mi_free_size,p,n) #endif #endif #if (__cplusplus > 201402L && defined(__cpp_aligned_new)) && (!defined(__GNUC__) || (__GNUC__ > 5)) void operator delete (void* p, std::align_val_t al) noexcept { mi_free_aligned(p, static_cast(al)); } void operator delete[](void* p, std::align_val_t al) noexcept { mi_free_aligned(p, static_cast(al)); } void operator delete (void* p, std::size_t n, std::align_val_t al) noexcept { mi_free_size_aligned(p, n, static_cast(al)); }; void operator delete[](void* p, std::size_t n, std::align_val_t al) noexcept { mi_free_size_aligned(p, n, static_cast(al)); }; void operator delete (void* p, std::align_val_t al, const std::nothrow_t&) noexcept { mi_free_aligned(p, static_cast(al)); } void operator delete[](void* p, std::align_val_t al, const std::nothrow_t&) noexcept { mi_free_aligned(p, static_cast(al)); } void* operator new( std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast(al)); } void* operator new[]( std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast(al)); } void* operator new (std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept { return mi_new_aligned_nothrow(n, static_cast(al)); } void* operator new[](std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept { return mi_new_aligned_nothrow(n, static_cast(al)); } #endif #elif (defined(__GNUC__) || defined(__clang__)) // ------------------------------------------------------ // Override by defining the mangled C++ names of the operators (as // used by GCC and CLang). // See // ------------------------------------------------------ void _ZdlPv(void* p) MI_FORWARD0(mi_free,p) // delete void _ZdaPv(void* p) MI_FORWARD0(mi_free,p) // delete[] void _ZdlPvm(void* p, size_t n) MI_FORWARD02(mi_free_size,p,n) void _ZdaPvm(void* p, size_t n) MI_FORWARD02(mi_free_size,p,n) void _ZdlPvSt11align_val_t(void* p, size_t al) { mi_free_aligned(p,al); } void _ZdaPvSt11align_val_t(void* p, size_t al) { mi_free_aligned(p,al); } void _ZdlPvmSt11align_val_t(void* p, size_t n, size_t al) { mi_free_size_aligned(p,n,al); } void _ZdaPvmSt11align_val_t(void* p, size_t n, size_t al) { mi_free_size_aligned(p,n,al); } #if (MI_INTPTR_SIZE==8) void* _Znwm(size_t n) MI_FORWARD1(mi_new,n) // new 64-bit void* _Znam(size_t n) MI_FORWARD1(mi_new,n) // new[] 64-bit void* _ZnwmRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_nothrow(n); } void* _ZnamRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_nothrow(n); } void* _ZnwmSt11align_val_t(size_t n, size_t al) MI_FORWARD2(mi_new_aligned, n, al) void* _ZnamSt11align_val_t(size_t n, size_t al) MI_FORWARD2(mi_new_aligned, n, al) void* _ZnwmSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_aligned_nothrow(n,al); } void* _ZnamSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_aligned_nothrow(n,al); } #elif (MI_INTPTR_SIZE==4) void* _Znwj(size_t n) MI_FORWARD1(mi_new,n) // new 64-bit void* _Znaj(size_t n) MI_FORWARD1(mi_new,n) // new[] 64-bit void* _ZnwjRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_nothrow(n); } void* _ZnajRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_nothrow(n); } void* _ZnwjSt11align_val_t(size_t n, size_t al) MI_FORWARD2(mi_new_aligned, n, al) void* _ZnajSt11align_val_t(size_t n, size_t al) MI_FORWARD2(mi_new_aligned, n, al) void* _ZnwjSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_aligned_nothrow(n,al); } void* _ZnajSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_aligned_nothrow(n,al); } #else #error "define overloads for new/delete for this platform (just for performance, can be skipped)" #endif #endif // __cplusplus // ------------------------------------------------------ // Further Posix & Unix functions definitions // ------------------------------------------------------ #ifdef __cplusplus extern "C" { #endif #ifndef MI_OSX_IS_INTERPOSED // Forward Posix/Unix calls as well void* reallocf(void* p, size_t newsize) MI_FORWARD2(mi_reallocf,p,newsize) size_t malloc_size(const void* p) MI_FORWARD1(mi_usable_size,p) #if !defined(__ANDROID__) && !defined(__FreeBSD__) size_t malloc_usable_size(void *p) MI_FORWARD1(mi_usable_size,p) #else size_t malloc_usable_size(const void *p) MI_FORWARD1(mi_usable_size,p) #endif // No forwarding here due to aliasing/name mangling issues void* valloc(size_t size) { return mi_valloc(size); } void vfree(void* p) { mi_free(p); } size_t malloc_good_size(size_t size) { return mi_malloc_good_size(size); } int posix_memalign(void** p, size_t alignment, size_t size) { return mi_posix_memalign(p, alignment, size); } // `aligned_alloc` is only available when __USE_ISOC11 is defined. // Note: it seems __USE_ISOC11 is not defined in musl (and perhaps other libc's) so we only check // for it if using glibc. // Note: Conda has a custom glibc where `aligned_alloc` is declared `static inline` and we cannot // override it, but both _ISOC11_SOURCE and __USE_ISOC11 are undefined in Conda GCC7 or GCC9. // Fortunately, in the case where `aligned_alloc` is declared as `static inline` it // uses internally `memalign`, `posix_memalign`, or `_aligned_malloc` so we can avoid overriding it ourselves. #if !defined(__GLIBC__) || __USE_ISOC11 void* aligned_alloc(size_t alignment, size_t size) { return mi_aligned_alloc(alignment, size); } #endif #endif // no forwarding here due to aliasing/name mangling issues void cfree(void* p) { mi_free(p); } void* pvalloc(size_t size) { return mi_pvalloc(size); } void* reallocarray(void* p, size_t count, size_t size) { return mi_reallocarray(p, count, size); } int reallocarr(void* p, size_t count, size_t size) { return mi_reallocarr(p, count, size); } void* memalign(size_t alignment, size_t size) { return mi_memalign(alignment, size); } void* _aligned_malloc(size_t alignment, size_t size) { return mi_aligned_alloc(alignment, size); } #if defined(__wasi__) // forward __libc interface (see PR #667) void* __libc_malloc(size_t size) MI_FORWARD1(mi_malloc, size) void* __libc_calloc(size_t count, size_t size) MI_FORWARD2(mi_calloc, count, size) void* __libc_realloc(void* p, size_t size) MI_FORWARD2(mi_realloc, p, size) void __libc_free(void* p) MI_FORWARD0(mi_free, p) void* __libc_memalign(size_t alignment, size_t size) { return mi_memalign(alignment, size); } #elif defined(__GLIBC__) && defined(__linux__) // forward __libc interface (needed for glibc-based Linux distributions) void* __libc_malloc(size_t size) MI_FORWARD1(mi_malloc,size) void* __libc_calloc(size_t count, size_t size) MI_FORWARD2(mi_calloc,count,size) void* __libc_realloc(void* p, size_t size) MI_FORWARD2(mi_realloc,p,size) void __libc_free(void* p) MI_FORWARD0(mi_free,p) void __libc_cfree(void* p) MI_FORWARD0(mi_free,p) void* __libc_valloc(size_t size) { return mi_valloc(size); } void* __libc_pvalloc(size_t size) { return mi_pvalloc(size); } void* __libc_memalign(size_t alignment, size_t size) { return mi_memalign(alignment,size); } int __posix_memalign(void** p, size_t alignment, size_t size) { return mi_posix_memalign(p,alignment,size); } #endif #ifdef __cplusplus } #endif #if (defined(__GNUC__) || defined(__clang__)) && !defined(__APPLE__) #pragma GCC visibility pop #endif #endif // MI_MALLOC_OVERRIDE && !_WIN32 luametatex-2.10.08/source/libraries/mimalloc/src/alloc-posix.c000066400000000000000000000147321442250314700243020ustar00rootroot00000000000000/* ---------------------------------------------------------------------------- Copyright (c) 2018-2021, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ // ------------------------------------------------------------------------ // mi prefixed publi definitions of various Posix, Unix, and C++ functions // for convenience and used when overriding these functions. // ------------------------------------------------------------------------ #include "mimalloc.h" #include "mimalloc/internal.h" // ------------------------------------------------------ // Posix & Unix functions definitions // ------------------------------------------------------ #include #include // memset #include // getenv #ifdef _MSC_VER #pragma warning(disable:4996) // getenv _wgetenv #endif #ifndef EINVAL #define EINVAL 22 #endif #ifndef ENOMEM #define ENOMEM 12 #endif mi_decl_nodiscard size_t mi_malloc_size(const void* p) mi_attr_noexcept { // if (!mi_is_in_heap_region(p)) return 0; return mi_usable_size(p); } mi_decl_nodiscard size_t mi_malloc_usable_size(const void *p) mi_attr_noexcept { // if (!mi_is_in_heap_region(p)) return 0; return mi_usable_size(p); } mi_decl_nodiscard size_t mi_malloc_good_size(size_t size) mi_attr_noexcept { return mi_good_size(size); } void mi_cfree(void* p) mi_attr_noexcept { if (mi_is_in_heap_region(p)) { mi_free(p); } } int mi_posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept { // Note: The spec dictates we should not modify `*p` on an error. (issue#27) // if (p == NULL) return EINVAL; if ((alignment % sizeof(void*)) != 0) return EINVAL; // natural alignment // it is also required that alignment is a power of 2 and > 0; this is checked in `mi_malloc_aligned` if (alignment==0 || !_mi_is_power_of_two(alignment)) return EINVAL; // not a power of 2 void* q = mi_malloc_aligned(size, alignment); if (q==NULL && size != 0) return ENOMEM; mi_assert_internal(((uintptr_t)q % alignment) == 0); *p = q; return 0; } mi_decl_nodiscard mi_decl_restrict void* mi_memalign(size_t alignment, size_t size) mi_attr_noexcept { void* p = mi_malloc_aligned(size, alignment); mi_assert_internal(((uintptr_t)p % alignment) == 0); return p; } mi_decl_nodiscard mi_decl_restrict void* mi_valloc(size_t size) mi_attr_noexcept { return mi_memalign( _mi_os_page_size(), size ); } mi_decl_nodiscard mi_decl_restrict void* mi_pvalloc(size_t size) mi_attr_noexcept { size_t psize = _mi_os_page_size(); if (size >= SIZE_MAX - psize) return NULL; // overflow size_t asize = _mi_align_up(size, psize); return mi_malloc_aligned(asize, psize); } mi_decl_nodiscard mi_decl_restrict void* mi_aligned_alloc(size_t alignment, size_t size) mi_attr_noexcept { // C11 requires the size to be an integral multiple of the alignment, see . // unfortunately, it turns out quite some programs pass a size that is not an integral multiple so skip this check.. /* if mi_unlikely((size & (alignment - 1)) != 0) { // C11 requires alignment>0 && integral multiple, see #if MI_DEBUG > 0 _mi_error_message(EOVERFLOW, "(mi_)aligned_alloc requires the size to be an integral multiple of the alignment (size %zu, alignment %zu)\n", size, alignment); #endif return NULL; } */ // C11 also requires alignment to be a power-of-two (and > 0) which is checked in mi_malloc_aligned void* p = mi_malloc_aligned(size, alignment); mi_assert_internal(((uintptr_t)p % alignment) == 0); return p; } mi_decl_nodiscard void* mi_reallocarray( void* p, size_t count, size_t size ) mi_attr_noexcept { // BSD void* newp = mi_reallocn(p,count,size); if (newp==NULL) { errno = ENOMEM; } return newp; } mi_decl_nodiscard int mi_reallocarr( void* p, size_t count, size_t size ) mi_attr_noexcept { // NetBSD mi_assert(p != NULL); if (p == NULL) { errno = EINVAL; return EINVAL; } void** op = (void**)p; void* newp = mi_reallocarray(*op, count, size); if mi_unlikely(newp == NULL) { return errno; } *op = newp; return 0; } void* mi__expand(void* p, size_t newsize) mi_attr_noexcept { // Microsoft void* res = mi_expand(p, newsize); if (res == NULL) { errno = ENOMEM; } return res; } mi_decl_nodiscard mi_decl_restrict unsigned short* mi_wcsdup(const unsigned short* s) mi_attr_noexcept { if (s==NULL) return NULL; size_t len; for(len = 0; s[len] != 0; len++) { } size_t size = (len+1)*sizeof(unsigned short); unsigned short* p = (unsigned short*)mi_malloc(size); if (p != NULL) { _mi_memcpy(p,s,size); } return p; } mi_decl_nodiscard mi_decl_restrict unsigned char* mi_mbsdup(const unsigned char* s) mi_attr_noexcept { return (unsigned char*)mi_strdup((const char*)s); } int mi_dupenv_s(char** buf, size_t* size, const char* name) mi_attr_noexcept { if (buf==NULL || name==NULL) return EINVAL; if (size != NULL) *size = 0; char* p = getenv(name); // mscver warning 4996 if (p==NULL) { *buf = NULL; } else { *buf = mi_strdup(p); if (*buf==NULL) return ENOMEM; if (size != NULL) *size = _mi_strlen(p); } return 0; } int mi_wdupenv_s(unsigned short** buf, size_t* size, const unsigned short* name) mi_attr_noexcept { if (buf==NULL || name==NULL) return EINVAL; if (size != NULL) *size = 0; #if !defined(_WIN32) || (defined(WINAPI_FAMILY) && (WINAPI_FAMILY != WINAPI_FAMILY_DESKTOP_APP)) // not supported *buf = NULL; return EINVAL; #else unsigned short* p = (unsigned short*)_wgetenv((const wchar_t*)name); // msvc warning 4996 if (p==NULL) { *buf = NULL; } else { *buf = mi_wcsdup(p); if (*buf==NULL) return ENOMEM; if (size != NULL) *size = wcslen((const wchar_t*)p); } return 0; #endif } mi_decl_nodiscard void* mi_aligned_offset_recalloc(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { // Microsoft return mi_recalloc_aligned_at(p, newcount, size, alignment, offset); } mi_decl_nodiscard void* mi_aligned_recalloc(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept { // Microsoft return mi_recalloc_aligned(p, newcount, size, alignment); } luametatex-2.10.08/source/libraries/mimalloc/src/alloc.c000066400000000000000000001137261442250314700231450ustar00rootroot00000000000000/* ---------------------------------------------------------------------------- Copyright (c) 2018-2022, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ #ifndef _DEFAULT_SOURCE #define _DEFAULT_SOURCE // for realpath() on Linux #endif #include "mimalloc.h" #include "mimalloc/internal.h" #include "mimalloc/atomic.h" #include "mimalloc/prim.h" // _mi_prim_thread_id() #include // memset, strlen (for mi_strdup) #include // malloc, abort #define MI_IN_ALLOC_C #include "alloc-override.c" #undef MI_IN_ALLOC_C // ------------------------------------------------------ // Allocation // ------------------------------------------------------ // Fast allocation in a page: just pop from the free list. // Fall back to generic allocation only if the list is empty. extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept { mi_assert_internal(page->xblock_size==0||mi_page_block_size(page) >= size); mi_block_t* const block = page->free; if mi_unlikely(block == NULL) { return _mi_malloc_generic(heap, size, zero, 0); } mi_assert_internal(block != NULL && _mi_ptr_page(block) == page); // pop from the free list page->used++; page->free = mi_block_next(page, block); mi_assert_internal(page->free == NULL || _mi_ptr_page(page->free) == page); #if MI_DEBUG>3 if (page->free_is_zero) { mi_assert_expensive(mi_mem_is_zero(block+1,size - sizeof(*block))); } #endif // allow use of the block internally // note: when tracking we need to avoid ever touching the MI_PADDING since // that is tracked by valgrind etc. as non-accessible (through the red-zone, see `mimalloc/track.h`) mi_track_mem_undefined(block, mi_page_usable_block_size(page)); // zero the block? note: we need to zero the full block size (issue #63) if mi_unlikely(zero) { mi_assert_internal(page->xblock_size != 0); // do not call with zero'ing for huge blocks (see _mi_malloc_generic) mi_assert_internal(page->xblock_size >= MI_PADDING_SIZE); if (page->free_is_zero) { block->next = 0; mi_track_mem_defined(block, page->xblock_size - MI_PADDING_SIZE); } else { _mi_memzero_aligned(block, page->xblock_size - MI_PADDING_SIZE); } } #if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN if (!zero && !mi_page_is_huge(page)) { memset(block, MI_DEBUG_UNINIT, mi_page_usable_block_size(page)); } #elif (MI_SECURE!=0) if (!zero) { block->next = 0; } // don't leak internal data #endif #if (MI_STAT>0) const size_t bsize = mi_page_usable_block_size(page); if (bsize <= MI_MEDIUM_OBJ_SIZE_MAX) { mi_heap_stat_increase(heap, normal, bsize); mi_heap_stat_counter_increase(heap, normal_count, 1); #if (MI_STAT>1) const size_t bin = _mi_bin(bsize); mi_heap_stat_increase(heap, normal_bins[bin], 1); #endif } #endif #if MI_PADDING // && !MI_TRACK_ENABLED mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + mi_page_usable_block_size(page)); ptrdiff_t delta = ((uint8_t*)padding - (uint8_t*)block - (size - MI_PADDING_SIZE)); #if (MI_DEBUG>=2) mi_assert_internal(delta >= 0 && mi_page_usable_block_size(page) >= (size - MI_PADDING_SIZE + delta)); #endif mi_track_mem_defined(padding,sizeof(mi_padding_t)); // note: re-enable since mi_page_usable_block_size may set noaccess padding->canary = (uint32_t)(mi_ptr_encode(page,block,page->keys)); padding->delta = (uint32_t)(delta); #if MI_PADDING_CHECK if (!mi_page_is_huge(page)) { uint8_t* fill = (uint8_t*)padding - delta; const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // set at most N initial padding bytes for (size_t i = 0; i < maxpad; i++) { fill[i] = MI_DEBUG_PADDING; } } #endif #endif return block; } static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept { mi_assert(heap != NULL); #if MI_DEBUG const uintptr_t tid = _mi_thread_id(); mi_assert(heap->thread_id == 0 || heap->thread_id == tid); // heaps are thread local #endif mi_assert(size <= MI_SMALL_SIZE_MAX); #if (MI_PADDING) if (size == 0) { size = sizeof(void*); } #endif mi_page_t* page = _mi_heap_get_free_small_page(heap, size + MI_PADDING_SIZE); void* const p = _mi_page_malloc(heap, page, size + MI_PADDING_SIZE, zero); mi_track_malloc(p,size,zero); #if MI_STAT>1 if (p != NULL) { if (!mi_heap_is_initialized(heap)) { heap = mi_prim_get_default_heap(); } mi_heap_stat_increase(heap, malloc, mi_usable_size(p)); } #endif #if MI_DEBUG>3 if (p != NULL && zero) { mi_assert_expensive(mi_mem_is_zero(p, size)); } #endif return p; } // allocate a small block mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept { return mi_heap_malloc_small_zero(heap, size, false); } mi_decl_nodiscard extern inline mi_decl_restrict void* mi_malloc_small(size_t size) mi_attr_noexcept { return mi_heap_malloc_small(mi_prim_get_default_heap(), size); } // The main allocation function extern inline void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept { if mi_likely(size <= MI_SMALL_SIZE_MAX) { mi_assert_internal(huge_alignment == 0); return mi_heap_malloc_small_zero(heap, size, zero); } else { mi_assert(heap!=NULL); mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local void* const p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE, zero, huge_alignment); // note: size can overflow but it is detected in malloc_generic mi_track_malloc(p,size,zero); #if MI_STAT>1 if (p != NULL) { if (!mi_heap_is_initialized(heap)) { heap = mi_prim_get_default_heap(); } mi_heap_stat_increase(heap, malloc, mi_usable_size(p)); } #endif #if MI_DEBUG>3 if (p != NULL && zero) { mi_assert_expensive(mi_mem_is_zero(p, size)); } #endif return p; } } extern inline void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept { return _mi_heap_malloc_zero_ex(heap, size, zero, 0); } mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept { return _mi_heap_malloc_zero(heap, size, false); } mi_decl_nodiscard extern inline mi_decl_restrict void* mi_malloc(size_t size) mi_attr_noexcept { return mi_heap_malloc(mi_prim_get_default_heap(), size); } // zero initialized small block mi_decl_nodiscard mi_decl_restrict void* mi_zalloc_small(size_t size) mi_attr_noexcept { return mi_heap_malloc_small_zero(mi_prim_get_default_heap(), size, true); } mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_zalloc(mi_heap_t* heap, size_t size) mi_attr_noexcept { return _mi_heap_malloc_zero(heap, size, true); } mi_decl_nodiscard mi_decl_restrict void* mi_zalloc(size_t size) mi_attr_noexcept { return mi_heap_zalloc(mi_prim_get_default_heap(),size); } // ------------------------------------------------------ // Check for double free in secure and debug mode // This is somewhat expensive so only enabled for secure mode 4 // ------------------------------------------------------ #if (MI_ENCODE_FREELIST && (MI_SECURE>=4 || MI_DEBUG!=0)) // linear check if the free list contains a specific element static bool mi_list_contains(const mi_page_t* page, const mi_block_t* list, const mi_block_t* elem) { while (list != NULL) { if (elem==list) return true; list = mi_block_next(page, list); } return false; } static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block) { // The decoded value is in the same page (or NULL). // Walk the free lists to verify positively if it is already freed if (mi_list_contains(page, page->free, block) || mi_list_contains(page, page->local_free, block) || mi_list_contains(page, mi_page_thread_free(page), block)) { _mi_error_message(EAGAIN, "double free detected of block %p with size %zu\n", block, mi_page_block_size(page)); return true; } return false; } #define mi_track_page(page,access) { size_t psize; void* pstart = _mi_page_start(_mi_page_segment(page),page,&psize); mi_track_mem_##access( pstart, psize); } static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) { bool is_double_free = false; mi_block_t* n = mi_block_nextx(page, block, page->keys); // pretend it is freed, and get the decoded first field if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check: aligned pointer? (n==NULL || mi_is_in_same_page(block, n))) // quick check: in same page or NULL? { // Suspicous: decoded value a in block is in the same page (or NULL) -- maybe a double free? // (continue in separate function to improve code generation) is_double_free = mi_check_is_double_freex(page, block); } return is_double_free; } #else static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) { MI_UNUSED(page); MI_UNUSED(block); return false; } #endif // --------------------------------------------------------------------------- // Check for heap block overflow by setting up padding at the end of the block // --------------------------------------------------------------------------- #if MI_PADDING // && !MI_TRACK_ENABLED static bool mi_page_decode_padding(const mi_page_t* page, const mi_block_t* block, size_t* delta, size_t* bsize) { *bsize = mi_page_usable_block_size(page); const mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + *bsize); mi_track_mem_defined(padding,sizeof(mi_padding_t)); *delta = padding->delta; uint32_t canary = padding->canary; uintptr_t keys[2]; keys[0] = page->keys[0]; keys[1] = page->keys[1]; bool ok = ((uint32_t)mi_ptr_encode(page,block,keys) == canary && *delta <= *bsize); mi_track_mem_noaccess(padding,sizeof(mi_padding_t)); return ok; } // Return the exact usable size of a block. static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block) { size_t bsize; size_t delta; bool ok = mi_page_decode_padding(page, block, &delta, &bsize); mi_assert_internal(ok); mi_assert_internal(delta <= bsize); return (ok ? bsize - delta : 0); } // When a non-thread-local block is freed, it becomes part of the thread delayed free // list that is freed later by the owning heap. If the exact usable size is too small to // contain the pointer for the delayed list, then shrink the padding (by decreasing delta) // so it will later not trigger an overflow error in `mi_free_block`. void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) { size_t bsize; size_t delta; bool ok = mi_page_decode_padding(page, block, &delta, &bsize); mi_assert_internal(ok); if (!ok || (bsize - delta) >= min_size) return; // usually already enough space mi_assert_internal(bsize >= min_size); if (bsize < min_size) return; // should never happen size_t new_delta = (bsize - min_size); mi_assert_internal(new_delta < bsize); mi_padding_t* padding = (mi_padding_t*)((uint8_t*)block + bsize); mi_track_mem_defined(padding,sizeof(mi_padding_t)); padding->delta = (uint32_t)new_delta; mi_track_mem_noaccess(padding,sizeof(mi_padding_t)); } #else static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block) { MI_UNUSED(block); return mi_page_usable_block_size(page); } void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) { MI_UNUSED(page); MI_UNUSED(block); MI_UNUSED(min_size); } #endif #if MI_PADDING && MI_PADDING_CHECK static bool mi_verify_padding(const mi_page_t* page, const mi_block_t* block, size_t* size, size_t* wrong) { size_t bsize; size_t delta; bool ok = mi_page_decode_padding(page, block, &delta, &bsize); *size = *wrong = bsize; if (!ok) return false; mi_assert_internal(bsize >= delta); *size = bsize - delta; if (!mi_page_is_huge(page)) { uint8_t* fill = (uint8_t*)block + bsize - delta; const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // check at most the first N padding bytes mi_track_mem_defined(fill, maxpad); for (size_t i = 0; i < maxpad; i++) { if (fill[i] != MI_DEBUG_PADDING) { *wrong = bsize - delta + i; ok = false; break; } } mi_track_mem_noaccess(fill, maxpad); } return ok; } static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { size_t size; size_t wrong; if (!mi_verify_padding(page,block,&size,&wrong)) { _mi_error_message(EFAULT, "buffer overflow in heap block %p of size %zu: write after %zu bytes\n", block, size, wrong ); } } #else static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { MI_UNUSED(page); MI_UNUSED(block); } #endif // only maintain stats for smaller objects if requested #if (MI_STAT>0) static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) { #if (MI_STAT < 2) MI_UNUSED(block); #endif mi_heap_t* const heap = mi_heap_get_default(); const size_t bsize = mi_page_usable_block_size(page); #if (MI_STAT>1) const size_t usize = mi_page_usable_size_of(page, block); mi_heap_stat_decrease(heap, malloc, usize); #endif if (bsize <= MI_MEDIUM_OBJ_SIZE_MAX) { mi_heap_stat_decrease(heap, normal, bsize); #if (MI_STAT > 1) mi_heap_stat_decrease(heap, normal_bins[_mi_bin(bsize)], 1); #endif } else if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { mi_heap_stat_decrease(heap, large, bsize); } else { mi_heap_stat_decrease(heap, huge, bsize); } } #else static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) { MI_UNUSED(page); MI_UNUSED(block); } #endif #if MI_HUGE_PAGE_ABANDON #if (MI_STAT>0) // maintain stats for huge objects static void mi_stat_huge_free(const mi_page_t* page) { mi_heap_t* const heap = mi_heap_get_default(); const size_t bsize = mi_page_block_size(page); // to match stats in `page.c:mi_page_huge_alloc` if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { mi_heap_stat_decrease(heap, large, bsize); } else { mi_heap_stat_decrease(heap, huge, bsize); } } #else static void mi_stat_huge_free(const mi_page_t* page) { MI_UNUSED(page); } #endif #endif // ------------------------------------------------------ // Free // ------------------------------------------------------ // multi-threaded free (or free in huge block if compiled with MI_HUGE_PAGE_ABANDON) static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* block) { // The padding check may access the non-thread-owned page for the key values. // that is safe as these are constant and the page won't be freed (as the block is not freed yet). mi_check_padding(page, block); _mi_padding_shrink(page, block, sizeof(mi_block_t)); // for small size, ensure we can fit the delayed thread pointers without triggering overflow detection // huge page segments are always abandoned and can be freed immediately mi_segment_t* segment = _mi_page_segment(page); if (segment->kind == MI_SEGMENT_HUGE) { #if MI_HUGE_PAGE_ABANDON // huge page segments are always abandoned and can be freed immediately mi_stat_huge_free(page); _mi_segment_huge_page_free(segment, page, block); return; #else // huge pages are special as they occupy the entire segment // as these are large we reset the memory occupied by the page so it is available to other threads // (as the owning thread needs to actually free the memory later). _mi_segment_huge_page_reset(segment, page, block); #endif } #if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN // note: when tracking, cannot use mi_usable_size with multi-threading if (segment->kind != MI_SEGMENT_HUGE) { // not for huge segments as we just reset the content memset(block, MI_DEBUG_FREED, mi_usable_size(block)); } #endif // Try to put the block on either the page-local thread free list, or the heap delayed free list. mi_thread_free_t tfreex; bool use_delayed; mi_thread_free_t tfree = mi_atomic_load_relaxed(&page->xthread_free); do { use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE); if mi_unlikely(use_delayed) { // unlikely: this only happens on the first concurrent free in a page that is in the full list tfreex = mi_tf_set_delayed(tfree,MI_DELAYED_FREEING); } else { // usual: directly add to page thread_free list mi_block_set_next(page, block, mi_tf_block(tfree)); tfreex = mi_tf_set_block(tfree,block); } } while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex)); if mi_unlikely(use_delayed) { // racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`) mi_heap_t* const heap = (mi_heap_t*)(mi_atomic_load_acquire(&page->xheap)); //mi_page_heap(page); mi_assert_internal(heap != NULL); if (heap != NULL) { // add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity) mi_block_t* dfree = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free); do { mi_block_set_nextx(heap,block,dfree, heap->keys); } while (!mi_atomic_cas_ptr_weak_release(mi_block_t,&heap->thread_delayed_free, &dfree, block)); } // and reset the MI_DELAYED_FREEING flag tfree = mi_atomic_load_relaxed(&page->xthread_free); do { tfreex = tfree; mi_assert_internal(mi_tf_delayed(tfree) == MI_DELAYED_FREEING); tfreex = mi_tf_set_delayed(tfree,MI_NO_DELAYED_FREE); } while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex)); } } // regular free static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block) { // and push it on the free list //const size_t bsize = mi_page_block_size(page); if mi_likely(local) { // owning thread can free a block directly if mi_unlikely(mi_check_is_double_free(page, block)) return; mi_check_padding(page, block); #if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN if (!mi_page_is_huge(page)) { // huge page content may be already decommitted memset(block, MI_DEBUG_FREED, mi_page_block_size(page)); } #endif mi_block_set_next(page, block, page->local_free); page->local_free = block; page->used--; if mi_unlikely(mi_page_all_free(page)) { _mi_page_retire(page); } else if mi_unlikely(mi_page_is_in_full(page)) { _mi_page_unfull(page); } } else { _mi_free_block_mt(page,block); } } // Adjust a block that was allocated aligned, to the actual start of the block in the page. mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p) { mi_assert_internal(page!=NULL && p!=NULL); const size_t diff = (uint8_t*)p - _mi_page_start(segment, page, NULL); const size_t adjust = (diff % mi_page_block_size(page)); return (mi_block_t*)((uintptr_t)p - adjust); } void mi_decl_noinline _mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept { mi_block_t* const block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(segment, page, p) : (mi_block_t*)p); mi_stat_free(page, block); // stat_free may access the padding mi_track_free_size(block, mi_page_usable_size_of(page,block)); _mi_free_block(page, is_local, block); } // Get the segment data belonging to a pointer // This is just a single `and` in assembly but does further checks in debug mode // (and secure mode) if this was a valid pointer. static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* msg) { MI_UNUSED(msg); mi_assert(p != NULL); #if (MI_DEBUG>0) if mi_unlikely(((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0) { _mi_error_message(EINVAL, "%s: invalid (unaligned) pointer: %p\n", msg, p); return NULL; } #endif mi_segment_t* const segment = _mi_ptr_segment(p); mi_assert_internal(segment != NULL); #if (MI_DEBUG>0) if mi_unlikely(!mi_is_in_heap_region(p)) { #if (MI_INTPTR_SIZE == 8 && defined(__linux__)) if (((uintptr_t)p >> 40) != 0x7F) { // linux tends to align large blocks above 0x7F000000000 (issue #640) #else { #endif _mi_warning_message("%s: pointer might not point to a valid heap region: %p\n" "(this may still be a valid very large allocation (over 64MiB))\n", msg, p); if mi_likely(_mi_ptr_cookie(segment) == segment->cookie) { _mi_warning_message("(yes, the previous pointer %p was valid after all)\n", p); } } } #endif #if (MI_DEBUG>0 || MI_SECURE>=4) if mi_unlikely(_mi_ptr_cookie(segment) != segment->cookie) { _mi_error_message(EINVAL, "%s: pointer does not point to a valid heap space: %p\n", msg, p); return NULL; } #endif return segment; } // Free a block // fast path written carefully to prevent spilling on the stack void mi_free(void* p) mi_attr_noexcept { if mi_unlikely(p == NULL) return; mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free"); const bool is_local= (_mi_prim_thread_id() == mi_atomic_load_relaxed(&segment->thread_id)); mi_page_t* const page = _mi_segment_page_of(segment, p); if mi_likely(is_local) { // thread-local free? if mi_likely(page->flags.full_aligned == 0) // and it is not a full page (full pages need to move from the full bin), nor has aligned blocks (aligned blocks need to be unaligned) { mi_block_t* const block = (mi_block_t*)p; if mi_unlikely(mi_check_is_double_free(page, block)) return; mi_check_padding(page, block); mi_stat_free(page, block); #if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN memset(block, MI_DEBUG_FREED, mi_page_block_size(page)); #endif mi_track_free_size(p, mi_page_usable_size_of(page,block)); // faster then mi_usable_size as we already know the page and that p is unaligned mi_block_set_next(page, block, page->local_free); page->local_free = block; if mi_unlikely(--page->used == 0) { // using this expression generates better code than: page->used--; if (mi_page_all_free(page)) _mi_page_retire(page); } } else { // page is full or contains (inner) aligned blocks; use generic path _mi_free_generic(segment, page, true, p); } } else { // not thread-local; use generic path _mi_free_generic(segment, page, false, p); } } // return true if successful bool _mi_free_delayed_block(mi_block_t* block) { // get segment and page const mi_segment_t* const segment = _mi_ptr_segment(block); mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie); mi_assert_internal(_mi_thread_id() == segment->thread_id); mi_page_t* const page = _mi_segment_page_of(segment, block); // Clear the no-delayed flag so delayed freeing is used again for this page. // This must be done before collecting the free lists on this page -- otherwise // some blocks may end up in the page `thread_free` list with no blocks in the // heap `thread_delayed_free` list which may cause the page to be never freed! // (it would only be freed if we happen to scan it in `mi_page_queue_find_free_ex`) if (!_mi_page_try_use_delayed_free(page, MI_USE_DELAYED_FREE, false /* dont overwrite never delayed */)) { return false; } // collect all other non-local frees to ensure up-to-date `used` count _mi_page_free_collect(page, false); // and free the block (possibly freeing the page as well since used is updated) _mi_free_block(page, true, block); return true; } // Bytes available in a block mi_decl_noinline static size_t mi_page_usable_aligned_size_of(const mi_segment_t* segment, const mi_page_t* page, const void* p) mi_attr_noexcept { const mi_block_t* block = _mi_page_ptr_unalign(segment, page, p); const size_t size = mi_page_usable_size_of(page, block); const ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)block; mi_assert_internal(adjust >= 0 && (size_t)adjust <= size); return (size - adjust); } static inline size_t _mi_usable_size(const void* p, const char* msg) mi_attr_noexcept { if (p == NULL) return 0; const mi_segment_t* const segment = mi_checked_ptr_segment(p, msg); const mi_page_t* const page = _mi_segment_page_of(segment, p); if mi_likely(!mi_page_has_aligned(page)) { const mi_block_t* block = (const mi_block_t*)p; return mi_page_usable_size_of(page, block); } else { // split out to separate routine for improved code generation return mi_page_usable_aligned_size_of(segment, page, p); } } mi_decl_nodiscard size_t mi_usable_size(const void* p) mi_attr_noexcept { return _mi_usable_size(p, "mi_usable_size"); } // ------------------------------------------------------ // Allocation extensions // ------------------------------------------------------ void mi_free_size(void* p, size_t size) mi_attr_noexcept { MI_UNUSED_RELEASE(size); mi_assert(p == NULL || size <= _mi_usable_size(p,"mi_free_size")); mi_free(p); } void mi_free_size_aligned(void* p, size_t size, size_t alignment) mi_attr_noexcept { MI_UNUSED_RELEASE(alignment); mi_assert(((uintptr_t)p % alignment) == 0); mi_free_size(p,size); } void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept { MI_UNUSED_RELEASE(alignment); mi_assert(((uintptr_t)p % alignment) == 0); mi_free(p); } mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept { size_t total; if (mi_count_size_overflow(count,size,&total)) return NULL; return mi_heap_zalloc(heap,total); } mi_decl_nodiscard mi_decl_restrict void* mi_calloc(size_t count, size_t size) mi_attr_noexcept { return mi_heap_calloc(mi_prim_get_default_heap(),count,size); } // Uninitialized `calloc` mi_decl_nodiscard extern mi_decl_restrict void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept { size_t total; if (mi_count_size_overflow(count, size, &total)) return NULL; return mi_heap_malloc(heap, total); } mi_decl_nodiscard mi_decl_restrict void* mi_mallocn(size_t count, size_t size) mi_attr_noexcept { return mi_heap_mallocn(mi_prim_get_default_heap(),count,size); } // Expand (or shrink) in place (or fail) void* mi_expand(void* p, size_t newsize) mi_attr_noexcept { #if MI_PADDING // we do not shrink/expand with padding enabled MI_UNUSED(p); MI_UNUSED(newsize); return NULL; #else if (p == NULL) return NULL; const size_t size = _mi_usable_size(p,"mi_expand"); if (newsize > size) return NULL; return p; // it fits #endif } void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept { // if p == NULL then behave as malloc. // else if size == 0 then reallocate to a zero-sized block (and don't return NULL, just as mi_malloc(0)). // (this means that returning NULL always indicates an error, and `p` will not have been freed in that case.) const size_t size = _mi_usable_size(p,"mi_realloc"); // also works if p == NULL (with size 0) if mi_unlikely(newsize <= size && newsize >= (size / 2) && newsize > 0) { // note: newsize must be > 0 or otherwise we return NULL for realloc(NULL,0) mi_assert_internal(p!=NULL); // todo: do not track as the usable size is still the same in the free; adjust potential padding? // mi_track_resize(p,size,newsize) // if (newsize < size) { mi_track_mem_noaccess((uint8_t*)p + newsize, size - newsize); } return p; // reallocation still fits and not more than 50% waste } void* newp = mi_heap_malloc(heap,newsize); if mi_likely(newp != NULL) { if (zero && newsize > size) { // also set last word in the previous allocation to zero to ensure any padding is zero-initialized const size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0); _mi_memzero((uint8_t*)newp + start, newsize - start); } else if (newsize == 0) { ((uint8_t*)newp)[0] = 0; // work around for applications that expect zero-reallocation to be zero initialized (issue #725) } if mi_likely(p != NULL) { const size_t copysize = (newsize > size ? size : newsize); mi_track_mem_defined(p,copysize); // _mi_useable_size may be too large for byte precise memory tracking.. _mi_memcpy(newp, p, copysize); mi_free(p); // only free the original pointer if successful } } return newp; } mi_decl_nodiscard void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept { return _mi_heap_realloc_zero(heap, p, newsize, false); } mi_decl_nodiscard void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept { size_t total; if (mi_count_size_overflow(count, size, &total)) return NULL; return mi_heap_realloc(heap, p, total); } // Reallocate but free `p` on errors mi_decl_nodiscard void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept { void* newp = mi_heap_realloc(heap, p, newsize); if (newp==NULL && p!=NULL) mi_free(p); return newp; } mi_decl_nodiscard void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept { return _mi_heap_realloc_zero(heap, p, newsize, true); } mi_decl_nodiscard void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept { size_t total; if (mi_count_size_overflow(count, size, &total)) return NULL; return mi_heap_rezalloc(heap, p, total); } mi_decl_nodiscard void* mi_realloc(void* p, size_t newsize) mi_attr_noexcept { return mi_heap_realloc(mi_prim_get_default_heap(),p,newsize); } mi_decl_nodiscard void* mi_reallocn(void* p, size_t count, size_t size) mi_attr_noexcept { return mi_heap_reallocn(mi_prim_get_default_heap(),p,count,size); } // Reallocate but free `p` on errors mi_decl_nodiscard void* mi_reallocf(void* p, size_t newsize) mi_attr_noexcept { return mi_heap_reallocf(mi_prim_get_default_heap(),p,newsize); } mi_decl_nodiscard void* mi_rezalloc(void* p, size_t newsize) mi_attr_noexcept { return mi_heap_rezalloc(mi_prim_get_default_heap(), p, newsize); } mi_decl_nodiscard void* mi_recalloc(void* p, size_t count, size_t size) mi_attr_noexcept { return mi_heap_recalloc(mi_prim_get_default_heap(), p, count, size); } // ------------------------------------------------------ // strdup, strndup, and realpath // ------------------------------------------------------ // `strdup` using mi_malloc mi_decl_nodiscard mi_decl_restrict char* mi_heap_strdup(mi_heap_t* heap, const char* s) mi_attr_noexcept { if (s == NULL) return NULL; size_t n = strlen(s); char* t = (char*)mi_heap_malloc(heap,n+1); if (t == NULL) return NULL; _mi_memcpy(t, s, n); t[n] = 0; return t; } mi_decl_nodiscard mi_decl_restrict char* mi_strdup(const char* s) mi_attr_noexcept { return mi_heap_strdup(mi_prim_get_default_heap(), s); } // `strndup` using mi_malloc mi_decl_nodiscard mi_decl_restrict char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n) mi_attr_noexcept { if (s == NULL) return NULL; const char* end = (const char*)memchr(s, 0, n); // find end of string in the first `n` characters (returns NULL if not found) const size_t m = (end != NULL ? (size_t)(end - s) : n); // `m` is the minimum of `n` or the end-of-string mi_assert_internal(m <= n); char* t = (char*)mi_heap_malloc(heap, m+1); if (t == NULL) return NULL; _mi_memcpy(t, s, m); t[m] = 0; return t; } mi_decl_nodiscard mi_decl_restrict char* mi_strndup(const char* s, size_t n) mi_attr_noexcept { return mi_heap_strndup(mi_prim_get_default_heap(),s,n); } #ifndef __wasi__ // `realpath` using mi_malloc #ifdef _WIN32 #ifndef PATH_MAX #define PATH_MAX MAX_PATH #endif #include mi_decl_nodiscard mi_decl_restrict char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept { // todo: use GetFullPathNameW to allow longer file names char buf[PATH_MAX]; DWORD res = GetFullPathNameA(fname, PATH_MAX, (resolved_name == NULL ? buf : resolved_name), NULL); if (res == 0) { errno = GetLastError(); return NULL; } else if (res > PATH_MAX) { errno = EINVAL; return NULL; } else if (resolved_name != NULL) { return resolved_name; } else { return mi_heap_strndup(heap, buf, PATH_MAX); } } #else /* #include // pathconf static size_t mi_path_max(void) { static size_t path_max = 0; if (path_max <= 0) { long m = pathconf("/",_PC_PATH_MAX); if (m <= 0) path_max = 4096; // guess else if (m < 256) path_max = 256; // at least 256 else path_max = m; } return path_max; } */ char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept { if (resolved_name != NULL) { return realpath(fname,resolved_name); } else { char* rname = realpath(fname, NULL); if (rname == NULL) return NULL; char* result = mi_heap_strdup(heap, rname); free(rname); // use regular free! (which may be redirected to our free but that's ok) return result; } /* const size_t n = mi_path_max(); char* buf = (char*)mi_malloc(n+1); if (buf == NULL) { errno = ENOMEM; return NULL; } char* rname = realpath(fname,buf); char* result = mi_heap_strndup(heap,rname,n); // ok if `rname==NULL` mi_free(buf); return result; } */ } #endif mi_decl_nodiscard mi_decl_restrict char* mi_realpath(const char* fname, char* resolved_name) mi_attr_noexcept { return mi_heap_realpath(mi_prim_get_default_heap(),fname,resolved_name); } #endif /*------------------------------------------------------- C++ new and new_aligned The standard requires calling into `get_new_handler` and throwing the bad_alloc exception on failure. If we compile with a C++ compiler we can implement this precisely. If we use a C compiler we cannot throw a `bad_alloc` exception but we call `exit` instead (i.e. not returning). -------------------------------------------------------*/ #ifdef __cplusplus #include static bool mi_try_new_handler(bool nothrow) { #if defined(_MSC_VER) || (__cplusplus >= 201103L) std::new_handler h = std::get_new_handler(); #else std::new_handler h = std::set_new_handler(); std::set_new_handler(h); #endif if (h==NULL) { _mi_error_message(ENOMEM, "out of memory in 'new'"); if (!nothrow) { throw std::bad_alloc(); } return false; } else { h(); return true; } } #else typedef void (*std_new_handler_t)(void); #if (defined(__GNUC__) || (defined(__clang__) && !defined(_MSC_VER))) // exclude clang-cl, see issue #631 std_new_handler_t __attribute__((weak)) _ZSt15get_new_handlerv(void) { return NULL; } static std_new_handler_t mi_get_new_handler(void) { return _ZSt15get_new_handlerv(); } #else // note: on windows we could dynamically link to `?get_new_handler@std@@YAP6AXXZXZ`. static std_new_handler_t mi_get_new_handler() { return NULL; } #endif static bool mi_try_new_handler(bool nothrow) { std_new_handler_t h = mi_get_new_handler(); if (h==NULL) { _mi_error_message(ENOMEM, "out of memory in 'new'"); if (!nothrow) { abort(); // cannot throw in plain C, use abort } return false; } else { h(); return true; } } #endif mi_decl_export mi_decl_noinline void* mi_heap_try_new(mi_heap_t* heap, size_t size, bool nothrow ) { void* p = NULL; while(p == NULL && mi_try_new_handler(nothrow)) { p = mi_heap_malloc(heap,size); } return p; } static mi_decl_noinline void* mi_try_new(size_t size, bool nothrow) { return mi_heap_try_new(mi_prim_get_default_heap(), size, nothrow); } mi_decl_nodiscard mi_decl_restrict void* mi_heap_alloc_new(mi_heap_t* heap, size_t size) { void* p = mi_heap_malloc(heap,size); if mi_unlikely(p == NULL) return mi_heap_try_new(heap, size, false); return p; } mi_decl_nodiscard mi_decl_restrict void* mi_new(size_t size) { return mi_heap_alloc_new(mi_prim_get_default_heap(), size); } mi_decl_nodiscard mi_decl_restrict void* mi_heap_alloc_new_n(mi_heap_t* heap, size_t count, size_t size) { size_t total; if mi_unlikely(mi_count_size_overflow(count, size, &total)) { mi_try_new_handler(false); // on overflow we invoke the try_new_handler once to potentially throw std::bad_alloc return NULL; } else { return mi_heap_alloc_new(heap,total); } } mi_decl_nodiscard mi_decl_restrict void* mi_new_n(size_t count, size_t size) { return mi_heap_alloc_new_n(mi_prim_get_default_heap(), size, count); } mi_decl_nodiscard mi_decl_restrict void* mi_new_nothrow(size_t size) mi_attr_noexcept { void* p = mi_malloc(size); if mi_unlikely(p == NULL) return mi_try_new(size, true); return p; } mi_decl_nodiscard mi_decl_restrict void* mi_new_aligned(size_t size, size_t alignment) { void* p; do { p = mi_malloc_aligned(size, alignment); } while(p == NULL && mi_try_new_handler(false)); return p; } mi_decl_nodiscard mi_decl_restrict void* mi_new_aligned_nothrow(size_t size, size_t alignment) mi_attr_noexcept { void* p; do { p = mi_malloc_aligned(size, alignment); } while(p == NULL && mi_try_new_handler(true)); return p; } mi_decl_nodiscard void* mi_new_realloc(void* p, size_t newsize) { void* q; do { q = mi_realloc(p, newsize); } while (q == NULL && mi_try_new_handler(false)); return q; } mi_decl_nodiscard void* mi_new_reallocn(void* p, size_t newcount, size_t size) { size_t total; if mi_unlikely(mi_count_size_overflow(newcount, size, &total)) { mi_try_new_handler(false); // on overflow we invoke the try_new_handler once to potentially throw std::bad_alloc return NULL; } else { return mi_new_realloc(p, total); } } // ------------------------------------------------------ // ensure explicit external inline definitions are emitted! // ------------------------------------------------------ #ifdef __cplusplus void* _mi_externs[] = { (void*)&_mi_page_malloc, (void*)&_mi_heap_malloc_zero, (void*)&_mi_heap_malloc_zero_ex, (void*)&mi_malloc, (void*)&mi_malloc_small, (void*)&mi_zalloc_small, (void*)&mi_heap_malloc, (void*)&mi_heap_zalloc, (void*)&mi_heap_malloc_small, // (void*)&mi_heap_alloc_new, // (void*)&mi_heap_alloc_new_n }; #endif luametatex-2.10.08/source/libraries/mimalloc/src/arena.c000066400000000000000000001161321442250314700231330ustar00rootroot00000000000000/* ---------------------------------------------------------------------------- Copyright (c) 2019-2023, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ /* ---------------------------------------------------------------------------- "Arenas" are fixed area's of OS memory from which we can allocate large blocks (>= MI_ARENA_MIN_BLOCK_SIZE, 4MiB). In contrast to the rest of mimalloc, the arenas are shared between threads and need to be accessed using atomic operations. Arenas are used to for huge OS page (1GiB) reservations or for reserving OS memory upfront which can be improve performance or is sometimes needed on embedded devices. We can also employ this with WASI or `sbrk` systems to reserve large arenas upfront and be able to reuse the memory more effectively. The arena allocation needs to be thread safe and we use an atomic bitmap to allocate. -----------------------------------------------------------------------------*/ #include "mimalloc.h" #include "mimalloc/internal.h" #include "mimalloc/atomic.h" #include // memset #include // ENOMEM #include "bitmap.h" // atomic bitmap /* ----------------------------------------------------------- Arena allocation ----------------------------------------------------------- */ // Block info: bit 0 contains the `in_use` bit, the upper bits the // size in count of arena blocks. typedef uintptr_t mi_block_info_t; #define MI_ARENA_BLOCK_SIZE (MI_SEGMENT_SIZE) // 64MiB (must be at least MI_SEGMENT_ALIGN) #define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 32MiB #define MI_MAX_ARENAS (112) // not more than 126 (since we use 7 bits in the memid and an arena index + 1) // A memory arena descriptor typedef struct mi_arena_s { mi_arena_id_t id; // arena id; 0 for non-specific mi_memid_t memid; // memid of the memory area _Atomic(uint8_t*) start; // the start of the memory area size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`) size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`) size_t meta_size; // size of the arena structure itself (including its bitmaps) mi_memid_t meta_memid; // memid of the arena structure itself (OS or static allocation) int numa_node; // associated NUMA node bool exclusive; // only allow allocations if specifically for this arena bool is_large; // memory area consists of large- or huge OS pages (always committed) _Atomic(size_t) search_idx; // optimization to start the search for free blocks _Atomic(mi_msecs_t) purge_expire; // expiration time when blocks should be decommitted from `blocks_decommit`. mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero? mi_bitmap_field_t* blocks_committed; // are the blocks committed? (can be NULL for memory that cannot be decommitted) mi_bitmap_field_t* blocks_purge; // blocks that can be (reset) decommitted. (can be NULL for memory that cannot be (reset) decommitted) mi_bitmap_field_t blocks_inuse[1]; // in-place bitmap of in-use blocks (of size `field_count`) } mi_arena_t; // The available arenas static mi_decl_cache_align _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS]; static mi_decl_cache_align _Atomic(size_t) mi_arena_count; // = 0 //static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int numa_node, bool exclusive, mi_memid_t memid, mi_arena_id_t* arena_id) mi_attr_noexcept; /* ----------------------------------------------------------- Arena id's id = arena_index + 1 ----------------------------------------------------------- */ static size_t mi_arena_id_index(mi_arena_id_t id) { return (size_t)(id <= 0 ? MI_MAX_ARENAS : id - 1); } static mi_arena_id_t mi_arena_id_create(size_t arena_index) { mi_assert_internal(arena_index < MI_MAX_ARENAS); return (int)arena_index + 1; } mi_arena_id_t _mi_arena_id_none(void) { return 0; } static bool mi_arena_id_is_suitable(mi_arena_id_t arena_id, bool arena_is_exclusive, mi_arena_id_t req_arena_id) { return ((!arena_is_exclusive && req_arena_id == _mi_arena_id_none()) || (arena_id == req_arena_id)); } bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id) { if (memid.memkind == MI_MEM_ARENA) { return mi_arena_id_is_suitable(memid.mem.arena.id, memid.mem.arena.is_exclusive, request_arena_id); } else { return mi_arena_id_is_suitable(0, false, request_arena_id); } } bool _mi_arena_memid_is_os_allocated(mi_memid_t memid) { return (memid.memkind == MI_MEM_OS); } /* ----------------------------------------------------------- Arena allocations get a (currently) 16-bit memory id where the lower 8 bits are the arena id, and the upper bits the block index. ----------------------------------------------------------- */ static size_t mi_block_count_of_size(size_t size) { return _mi_divide_up(size, MI_ARENA_BLOCK_SIZE); } static size_t mi_arena_block_size(size_t bcount) { return (bcount * MI_ARENA_BLOCK_SIZE); } static size_t mi_arena_size(mi_arena_t* arena) { return mi_arena_block_size(arena->block_count); } static mi_memid_t mi_memid_create_arena(mi_arena_id_t id, bool is_exclusive, mi_bitmap_index_t bitmap_index) { mi_memid_t memid = _mi_memid_create(MI_MEM_ARENA); memid.mem.arena.id = id; memid.mem.arena.block_index = bitmap_index; memid.mem.arena.is_exclusive = is_exclusive; return memid; } static bool mi_arena_memid_indices(mi_memid_t memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) { mi_assert_internal(memid.memkind == MI_MEM_ARENA); *arena_index = mi_arena_id_index(memid.mem.arena.id); *bitmap_index = memid.mem.arena.block_index; return memid.mem.arena.is_exclusive; } /* ----------------------------------------------------------- Special static area for mimalloc internal structures to avoid OS calls (for example, for the arena metadata) ----------------------------------------------------------- */ #define MI_ARENA_STATIC_MAX (MI_INTPTR_SIZE*MI_KiB) // 8 KiB on 64-bit static uint8_t mi_arena_static[MI_ARENA_STATIC_MAX]; static _Atomic(size_t) mi_arena_static_top; static void* mi_arena_static_zalloc(size_t size, size_t alignment, mi_memid_t* memid) { *memid = _mi_memid_none(); if (size == 0 || size > MI_ARENA_STATIC_MAX) return NULL; if ((mi_atomic_load_relaxed(&mi_arena_static_top) + size) > MI_ARENA_STATIC_MAX) return NULL; // try to claim space if (alignment == 0) { alignment = 1; } const size_t oversize = size + alignment - 1; if (oversize > MI_ARENA_STATIC_MAX) return NULL; const size_t oldtop = mi_atomic_add_acq_rel(&mi_arena_static_top, oversize); size_t top = oldtop + oversize; if (top > MI_ARENA_STATIC_MAX) { // try to roll back, ok if this fails mi_atomic_cas_strong_acq_rel(&mi_arena_static_top, &top, oldtop); return NULL; } // success *memid = _mi_memid_create(MI_MEM_STATIC); const size_t start = _mi_align_up(oldtop, alignment); uint8_t* const p = &mi_arena_static[start]; _mi_memzero(p, size); return p; } static void* mi_arena_meta_zalloc(size_t size, mi_memid_t* memid, mi_stats_t* stats) { *memid = _mi_memid_none(); // try static void* p = mi_arena_static_zalloc(size, MI_ALIGNMENT_MAX, memid); if (p != NULL) return p; // or fall back to the OS return _mi_os_alloc(size, memid, stats); } static void mi_arena_meta_free(void* p, mi_memid_t memid, size_t size, mi_stats_t* stats) { if (mi_memkind_is_os(memid.memkind)) { _mi_os_free(p, size, memid, stats); } else { mi_assert(memid.memkind == MI_MEM_STATIC); } } static void* mi_arena_block_start(mi_arena_t* arena, mi_bitmap_index_t bindex) { return (arena->start + mi_arena_block_size(mi_bitmap_index_bit(bindex))); } /* ----------------------------------------------------------- Thread safe allocation in an arena ----------------------------------------------------------- */ // claim the `blocks_inuse` bits static bool mi_arena_try_claim(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx) { size_t idx = 0; // mi_atomic_load_relaxed(&arena->search_idx); // start from last search; ok to be relaxed as the exact start does not matter if (_mi_bitmap_try_find_from_claim_across(arena->blocks_inuse, arena->field_count, idx, blocks, bitmap_idx)) { mi_atomic_store_relaxed(&arena->search_idx, mi_bitmap_index_field(*bitmap_idx)); // start search from found location next time around return true; }; return false; } /* ----------------------------------------------------------- Arena Allocation ----------------------------------------------------------- */ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t arena_index, size_t needed_bcount, bool commit, mi_memid_t* memid, mi_os_tld_t* tld) { MI_UNUSED(arena_index); mi_assert_internal(mi_arena_id_index(arena->id) == arena_index); mi_bitmap_index_t bitmap_index; if (!mi_arena_try_claim(arena, needed_bcount, &bitmap_index)) return NULL; // claimed it! void* p = mi_arena_block_start(arena, bitmap_index); *memid = mi_memid_create_arena(arena->id, arena->exclusive, bitmap_index); memid->is_pinned = arena->memid.is_pinned; // none of the claimed blocks should be scheduled for a decommit if (arena->blocks_purge != NULL) { // this is thread safe as a potential purge only decommits parts that are not yet claimed as used (in `blocks_inuse`). _mi_bitmap_unclaim_across(arena->blocks_purge, arena->field_count, needed_bcount, bitmap_index); } // set the dirty bits (todo: no need for an atomic op here?) if (arena->memid.initially_zero && arena->blocks_dirty != NULL) { memid->initially_zero = _mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL); } // set commit state if (arena->blocks_committed == NULL) { // always committed memid->initially_committed = true; } else if (commit) { // commit requested, but the range may not be committed as a whole: ensure it is committed now memid->initially_committed = true; bool any_uncommitted; _mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted); if (any_uncommitted) { bool commit_zero = false; if (!_mi_os_commit(p, mi_arena_block_size(needed_bcount), &commit_zero, tld->stats)) { memid->initially_committed = false; } else { if (commit_zero) { memid->initially_zero = true; } } } } else { // no need to commit, but check if already fully committed memid->initially_committed = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index); } return p; } // allocate in a speficic arena static void* mi_arena_try_alloc_at_id(mi_arena_id_t arena_id, bool match_numa_node, int numa_node, size_t size, size_t alignment, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld ) { MI_UNUSED_RELEASE(alignment); mi_assert_internal(alignment <= MI_SEGMENT_ALIGN); const size_t bcount = mi_block_count_of_size(size); const size_t arena_index = mi_arena_id_index(arena_id); mi_assert_internal(arena_index < mi_atomic_load_relaxed(&mi_arena_count)); mi_assert_internal(size <= mi_arena_block_size(bcount)); // Check arena suitability mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_index]); if (arena == NULL) return NULL; if (!allow_large && arena->is_large) return NULL; if (!mi_arena_id_is_suitable(arena->id, arena->exclusive, req_arena_id)) return NULL; if (req_arena_id == _mi_arena_id_none()) { // in not specific, check numa affinity const bool numa_suitable = (numa_node < 0 || arena->numa_node < 0 || arena->numa_node == numa_node); if (match_numa_node) { if (!numa_suitable) return NULL; } else { if (numa_suitable) return NULL; } } // try to allocate void* p = mi_arena_try_alloc_at(arena, arena_index, bcount, commit, memid, tld); mi_assert_internal(p == NULL || _mi_is_aligned(p, alignment)); return p; } // allocate from an arena with fallback to the OS static mi_decl_noinline void* mi_arena_try_alloc(int numa_node, size_t size, size_t alignment, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld ) { MI_UNUSED(alignment); mi_assert_internal(alignment <= MI_SEGMENT_ALIGN); const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); if mi_likely(max_arena == 0) return NULL; if (req_arena_id != _mi_arena_id_none()) { // try a specific arena if requested if (mi_arena_id_index(req_arena_id) < max_arena) { void* p = mi_arena_try_alloc_at_id(req_arena_id, true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); if (p != NULL) return p; } } else { // try numa affine allocation for (size_t i = 0; i < max_arena; i++) { void* p = mi_arena_try_alloc_at_id(mi_arena_id_create(i), true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); if (p != NULL) return p; } // try from another numa node instead.. if (numa_node >= 0) { // if numa_node was < 0 (no specific affinity requested), all arena's have been tried already for (size_t i = 0; i < max_arena; i++) { void* p = mi_arena_try_alloc_at_id(mi_arena_id_create(i), false /* only proceed if not numa local */, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); if (p != NULL) return p; } } } return NULL; } // try to reserve a fresh arena space static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t req_arena_id, mi_arena_id_t *arena_id) { if (_mi_preloading()) return false; // use OS only while pre loading if (req_arena_id != _mi_arena_id_none()) return false; const size_t arena_count = mi_atomic_load_acquire(&mi_arena_count); if (arena_count > (MI_MAX_ARENAS - 4)) return false; size_t arena_reserve = mi_option_get_size(mi_option_arena_reserve); if (arena_reserve == 0) return false; if (!_mi_os_has_virtual_reserve()) { arena_reserve = arena_reserve/4; // be conservative if virtual reserve is not supported (for some embedded systems for example) } arena_reserve = _mi_align_up(arena_reserve, MI_ARENA_BLOCK_SIZE); if (arena_count >= 8 && arena_count <= 128) { arena_reserve = ((size_t)1<<(arena_count/8)) * arena_reserve; // scale up the arena sizes exponentially } if (arena_reserve < req_size) return false; // should be able to at least handle the current allocation size // commit eagerly? bool arena_commit = false; if (mi_option_get(mi_option_arena_eager_commit) == 2) { arena_commit = _mi_os_has_overcommit(); } else if (mi_option_get(mi_option_arena_eager_commit) == 1) { arena_commit = true; } return (mi_reserve_os_memory_ex(arena_reserve, arena_commit, allow_large, false /* exclusive */, arena_id) == 0); } void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld) { mi_assert_internal(memid != NULL && tld != NULL); mi_assert_internal(size > 0); *memid = _mi_memid_none(); const int numa_node = _mi_os_numa_node(tld); // current numa node // try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data) if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) { void* p = mi_arena_try_alloc(numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); if (p != NULL) return p; // otherwise, try to first eagerly reserve a new arena if (req_arena_id == _mi_arena_id_none()) { mi_arena_id_t arena_id = 0; if (mi_arena_reserve(size, allow_large, req_arena_id, &arena_id)) { // and try allocate in there mi_assert_internal(req_arena_id == _mi_arena_id_none()); p = mi_arena_try_alloc_at_id(arena_id, true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); if (p != NULL) return p; } } } // if we cannot use OS allocation, return NULL if (mi_option_is_enabled(mi_option_limit_os_alloc) || req_arena_id != _mi_arena_id_none()) { errno = ENOMEM; return NULL; } // finally, fall back to the OS if (align_offset > 0) { return _mi_os_alloc_aligned_at_offset(size, alignment, align_offset, commit, allow_large, memid, tld->stats); } else { return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid, tld->stats); } } void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld) { return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, 0, commit, allow_large, req_arena_id, memid, tld); } void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) { if (size != NULL) *size = 0; size_t arena_index = mi_arena_id_index(arena_id); if (arena_index >= MI_MAX_ARENAS) return NULL; mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_index]); if (arena == NULL) return NULL; if (size != NULL) { *size = mi_arena_block_size(arena->block_count); } return arena->start; } /* ----------------------------------------------------------- Arena purge ----------------------------------------------------------- */ static long mi_arena_purge_delay(void) { // <0 = no purging allowed, 0=immediate purging, >0=milli-second delay return (mi_option_get(mi_option_purge_delay) * mi_option_get(mi_option_arena_purge_mult)); } // reset or decommit in an arena and update the committed/decommit bitmaps // assumes we own the area (i.e. blocks_in_use is claimed by us) static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks, mi_stats_t* stats) { mi_assert_internal(arena->blocks_committed != NULL); mi_assert_internal(arena->blocks_purge != NULL); mi_assert_internal(!arena->memid.is_pinned); const size_t size = mi_arena_block_size(blocks); void* const p = mi_arena_block_start(arena, bitmap_idx); bool needs_recommit; if (_mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx)) { // all blocks are committed, we can purge freely needs_recommit = _mi_os_purge(p, size, stats); } else { // some blocks are not committed -- this can happen when a partially committed block is freed // in `_mi_arena_free` and it is conservatively marked as uncommitted but still scheduled for a purge // we need to ensure we do not try to reset (as that may be invalid for uncommitted memory), // and also undo the decommit stats (as it was already adjusted) mi_assert_internal(mi_option_is_enabled(mi_option_purge_decommits)); needs_recommit = _mi_os_purge_ex(p, size, false /* allow reset? */, stats); _mi_stat_increase(&stats->committed, size); } // clear the purged blocks _mi_bitmap_unclaim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx); // update committed bitmap if (needs_recommit) { _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx); } } // Schedule a purge. This is usually delayed to avoid repeated decommit/commit calls. // Note: assumes we (still) own the area as we may purge immediately static void mi_arena_schedule_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks, mi_stats_t* stats) { mi_assert_internal(arena->blocks_purge != NULL); const long delay = mi_arena_purge_delay(); if (delay < 0) return; // is purging allowed at all? if (_mi_preloading() || delay == 0) { // decommit directly mi_arena_purge(arena, bitmap_idx, blocks, stats); } else { // schedule decommit mi_msecs_t expire = mi_atomic_loadi64_relaxed(&arena->purge_expire); if (expire != 0) { mi_atomic_addi64_acq_rel(&arena->purge_expire, delay/10); // add smallish extra delay } else { mi_atomic_storei64_release(&arena->purge_expire, _mi_clock_now() + delay); } _mi_bitmap_claim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx, NULL); } } // purge a range of blocks // return true if the full range was purged. // assumes we own the area (i.e. blocks_in_use is claimed by us) static bool mi_arena_purge_range(mi_arena_t* arena, size_t idx, size_t startidx, size_t bitlen, size_t purge, mi_stats_t* stats) { const size_t endidx = startidx + bitlen; size_t bitidx = startidx; bool all_purged = false; while (bitidx < endidx) { // count consequetive ones in the purge mask size_t count = 0; while (bitidx + count < endidx && (purge & ((size_t)1 << (bitidx + count))) != 0) { count++; } if (count > 0) { // found range to be purged const mi_bitmap_index_t range_idx = mi_bitmap_index_create(idx, bitidx); mi_arena_purge(arena, range_idx, count, stats); if (count == bitlen) { all_purged = true; } } bitidx += (count+1); // +1 to skip the zero bit (or end) } return all_purged; } // returns true if anything was purged static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi_stats_t* stats) { if (arena->memid.is_pinned || arena->blocks_purge == NULL) return false; mi_msecs_t expire = mi_atomic_loadi64_relaxed(&arena->purge_expire); if (expire == 0) return false; if (!force && expire > now) return false; // reset expire (if not already set concurrently) mi_atomic_casi64_strong_acq_rel(&arena->purge_expire, &expire, 0); // potential purges scheduled, walk through the bitmap bool any_purged = false; bool full_purge = true; for (size_t i = 0; i < arena->field_count; i++) { size_t purge = mi_atomic_load_relaxed(&arena->blocks_purge[i]); if (purge != 0) { size_t bitidx = 0; while (bitidx < MI_BITMAP_FIELD_BITS) { // find consequetive range of ones in the purge mask size_t bitlen = 0; while (bitidx + bitlen < MI_BITMAP_FIELD_BITS && (purge & ((size_t)1 << (bitidx + bitlen))) != 0) { bitlen++; } // try to claim the longest range of corresponding in_use bits const mi_bitmap_index_t bitmap_index = mi_bitmap_index_create(i, bitidx); while( bitlen > 0 ) { if (_mi_bitmap_try_claim(arena->blocks_inuse, arena->field_count, bitlen, bitmap_index)) { break; } bitlen--; } // actual claimed bits at `in_use` if (bitlen > 0) { // read purge again now that we have the in_use bits purge = mi_atomic_load_acquire(&arena->blocks_purge[i]); if (!mi_arena_purge_range(arena, i, bitidx, bitlen, purge, stats)) { full_purge = false; } any_purged = true; // release the claimed `in_use` bits again _mi_bitmap_unclaim(arena->blocks_inuse, arena->field_count, bitlen, bitmap_index); } bitidx += (bitlen+1); // +1 to skip the zero (or end) } // while bitidx } // purge != 0 } // if not fully purged, make sure to purge again in the future if (!full_purge) { const long delay = mi_arena_purge_delay(); mi_msecs_t expected = 0; mi_atomic_casi64_strong_acq_rel(&arena->purge_expire,&expected,_mi_clock_now() + delay); } return any_purged; } static void mi_arenas_try_purge( bool force, bool visit_all, mi_stats_t* stats ) { if (_mi_preloading() || mi_arena_purge_delay() <= 0) return; // nothing will be scheduled const size_t max_arena = mi_atomic_load_acquire(&mi_arena_count); if (max_arena == 0) return; // allow only one thread to purge at a time static mi_atomic_guard_t purge_guard; mi_atomic_guard(&purge_guard) { mi_msecs_t now = _mi_clock_now(); size_t max_purge_count = (visit_all ? max_arena : 1); for (size_t i = 0; i < max_arena; i++) { mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]); if (arena != NULL) { if (mi_arena_try_purge(arena, now, force, stats)) { if (max_purge_count <= 1) break; max_purge_count--; } } } } } /* ----------------------------------------------------------- Arena free ----------------------------------------------------------- */ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memid, mi_stats_t* stats) { mi_assert_internal(size > 0 && stats != NULL); mi_assert_internal(committed_size <= size); if (p==NULL) return; if (size==0) return; const bool all_committed = (committed_size == size); if (mi_memkind_is_os(memid.memkind)) { // was a direct OS allocation, pass through if (!all_committed && committed_size > 0) { // if partially committed, adjust the committed stats (as `_mi_os_free` will increase decommit by the full size) _mi_stat_decrease(&stats->committed, committed_size); } _mi_os_free(p, size, memid, stats); } else if (memid.memkind == MI_MEM_ARENA) { // allocated in an arena size_t arena_idx; size_t bitmap_idx; mi_arena_memid_indices(memid, &arena_idx, &bitmap_idx); mi_assert_internal(arena_idx < MI_MAX_ARENAS); mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t,&mi_arenas[arena_idx]); mi_assert_internal(arena != NULL); const size_t blocks = mi_block_count_of_size(size); // checks if (arena == NULL) { _mi_error_message(EINVAL, "trying to free from non-existent arena: %p, size %zu, memid: 0x%zx\n", p, size, memid); return; } mi_assert_internal(arena->field_count > mi_bitmap_index_field(bitmap_idx)); if (arena->field_count <= mi_bitmap_index_field(bitmap_idx)) { _mi_error_message(EINVAL, "trying to free from non-existent arena block: %p, size %zu, memid: 0x%zx\n", p, size, memid); return; } // need to set all memory to undefined as some parts may still be marked as no_access (like padding etc.) mi_track_mem_undefined(p,size); // potentially decommit if (arena->memid.is_pinned || arena->blocks_committed == NULL) { mi_assert_internal(all_committed); } else { mi_assert_internal(arena->blocks_committed != NULL); mi_assert_internal(arena->blocks_purge != NULL); if (!all_committed) { // mark the entire range as no longer committed (so we recommit the full range when re-using) _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx); mi_track_mem_noaccess(p,size); if (committed_size > 0) { // if partially committed, adjust the committed stats (is it will be recommitted when re-using) // in the delayed purge, we now need to not count a decommit if the range is not marked as committed. _mi_stat_decrease(&stats->committed, committed_size); } // note: if not all committed, it may be that the purge will reset/decommit the entire range // that contains already decommitted parts. Since purge consistently uses reset or decommit that // works (as we should never reset decommitted parts). } // (delay) purge the entire range mi_arena_schedule_purge(arena, bitmap_idx, blocks, stats); } // and make it available to others again bool all_inuse = _mi_bitmap_unclaim_across(arena->blocks_inuse, arena->field_count, blocks, bitmap_idx); if (!all_inuse) { _mi_error_message(EAGAIN, "trying to free an already freed arena block: %p, size %zu\n", p, size); return; }; } else { // arena was none, external, or static; nothing to do mi_assert_internal(memid.memkind < MI_MEM_OS); } // purge expired decommits mi_arenas_try_purge(false, false, stats); } // destroy owned arenas; this is unsafe and should only be done using `mi_option_destroy_on_exit` // for dynamic libraries that are unloaded and need to release all their allocated memory. static void mi_arenas_unsafe_destroy(void) { const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); size_t new_max_arena = 0; for (size_t i = 0; i < max_arena; i++) { mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]); if (arena != NULL) { if (arena->start != NULL && mi_memkind_is_os(arena->memid.memkind)) { mi_atomic_store_ptr_release(mi_arena_t, &mi_arenas[i], NULL); _mi_os_free(arena->start, mi_arena_size(arena), arena->memid, &_mi_stats_main); } else { new_max_arena = i; } mi_arena_meta_free(arena, arena->meta_memid, arena->meta_size, &_mi_stats_main); } } // try to lower the max arena. size_t expected = max_arena; mi_atomic_cas_strong_acq_rel(&mi_arena_count, &expected, new_max_arena); } // Purge the arenas; if `force_purge` is true, amenable parts are purged even if not yet expired void _mi_arena_collect(bool force_purge, mi_stats_t* stats) { mi_arenas_try_purge(force_purge, true /* visit all */, stats); } // destroy owned arenas; this is unsafe and should only be done using `mi_option_destroy_on_exit` // for dynamic libraries that are unloaded and need to release all their allocated memory. void _mi_arena_unsafe_destroy_all(mi_stats_t* stats) { mi_arenas_unsafe_destroy(); _mi_arena_collect(true /* force purge */, stats); // purge non-owned arenas } // Is a pointer inside any of our arenas? bool _mi_arena_contains(const void* p) { const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); for (size_t i = 0; i < max_arena; i++) { mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]); if (arena != NULL && arena->start <= (const uint8_t*)p && arena->start + mi_arena_block_size(arena->block_count) > (const uint8_t*)p) { return true; } } return false; } /* ----------------------------------------------------------- Add an arena. ----------------------------------------------------------- */ static bool mi_arena_add(mi_arena_t* arena, mi_arena_id_t* arena_id) { mi_assert_internal(arena != NULL); mi_assert_internal((uintptr_t)mi_atomic_load_ptr_relaxed(uint8_t,&arena->start) % MI_SEGMENT_ALIGN == 0); mi_assert_internal(arena->block_count > 0); if (arena_id != NULL) { *arena_id = -1; } size_t i = mi_atomic_increment_acq_rel(&mi_arena_count); if (i >= MI_MAX_ARENAS) { mi_atomic_decrement_acq_rel(&mi_arena_count); return false; } arena->id = mi_arena_id_create(i); mi_atomic_store_ptr_release(mi_arena_t,&mi_arenas[i], arena); if (arena_id != NULL) { *arena_id = arena->id; } return true; } static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int numa_node, bool exclusive, mi_memid_t memid, mi_arena_id_t* arena_id) mi_attr_noexcept { if (arena_id != NULL) *arena_id = _mi_arena_id_none(); if (size < MI_ARENA_BLOCK_SIZE) return false; if (is_large) { mi_assert_internal(memid.initially_committed && memid.is_pinned); } const size_t bcount = size / MI_ARENA_BLOCK_SIZE; const size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS); const size_t bitmaps = (memid.is_pinned ? 2 : 4); const size_t asize = sizeof(mi_arena_t) + (bitmaps*fields*sizeof(mi_bitmap_field_t)); mi_memid_t meta_memid; mi_arena_t* arena = (mi_arena_t*)mi_arena_meta_zalloc(asize, &meta_memid, &_mi_stats_main); // TODO: can we avoid allocating from the OS? if (arena == NULL) return false; // already zero'd due to os_alloc // _mi_memzero(arena, asize); arena->id = _mi_arena_id_none(); arena->memid = memid; arena->exclusive = exclusive; arena->meta_size = asize; arena->meta_memid = meta_memid; arena->block_count = bcount; arena->field_count = fields; arena->start = (uint8_t*)start; arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1) arena->is_large = is_large; arena->purge_expire = 0; arena->search_idx = 0; arena->blocks_dirty = &arena->blocks_inuse[fields]; // just after inuse bitmap arena->blocks_committed = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[2*fields]); // just after dirty bitmap arena->blocks_purge = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[3*fields]); // just after committed bitmap // initialize committed bitmap? if (arena->blocks_committed != NULL && arena->memid.initially_committed) { memset((void*)arena->blocks_committed, 0xFF, fields*sizeof(mi_bitmap_field_t)); // cast to void* to avoid atomic warning } // and claim leftover blocks if needed (so we never allocate there) ptrdiff_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount; mi_assert_internal(post >= 0); if (post > 0) { // don't use leftover bits at the end mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post); _mi_bitmap_claim(arena->blocks_inuse, fields, post, postidx, NULL); } return mi_arena_add(arena, arena_id); } bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept { mi_memid_t memid = _mi_memid_create(MI_MEM_EXTERNAL); memid.initially_committed = is_committed; memid.initially_zero = is_zero; memid.is_pinned = is_large; return mi_manage_os_memory_ex2(start,size,is_large,numa_node,exclusive,memid, arena_id); } // Reserve a range of regular OS memory int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept { if (arena_id != NULL) *arena_id = _mi_arena_id_none(); size = _mi_align_up(size, MI_ARENA_BLOCK_SIZE); // at least one block mi_memid_t memid; void* start = _mi_os_alloc_aligned(size, MI_SEGMENT_ALIGN, commit, allow_large, &memid, &_mi_stats_main); if (start == NULL) return ENOMEM; const bool is_large = memid.is_pinned; // todo: use separate is_large field? if (!mi_manage_os_memory_ex2(start, size, is_large, -1 /* numa node */, exclusive, memid, arena_id)) { _mi_os_free_ex(start, size, commit, memid, &_mi_stats_main); _mi_verbose_message("failed to reserve %zu k memory\n", _mi_divide_up(size, 1024)); return ENOMEM; } _mi_verbose_message("reserved %zu KiB memory%s\n", _mi_divide_up(size, 1024), is_large ? " (in large os pages)" : ""); return 0; } // Manage a range of regular OS memory bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept { return mi_manage_os_memory_ex(start, size, is_committed, is_large, is_zero, numa_node, false /* exclusive? */, NULL); } // Reserve a range of regular OS memory int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept { return mi_reserve_os_memory_ex(size, commit, allow_large, false, NULL); } /* ----------------------------------------------------------- Debugging ----------------------------------------------------------- */ static size_t mi_debug_show_bitmap(const char* prefix, mi_bitmap_field_t* fields, size_t field_count ) { size_t inuse_count = 0; for (size_t i = 0; i < field_count; i++) { char buf[MI_BITMAP_FIELD_BITS + 1]; uintptr_t field = mi_atomic_load_relaxed(&fields[i]); for (size_t bit = 0; bit < MI_BITMAP_FIELD_BITS; bit++) { bool inuse = ((((uintptr_t)1 << bit) & field) != 0); if (inuse) inuse_count++; buf[MI_BITMAP_FIELD_BITS - 1 - bit] = (inuse ? 'x' : '.'); } buf[MI_BITMAP_FIELD_BITS] = 0; _mi_verbose_message("%s%s\n", prefix, buf); } return inuse_count; } void mi_debug_show_arenas(void) mi_attr_noexcept { size_t max_arenas = mi_atomic_load_relaxed(&mi_arena_count); for (size_t i = 0; i < max_arenas; i++) { mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); if (arena == NULL) break; size_t inuse_count = 0; _mi_verbose_message("arena %zu: %zu blocks with %zu fields\n", i, arena->block_count, arena->field_count); inuse_count += mi_debug_show_bitmap(" ", arena->blocks_inuse, arena->field_count); _mi_verbose_message(" blocks in use ('x'): %zu\n", inuse_count); } } /* ----------------------------------------------------------- Reserve a huge page arena. ----------------------------------------------------------- */ // reserve at a specific numa node int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_msecs, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept { if (arena_id != NULL) *arena_id = -1; if (pages==0) return 0; if (numa_node < -1) numa_node = -1; if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count(); size_t hsize = 0; size_t pages_reserved = 0; mi_memid_t memid; void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, timeout_msecs, &pages_reserved, &hsize, &memid); if (p==NULL || pages_reserved==0) { _mi_warning_message("failed to reserve %zu GiB huge pages\n", pages); return ENOMEM; } _mi_verbose_message("numa node %i: reserved %zu GiB huge pages (of the %zu GiB requested)\n", numa_node, pages_reserved, pages); if (!mi_manage_os_memory_ex2(p, hsize, true, numa_node, exclusive, memid, arena_id)) { _mi_os_free(p, hsize, memid, &_mi_stats_main); return ENOMEM; } return 0; } int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept { return mi_reserve_huge_os_pages_at_ex(pages, numa_node, timeout_msecs, false, NULL); } // reserve huge pages evenly among the given number of numa nodes (or use the available ones as detected) int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept { if (pages == 0) return 0; // pages per numa node size_t numa_count = (numa_nodes > 0 ? numa_nodes : _mi_os_numa_node_count()); if (numa_count <= 0) numa_count = 1; const size_t pages_per = pages / numa_count; const size_t pages_mod = pages % numa_count; const size_t timeout_per = (timeout_msecs==0 ? 0 : (timeout_msecs / numa_count) + 50); // reserve evenly among numa nodes for (size_t numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) { size_t node_pages = pages_per; // can be 0 if (numa_node < pages_mod) node_pages++; int err = mi_reserve_huge_os_pages_at(node_pages, (int)numa_node, timeout_per); if (err) return err; if (pages < node_pages) { pages = 0; } else { pages -= node_pages; } } return 0; } int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept { MI_UNUSED(max_secs); _mi_warning_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n"); if (pages_reserved != NULL) *pages_reserved = 0; int err = mi_reserve_huge_os_pages_interleave(pages, 0, (size_t)(max_secs * 1000.0)); if (err==0 && pages_reserved!=NULL) *pages_reserved = pages; return err; } luametatex-2.10.08/source/libraries/mimalloc/src/bitmap.c000066400000000000000000000442771442250314700233330ustar00rootroot00000000000000/* ---------------------------------------------------------------------------- Copyright (c) 2019-2023 Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ /* ---------------------------------------------------------------------------- Concurrent bitmap that can set/reset sequences of bits atomically, represeted as an array of fields where each field is a machine word (`size_t`) There are two api's; the standard one cannot have sequences that cross between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS). The `_across` postfixed functions do allow sequences that can cross over between the fields. (This is used in arena allocation) ---------------------------------------------------------------------------- */ #include "mimalloc.h" #include "mimalloc/internal.h" #include "bitmap.h" /* ----------------------------------------------------------- Bitmap definition ----------------------------------------------------------- */ // The bit mask for a given number of blocks at a specified bit index. static inline size_t mi_bitmap_mask_(size_t count, size_t bitidx) { mi_assert_internal(count + bitidx <= MI_BITMAP_FIELD_BITS); mi_assert_internal(count > 0); if (count >= MI_BITMAP_FIELD_BITS) return MI_BITMAP_FIELD_FULL; if (count == 0) return 0; return ((((size_t)1 << count) - 1) << bitidx); } /* ----------------------------------------------------------- Claim a bit sequence atomically ----------------------------------------------------------- */ // Try to atomically claim a sequence of `count` bits in a single // field at `idx` in `bitmap`. Returns `true` on success. inline bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx) { mi_assert_internal(bitmap_idx != NULL); mi_assert_internal(count <= MI_BITMAP_FIELD_BITS); mi_assert_internal(count > 0); mi_bitmap_field_t* field = &bitmap[idx]; size_t map = mi_atomic_load_relaxed(field); if (map==MI_BITMAP_FIELD_FULL) return false; // short cut // search for 0-bit sequence of length count const size_t mask = mi_bitmap_mask_(count, 0); const size_t bitidx_max = MI_BITMAP_FIELD_BITS - count; #ifdef MI_HAVE_FAST_BITSCAN size_t bitidx = mi_ctz(~map); // quickly find the first zero bit if possible #else size_t bitidx = 0; // otherwise start at 0 #endif size_t m = (mask << bitidx); // invariant: m == mask shifted by bitidx // scan linearly for a free range of zero bits while (bitidx <= bitidx_max) { const size_t mapm = (map & m); if (mapm == 0) { // are the mask bits free at bitidx? mi_assert_internal((m >> bitidx) == mask); // no overflow? const size_t newmap = (map | m); mi_assert_internal((newmap^map) >> bitidx == mask); if (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)) { // TODO: use weak cas here? // no success, another thread claimed concurrently.. keep going (with updated `map`) continue; } else { // success, we claimed the bits! *bitmap_idx = mi_bitmap_index_create(idx, bitidx); return true; } } else { // on to the next bit range #ifdef MI_HAVE_FAST_BITSCAN mi_assert_internal(mapm != 0); const size_t shift = (count == 1 ? 1 : (MI_INTPTR_BITS - mi_clz(mapm) - bitidx)); mi_assert_internal(shift > 0 && shift <= count); #else const size_t shift = 1; #endif bitidx += shift; m <<= shift; } } // no bits found return false; } // Find `count` bits of 0 and set them to 1 atomically; returns `true` on success. // Starts at idx, and wraps around to search in all `bitmap_fields` fields. // `count` can be at most MI_BITMAP_FIELD_BITS and will never cross fields. bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) { size_t idx = start_field_idx; for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) { if (idx >= bitmap_fields) { idx = 0; } // wrap if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) { return true; } } return false; } // Like _mi_bitmap_try_find_from_claim but with an extra predicate that must be fullfilled bool _mi_bitmap_try_find_from_claim_pred(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_pred_fun_t pred_fun, void* pred_arg, mi_bitmap_index_t* bitmap_idx) { size_t idx = start_field_idx; for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) { if (idx >= bitmap_fields) idx = 0; // wrap if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) { if (pred_fun == NULL || pred_fun(*bitmap_idx, pred_arg)) { return true; } // predicate returned false, unclaim and look further _mi_bitmap_unclaim(bitmap, bitmap_fields, count, *bitmap_idx); } } return false; } // Set `count` bits at `bitmap_idx` to 0 atomically // Returns `true` if all `count` bits were 1 previously. bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { const size_t idx = mi_bitmap_index_field(bitmap_idx); const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); const size_t mask = mi_bitmap_mask_(count, bitidx); mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields); // mi_assert_internal((bitmap[idx] & mask) == mask); const size_t prev = mi_atomic_and_acq_rel(&bitmap[idx], ~mask); return ((prev & mask) == mask); } // Set `count` bits at `bitmap_idx` to 1 atomically // Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit. bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero) { const size_t idx = mi_bitmap_index_field(bitmap_idx); const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); const size_t mask = mi_bitmap_mask_(count, bitidx); mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields); //mi_assert_internal(any_zero != NULL || (bitmap[idx] & mask) == 0); size_t prev = mi_atomic_or_acq_rel(&bitmap[idx], mask); if (any_zero != NULL) { *any_zero = ((prev & mask) != mask); } return ((prev & mask) == 0); } // Returns `true` if all `count` bits were 1. `any_ones` is `true` if there was at least one bit set to one. static bool mi_bitmap_is_claimedx(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_ones) { const size_t idx = mi_bitmap_index_field(bitmap_idx); const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); const size_t mask = mi_bitmap_mask_(count, bitidx); mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields); const size_t field = mi_atomic_load_relaxed(&bitmap[idx]); if (any_ones != NULL) { *any_ones = ((field & mask) != 0); } return ((field & mask) == mask); } // Try to set `count` bits at `bitmap_idx` from 0 to 1 atomically. // Returns `true` if successful when all previous `count` bits were 0. bool _mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { const size_t idx = mi_bitmap_index_field(bitmap_idx); const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); const size_t mask = mi_bitmap_mask_(count, bitidx); mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields); size_t expected = mi_atomic_load_relaxed(&bitmap[idx]); do { if ((expected & mask) != 0) return false; } while (!mi_atomic_cas_strong_acq_rel(&bitmap[idx], &expected, expected | mask)); mi_assert_internal((expected & mask) == 0); return true; } bool _mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { return mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, NULL); } bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { bool any_ones; mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, &any_ones); return any_ones; } //-------------------------------------------------------------------------- // the `_across` functions work on bitmaps where sequences can cross over // between the fields. This is used in arena allocation //-------------------------------------------------------------------------- // Try to atomically claim a sequence of `count` bits starting from the field // at `idx` in `bitmap` and crossing into subsequent fields. Returns `true` on success. // Only needs to consider crossing into the next fields (see `mi_bitmap_try_find_from_claim_across`) static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t idx, const size_t count, const size_t retries, mi_bitmap_index_t* bitmap_idx) { mi_assert_internal(bitmap_idx != NULL); // check initial trailing zeros mi_bitmap_field_t* field = &bitmap[idx]; size_t map = mi_atomic_load_relaxed(field); const size_t initial = mi_clz(map); // count of initial zeros starting at idx mi_assert_internal(initial <= MI_BITMAP_FIELD_BITS); if (initial == 0) return false; if (initial >= count) return _mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx); // no need to cross fields (this case won't happen for us) if (_mi_divide_up(count - initial, MI_BITMAP_FIELD_BITS) >= (bitmap_fields - idx)) return false; // not enough entries // scan ahead size_t found = initial; size_t mask = 0; // mask bits for the final field while(found < count) { field++; map = mi_atomic_load_relaxed(field); const size_t mask_bits = (found + MI_BITMAP_FIELD_BITS <= count ? MI_BITMAP_FIELD_BITS : (count - found)); mi_assert_internal(mask_bits > 0 && mask_bits <= MI_BITMAP_FIELD_BITS); mask = mi_bitmap_mask_(mask_bits, 0); if ((map & mask) != 0) return false; // some part is already claimed found += mask_bits; } mi_assert_internal(field < &bitmap[bitmap_fields]); // we found a range of contiguous zeros up to the final field; mask contains mask in the final field // now try to claim the range atomically mi_bitmap_field_t* const final_field = field; const size_t final_mask = mask; mi_bitmap_field_t* const initial_field = &bitmap[idx]; const size_t initial_idx = MI_BITMAP_FIELD_BITS - initial; const size_t initial_mask = mi_bitmap_mask_(initial, initial_idx); // initial field size_t newmap; field = initial_field; map = mi_atomic_load_relaxed(field); do { newmap = (map | initial_mask); if ((map & initial_mask) != 0) { goto rollback; }; } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)); // intermediate fields while (++field < final_field) { newmap = MI_BITMAP_FIELD_FULL; map = 0; if (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)) { goto rollback; } } // final field mi_assert_internal(field == final_field); map = mi_atomic_load_relaxed(field); do { newmap = (map | final_mask); if ((map & final_mask) != 0) { goto rollback; } } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)); // claimed! *bitmap_idx = mi_bitmap_index_create(idx, initial_idx); return true; rollback: // roll back intermediate fields // (we just failed to claim `field` so decrement first) while (--field > initial_field) { newmap = 0; map = MI_BITMAP_FIELD_FULL; mi_assert_internal(mi_atomic_load_relaxed(field) == map); mi_atomic_store_release(field, newmap); } if (field == initial_field) { // (if we failed on the initial field, `field + 1 == initial_field`) map = mi_atomic_load_relaxed(field); do { mi_assert_internal((map & initial_mask) == initial_mask); newmap = (map & ~initial_mask); } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)); } // retry? (we make a recursive call instead of goto to be able to use const declarations) if (retries <= 2) { return mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, retries+1, bitmap_idx); } else { return false; } } // Find `count` bits of zeros and set them to 1 atomically; returns `true` on success. // Starts at idx, and wraps around to search in all `bitmap_fields` fields. bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) { mi_assert_internal(count > 0); if (count <= 2) { // we don't bother with crossover fields for small counts return _mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, start_field_idx, count, bitmap_idx); } // visit the fields size_t idx = start_field_idx; for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) { if (idx >= bitmap_fields) { idx = 0; } // wrap // first try to claim inside a field if (count <= MI_BITMAP_FIELD_BITS) { if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) { return true; } } // if that fails, then try to claim across fields if (mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, 0, bitmap_idx)) { return true; } } return false; } // Helper for masks across fields; returns the mid count, post_mask may be 0 static size_t mi_bitmap_mask_across(mi_bitmap_index_t bitmap_idx, size_t bitmap_fields, size_t count, size_t* pre_mask, size_t* mid_mask, size_t* post_mask) { MI_UNUSED(bitmap_fields); const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); if mi_likely(bitidx + count <= MI_BITMAP_FIELD_BITS) { *pre_mask = mi_bitmap_mask_(count, bitidx); *mid_mask = 0; *post_mask = 0; mi_assert_internal(mi_bitmap_index_field(bitmap_idx) < bitmap_fields); return 0; } else { const size_t pre_bits = MI_BITMAP_FIELD_BITS - bitidx; mi_assert_internal(pre_bits < count); *pre_mask = mi_bitmap_mask_(pre_bits, bitidx); count -= pre_bits; const size_t mid_count = (count / MI_BITMAP_FIELD_BITS); *mid_mask = MI_BITMAP_FIELD_FULL; count %= MI_BITMAP_FIELD_BITS; *post_mask = (count==0 ? 0 : mi_bitmap_mask_(count, 0)); mi_assert_internal(mi_bitmap_index_field(bitmap_idx) + mid_count + (count==0 ? 0 : 1) < bitmap_fields); return mid_count; } } // Set `count` bits at `bitmap_idx` to 0 atomically // Returns `true` if all `count` bits were 1 previously. bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { size_t idx = mi_bitmap_index_field(bitmap_idx); size_t pre_mask; size_t mid_mask; size_t post_mask; size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask); bool all_one = true; mi_bitmap_field_t* field = &bitmap[idx]; size_t prev = mi_atomic_and_acq_rel(field++, ~pre_mask); // clear first part if ((prev & pre_mask) != pre_mask) all_one = false; while(mid_count-- > 0) { prev = mi_atomic_and_acq_rel(field++, ~mid_mask); // clear mid part if ((prev & mid_mask) != mid_mask) all_one = false; } if (post_mask!=0) { prev = mi_atomic_and_acq_rel(field, ~post_mask); // clear end part if ((prev & post_mask) != post_mask) all_one = false; } return all_one; } // Set `count` bits at `bitmap_idx` to 1 atomically // Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit. bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero) { size_t idx = mi_bitmap_index_field(bitmap_idx); size_t pre_mask; size_t mid_mask; size_t post_mask; size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask); bool all_zero = true; bool any_zero = false; _Atomic(size_t)*field = &bitmap[idx]; size_t prev = mi_atomic_or_acq_rel(field++, pre_mask); if ((prev & pre_mask) != 0) all_zero = false; if ((prev & pre_mask) != pre_mask) any_zero = true; while (mid_count-- > 0) { prev = mi_atomic_or_acq_rel(field++, mid_mask); if ((prev & mid_mask) != 0) all_zero = false; if ((prev & mid_mask) != mid_mask) any_zero = true; } if (post_mask!=0) { prev = mi_atomic_or_acq_rel(field, post_mask); if ((prev & post_mask) != 0) all_zero = false; if ((prev & post_mask) != post_mask) any_zero = true; } if (pany_zero != NULL) { *pany_zero = any_zero; } return all_zero; } // Returns `true` if all `count` bits were 1. // `any_ones` is `true` if there was at least one bit set to one. static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_ones) { size_t idx = mi_bitmap_index_field(bitmap_idx); size_t pre_mask; size_t mid_mask; size_t post_mask; size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask); bool all_ones = true; bool any_ones = false; mi_bitmap_field_t* field = &bitmap[idx]; size_t prev = mi_atomic_load_relaxed(field++); if ((prev & pre_mask) != pre_mask) all_ones = false; if ((prev & pre_mask) != 0) any_ones = true; while (mid_count-- > 0) { prev = mi_atomic_load_relaxed(field++); if ((prev & mid_mask) != mid_mask) all_ones = false; if ((prev & mid_mask) != 0) any_ones = true; } if (post_mask!=0) { prev = mi_atomic_load_relaxed(field); if ((prev & post_mask) != post_mask) all_ones = false; if ((prev & post_mask) != 0) any_ones = true; } if (pany_ones != NULL) { *pany_ones = any_ones; } return all_ones; } bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { return mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, NULL); } bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { bool any_ones; mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, &any_ones); return any_ones; } luametatex-2.10.08/source/libraries/mimalloc/src/bitmap.h000066400000000000000000000135401442250314700233250ustar00rootroot00000000000000/* ---------------------------------------------------------------------------- Copyright (c) 2019-2023 Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ /* ---------------------------------------------------------------------------- Concurrent bitmap that can set/reset sequences of bits atomically, represeted as an array of fields where each field is a machine word (`size_t`) There are two api's; the standard one cannot have sequences that cross between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS). (this is used in region allocation) The `_across` postfixed functions do allow sequences that can cross over between the fields. (This is used in arena allocation) ---------------------------------------------------------------------------- */ #pragma once #ifndef MI_BITMAP_H #define MI_BITMAP_H /* ----------------------------------------------------------- Bitmap definition ----------------------------------------------------------- */ #define MI_BITMAP_FIELD_BITS (8*MI_SIZE_SIZE) #define MI_BITMAP_FIELD_FULL (~((size_t)0)) // all bits set // An atomic bitmap of `size_t` fields typedef _Atomic(size_t) mi_bitmap_field_t; typedef mi_bitmap_field_t* mi_bitmap_t; // A bitmap index is the index of the bit in a bitmap. typedef size_t mi_bitmap_index_t; // Create a bit index. static inline mi_bitmap_index_t mi_bitmap_index_create(size_t idx, size_t bitidx) { mi_assert_internal(bitidx < MI_BITMAP_FIELD_BITS); return (idx*MI_BITMAP_FIELD_BITS) + bitidx; } // Create a bit index. static inline mi_bitmap_index_t mi_bitmap_index_create_from_bit(size_t full_bitidx) { return mi_bitmap_index_create(full_bitidx / MI_BITMAP_FIELD_BITS, full_bitidx % MI_BITMAP_FIELD_BITS); } // Get the field index from a bit index. static inline size_t mi_bitmap_index_field(mi_bitmap_index_t bitmap_idx) { return (bitmap_idx / MI_BITMAP_FIELD_BITS); } // Get the bit index in a bitmap field static inline size_t mi_bitmap_index_bit_in_field(mi_bitmap_index_t bitmap_idx) { return (bitmap_idx % MI_BITMAP_FIELD_BITS); } // Get the full bit index static inline size_t mi_bitmap_index_bit(mi_bitmap_index_t bitmap_idx) { return bitmap_idx; } /* ----------------------------------------------------------- Claim a bit sequence atomically ----------------------------------------------------------- */ // Try to atomically claim a sequence of `count` bits in a single // field at `idx` in `bitmap`. Returns `true` on success. bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx); // Starts at idx, and wraps around to search in all `bitmap_fields` fields. // For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never cross fields. bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx); // Like _mi_bitmap_try_find_from_claim but with an extra predicate that must be fullfilled typedef bool (mi_cdecl *mi_bitmap_pred_fun_t)(mi_bitmap_index_t bitmap_idx, void* pred_arg); bool _mi_bitmap_try_find_from_claim_pred(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_pred_fun_t pred_fun, void* pred_arg, mi_bitmap_index_t* bitmap_idx); // Set `count` bits at `bitmap_idx` to 0 atomically // Returns `true` if all `count` bits were 1 previously. bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); // Try to set `count` bits at `bitmap_idx` from 0 to 1 atomically. // Returns `true` if successful when all previous `count` bits were 0. bool _mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); // Set `count` bits at `bitmap_idx` to 1 atomically // Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit. bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero); bool _mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); //-------------------------------------------------------------------------- // the `_across` functions work on bitmaps where sequences can cross over // between the fields. This is used in arena allocation //-------------------------------------------------------------------------- // Find `count` bits of zeros and set them to 1 atomically; returns `true` on success. // Starts at idx, and wraps around to search in all `bitmap_fields` fields. bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx); // Set `count` bits at `bitmap_idx` to 0 atomically // Returns `true` if all `count` bits were 1 previously. bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); // Set `count` bits at `bitmap_idx` to 1 atomically // Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit. bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero); bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); #endif luametatex-2.10.08/source/libraries/mimalloc/src/heap.c000066400000000000000000000516161442250314700227670ustar00rootroot00000000000000/*---------------------------------------------------------------------------- Copyright (c) 2018-2021, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ #include "mimalloc.h" #include "mimalloc/internal.h" #include "mimalloc/atomic.h" #include "mimalloc/prim.h" // mi_prim_get_default_heap #include // memset, memcpy #if defined(_MSC_VER) && (_MSC_VER < 1920) #pragma warning(disable:4204) // non-constant aggregate initializer #endif /* ----------------------------------------------------------- Helpers ----------------------------------------------------------- */ // return `true` if ok, `false` to break typedef bool (heap_page_visitor_fun)(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2); // Visit all pages in a heap; returns `false` if break was called. static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void* arg1, void* arg2) { if (heap==NULL || heap->page_count==0) return 0; // visit all pages #if MI_DEBUG>1 size_t total = heap->page_count; size_t count = 0; #endif for (size_t i = 0; i <= MI_BIN_FULL; i++) { mi_page_queue_t* pq = &heap->pages[i]; mi_page_t* page = pq->first; while(page != NULL) { mi_page_t* next = page->next; // save next in case the page gets removed from the queue mi_assert_internal(mi_page_heap(page) == heap); #if MI_DEBUG>1 count++; #endif if (!fn(heap, pq, page, arg1, arg2)) return false; page = next; // and continue } } mi_assert_internal(count == total); return true; } #if MI_DEBUG>=2 static bool mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) { MI_UNUSED(arg1); MI_UNUSED(arg2); MI_UNUSED(pq); mi_assert_internal(mi_page_heap(page) == heap); mi_segment_t* segment = _mi_page_segment(page); mi_assert_internal(segment->thread_id == heap->thread_id); mi_assert_expensive(_mi_page_is_valid(page)); return true; } #endif #if MI_DEBUG>=3 static bool mi_heap_is_valid(mi_heap_t* heap) { mi_assert_internal(heap!=NULL); mi_heap_visit_pages(heap, &mi_heap_page_is_valid, NULL, NULL); return true; } #endif /* ----------------------------------------------------------- "Collect" pages by migrating `local_free` and `thread_free` lists and freeing empty pages. This is done when a thread stops (and in that case abandons pages if there are still blocks alive) ----------------------------------------------------------- */ typedef enum mi_collect_e { MI_NORMAL, MI_FORCE, MI_ABANDON } mi_collect_t; static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg_collect, void* arg2 ) { MI_UNUSED(arg2); MI_UNUSED(heap); mi_assert_internal(mi_heap_page_is_valid(heap, pq, page, NULL, NULL)); mi_collect_t collect = *((mi_collect_t*)arg_collect); _mi_page_free_collect(page, collect >= MI_FORCE); if (mi_page_all_free(page)) { // no more used blocks, free the page. // note: this will free retired pages as well. _mi_page_free(page, pq, collect >= MI_FORCE); } else if (collect == MI_ABANDON) { // still used blocks but the thread is done; abandon the page _mi_page_abandon(page, pq); } return true; // don't break } static bool mi_heap_page_never_delayed_free(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) { MI_UNUSED(arg1); MI_UNUSED(arg2); MI_UNUSED(heap); MI_UNUSED(pq); _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false); return true; // don't break } static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) { if (heap==NULL || !mi_heap_is_initialized(heap)) return; const bool force = collect >= MI_FORCE; _mi_deferred_free(heap, force); // note: never reclaim on collect but leave it to threads that need storage to reclaim const bool force_main = #ifdef NDEBUG collect == MI_FORCE #else collect >= MI_FORCE #endif && _mi_is_main_thread() && mi_heap_is_backing(heap) && !heap->no_reclaim; if (force_main) { // the main thread is abandoned (end-of-program), try to reclaim all abandoned segments. // if all memory is freed by now, all segments should be freed. _mi_abandoned_reclaim_all(heap, &heap->tld->segments); } // if abandoning, mark all pages to no longer add to delayed_free if (collect == MI_ABANDON) { mi_heap_visit_pages(heap, &mi_heap_page_never_delayed_free, NULL, NULL); } // free all current thread delayed blocks. // (if abandoning, after this there are no more thread-delayed references into the pages.) _mi_heap_delayed_free_all(heap); // collect retired pages _mi_heap_collect_retired(heap, force); // collect all pages owned by this thread mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL); mi_assert_internal( collect != MI_ABANDON || mi_atomic_load_ptr_acquire(mi_block_t,&heap->thread_delayed_free) == NULL ); // collect abandoned segments (in particular, purge expired parts of segments in the abandoned segment list) // note: forced purge can be quite expensive if many threads are created/destroyed so we do not force on abandonment _mi_abandoned_collect(heap, collect == MI_FORCE /* force? */, &heap->tld->segments); // collect segment local caches if (force) { _mi_segment_thread_collect(&heap->tld->segments); } // collect regions on program-exit (or shared library unload) if (force && _mi_is_main_thread() && mi_heap_is_backing(heap)) { _mi_thread_data_collect(); // collect thread data cache _mi_arena_collect(true /* force purge */, &heap->tld->stats); } } void _mi_heap_collect_abandon(mi_heap_t* heap) { mi_heap_collect_ex(heap, MI_ABANDON); } void mi_heap_collect(mi_heap_t* heap, bool force) mi_attr_noexcept { mi_heap_collect_ex(heap, (force ? MI_FORCE : MI_NORMAL)); } void mi_collect(bool force) mi_attr_noexcept { mi_heap_collect(mi_prim_get_default_heap(), force); } /* ----------------------------------------------------------- Heap new ----------------------------------------------------------- */ mi_heap_t* mi_heap_get_default(void) { mi_thread_init(); return mi_prim_get_default_heap(); } static bool mi_heap_is_default(const mi_heap_t* heap) { return (heap == mi_prim_get_default_heap()); } mi_heap_t* mi_heap_get_backing(void) { mi_heap_t* heap = mi_heap_get_default(); mi_assert_internal(heap!=NULL); mi_heap_t* bheap = heap->tld->heap_backing; mi_assert_internal(bheap!=NULL); mi_assert_internal(bheap->thread_id == _mi_thread_id()); return bheap; } mi_decl_nodiscard mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id) { mi_heap_t* bheap = mi_heap_get_backing(); mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t); // todo: OS allocate in secure mode? if (heap == NULL) return NULL; _mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(mi_heap_t)); heap->tld = bheap->tld; heap->thread_id = _mi_thread_id(); heap->arena_id = arena_id; _mi_random_split(&bheap->random, &heap->random); heap->cookie = _mi_heap_random_next(heap) | 1; heap->keys[0] = _mi_heap_random_next(heap); heap->keys[1] = _mi_heap_random_next(heap); heap->no_reclaim = true; // don't reclaim abandoned pages or otherwise destroy is unsafe // push on the thread local heaps list heap->next = heap->tld->heaps; heap->tld->heaps = heap; return heap; } mi_decl_nodiscard mi_heap_t* mi_heap_new(void) { return mi_heap_new_in_arena(_mi_arena_id_none()); } bool _mi_heap_memid_is_suitable(mi_heap_t* heap, mi_memid_t memid) { return _mi_arena_memid_is_suitable(memid, heap->arena_id); } uintptr_t _mi_heap_random_next(mi_heap_t* heap) { return _mi_random_next(&heap->random); } // zero out the page queues static void mi_heap_reset_pages(mi_heap_t* heap) { mi_assert_internal(heap != NULL); mi_assert_internal(mi_heap_is_initialized(heap)); // TODO: copy full empty heap instead? memset(&heap->pages_free_direct, 0, sizeof(heap->pages_free_direct)); _mi_memcpy_aligned(&heap->pages, &_mi_heap_empty.pages, sizeof(heap->pages)); heap->thread_delayed_free = NULL; heap->page_count = 0; } // called from `mi_heap_destroy` and `mi_heap_delete` to free the internal heap resources. static void mi_heap_free(mi_heap_t* heap) { mi_assert(heap != NULL); mi_assert_internal(mi_heap_is_initialized(heap)); if (heap==NULL || !mi_heap_is_initialized(heap)) return; if (mi_heap_is_backing(heap)) return; // dont free the backing heap // reset default if (mi_heap_is_default(heap)) { _mi_heap_set_default_direct(heap->tld->heap_backing); } // remove ourselves from the thread local heaps list // linear search but we expect the number of heaps to be relatively small mi_heap_t* prev = NULL; mi_heap_t* curr = heap->tld->heaps; while (curr != heap && curr != NULL) { prev = curr; curr = curr->next; } mi_assert_internal(curr == heap); if (curr == heap) { if (prev != NULL) { prev->next = heap->next; } else { heap->tld->heaps = heap->next; } } mi_assert_internal(heap->tld->heaps != NULL); // and free the used memory mi_free(heap); } /* ----------------------------------------------------------- Heap destroy ----------------------------------------------------------- */ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) { MI_UNUSED(arg1); MI_UNUSED(arg2); MI_UNUSED(heap); MI_UNUSED(pq); // ensure no more thread_delayed_free will be added _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false); // stats const size_t bsize = mi_page_block_size(page); if (bsize > MI_MEDIUM_OBJ_SIZE_MAX) { if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { mi_heap_stat_decrease(heap, large, bsize); } else { mi_heap_stat_decrease(heap, huge, bsize); } } #if (MI_STAT) _mi_page_free_collect(page, false); // update used count const size_t inuse = page->used; if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { mi_heap_stat_decrease(heap, normal, bsize * inuse); #if (MI_STAT>1) mi_heap_stat_decrease(heap, normal_bins[_mi_bin(bsize)], inuse); #endif } mi_heap_stat_decrease(heap, malloc, bsize * inuse); // todo: off for aligned blocks... #endif /// pretend it is all free now mi_assert_internal(mi_page_thread_free(page) == NULL); page->used = 0; // and free the page // mi_page_free(page,false); page->next = NULL; page->prev = NULL; _mi_segment_page_free(page,false /* no force? */, &heap->tld->segments); return true; // keep going } void _mi_heap_destroy_pages(mi_heap_t* heap) { mi_heap_visit_pages(heap, &_mi_heap_page_destroy, NULL, NULL); mi_heap_reset_pages(heap); } #if MI_TRACK_HEAP_DESTROY static bool mi_cdecl mi_heap_track_block_free(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg) { MI_UNUSED(heap); MI_UNUSED(area); MI_UNUSED(arg); MI_UNUSED(block_size); mi_track_free_size(block,mi_usable_size(block)); return true; } #endif void mi_heap_destroy(mi_heap_t* heap) { mi_assert(heap != NULL); mi_assert(mi_heap_is_initialized(heap)); mi_assert(heap->no_reclaim); mi_assert_expensive(mi_heap_is_valid(heap)); if (heap==NULL || !mi_heap_is_initialized(heap)) return; if (!heap->no_reclaim) { // don't free in case it may contain reclaimed pages mi_heap_delete(heap); } else { // track all blocks as freed #if MI_TRACK_HEAP_DESTROY mi_heap_visit_blocks(heap, true, mi_heap_track_block_free, NULL); #endif // free all pages _mi_heap_destroy_pages(heap); mi_heap_free(heap); } } // forcefully destroy all heaps in the current thread void _mi_heap_unsafe_destroy_all(void) { mi_heap_t* bheap = mi_heap_get_backing(); mi_heap_t* curr = bheap->tld->heaps; while (curr != NULL) { mi_heap_t* next = curr->next; if (curr->no_reclaim) { mi_heap_destroy(curr); } else { _mi_heap_destroy_pages(curr); } curr = next; } } /* ----------------------------------------------------------- Safe Heap delete ----------------------------------------------------------- */ // Transfer the pages from one heap to the other static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) { mi_assert_internal(heap!=NULL); if (from==NULL || from->page_count == 0) return; // reduce the size of the delayed frees _mi_heap_delayed_free_partial(from); // transfer all pages by appending the queues; this will set a new heap field // so threads may do delayed frees in either heap for a while. // note: appending waits for each page to not be in the `MI_DELAYED_FREEING` state // so after this only the new heap will get delayed frees for (size_t i = 0; i <= MI_BIN_FULL; i++) { mi_page_queue_t* pq = &heap->pages[i]; mi_page_queue_t* append = &from->pages[i]; size_t pcount = _mi_page_queue_append(heap, pq, append); heap->page_count += pcount; from->page_count -= pcount; } mi_assert_internal(from->page_count == 0); // and do outstanding delayed frees in the `from` heap // note: be careful here as the `heap` field in all those pages no longer point to `from`, // turns out to be ok as `_mi_heap_delayed_free` only visits the list and calls a // the regular `_mi_free_delayed_block` which is safe. _mi_heap_delayed_free_all(from); #if !defined(_MSC_VER) || (_MSC_VER > 1900) // somehow the following line gives an error in VS2015, issue #353 mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_block_t,&from->thread_delayed_free) == NULL); #endif // and reset the `from` heap mi_heap_reset_pages(from); } // Safe delete a heap without freeing any still allocated blocks in that heap. void mi_heap_delete(mi_heap_t* heap) { mi_assert(heap != NULL); mi_assert(mi_heap_is_initialized(heap)); mi_assert_expensive(mi_heap_is_valid(heap)); if (heap==NULL || !mi_heap_is_initialized(heap)) return; if (!mi_heap_is_backing(heap)) { // tranfer still used pages to the backing heap mi_heap_absorb(heap->tld->heap_backing, heap); } else { // the backing heap abandons its pages _mi_heap_collect_abandon(heap); } mi_assert_internal(heap->page_count==0); mi_heap_free(heap); } mi_heap_t* mi_heap_set_default(mi_heap_t* heap) { mi_assert(heap != NULL); mi_assert(mi_heap_is_initialized(heap)); if (heap==NULL || !mi_heap_is_initialized(heap)) return NULL; mi_assert_expensive(mi_heap_is_valid(heap)); mi_heap_t* old = mi_prim_get_default_heap(); _mi_heap_set_default_direct(heap); return old; } /* ----------------------------------------------------------- Analysis ----------------------------------------------------------- */ // static since it is not thread safe to access heaps from other threads. static mi_heap_t* mi_heap_of_block(const void* p) { if (p == NULL) return NULL; mi_segment_t* segment = _mi_ptr_segment(p); bool valid = (_mi_ptr_cookie(segment) == segment->cookie); mi_assert_internal(valid); if mi_unlikely(!valid) return NULL; return mi_page_heap(_mi_segment_page_of(segment,p)); } bool mi_heap_contains_block(mi_heap_t* heap, const void* p) { mi_assert(heap != NULL); if (heap==NULL || !mi_heap_is_initialized(heap)) return false; return (heap == mi_heap_of_block(p)); } static bool mi_heap_page_check_owned(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* p, void* vfound) { MI_UNUSED(heap); MI_UNUSED(pq); bool* found = (bool*)vfound; mi_segment_t* segment = _mi_page_segment(page); void* start = _mi_page_start(segment, page, NULL); void* end = (uint8_t*)start + (page->capacity * mi_page_block_size(page)); *found = (p >= start && p < end); return (!*found); // continue if not found } bool mi_heap_check_owned(mi_heap_t* heap, const void* p) { mi_assert(heap != NULL); if (heap==NULL || !mi_heap_is_initialized(heap)) return false; if (((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0) return false; // only aligned pointers bool found = false; mi_heap_visit_pages(heap, &mi_heap_page_check_owned, (void*)p, &found); return found; } bool mi_check_owned(const void* p) { return mi_heap_check_owned(mi_prim_get_default_heap(), p); } /* ----------------------------------------------------------- Visit all heap blocks and areas Todo: enable visiting abandoned pages, and enable visiting all blocks of all heaps across threads ----------------------------------------------------------- */ // Separate struct to keep `mi_page_t` out of the public interface typedef struct mi_heap_area_ex_s { mi_heap_area_t area; mi_page_t* page; } mi_heap_area_ex_t; static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_visit_fun* visitor, void* arg) { mi_assert(xarea != NULL); if (xarea==NULL) return true; const mi_heap_area_t* area = &xarea->area; mi_page_t* page = xarea->page; mi_assert(page != NULL); if (page == NULL) return true; _mi_page_free_collect(page,true); mi_assert_internal(page->local_free == NULL); if (page->used == 0) return true; const size_t bsize = mi_page_block_size(page); const size_t ubsize = mi_page_usable_block_size(page); // without padding size_t psize; uint8_t* pstart = _mi_page_start(_mi_page_segment(page), page, &psize); if (page->capacity == 1) { // optimize page with one block mi_assert_internal(page->used == 1 && page->free == NULL); return visitor(mi_page_heap(page), area, pstart, ubsize, arg); } // create a bitmap of free blocks. #define MI_MAX_BLOCKS (MI_SMALL_PAGE_SIZE / sizeof(void*)) uintptr_t free_map[MI_MAX_BLOCKS / sizeof(uintptr_t)]; memset(free_map, 0, sizeof(free_map)); #if MI_DEBUG>1 size_t free_count = 0; #endif for (mi_block_t* block = page->free; block != NULL; block = mi_block_next(page,block)) { #if MI_DEBUG>1 free_count++; #endif mi_assert_internal((uint8_t*)block >= pstart && (uint8_t*)block < (pstart + psize)); size_t offset = (uint8_t*)block - pstart; mi_assert_internal(offset % bsize == 0); size_t blockidx = offset / bsize; // Todo: avoid division? mi_assert_internal( blockidx < MI_MAX_BLOCKS); size_t bitidx = (blockidx / sizeof(uintptr_t)); size_t bit = blockidx - (bitidx * sizeof(uintptr_t)); free_map[bitidx] |= ((uintptr_t)1 << bit); } mi_assert_internal(page->capacity == (free_count + page->used)); // walk through all blocks skipping the free ones #if MI_DEBUG>1 size_t used_count = 0; #endif for (size_t i = 0; i < page->capacity; i++) { size_t bitidx = (i / sizeof(uintptr_t)); size_t bit = i - (bitidx * sizeof(uintptr_t)); uintptr_t m = free_map[bitidx]; if (bit == 0 && m == UINTPTR_MAX) { i += (sizeof(uintptr_t) - 1); // skip a run of free blocks } else if ((m & ((uintptr_t)1 << bit)) == 0) { #if MI_DEBUG>1 used_count++; #endif uint8_t* block = pstart + (i * bsize); if (!visitor(mi_page_heap(page), area, block, ubsize, arg)) return false; } } mi_assert_internal(page->used == used_count); return true; } typedef bool (mi_heap_area_visit_fun)(const mi_heap_t* heap, const mi_heap_area_ex_t* area, void* arg); static bool mi_heap_visit_areas_page(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* vfun, void* arg) { MI_UNUSED(heap); MI_UNUSED(pq); mi_heap_area_visit_fun* fun = (mi_heap_area_visit_fun*)vfun; mi_heap_area_ex_t xarea; const size_t bsize = mi_page_block_size(page); const size_t ubsize = mi_page_usable_block_size(page); xarea.page = page; xarea.area.reserved = page->reserved * bsize; xarea.area.committed = page->capacity * bsize; xarea.area.blocks = _mi_page_start(_mi_page_segment(page), page, NULL); xarea.area.used = page->used; // number of blocks in use (#553) xarea.area.block_size = ubsize; xarea.area.full_block_size = bsize; return fun(heap, &xarea, arg); } // Visit all heap pages as areas static bool mi_heap_visit_areas(const mi_heap_t* heap, mi_heap_area_visit_fun* visitor, void* arg) { if (visitor == NULL) return false; return mi_heap_visit_pages((mi_heap_t*)heap, &mi_heap_visit_areas_page, (void*)(visitor), arg); // note: function pointer to void* :-{ } // Just to pass arguments typedef struct mi_visit_blocks_args_s { bool visit_blocks; mi_block_visit_fun* visitor; void* arg; } mi_visit_blocks_args_t; static bool mi_heap_area_visitor(const mi_heap_t* heap, const mi_heap_area_ex_t* xarea, void* arg) { mi_visit_blocks_args_t* args = (mi_visit_blocks_args_t*)arg; if (!args->visitor(heap, &xarea->area, NULL, xarea->area.block_size, args->arg)) return false; if (args->visit_blocks) { return mi_heap_area_visit_blocks(xarea, args->visitor, args->arg); } else { return true; } } // Visit all blocks in a heap bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) { mi_visit_blocks_args_t args = { visit_blocks, visitor, arg }; return mi_heap_visit_areas(heap, &mi_heap_area_visitor, &args); } luametatex-2.10.08/source/libraries/mimalloc/src/init.c000066400000000000000000000620111442250314700230040ustar00rootroot00000000000000/* ---------------------------------------------------------------------------- Copyright (c) 2018-2022, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ #include "mimalloc.h" #include "mimalloc/internal.h" #include "mimalloc/prim.h" #include // memcpy, memset #include // atexit // Empty page used to initialize the small free pages array const mi_page_t _mi_page_empty = { 0, false, false, false, 0, // capacity 0, // reserved capacity { 0 }, // flags false, // is_zero 0, // retire_expire NULL, // free 0, // used 0, // xblock_size NULL, // local_free #if (MI_PADDING || MI_ENCODE_FREELIST) { 0, 0 }, #endif MI_ATOMIC_VAR_INIT(0), // xthread_free MI_ATOMIC_VAR_INIT(0), // xheap NULL, NULL #if MI_INTPTR_SIZE==8 , { 0 } // padding #endif }; #define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty) #if (MI_SMALL_WSIZE_MAX==128) #if (MI_PADDING>0) && (MI_INTPTR_SIZE >= 8) #define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY(), MI_PAGE_EMPTY() } #elif (MI_PADDING>0) #define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY(), MI_PAGE_EMPTY(), MI_PAGE_EMPTY() } #else #define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY() } #endif #else #error "define right initialization sizes corresponding to MI_SMALL_WSIZE_MAX" #endif // Empty page queues for every bin #define QNULL(sz) { NULL, NULL, (sz)*sizeof(uintptr_t) } #define MI_PAGE_QUEUES_EMPTY \ { QNULL(1), \ QNULL( 1), QNULL( 2), QNULL( 3), QNULL( 4), QNULL( 5), QNULL( 6), QNULL( 7), QNULL( 8), /* 8 */ \ QNULL( 10), QNULL( 12), QNULL( 14), QNULL( 16), QNULL( 20), QNULL( 24), QNULL( 28), QNULL( 32), /* 16 */ \ QNULL( 40), QNULL( 48), QNULL( 56), QNULL( 64), QNULL( 80), QNULL( 96), QNULL( 112), QNULL( 128), /* 24 */ \ QNULL( 160), QNULL( 192), QNULL( 224), QNULL( 256), QNULL( 320), QNULL( 384), QNULL( 448), QNULL( 512), /* 32 */ \ QNULL( 640), QNULL( 768), QNULL( 896), QNULL( 1024), QNULL( 1280), QNULL( 1536), QNULL( 1792), QNULL( 2048), /* 40 */ \ QNULL( 2560), QNULL( 3072), QNULL( 3584), QNULL( 4096), QNULL( 5120), QNULL( 6144), QNULL( 7168), QNULL( 8192), /* 48 */ \ QNULL( 10240), QNULL( 12288), QNULL( 14336), QNULL( 16384), QNULL( 20480), QNULL( 24576), QNULL( 28672), QNULL( 32768), /* 56 */ \ QNULL( 40960), QNULL( 49152), QNULL( 57344), QNULL( 65536), QNULL( 81920), QNULL( 98304), QNULL(114688), QNULL(131072), /* 64 */ \ QNULL(163840), QNULL(196608), QNULL(229376), QNULL(262144), QNULL(327680), QNULL(393216), QNULL(458752), QNULL(524288), /* 72 */ \ QNULL(MI_MEDIUM_OBJ_WSIZE_MAX + 1 /* 655360, Huge queue */), \ QNULL(MI_MEDIUM_OBJ_WSIZE_MAX + 2) /* Full queue */ } #define MI_STAT_COUNT_NULL() {0,0,0,0} // Empty statistics #if MI_STAT>1 #define MI_STAT_COUNT_END_NULL() , { MI_STAT_COUNT_NULL(), MI_INIT32(MI_STAT_COUNT_NULL) } #else #define MI_STAT_COUNT_END_NULL() #endif #define MI_STATS_NULL \ MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ MI_STAT_COUNT_NULL(), \ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } \ MI_STAT_COUNT_END_NULL() // Empty slice span queues for every bin #define SQNULL(sz) { NULL, NULL, sz } #define MI_SEGMENT_SPAN_QUEUES_EMPTY \ { SQNULL(1), \ SQNULL( 1), SQNULL( 2), SQNULL( 3), SQNULL( 4), SQNULL( 5), SQNULL( 6), SQNULL( 7), SQNULL( 10), /* 8 */ \ SQNULL( 12), SQNULL( 14), SQNULL( 16), SQNULL( 20), SQNULL( 24), SQNULL( 28), SQNULL( 32), SQNULL( 40), /* 16 */ \ SQNULL( 48), SQNULL( 56), SQNULL( 64), SQNULL( 80), SQNULL( 96), SQNULL( 112), SQNULL( 128), SQNULL( 160), /* 24 */ \ SQNULL( 192), SQNULL( 224), SQNULL( 256), SQNULL( 320), SQNULL( 384), SQNULL( 448), SQNULL( 512), SQNULL( 640), /* 32 */ \ SQNULL( 768), SQNULL( 896), SQNULL( 1024) /* 35 */ } // -------------------------------------------------------- // Statically allocate an empty heap as the initial // thread local value for the default heap, // and statically allocate the backing heap for the main // thread so it can function without doing any allocation // itself (as accessing a thread local for the first time // may lead to allocation itself on some platforms) // -------------------------------------------------------- mi_decl_cache_align const mi_heap_t _mi_heap_empty = { NULL, MI_SMALL_PAGES_EMPTY, MI_PAGE_QUEUES_EMPTY, MI_ATOMIC_VAR_INIT(NULL), 0, // tid 0, // cookie 0, // arena id { 0, 0 }, // keys { {0}, {0}, 0, true }, // random 0, // page count MI_BIN_FULL, 0, // page retired min/max NULL, // next false }; #define tld_empty_stats ((mi_stats_t*)((uint8_t*)&tld_empty + offsetof(mi_tld_t,stats))) #define tld_empty_os ((mi_os_tld_t*)((uint8_t*)&tld_empty + offsetof(mi_tld_t,os))) mi_decl_cache_align static const mi_tld_t tld_empty = { 0, false, NULL, NULL, { MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, tld_empty_stats, tld_empty_os }, // segments { 0, tld_empty_stats }, // os { MI_STATS_NULL } // stats }; mi_threadid_t _mi_thread_id(void) mi_attr_noexcept { return _mi_prim_thread_id(); } // the thread-local default heap for allocation mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty; extern mi_heap_t _mi_heap_main; static mi_tld_t tld_main = { 0, false, &_mi_heap_main, & _mi_heap_main, { MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, &tld_main.stats, &tld_main.os }, // segments { 0, &tld_main.stats }, // os { MI_STATS_NULL } // stats }; mi_heap_t _mi_heap_main = { &tld_main, MI_SMALL_PAGES_EMPTY, MI_PAGE_QUEUES_EMPTY, MI_ATOMIC_VAR_INIT(NULL), 0, // thread id 0, // initial cookie 0, // arena id { 0, 0 }, // the key of the main heap can be fixed (unlike page keys that need to be secure!) { {0x846ca68b}, {0}, 0, true }, // random 0, // page count MI_BIN_FULL, 0, // page retired min/max NULL, // next heap false // can reclaim }; bool _mi_process_is_initialized = false; // set to `true` in `mi_process_init`. mi_stats_t _mi_stats_main = { MI_STATS_NULL }; static void mi_heap_main_init(void) { if (_mi_heap_main.cookie == 0) { _mi_heap_main.thread_id = _mi_thread_id(); _mi_heap_main.cookie = 1; #if defined(_WIN32) && !defined(MI_SHARED_LIB) _mi_random_init_weak(&_mi_heap_main.random); // prevent allocation failure during bcrypt dll initialization with static linking #else _mi_random_init(&_mi_heap_main.random); #endif _mi_heap_main.cookie = _mi_heap_random_next(&_mi_heap_main); _mi_heap_main.keys[0] = _mi_heap_random_next(&_mi_heap_main); _mi_heap_main.keys[1] = _mi_heap_random_next(&_mi_heap_main); } } mi_heap_t* _mi_heap_main_get(void) { mi_heap_main_init(); return &_mi_heap_main; } /* ----------------------------------------------------------- Initialization and freeing of the thread local heaps ----------------------------------------------------------- */ // note: in x64 in release build `sizeof(mi_thread_data_t)` is under 4KiB (= OS page size). typedef struct mi_thread_data_s { mi_heap_t heap; // must come first due to cast in `_mi_heap_done` mi_tld_t tld; mi_memid_t memid; } mi_thread_data_t; // Thread meta-data is allocated directly from the OS. For // some programs that do not use thread pools and allocate and // destroy many OS threads, this may causes too much overhead // per thread so we maintain a small cache of recently freed metadata. #define TD_CACHE_SIZE (16) static _Atomic(mi_thread_data_t*) td_cache[TD_CACHE_SIZE]; static mi_thread_data_t* mi_thread_data_zalloc(void) { // try to find thread metadata in the cache bool is_zero = false; mi_thread_data_t* td = NULL; for (int i = 0; i < TD_CACHE_SIZE; i++) { td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]); if (td != NULL) { // found cached allocation, try use it td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL); if (td != NULL) { break; } } } // if that fails, allocate as meta data if (td == NULL) { mi_memid_t memid; td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &memid, &_mi_stats_main); if (td == NULL) { // if this fails, try once more. (issue #257) td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &memid, &_mi_stats_main); if (td == NULL) { // really out of memory _mi_error_message(ENOMEM, "unable to allocate thread local heap metadata (%zu bytes)\n", sizeof(mi_thread_data_t)); } } if (td != NULL) { td->memid = memid; is_zero = memid.initially_zero; } } if (td != NULL && !is_zero) { _mi_memzero_aligned(td, sizeof(*td)); } return td; } static void mi_thread_data_free( mi_thread_data_t* tdfree ) { // try to add the thread metadata to the cache for (int i = 0; i < TD_CACHE_SIZE; i++) { mi_thread_data_t* td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]); if (td == NULL) { mi_thread_data_t* expected = NULL; if (mi_atomic_cas_ptr_weak_acq_rel(mi_thread_data_t, &td_cache[i], &expected, tdfree)) { return; } } } // if that fails, just free it directly _mi_os_free(tdfree, sizeof(mi_thread_data_t), tdfree->memid, &_mi_stats_main); } void _mi_thread_data_collect(void) { // free all thread metadata from the cache for (int i = 0; i < TD_CACHE_SIZE; i++) { mi_thread_data_t* td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]); if (td != NULL) { td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL); if (td != NULL) { _mi_os_free(td, sizeof(mi_thread_data_t), td->memid, &_mi_stats_main); } } } } // Initialize the thread local default heap, called from `mi_thread_init` static bool _mi_heap_init(void) { if (mi_heap_is_initialized(mi_prim_get_default_heap())) return true; if (_mi_is_main_thread()) { // mi_assert_internal(_mi_heap_main.thread_id != 0); // can happen on freeBSD where alloc is called before any initialization // the main heap is statically allocated mi_heap_main_init(); _mi_heap_set_default_direct(&_mi_heap_main); //mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_prim_get_default_heap()); } else { // use `_mi_os_alloc` to allocate directly from the OS mi_thread_data_t* td = mi_thread_data_zalloc(); if (td == NULL) return false; mi_tld_t* tld = &td->tld; mi_heap_t* heap = &td->heap; _mi_memcpy_aligned(tld, &tld_empty, sizeof(*tld)); _mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(*heap)); heap->thread_id = _mi_thread_id(); _mi_random_init(&heap->random); heap->cookie = _mi_heap_random_next(heap) | 1; heap->keys[0] = _mi_heap_random_next(heap); heap->keys[1] = _mi_heap_random_next(heap); heap->tld = tld; tld->heap_backing = heap; tld->heaps = heap; tld->segments.stats = &tld->stats; tld->segments.os = &tld->os; tld->os.stats = &tld->stats; _mi_heap_set_default_direct(heap); } return false; } // Free the thread local default heap (called from `mi_thread_done`) static bool _mi_heap_done(mi_heap_t* heap) { if (!mi_heap_is_initialized(heap)) return true; // reset default heap _mi_heap_set_default_direct(_mi_is_main_thread() ? &_mi_heap_main : (mi_heap_t*)&_mi_heap_empty); // switch to backing heap heap = heap->tld->heap_backing; if (!mi_heap_is_initialized(heap)) return false; // delete all non-backing heaps in this thread mi_heap_t* curr = heap->tld->heaps; while (curr != NULL) { mi_heap_t* next = curr->next; // save `next` as `curr` will be freed if (curr != heap) { mi_assert_internal(!mi_heap_is_backing(curr)); mi_heap_delete(curr); } curr = next; } mi_assert_internal(heap->tld->heaps == heap && heap->next == NULL); mi_assert_internal(mi_heap_is_backing(heap)); // collect if not the main thread if (heap != &_mi_heap_main) { _mi_heap_collect_abandon(heap); } // merge stats _mi_stats_done(&heap->tld->stats); // free if not the main thread if (heap != &_mi_heap_main) { // the following assertion does not always hold for huge segments as those are always treated // as abondened: one may allocate it in one thread, but deallocate in another in which case // the count can be too large or negative. todo: perhaps not count huge segments? see issue #363 // mi_assert_internal(heap->tld->segments.count == 0 || heap->thread_id != _mi_thread_id()); mi_thread_data_free((mi_thread_data_t*)heap); } else { #if 0 // never free the main thread even in debug mode; if a dll is linked statically with mimalloc, // there may still be delete/free calls after the mi_fls_done is called. Issue #207 _mi_heap_destroy_pages(heap); mi_assert_internal(heap->tld->heap_backing == &_mi_heap_main); #endif } return false; } // -------------------------------------------------------- // Try to run `mi_thread_done()` automatically so any memory // owned by the thread but not yet released can be abandoned // and re-owned by another thread. // // 1. windows dynamic library: // call from DllMain on DLL_THREAD_DETACH // 2. windows static library: // use `FlsAlloc` to call a destructor when the thread is done // 3. unix, pthreads: // use a pthread key to call a destructor when a pthread is done // // In the last two cases we also need to call `mi_process_init` // to set up the thread local keys. // -------------------------------------------------------- // Set up handlers so `mi_thread_done` is called automatically static void mi_process_setup_auto_thread_done(void) { static bool tls_initialized = false; // fine if it races if (tls_initialized) return; tls_initialized = true; _mi_prim_thread_init_auto_done(); _mi_heap_set_default_direct(&_mi_heap_main); } bool _mi_is_main_thread(void) { return (_mi_heap_main.thread_id==0 || _mi_heap_main.thread_id == _mi_thread_id()); } static _Atomic(size_t) thread_count = MI_ATOMIC_VAR_INIT(1); size_t _mi_current_thread_count(void) { return mi_atomic_load_relaxed(&thread_count); } // This is called from the `mi_malloc_generic` void mi_thread_init(void) mi_attr_noexcept { // ensure our process has started already mi_process_init(); // initialize the thread local default heap // (this will call `_mi_heap_set_default_direct` and thus set the // fiber/pthread key to a non-zero value, ensuring `_mi_thread_done` is called) if (_mi_heap_init()) return; // returns true if already initialized _mi_stat_increase(&_mi_stats_main.threads, 1); mi_atomic_increment_relaxed(&thread_count); //_mi_verbose_message("thread init: 0x%zx\n", _mi_thread_id()); } void mi_thread_done(void) mi_attr_noexcept { _mi_thread_done(NULL); } void _mi_thread_done(mi_heap_t* heap) { // calling with NULL implies using the default heap if (heap == NULL) { heap = mi_prim_get_default_heap(); if (heap == NULL) return; } // prevent re-entrancy through heap_done/heap_set_default_direct (issue #699) if (!mi_heap_is_initialized(heap)) { return; } // adjust stats mi_atomic_decrement_relaxed(&thread_count); _mi_stat_decrease(&_mi_stats_main.threads, 1); // check thread-id as on Windows shutdown with FLS the main (exit) thread may call this on thread-local heaps... if (heap->thread_id != _mi_thread_id()) return; // abandon the thread local heap if (_mi_heap_done(heap)) return; // returns true if already ran } void _mi_heap_set_default_direct(mi_heap_t* heap) { mi_assert_internal(heap != NULL); #if defined(MI_TLS_SLOT) mi_prim_tls_slot_set(MI_TLS_SLOT,heap); #elif defined(MI_TLS_PTHREAD_SLOT_OFS) *mi_tls_pthread_heap_slot() = heap; #elif defined(MI_TLS_PTHREAD) // we use _mi_heap_default_key #else _mi_heap_default = heap; #endif // ensure the default heap is passed to `_mi_thread_done` // setting to a non-NULL value also ensures `mi_thread_done` is called. _mi_prim_thread_associate_default_heap(heap); } // -------------------------------------------------------- // Run functions on process init/done, and thread init/done // -------------------------------------------------------- static void mi_cdecl mi_process_done(void); static bool os_preloading = true; // true until this module is initialized static bool mi_redirected = false; // true if malloc redirects to mi_malloc // Returns true if this module has not been initialized; Don't use C runtime routines until it returns false. bool mi_decl_noinline _mi_preloading(void) { return os_preloading; } mi_decl_nodiscard bool mi_is_redirected(void) mi_attr_noexcept { return mi_redirected; } // Communicate with the redirection module on Windows #if defined(_WIN32) && defined(MI_SHARED_LIB) && !defined(MI_WIN_NOREDIRECT) #ifdef __cplusplus extern "C" { #endif mi_decl_export void _mi_redirect_entry(DWORD reason) { // called on redirection; careful as this may be called before DllMain if (reason == DLL_PROCESS_ATTACH) { mi_redirected = true; } else if (reason == DLL_PROCESS_DETACH) { mi_redirected = false; } else if (reason == DLL_THREAD_DETACH) { mi_thread_done(); } } __declspec(dllimport) bool mi_cdecl mi_allocator_init(const char** message); __declspec(dllimport) void mi_cdecl mi_allocator_done(void); #ifdef __cplusplus } #endif #else static bool mi_allocator_init(const char** message) { if (message != NULL) *message = NULL; return true; } static void mi_allocator_done(void) { // nothing to do } #endif // Called once by the process loader static void mi_process_load(void) { mi_heap_main_init(); #if defined(__APPLE__) || defined(MI_TLS_RECURSE_GUARD) volatile mi_heap_t* dummy = _mi_heap_default; // access TLS to allocate it before setting tls_initialized to true; if (dummy == NULL) return; // use dummy or otherwise the access may get optimized away (issue #697) #endif os_preloading = false; mi_assert_internal(_mi_is_main_thread()); #if !(defined(_WIN32) && defined(MI_SHARED_LIB)) // use Dll process detach (see below) instead of atexit (issue #521) atexit(&mi_process_done); #endif _mi_options_init(); mi_process_setup_auto_thread_done(); mi_process_init(); if (mi_redirected) _mi_verbose_message("malloc is redirected.\n"); // show message from the redirector (if present) const char* msg = NULL; mi_allocator_init(&msg); if (msg != NULL && (mi_option_is_enabled(mi_option_verbose) || mi_option_is_enabled(mi_option_show_errors))) { _mi_fputs(NULL,NULL,NULL,msg); } // reseed random _mi_random_reinit_if_weak(&_mi_heap_main.random); } #if defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64)) #include mi_decl_cache_align bool _mi_cpu_has_fsrm = false; static void mi_detect_cpu_features(void) { // FSRM for fast rep movsb support (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017)) int32_t cpu_info[4]; __cpuid(cpu_info, 7); _mi_cpu_has_fsrm = ((cpu_info[3] & (1 << 4)) != 0); // bit 4 of EDX : see } #else static void mi_detect_cpu_features(void) { // nothing } #endif // Initialize the process; called by thread_init or the process loader void mi_process_init(void) mi_attr_noexcept { // ensure we are called once static mi_atomic_once_t process_init; #if _MSC_VER < 1920 mi_heap_main_init(); // vs2017 can dynamically re-initialize _mi_heap_main #endif if (!mi_atomic_once(&process_init)) return; _mi_process_is_initialized = true; _mi_verbose_message("process init: 0x%zx\n", _mi_thread_id()); mi_process_setup_auto_thread_done(); mi_detect_cpu_features(); _mi_os_init(); mi_heap_main_init(); #if MI_DEBUG _mi_verbose_message("debug level : %d\n", MI_DEBUG); #endif _mi_verbose_message("secure level: %d\n", MI_SECURE); _mi_verbose_message("mem tracking: %s\n", MI_TRACK_TOOL); #if MI_TSAN _mi_verbose_message("thread santizer enabled\n"); #endif mi_thread_init(); #if defined(_WIN32) // On windows, when building as a static lib the FLS cleanup happens to early for the main thread. // To avoid this, set the FLS value for the main thread to NULL so the fls cleanup // will not call _mi_thread_done on the (still executing) main thread. See issue #508. _mi_prim_thread_associate_default_heap(NULL); #endif mi_stats_reset(); // only call stat reset *after* thread init (or the heap tld == NULL) mi_track_init(); if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { size_t pages = mi_option_get_clamp(mi_option_reserve_huge_os_pages, 0, 128*1024); long reserve_at = mi_option_get(mi_option_reserve_huge_os_pages_at); if (reserve_at != -1) { mi_reserve_huge_os_pages_at(pages, reserve_at, pages*500); } else { mi_reserve_huge_os_pages_interleave(pages, 0, pages*500); } } if (mi_option_is_enabled(mi_option_reserve_os_memory)) { long ksize = mi_option_get(mi_option_reserve_os_memory); if (ksize > 0) { mi_reserve_os_memory((size_t)ksize*MI_KiB, true /* commit? */, true /* allow large pages? */); } } } // Called when the process is done (through `at_exit`) static void mi_cdecl mi_process_done(void) { // only shutdown if we were initialized if (!_mi_process_is_initialized) return; // ensure we are called once static bool process_done = false; if (process_done) return; process_done = true; // release any thread specific resources and ensure _mi_thread_done is called on all but the main thread _mi_prim_thread_done_auto_done(); #ifndef MI_SKIP_COLLECT_ON_EXIT #if (MI_DEBUG || !defined(MI_SHARED_LIB)) // free all memory if possible on process exit. This is not needed for a stand-alone process // but should be done if mimalloc is statically linked into another shared library which // is repeatedly loaded/unloaded, see issue #281. mi_collect(true /* force */ ); #endif #endif // Forcefully release all retained memory; this can be dangerous in general if overriding regular malloc/free // since after process_done there might still be other code running that calls `free` (like at_exit routines, // or C-runtime termination code. if (mi_option_is_enabled(mi_option_destroy_on_exit)) { mi_collect(true /* force */); _mi_heap_unsafe_destroy_all(); // forcefully release all memory held by all heaps (of this thread only!) _mi_arena_unsafe_destroy_all(& _mi_heap_main_get()->tld->stats); } if (mi_option_is_enabled(mi_option_show_stats) || mi_option_is_enabled(mi_option_verbose)) { mi_stats_print(NULL); } mi_allocator_done(); _mi_verbose_message("process done: 0x%zx\n", _mi_heap_main.thread_id); os_preloading = true; // don't call the C runtime anymore } #if defined(_WIN32) && defined(MI_SHARED_LIB) // Windows DLL: easy to hook into process_init and thread_done __declspec(dllexport) BOOL WINAPI DllMain(HINSTANCE inst, DWORD reason, LPVOID reserved) { MI_UNUSED(reserved); MI_UNUSED(inst); if (reason==DLL_PROCESS_ATTACH) { mi_process_load(); } else if (reason==DLL_PROCESS_DETACH) { mi_process_done(); } else if (reason==DLL_THREAD_DETACH) { if (!mi_is_redirected()) { mi_thread_done(); } } return TRUE; } #elif defined(_MSC_VER) // MSVC: use data section magic for static libraries // See static int _mi_process_init(void) { mi_process_load(); return 0; } typedef int(*_mi_crt_callback_t)(void); #if defined(_M_X64) || defined(_M_ARM64) __pragma(comment(linker, "/include:" "_mi_msvc_initu")) #pragma section(".CRT$XIU", long, read) #else __pragma(comment(linker, "/include:" "__mi_msvc_initu")) #endif #pragma data_seg(".CRT$XIU") mi_decl_externc _mi_crt_callback_t _mi_msvc_initu[] = { &_mi_process_init }; #pragma data_seg() #elif defined(__cplusplus) // C++: use static initialization to detect process start static bool _mi_process_init(void) { mi_process_load(); return (_mi_heap_main.thread_id != 0); } static bool mi_initialized = _mi_process_init(); #elif defined(__GNUC__) || defined(__clang__) // GCC,Clang: use the constructor attribute static void __attribute__((constructor)) _mi_process_init(void) { mi_process_load(); } #else #pragma message("define a way to call mi_process_load on your platform") #endif luametatex-2.10.08/source/libraries/mimalloc/src/options.c000066400000000000000000000513421442250314700235410ustar00rootroot00000000000000/* ---------------------------------------------------------------------------- Copyright (c) 2018-2021, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ #include "mimalloc.h" #include "mimalloc/internal.h" #include "mimalloc/atomic.h" #include "mimalloc/prim.h" // mi_prim_out_stderr #include // FILE #include // abort #include static long mi_max_error_count = 16; // stop outputting errors after this (use < 0 for no limit) static long mi_max_warning_count = 16; // stop outputting warnings after this (use < 0 for no limit) static void mi_add_stderr_output(void); int mi_version(void) mi_attr_noexcept { return MI_MALLOC_VERSION; } // -------------------------------------------------------- // Options // These can be accessed by multiple threads and may be // concurrently initialized, but an initializing data race // is ok since they resolve to the same value. // -------------------------------------------------------- typedef enum mi_init_e { UNINIT, // not yet initialized DEFAULTED, // not found in the environment, use default value INITIALIZED // found in environment or set explicitly } mi_init_t; typedef struct mi_option_desc_s { long value; // the value mi_init_t init; // is it initialized yet? (from the environment) mi_option_t option; // for debugging: the option index should match the option const char* name; // option name without `mimalloc_` prefix const char* legacy_name; // potential legacy option name } mi_option_desc_t; #define MI_OPTION(opt) mi_option_##opt, #opt, NULL #define MI_OPTION_LEGACY(opt,legacy) mi_option_##opt, #opt, #legacy static mi_option_desc_t options[_mi_option_last] = { // stable options #if MI_DEBUG || defined(MI_SHOW_ERRORS) { 1, UNINIT, MI_OPTION(show_errors) }, #else { 0, UNINIT, MI_OPTION(show_errors) }, #endif { 0, UNINIT, MI_OPTION(show_stats) }, { 0, UNINIT, MI_OPTION(verbose) }, // the following options are experimental and not all combinations make sense. { 1, UNINIT, MI_OPTION(eager_commit) }, // commit per segment directly (4MiB) (but see also `eager_commit_delay`) { 2, UNINIT, MI_OPTION_LEGACY(arena_eager_commit,eager_region_commit) }, // eager commit arena's? 2 is used to enable this only on an OS that has overcommit (i.e. linux) { 1, UNINIT, MI_OPTION_LEGACY(purge_decommits,reset_decommits) }, // purge decommits memory (instead of reset) (note: on linux this uses MADV_DONTNEED for decommit) { 0, UNINIT, MI_OPTION_LEGACY(allow_large_os_pages,large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, // per 1GiB huge pages {-1, UNINIT, MI_OPTION(reserve_huge_os_pages_at) }, // reserve huge pages at node N { 0, UNINIT, MI_OPTION(reserve_os_memory) }, { 0, UNINIT, MI_OPTION(deprecated_segment_cache) }, // cache N segments per thread { 0, UNINIT, MI_OPTION(deprecated_page_reset) }, // reset page memory on free { 0, UNINIT, MI_OPTION_LEGACY(abandoned_page_purge,abandoned_page_reset) }, // reset free page memory when a thread terminates { 0, UNINIT, MI_OPTION(deprecated_segment_reset) }, // reset segment memory on free (needs eager commit) #if defined(__NetBSD__) { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed #else { 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed (but per page in the segment on demand) #endif { 10, UNINIT, MI_OPTION_LEGACY(purge_delay,reset_delay) }, // purge delay in milli-seconds { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 0, UNINIT, MI_OPTION(limit_os_alloc) }, // 1 = do not use OS memory for allocation (but only reserved arenas) { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose { 16, UNINIT, MI_OPTION(max_errors) }, // maximum errors that are output { 16, UNINIT, MI_OPTION(max_warnings) }, // maximum warnings that are output { 8, UNINIT, MI_OPTION(max_segment_reclaim)}, // max. number of segment reclaims from the abandoned segments per try. { 0, UNINIT, MI_OPTION(destroy_on_exit)}, // release all OS memory on process exit; careful with dangling pointer or after-exit frees! #if (MI_INTPTR_SIZE>4) { 1024L * 1024L, UNINIT, MI_OPTION(arena_reserve) }, // reserve memory N KiB at a time #else { 128L * 1024L, UNINIT, MI_OPTION(arena_reserve) }, #endif { 10, UNINIT, MI_OPTION(arena_purge_mult) }, // purge delay multiplier for arena's { 1, UNINIT, MI_OPTION_LEGACY(purge_extend_delay, decommit_extend_delay) }, }; static void mi_option_init(mi_option_desc_t* desc); void _mi_options_init(void) { // called on process load; should not be called before the CRT is initialized! // (e.g. do not call this from process_init as that may run before CRT initialization) mi_add_stderr_output(); // now it safe to use stderr for output for(int i = 0; i < _mi_option_last; i++ ) { mi_option_t option = (mi_option_t)i; long l = mi_option_get(option); MI_UNUSED(l); // initialize // if (option != mi_option_verbose) { mi_option_desc_t* desc = &options[option]; _mi_verbose_message("option '%s': %ld\n", desc->name, desc->value); } } mi_max_error_count = mi_option_get(mi_option_max_errors); mi_max_warning_count = mi_option_get(mi_option_max_warnings); } mi_decl_nodiscard long mi_option_get(mi_option_t option) { mi_assert(option >= 0 && option < _mi_option_last); if (option < 0 || option >= _mi_option_last) return 0; mi_option_desc_t* desc = &options[option]; mi_assert(desc->option == option); // index should match the option if mi_unlikely(desc->init == UNINIT) { mi_option_init(desc); } return desc->value; } mi_decl_nodiscard long mi_option_get_clamp(mi_option_t option, long min, long max) { long x = mi_option_get(option); return (x < min ? min : (x > max ? max : x)); } mi_decl_nodiscard size_t mi_option_get_size(mi_option_t option) { mi_assert_internal(option == mi_option_reserve_os_memory || option == mi_option_arena_reserve); long x = mi_option_get(option); return (x < 0 ? 0 : (size_t)x * MI_KiB); } void mi_option_set(mi_option_t option, long value) { mi_assert(option >= 0 && option < _mi_option_last); if (option < 0 || option >= _mi_option_last) return; mi_option_desc_t* desc = &options[option]; mi_assert(desc->option == option); // index should match the option desc->value = value; desc->init = INITIALIZED; } void mi_option_set_default(mi_option_t option, long value) { mi_assert(option >= 0 && option < _mi_option_last); if (option < 0 || option >= _mi_option_last) return; mi_option_desc_t* desc = &options[option]; if (desc->init != INITIALIZED) { desc->value = value; } } mi_decl_nodiscard bool mi_option_is_enabled(mi_option_t option) { return (mi_option_get(option) != 0); } void mi_option_set_enabled(mi_option_t option, bool enable) { mi_option_set(option, (enable ? 1 : 0)); } void mi_option_set_enabled_default(mi_option_t option, bool enable) { mi_option_set_default(option, (enable ? 1 : 0)); } void mi_option_enable(mi_option_t option) { mi_option_set_enabled(option,true); } void mi_option_disable(mi_option_t option) { mi_option_set_enabled(option,false); } static void mi_cdecl mi_out_stderr(const char* msg, void* arg) { MI_UNUSED(arg); if (msg != NULL && msg[0] != 0) { _mi_prim_out_stderr(msg); } } // Since an output function can be registered earliest in the `main` // function we also buffer output that happens earlier. When // an output function is registered it is called immediately with // the output up to that point. #ifndef MI_MAX_DELAY_OUTPUT #define MI_MAX_DELAY_OUTPUT ((size_t)(32*1024)) #endif static char out_buf[MI_MAX_DELAY_OUTPUT+1]; static _Atomic(size_t) out_len; static void mi_cdecl mi_out_buf(const char* msg, void* arg) { MI_UNUSED(arg); if (msg==NULL) return; if (mi_atomic_load_relaxed(&out_len)>=MI_MAX_DELAY_OUTPUT) return; size_t n = _mi_strlen(msg); if (n==0) return; // claim space size_t start = mi_atomic_add_acq_rel(&out_len, n); if (start >= MI_MAX_DELAY_OUTPUT) return; // check bound if (start+n >= MI_MAX_DELAY_OUTPUT) { n = MI_MAX_DELAY_OUTPUT-start-1; } _mi_memcpy(&out_buf[start], msg, n); } static void mi_out_buf_flush(mi_output_fun* out, bool no_more_buf, void* arg) { if (out==NULL) return; // claim (if `no_more_buf == true`, no more output will be added after this point) size_t count = mi_atomic_add_acq_rel(&out_len, (no_more_buf ? MI_MAX_DELAY_OUTPUT : 1)); // and output the current contents if (count>MI_MAX_DELAY_OUTPUT) count = MI_MAX_DELAY_OUTPUT; out_buf[count] = 0; out(out_buf,arg); if (!no_more_buf) { out_buf[count] = '\n'; // if continue with the buffer, insert a newline } } // Once this module is loaded, switch to this routine // which outputs to stderr and the delayed output buffer. static void mi_cdecl mi_out_buf_stderr(const char* msg, void* arg) { mi_out_stderr(msg,arg); mi_out_buf(msg,arg); } // -------------------------------------------------------- // Default output handler // -------------------------------------------------------- // Should be atomic but gives errors on many platforms as generally we cannot cast a function pointer to a uintptr_t. // For now, don't register output from multiple threads. static mi_output_fun* volatile mi_out_default; // = NULL static _Atomic(void*) mi_out_arg; // = NULL static mi_output_fun* mi_out_get_default(void** parg) { if (parg != NULL) { *parg = mi_atomic_load_ptr_acquire(void,&mi_out_arg); } mi_output_fun* out = mi_out_default; return (out == NULL ? &mi_out_buf : out); } void mi_register_output(mi_output_fun* out, void* arg) mi_attr_noexcept { mi_out_default = (out == NULL ? &mi_out_stderr : out); // stop using the delayed output buffer mi_atomic_store_ptr_release(void,&mi_out_arg, arg); if (out!=NULL) mi_out_buf_flush(out,true,arg); // output all the delayed output now } // add stderr to the delayed output after the module is loaded static void mi_add_stderr_output(void) { mi_assert_internal(mi_out_default == NULL); mi_out_buf_flush(&mi_out_stderr, false, NULL); // flush current contents to stderr mi_out_default = &mi_out_buf_stderr; // and add stderr to the delayed output } // -------------------------------------------------------- // Messages, all end up calling `_mi_fputs`. // -------------------------------------------------------- static _Atomic(size_t) error_count; // = 0; // when >= max_error_count stop emitting errors static _Atomic(size_t) warning_count; // = 0; // when >= max_warning_count stop emitting warnings // When overriding malloc, we may recurse into mi_vfprintf if an allocation // inside the C runtime causes another message. // In some cases (like on macOS) the loader already allocates which // calls into mimalloc; if we then access thread locals (like `recurse`) // this may crash as the access may call _tlv_bootstrap that tries to // (recursively) invoke malloc again to allocate space for the thread local // variables on demand. This is why we use a _mi_preloading test on such // platforms. However, C code generator may move the initial thread local address // load before the `if` and we therefore split it out in a separate funcion. static mi_decl_thread bool recurse = false; static mi_decl_noinline bool mi_recurse_enter_prim(void) { if (recurse) return false; recurse = true; return true; } static mi_decl_noinline void mi_recurse_exit_prim(void) { recurse = false; } static bool mi_recurse_enter(void) { #if defined(__APPLE__) || defined(MI_TLS_RECURSE_GUARD) if (_mi_preloading()) return false; #endif return mi_recurse_enter_prim(); } static void mi_recurse_exit(void) { #if defined(__APPLE__) || defined(MI_TLS_RECURSE_GUARD) if (_mi_preloading()) return; #endif mi_recurse_exit_prim(); } void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message) { if (out==NULL || (void*)out==(void*)stdout || (void*)out==(void*)stderr) { // TODO: use mi_out_stderr for stderr? if (!mi_recurse_enter()) return; out = mi_out_get_default(&arg); if (prefix != NULL) out(prefix, arg); out(message, arg); mi_recurse_exit(); } else { if (prefix != NULL) out(prefix, arg); out(message, arg); } } // Define our own limited `fprintf` that avoids memory allocation. // We do this using `snprintf` with a limited buffer. static void mi_vfprintf( mi_output_fun* out, void* arg, const char* prefix, const char* fmt, va_list args ) { char buf[512]; if (fmt==NULL) return; if (!mi_recurse_enter()) return; vsnprintf(buf,sizeof(buf)-1,fmt,args); mi_recurse_exit(); _mi_fputs(out,arg,prefix,buf); } void _mi_fprintf( mi_output_fun* out, void* arg, const char* fmt, ... ) { va_list args; va_start(args,fmt); mi_vfprintf(out,arg,NULL,fmt,args); va_end(args); } static void mi_vfprintf_thread(mi_output_fun* out, void* arg, const char* prefix, const char* fmt, va_list args) { if (prefix != NULL && _mi_strnlen(prefix,33) <= 32 && !_mi_is_main_thread()) { char tprefix[64]; snprintf(tprefix, sizeof(tprefix), "%sthread 0x%llx: ", prefix, (unsigned long long)_mi_thread_id()); mi_vfprintf(out, arg, tprefix, fmt, args); } else { mi_vfprintf(out, arg, prefix, fmt, args); } } void _mi_trace_message(const char* fmt, ...) { if (mi_option_get(mi_option_verbose) <= 1) return; // only with verbose level 2 or higher va_list args; va_start(args, fmt); mi_vfprintf_thread(NULL, NULL, "mimalloc: ", fmt, args); va_end(args); } void _mi_verbose_message(const char* fmt, ...) { if (!mi_option_is_enabled(mi_option_verbose)) return; va_list args; va_start(args,fmt); mi_vfprintf(NULL, NULL, "mimalloc: ", fmt, args); va_end(args); } static void mi_show_error_message(const char* fmt, va_list args) { if (!mi_option_is_enabled(mi_option_verbose)) { if (!mi_option_is_enabled(mi_option_show_errors)) return; if (mi_max_error_count >= 0 && (long)mi_atomic_increment_acq_rel(&error_count) > mi_max_error_count) return; } mi_vfprintf_thread(NULL, NULL, "mimalloc: error: ", fmt, args); } void _mi_warning_message(const char* fmt, ...) { if (!mi_option_is_enabled(mi_option_verbose)) { if (!mi_option_is_enabled(mi_option_show_errors)) return; if (mi_max_warning_count >= 0 && (long)mi_atomic_increment_acq_rel(&warning_count) > mi_max_warning_count) return; } va_list args; va_start(args,fmt); mi_vfprintf_thread(NULL, NULL, "mimalloc: warning: ", fmt, args); va_end(args); } #if MI_DEBUG void _mi_assert_fail(const char* assertion, const char* fname, unsigned line, const char* func ) { _mi_fprintf(NULL, NULL, "mimalloc: assertion failed: at \"%s\":%u, %s\n assertion: \"%s\"\n", fname, line, (func==NULL?"":func), assertion); abort(); } #endif // -------------------------------------------------------- // Errors // -------------------------------------------------------- static mi_error_fun* volatile mi_error_handler; // = NULL static _Atomic(void*) mi_error_arg; // = NULL static void mi_error_default(int err) { MI_UNUSED(err); #if (MI_DEBUG>0) if (err==EFAULT) { #ifdef _MSC_VER __debugbreak(); #endif abort(); } #endif #if (MI_SECURE>0) if (err==EFAULT) { // abort on serious errors in secure mode (corrupted meta-data) abort(); } #endif #if defined(MI_XMALLOC) if (err==ENOMEM || err==EOVERFLOW) { // abort on memory allocation fails in xmalloc mode abort(); } #endif } void mi_register_error(mi_error_fun* fun, void* arg) { mi_error_handler = fun; // can be NULL mi_atomic_store_ptr_release(void,&mi_error_arg, arg); } void _mi_error_message(int err, const char* fmt, ...) { // show detailed error message va_list args; va_start(args, fmt); mi_show_error_message(fmt, args); va_end(args); // and call the error handler which may abort (or return normally) if (mi_error_handler != NULL) { mi_error_handler(err, mi_atomic_load_ptr_acquire(void,&mi_error_arg)); } else { mi_error_default(err); } } // -------------------------------------------------------- // Initialize options by checking the environment // -------------------------------------------------------- char _mi_toupper(char c) { if (c >= 'a' && c <= 'z') return (c - 'a' + 'A'); else return c; } int _mi_strnicmp(const char* s, const char* t, size_t n) { if (n == 0) return 0; for (; *s != 0 && *t != 0 && n > 0; s++, t++, n--) { if (_mi_toupper(*s) != _mi_toupper(*t)) break; } return (n == 0 ? 0 : *s - *t); } void _mi_strlcpy(char* dest, const char* src, size_t dest_size) { if (dest==NULL || src==NULL || dest_size == 0) return; // copy until end of src, or when dest is (almost) full while (*src != 0 && dest_size > 1) { *dest++ = *src++; dest_size--; } // always zero terminate *dest = 0; } void _mi_strlcat(char* dest, const char* src, size_t dest_size) { if (dest==NULL || src==NULL || dest_size == 0) return; // find end of string in the dest buffer while (*dest != 0 && dest_size > 1) { dest++; dest_size--; } // and catenate _mi_strlcpy(dest, src, dest_size); } size_t _mi_strlen(const char* s) { if (s==NULL) return 0; size_t len = 0; while(s[len] != 0) { len++; } return len; } size_t _mi_strnlen(const char* s, size_t max_len) { if (s==NULL) return 0; size_t len = 0; while(s[len] != 0 && len < max_len) { len++; } return len; } #ifdef MI_NO_GETENV static bool mi_getenv(const char* name, char* result, size_t result_size) { MI_UNUSED(name); MI_UNUSED(result); MI_UNUSED(result_size); return false; } #else static bool mi_getenv(const char* name, char* result, size_t result_size) { if (name==NULL || result == NULL || result_size < 64) return false; return _mi_prim_getenv(name,result,result_size); } #endif // TODO: implement ourselves to reduce dependencies on the C runtime #include // strtol #include // strstr static void mi_option_init(mi_option_desc_t* desc) { // Read option value from the environment char s[64 + 1]; char buf[64+1]; _mi_strlcpy(buf, "mimalloc_", sizeof(buf)); _mi_strlcat(buf, desc->name, sizeof(buf)); bool found = mi_getenv(buf, s, sizeof(s)); if (!found && desc->legacy_name != NULL) { _mi_strlcpy(buf, "mimalloc_", sizeof(buf)); _mi_strlcat(buf, desc->legacy_name, sizeof(buf)); found = mi_getenv(buf, s, sizeof(s)); if (found) { _mi_warning_message("environment option \"mimalloc_%s\" is deprecated -- use \"mimalloc_%s\" instead.\n", desc->legacy_name, desc->name); } } if (found) { size_t len = _mi_strnlen(s, sizeof(buf) - 1); for (size_t i = 0; i < len; i++) { buf[i] = _mi_toupper(s[i]); } buf[len] = 0; if (buf[0] == 0 || strstr("1;TRUE;YES;ON", buf) != NULL) { desc->value = 1; desc->init = INITIALIZED; } else if (strstr("0;FALSE;NO;OFF", buf) != NULL) { desc->value = 0; desc->init = INITIALIZED; } else { char* end = buf; long value = strtol(buf, &end, 10); if (desc->option == mi_option_reserve_os_memory || desc->option == mi_option_arena_reserve) { // this option is interpreted in KiB to prevent overflow of `long` if (*end == 'K') { end++; } else if (*end == 'M') { value *= MI_KiB; end++; } else if (*end == 'G') { value *= MI_MiB; end++; } else { value = (value + MI_KiB - 1) / MI_KiB; } if (end[0] == 'I' && end[1] == 'B') { end += 2; } else if (*end == 'B') { end++; } } if (*end == 0) { desc->value = value; desc->init = INITIALIZED; } else { // set `init` first to avoid recursion through _mi_warning_message on mimalloc_verbose. desc->init = DEFAULTED; if (desc->option == mi_option_verbose && desc->value == 0) { // if the 'mimalloc_verbose' env var has a bogus value we'd never know // (since the value defaults to 'off') so in that case briefly enable verbose desc->value = 1; _mi_warning_message("environment option mimalloc_%s has an invalid value.\n", desc->name); desc->value = 0; } else { _mi_warning_message("environment option mimalloc_%s has an invalid value.\n", desc->name); } } } mi_assert_internal(desc->init != UNINIT); } else if (!_mi_preloading()) { desc->init = DEFAULTED; } } luametatex-2.10.08/source/libraries/mimalloc/src/os.c000066400000000000000000000664361442250314700225010ustar00rootroot00000000000000/* ---------------------------------------------------------------------------- Copyright (c) 2018-2023, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ #include "mimalloc.h" #include "mimalloc/internal.h" #include "mimalloc/atomic.h" #include "mimalloc/prim.h" /* ----------------------------------------------------------- Initialization. On windows initializes support for aligned allocation and large OS pages (if MIMALLOC_LARGE_OS_PAGES is true). ----------------------------------------------------------- */ static mi_os_mem_config_t mi_os_mem_config = { 4096, // page size 0, // large page size (usually 2MiB) 4096, // allocation granularity true, // has overcommit? (if true we use MAP_NORESERVE on mmap systems) false, // must free whole? (on mmap systems we can free anywhere in a mapped range, but on Windows we must free the entire span) true // has virtual reserve? (if true we can reserve virtual address space without using commit or physical memory) }; bool _mi_os_has_overcommit(void) { return mi_os_mem_config.has_overcommit; } bool _mi_os_has_virtual_reserve(void) { return mi_os_mem_config.has_virtual_reserve; } // OS (small) page size size_t _mi_os_page_size(void) { return mi_os_mem_config.page_size; } // if large OS pages are supported (2 or 4MiB), then return the size, otherwise return the small page size (4KiB) size_t _mi_os_large_page_size(void) { return (mi_os_mem_config.large_page_size != 0 ? mi_os_mem_config.large_page_size : _mi_os_page_size()); } bool _mi_os_use_large_page(size_t size, size_t alignment) { // if we have access, check the size and alignment requirements if (mi_os_mem_config.large_page_size == 0 || !mi_option_is_enabled(mi_option_allow_large_os_pages)) return false; return ((size % mi_os_mem_config.large_page_size) == 0 && (alignment % mi_os_mem_config.large_page_size) == 0); } // round to a good OS allocation size (bounded by max 12.5% waste) size_t _mi_os_good_alloc_size(size_t size) { size_t align_size; if (size < 512*MI_KiB) align_size = _mi_os_page_size(); else if (size < 2*MI_MiB) align_size = 64*MI_KiB; else if (size < 8*MI_MiB) align_size = 256*MI_KiB; else if (size < 32*MI_MiB) align_size = 1*MI_MiB; else align_size = 4*MI_MiB; if mi_unlikely(size >= (SIZE_MAX - align_size)) return size; // possible overflow? return _mi_align_up(size, align_size); } void _mi_os_init(void) { _mi_prim_mem_init(&mi_os_mem_config); } /* ----------------------------------------------------------- Util -------------------------------------------------------------- */ bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats); static void* mi_align_up_ptr(void* p, size_t alignment) { return (void*)_mi_align_up((uintptr_t)p, alignment); } static void* mi_align_down_ptr(void* p, size_t alignment) { return (void*)_mi_align_down((uintptr_t)p, alignment); } /* ----------------------------------------------------------- aligned hinting -------------------------------------------------------------- */ // On 64-bit systems, we can do efficient aligned allocation by using // the 2TiB to 30TiB area to allocate those. #if (MI_INTPTR_SIZE >= 8) static mi_decl_cache_align _Atomic(uintptr_t)aligned_base; // Return a MI_SEGMENT_SIZE aligned address that is probably available. // If this returns NULL, the OS will determine the address but on some OS's that may not be // properly aligned which can be more costly as it needs to be adjusted afterwards. // For a size > 1GiB this always returns NULL in order to guarantee good ASLR randomization; // (otherwise an initial large allocation of say 2TiB has a 50% chance to include (known) addresses // in the middle of the 2TiB - 6TiB address range (see issue #372)) #define MI_HINT_BASE ((uintptr_t)2 << 40) // 2TiB start #define MI_HINT_AREA ((uintptr_t)4 << 40) // upto 6TiB (since before win8 there is "only" 8TiB available to processes) #define MI_HINT_MAX ((uintptr_t)30 << 40) // wrap after 30TiB (area after 32TiB is used for huge OS pages) void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size) { if (try_alignment <= 1 || try_alignment > MI_SEGMENT_SIZE) return NULL; size = _mi_align_up(size, MI_SEGMENT_SIZE); if (size > 1*MI_GiB) return NULL; // guarantee the chance of fixed valid address is at most 1/(MI_HINT_AREA / 1<<30) = 1/4096. #if (MI_SECURE>0) size += MI_SEGMENT_SIZE; // put in `MI_SEGMENT_SIZE` virtual gaps between hinted blocks; this splits VLA's but increases guarded areas. #endif uintptr_t hint = mi_atomic_add_acq_rel(&aligned_base, size); if (hint == 0 || hint > MI_HINT_MAX) { // wrap or initialize uintptr_t init = MI_HINT_BASE; #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode uintptr_t r = _mi_heap_random_next(mi_prim_get_default_heap()); init = init + ((MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)) % MI_HINT_AREA); // (randomly 20 bits)*4MiB == 0 to 4TiB #endif uintptr_t expected = hint + size; mi_atomic_cas_strong_acq_rel(&aligned_base, &expected, init); hint = mi_atomic_add_acq_rel(&aligned_base, size); // this may still give 0 or > MI_HINT_MAX but that is ok, it is a hint after all } if (hint%try_alignment != 0) return NULL; return (void*)hint; } #else void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size) { MI_UNUSED(try_alignment); MI_UNUSED(size); return NULL; } #endif /* ----------------------------------------------------------- Free memory -------------------------------------------------------------- */ static void mi_os_free_huge_os_pages(void* p, size_t size, mi_stats_t* stats); static void mi_os_prim_free(void* addr, size_t size, bool still_committed, mi_stats_t* tld_stats) { MI_UNUSED(tld_stats); mi_assert_internal((size % _mi_os_page_size()) == 0); if (addr == NULL || size == 0) return; // || _mi_os_is_huge_reserved(addr) int err = _mi_prim_free(addr, size); if (err != 0) { _mi_warning_message("unable to free OS memory (error: %d (0x%x), size: 0x%zx bytes, address: %p)\n", err, err, size, addr); } mi_stats_t* stats = &_mi_stats_main; if (still_committed) { _mi_stat_decrease(&stats->committed, size); } _mi_stat_decrease(&stats->reserved, size); } void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t memid, mi_stats_t* tld_stats) { if (mi_memkind_is_os(memid.memkind)) { size_t csize = _mi_os_good_alloc_size(size); void* base = addr; // different base? (due to alignment) if (memid.mem.os.base != NULL) { mi_assert(memid.mem.os.base <= addr); mi_assert((uint8_t*)memid.mem.os.base + memid.mem.os.alignment >= (uint8_t*)addr); base = memid.mem.os.base; csize += ((uint8_t*)addr - (uint8_t*)memid.mem.os.base); } // free it if (memid.memkind == MI_MEM_OS_HUGE) { mi_assert(memid.is_pinned); mi_os_free_huge_os_pages(base, csize, tld_stats); } else { mi_os_prim_free(base, csize, still_committed, tld_stats); } } else { // nothing to do mi_assert(memid.memkind < MI_MEM_OS); } } void _mi_os_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* tld_stats) { _mi_os_free_ex(p, size, true, memid, tld_stats); } /* ----------------------------------------------------------- Primitive allocation from the OS. -------------------------------------------------------------- */ // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned. static void* mi_os_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* stats) { mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); mi_assert_internal(is_zero != NULL); mi_assert_internal(is_large != NULL); if (size == 0) return NULL; if (!commit) { allow_large = false; } if (try_alignment == 0) { try_alignment = 1; } // avoid 0 to ensure there will be no divide by zero when aligning *is_zero = false; void* p = NULL; int err = _mi_prim_alloc(size, try_alignment, commit, allow_large, is_large, is_zero, &p); if (err != 0) { _mi_warning_message("unable to allocate OS memory (error: %d (0x%x), size: 0x%zx bytes, align: 0x%zx, commit: %d, allow large: %d)\n", err, err, size, try_alignment, commit, allow_large); } mi_stat_counter_increase(stats->mmap_calls, 1); if (p != NULL) { _mi_stat_increase(&stats->reserved, size); if (commit) { _mi_stat_increase(&stats->committed, size); // seems needed for asan (or `mimalloc-test-api` fails) #ifdef MI_TRACK_ASAN if (*is_zero) { mi_track_mem_defined(p,size); } else { mi_track_mem_undefined(p,size); } #endif } } return p; } // Primitive aligned allocation from the OS. // This function guarantees the allocated memory is aligned. static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** base, mi_stats_t* stats) { mi_assert_internal(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0)); mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); mi_assert_internal(is_large != NULL); mi_assert_internal(is_zero != NULL); mi_assert_internal(base != NULL); if (!commit) allow_large = false; if (!(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0))) return NULL; size = _mi_align_up(size, _mi_os_page_size()); // try first with a hint (this will be aligned directly on Win 10+ or BSD) void* p = mi_os_prim_alloc(size, alignment, commit, allow_large, is_large, is_zero, stats); if (p == NULL) return NULL; // aligned already? if (((uintptr_t)p % alignment) == 0) { *base = p; } else { // if not aligned, free it, overallocate, and unmap around it _mi_warning_message("unable to allocate aligned OS memory directly, fall back to over-allocation (size: 0x%zx bytes, address: %p, alignment: 0x%zx, commit: %d)\n", size, p, alignment, commit); mi_os_prim_free(p, size, commit, stats); if (size >= (SIZE_MAX - alignment)) return NULL; // overflow const size_t over_size = size + alignment; if (mi_os_mem_config.must_free_whole) { // win32 virtualAlloc cannot free parts of an allocate block // over-allocate uncommitted (virtual) memory p = mi_os_prim_alloc(over_size, 1 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, is_zero, stats); if (p == NULL) return NULL; // set p to the aligned part in the full region // note: this is dangerous on Windows as VirtualFree needs the actual base pointer // this is handled though by having the `base` field in the memid's *base = p; // remember the base p = mi_align_up_ptr(p, alignment); // explicitly commit only the aligned part if (commit) { _mi_os_commit(p, size, NULL, stats); } } else { // mmap can free inside an allocation // overallocate... p = mi_os_prim_alloc(over_size, 1, commit, false, is_large, is_zero, stats); if (p == NULL) return NULL; // and selectively unmap parts around the over-allocated area. (noop on sbrk) void* aligned_p = mi_align_up_ptr(p, alignment); size_t pre_size = (uint8_t*)aligned_p - (uint8_t*)p; size_t mid_size = _mi_align_up(size, _mi_os_page_size()); size_t post_size = over_size - pre_size - mid_size; mi_assert_internal(pre_size < over_size&& post_size < over_size&& mid_size >= size); if (pre_size > 0) { mi_os_prim_free(p, pre_size, commit, stats); } if (post_size > 0) { mi_os_prim_free((uint8_t*)aligned_p + mid_size, post_size, commit, stats); } // we can return the aligned pointer on `mmap` (and sbrk) systems p = aligned_p; *base = aligned_p; // since we freed the pre part, `*base == p`. } } mi_assert_internal(p == NULL || (p != NULL && *base != NULL && ((uintptr_t)p % alignment) == 0)); return p; } /* ----------------------------------------------------------- OS API: alloc and alloc_aligned ----------------------------------------------------------- */ void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* tld_stats) { MI_UNUSED(tld_stats); *memid = _mi_memid_none(); mi_stats_t* stats = &_mi_stats_main; if (size == 0) return NULL; size = _mi_os_good_alloc_size(size); bool os_is_large = false; bool os_is_zero = false; void* p = mi_os_prim_alloc(size, 0, true, false, &os_is_large, &os_is_zero, stats); if (p != NULL) { *memid = _mi_memid_create_os(true, os_is_zero, os_is_large); } return p; } void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* tld_stats) { MI_UNUSED(&_mi_os_get_aligned_hint); // suppress unused warnings MI_UNUSED(tld_stats); *memid = _mi_memid_none(); if (size == 0) return NULL; size = _mi_os_good_alloc_size(size); alignment = _mi_align_up(alignment, _mi_os_page_size()); bool os_is_large = false; bool os_is_zero = false; void* os_base = NULL; void* p = mi_os_prim_alloc_aligned(size, alignment, commit, allow_large, &os_is_large, &os_is_zero, &os_base, &_mi_stats_main /*tld->stats*/ ); if (p != NULL) { *memid = _mi_memid_create_os(commit, os_is_zero, os_is_large); memid->mem.os.base = os_base; memid->mem.os.alignment = alignment; } return p; } /* ----------------------------------------------------------- OS aligned allocation with an offset. This is used for large alignments > MI_ALIGNMENT_MAX. We use a large mimalloc page where the object can be aligned at an offset from the start of the segment. As we may need to overallocate, we need to free such pointers using `mi_free_aligned` to use the actual start of the memory region. ----------------------------------------------------------- */ void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t offset, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* tld_stats) { mi_assert(offset <= MI_SEGMENT_SIZE); mi_assert(offset <= size); mi_assert((alignment % _mi_os_page_size()) == 0); *memid = _mi_memid_none(); if (offset > MI_SEGMENT_SIZE) return NULL; if (offset == 0) { // regular aligned allocation return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid, tld_stats); } else { // overallocate to align at an offset const size_t extra = _mi_align_up(offset, alignment) - offset; const size_t oversize = size + extra; void* const start = _mi_os_alloc_aligned(oversize, alignment, commit, allow_large, memid, tld_stats); if (start == NULL) return NULL; void* const p = (uint8_t*)start + extra; mi_assert(_mi_is_aligned((uint8_t*)p + offset, alignment)); // decommit the overallocation at the start if (commit && extra > _mi_os_page_size()) { _mi_os_decommit(start, extra, tld_stats); } return p; } } /* ----------------------------------------------------------- OS memory API: reset, commit, decommit, protect, unprotect. ----------------------------------------------------------- */ // OS page align within a given area, either conservative (pages inside the area only), // or not (straddling pages outside the area is possible) static void* mi_os_page_align_areax(bool conservative, void* addr, size_t size, size_t* newsize) { mi_assert(addr != NULL && size > 0); if (newsize != NULL) *newsize = 0; if (size == 0 || addr == NULL) return NULL; // page align conservatively within the range void* start = (conservative ? mi_align_up_ptr(addr, _mi_os_page_size()) : mi_align_down_ptr(addr, _mi_os_page_size())); void* end = (conservative ? mi_align_down_ptr((uint8_t*)addr + size, _mi_os_page_size()) : mi_align_up_ptr((uint8_t*)addr + size, _mi_os_page_size())); ptrdiff_t diff = (uint8_t*)end - (uint8_t*)start; if (diff <= 0) return NULL; mi_assert_internal((conservative && (size_t)diff <= size) || (!conservative && (size_t)diff >= size)); if (newsize != NULL) *newsize = (size_t)diff; return start; } static void* mi_os_page_align_area_conservative(void* addr, size_t size, size_t* newsize) { return mi_os_page_align_areax(true, addr, size, newsize); } bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats) { MI_UNUSED(tld_stats); mi_stats_t* stats = &_mi_stats_main; if (is_zero != NULL) { *is_zero = false; } _mi_stat_increase(&stats->committed, size); // use size for precise commit vs. decommit _mi_stat_counter_increase(&stats->commit_calls, 1); // page align range size_t csize; void* start = mi_os_page_align_areax(false /* conservative? */, addr, size, &csize); if (csize == 0) return true; // commit bool os_is_zero = false; int err = _mi_prim_commit(start, csize, &os_is_zero); if (err != 0) { _mi_warning_message("cannot commit OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize); return false; } if (os_is_zero && is_zero != NULL) { *is_zero = true; mi_assert_expensive(mi_mem_is_zero(start, csize)); } // note: the following seems required for asan (otherwise `mimalloc-test-stress` fails) #ifdef MI_TRACK_ASAN if (os_is_zero) { mi_track_mem_defined(start,csize); } else { mi_track_mem_undefined(start,csize); } #endif return true; } static bool mi_os_decommit_ex(void* addr, size_t size, bool* needs_recommit, mi_stats_t* tld_stats) { MI_UNUSED(tld_stats); mi_stats_t* stats = &_mi_stats_main; mi_assert_internal(needs_recommit!=NULL); _mi_stat_decrease(&stats->committed, size); // page align size_t csize; void* start = mi_os_page_align_area_conservative(addr, size, &csize); if (csize == 0) return true; // decommit *needs_recommit = true; int err = _mi_prim_decommit(start,csize,needs_recommit); if (err != 0) { _mi_warning_message("cannot decommit OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize); } mi_assert_internal(err == 0); return (err == 0); } bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* tld_stats) { bool needs_recommit; return mi_os_decommit_ex(addr, size, &needs_recommit, tld_stats); } // Signal to the OS that the address range is no longer in use // but may be used later again. This will release physical memory // pages and reduce swapping while keeping the memory committed. // We page align to a conservative area inside the range to reset. bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) { // page align conservatively within the range size_t csize; void* start = mi_os_page_align_area_conservative(addr, size, &csize); if (csize == 0) return true; // || _mi_os_is_huge_reserved(addr) _mi_stat_increase(&stats->reset, csize); _mi_stat_counter_increase(&stats->reset_calls, 1); #if (MI_DEBUG>1) && !MI_SECURE && !MI_TRACK_ENABLED // && !MI_TSAN memset(start, 0, csize); // pretend it is eagerly reset #endif int err = _mi_prim_reset(start, csize); if (err != 0) { _mi_warning_message("cannot reset OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize); } return (err == 0); } // either resets or decommits memory, returns true if the memory needs // to be recommitted if it is to be re-used later on. bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, mi_stats_t* stats) { if (mi_option_get(mi_option_purge_delay) < 0) return false; // is purging allowed? _mi_stat_counter_increase(&stats->purge_calls, 1); _mi_stat_increase(&stats->purged, size); if (mi_option_is_enabled(mi_option_purge_decommits) && // should decommit? !_mi_preloading()) // don't decommit during preloading (unsafe) { bool needs_recommit = true; mi_os_decommit_ex(p, size, &needs_recommit, stats); return needs_recommit; } else { if (allow_reset) { // this can sometimes be not allowed if the range is not fully committed _mi_os_reset(p, size, stats); } return false; // needs no recommit } } // either resets or decommits memory, returns true if the memory needs // to be recommitted if it is to be re-used later on. bool _mi_os_purge(void* p, size_t size, mi_stats_t * stats) { return _mi_os_purge_ex(p, size, true, stats); } // Protect a region in memory to be not accessible. static bool mi_os_protectx(void* addr, size_t size, bool protect) { // page align conservatively within the range size_t csize = 0; void* start = mi_os_page_align_area_conservative(addr, size, &csize); if (csize == 0) return false; /* if (_mi_os_is_huge_reserved(addr)) { _mi_warning_message("cannot mprotect memory allocated in huge OS pages\n"); } */ int err = _mi_prim_protect(start,csize,protect); if (err != 0) { _mi_warning_message("cannot %s OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", (protect ? "protect" : "unprotect"), err, err, start, csize); } return (err == 0); } bool _mi_os_protect(void* addr, size_t size) { return mi_os_protectx(addr, size, true); } bool _mi_os_unprotect(void* addr, size_t size) { return mi_os_protectx(addr, size, false); } /* ---------------------------------------------------------------------------- Support for allocating huge OS pages (1Gib) that are reserved up-front and possibly associated with a specific NUMA node. (use `numa_node>=0`) -----------------------------------------------------------------------------*/ #define MI_HUGE_OS_PAGE_SIZE (MI_GiB) #if (MI_INTPTR_SIZE >= 8) // To ensure proper alignment, use our own area for huge OS pages static mi_decl_cache_align _Atomic(uintptr_t) mi_huge_start; // = 0 // Claim an aligned address range for huge pages static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { if (total_size != NULL) *total_size = 0; const size_t size = pages * MI_HUGE_OS_PAGE_SIZE; uintptr_t start = 0; uintptr_t end = 0; uintptr_t huge_start = mi_atomic_load_relaxed(&mi_huge_start); do { start = huge_start; if (start == 0) { // Initialize the start address after the 32TiB area start = ((uintptr_t)32 << 40); // 32TiB virtual start address #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode uintptr_t r = _mi_heap_random_next(mi_prim_get_default_heap()); start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x0FFF)); // (randomly 12bits)*1GiB == between 0 to 4TiB #endif } end = start + size; mi_assert_internal(end % MI_SEGMENT_SIZE == 0); } while (!mi_atomic_cas_strong_acq_rel(&mi_huge_start, &huge_start, end)); if (total_size != NULL) *total_size = size; return (uint8_t*)start; } #else static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { MI_UNUSED(pages); if (total_size != NULL) *total_size = 0; return NULL; } #endif // Allocate MI_SEGMENT_SIZE aligned huge pages void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_msecs, size_t* pages_reserved, size_t* psize, mi_memid_t* memid) { *memid = _mi_memid_none(); if (psize != NULL) *psize = 0; if (pages_reserved != NULL) *pages_reserved = 0; size_t size = 0; uint8_t* start = mi_os_claim_huge_pages(pages, &size); if (start == NULL) return NULL; // or 32-bit systems // Allocate one page at the time but try to place them contiguously // We allocate one page at the time to be able to abort if it takes too long // or to at least allocate as many as available on the system. mi_msecs_t start_t = _mi_clock_start(); size_t page = 0; bool all_zero = true; while (page < pages) { // allocate a page bool is_zero = false; void* addr = start + (page * MI_HUGE_OS_PAGE_SIZE); void* p = NULL; int err = _mi_prim_alloc_huge_os_pages(addr, MI_HUGE_OS_PAGE_SIZE, numa_node, &is_zero, &p); if (!is_zero) { all_zero = false; } if (err != 0) { _mi_warning_message("unable to allocate huge OS page (error: %d (0x%x), address: %p, size: %zx bytes)\n", err, err, addr, MI_HUGE_OS_PAGE_SIZE); break; } // Did we succeed at a contiguous address? if (p != addr) { // no success, issue a warning and break if (p != NULL) { _mi_warning_message("could not allocate contiguous huge OS page %zu at %p\n", page, addr); mi_os_prim_free(p, MI_HUGE_OS_PAGE_SIZE, true, &_mi_stats_main); } break; } // success, record it page++; // increase before timeout check (see issue #711) _mi_stat_increase(&_mi_stats_main.committed, MI_HUGE_OS_PAGE_SIZE); _mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE); // check for timeout if (max_msecs > 0) { mi_msecs_t elapsed = _mi_clock_end(start_t); if (page >= 1) { mi_msecs_t estimate = ((elapsed / (page+1)) * pages); if (estimate > 2*max_msecs) { // seems like we are going to timeout, break elapsed = max_msecs + 1; } } if (elapsed > max_msecs) { _mi_warning_message("huge OS page allocation timed out (after allocating %zu page(s))\n", page); break; } } } mi_assert_internal(page*MI_HUGE_OS_PAGE_SIZE <= size); if (pages_reserved != NULL) { *pages_reserved = page; } if (psize != NULL) { *psize = page * MI_HUGE_OS_PAGE_SIZE; } if (page != 0) { mi_assert(start != NULL); *memid = _mi_memid_create_os(true /* is committed */, all_zero, true /* is_large */); memid->memkind = MI_MEM_OS_HUGE; mi_assert(memid->is_pinned); #ifdef MI_TRACK_ASAN if (all_zero) { mi_track_mem_defined(start,size); } #endif } return (page == 0 ? NULL : start); } // free every huge page in a range individually (as we allocated per page) // note: needed with VirtualAlloc but could potentially be done in one go on mmap'd systems. static void mi_os_free_huge_os_pages(void* p, size_t size, mi_stats_t* stats) { if (p==NULL || size==0) return; uint8_t* base = (uint8_t*)p; while (size >= MI_HUGE_OS_PAGE_SIZE) { mi_os_prim_free(base, MI_HUGE_OS_PAGE_SIZE, true, stats); size -= MI_HUGE_OS_PAGE_SIZE; base += MI_HUGE_OS_PAGE_SIZE; } } /* ---------------------------------------------------------------------------- Support NUMA aware allocation -----------------------------------------------------------------------------*/ _Atomic(size_t) _mi_numa_node_count; // = 0 // cache the node count size_t _mi_os_numa_node_count_get(void) { size_t count = mi_atomic_load_acquire(&_mi_numa_node_count); if (count <= 0) { long ncount = mi_option_get(mi_option_use_numa_nodes); // given explicitly? if (ncount > 0) { count = (size_t)ncount; } else { count = _mi_prim_numa_node_count(); // or detect dynamically if (count == 0) count = 1; } mi_atomic_store_release(&_mi_numa_node_count, count); // save it _mi_verbose_message("using %zd numa regions\n", count); } return count; } int _mi_os_numa_node_get(mi_os_tld_t* tld) { MI_UNUSED(tld); size_t numa_count = _mi_os_numa_node_count(); if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0 // never more than the node count and >= 0 size_t numa_node = _mi_prim_numa_node(); if (numa_node >= numa_count) { numa_node = numa_node % numa_count; } return (int)numa_node; } luametatex-2.10.08/source/libraries/mimalloc/src/page-queue.c000066400000000000000000000264631442250314700241120ustar00rootroot00000000000000/*---------------------------------------------------------------------------- Copyright (c) 2018-2020, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ /* ----------------------------------------------------------- Definition of page queues for each block size ----------------------------------------------------------- */ #ifndef MI_IN_PAGE_C #error "this file should be included from 'page.c'" #endif /* ----------------------------------------------------------- Minimal alignment in machine words (i.e. `sizeof(void*)`) ----------------------------------------------------------- */ #if (MI_MAX_ALIGN_SIZE > 4*MI_INTPTR_SIZE) #error "define alignment for more than 4x word size for this platform" #elif (MI_MAX_ALIGN_SIZE > 2*MI_INTPTR_SIZE) #define MI_ALIGN4W // 4 machine words minimal alignment #elif (MI_MAX_ALIGN_SIZE > MI_INTPTR_SIZE) #define MI_ALIGN2W // 2 machine words minimal alignment #else // ok, default alignment is 1 word #endif /* ----------------------------------------------------------- Queue query ----------------------------------------------------------- */ static inline bool mi_page_queue_is_huge(const mi_page_queue_t* pq) { return (pq->block_size == (MI_MEDIUM_OBJ_SIZE_MAX+sizeof(uintptr_t))); } static inline bool mi_page_queue_is_full(const mi_page_queue_t* pq) { return (pq->block_size == (MI_MEDIUM_OBJ_SIZE_MAX+(2*sizeof(uintptr_t)))); } static inline bool mi_page_queue_is_special(const mi_page_queue_t* pq) { return (pq->block_size > MI_MEDIUM_OBJ_SIZE_MAX); } /* ----------------------------------------------------------- Bins ----------------------------------------------------------- */ // Return the bin for a given field size. // Returns MI_BIN_HUGE if the size is too large. // We use `wsize` for the size in "machine word sizes", // i.e. byte size == `wsize*sizeof(void*)`. static inline uint8_t mi_bin(size_t size) { size_t wsize = _mi_wsize_from_size(size); uint8_t bin; if (wsize <= 1) { bin = 1; } #if defined(MI_ALIGN4W) else if (wsize <= 4) { bin = (uint8_t)((wsize+1)&~1); // round to double word sizes } #elif defined(MI_ALIGN2W) else if (wsize <= 8) { bin = (uint8_t)((wsize+1)&~1); // round to double word sizes } #else else if (wsize <= 8) { bin = (uint8_t)wsize; } #endif else if (wsize > MI_MEDIUM_OBJ_WSIZE_MAX) { bin = MI_BIN_HUGE; } else { #if defined(MI_ALIGN4W) if (wsize <= 16) { wsize = (wsize+3)&~3; } // round to 4x word sizes #endif wsize--; // find the highest bit uint8_t b = (uint8_t)mi_bsr(wsize); // note: wsize != 0 // and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation). // - adjust with 3 because we use do not round the first 8 sizes // which each get an exact bin bin = ((b << 2) + (uint8_t)((wsize >> (b - 2)) & 0x03)) - 3; mi_assert_internal(bin < MI_BIN_HUGE); } mi_assert_internal(bin > 0 && bin <= MI_BIN_HUGE); return bin; } /* ----------------------------------------------------------- Queue of pages with free blocks ----------------------------------------------------------- */ uint8_t _mi_bin(size_t size) { return mi_bin(size); } size_t _mi_bin_size(uint8_t bin) { return _mi_heap_empty.pages[bin].block_size; } // Good size for allocation size_t mi_good_size(size_t size) mi_attr_noexcept { if (size <= MI_MEDIUM_OBJ_SIZE_MAX) { return _mi_bin_size(mi_bin(size)); } else { return _mi_align_up(size,_mi_os_page_size()); } } #if (MI_DEBUG>1) static bool mi_page_queue_contains(mi_page_queue_t* queue, const mi_page_t* page) { mi_assert_internal(page != NULL); mi_page_t* list = queue->first; while (list != NULL) { mi_assert_internal(list->next == NULL || list->next->prev == list); mi_assert_internal(list->prev == NULL || list->prev->next == list); if (list == page) break; list = list->next; } return (list == page); } #endif #if (MI_DEBUG>1) static bool mi_heap_contains_queue(const mi_heap_t* heap, const mi_page_queue_t* pq) { return (pq >= &heap->pages[0] && pq <= &heap->pages[MI_BIN_FULL]); } #endif static mi_page_queue_t* mi_page_queue_of(const mi_page_t* page) { uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : mi_bin(page->xblock_size)); mi_heap_t* heap = mi_page_heap(page); mi_assert_internal(heap != NULL && bin <= MI_BIN_FULL); mi_page_queue_t* pq = &heap->pages[bin]; mi_assert_internal(bin >= MI_BIN_HUGE || page->xblock_size == pq->block_size); mi_assert_expensive(mi_page_queue_contains(pq, page)); return pq; } static mi_page_queue_t* mi_heap_page_queue_of(mi_heap_t* heap, const mi_page_t* page) { uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : mi_bin(page->xblock_size)); mi_assert_internal(bin <= MI_BIN_FULL); mi_page_queue_t* pq = &heap->pages[bin]; mi_assert_internal(mi_page_is_in_full(page) || page->xblock_size == pq->block_size); return pq; } // The current small page array is for efficiency and for each // small size (up to 256) it points directly to the page for that // size without having to compute the bin. This means when the // current free page queue is updated for a small bin, we need to update a // range of entries in `_mi_page_small_free`. static inline void mi_heap_queue_first_update(mi_heap_t* heap, const mi_page_queue_t* pq) { mi_assert_internal(mi_heap_contains_queue(heap,pq)); size_t size = pq->block_size; if (size > MI_SMALL_SIZE_MAX) return; mi_page_t* page = pq->first; if (pq->first == NULL) page = (mi_page_t*)&_mi_page_empty; // find index in the right direct page array size_t start; size_t idx = _mi_wsize_from_size(size); mi_page_t** pages_free = heap->pages_free_direct; if (pages_free[idx] == page) return; // already set // find start slot if (idx<=1) { start = 0; } else { // find previous size; due to minimal alignment upto 3 previous bins may need to be skipped uint8_t bin = mi_bin(size); const mi_page_queue_t* prev = pq - 1; while( bin == mi_bin(prev->block_size) && prev > &heap->pages[0]) { prev--; } start = 1 + _mi_wsize_from_size(prev->block_size); if (start > idx) start = idx; } // set size range to the right page mi_assert(start <= idx); for (size_t sz = start; sz <= idx; sz++) { pages_free[sz] = page; } } /* static bool mi_page_queue_is_empty(mi_page_queue_t* queue) { return (queue->first == NULL); } */ static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) { mi_assert_internal(page != NULL); mi_assert_expensive(mi_page_queue_contains(queue, page)); mi_assert_internal(page->xblock_size == queue->block_size || (page->xblock_size > MI_MEDIUM_OBJ_SIZE_MAX && mi_page_queue_is_huge(queue)) || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); mi_heap_t* heap = mi_page_heap(page); if (page->prev != NULL) page->prev->next = page->next; if (page->next != NULL) page->next->prev = page->prev; if (page == queue->last) queue->last = page->prev; if (page == queue->first) { queue->first = page->next; // update first mi_assert_internal(mi_heap_contains_queue(heap, queue)); mi_heap_queue_first_update(heap,queue); } heap->page_count--; page->next = NULL; page->prev = NULL; // mi_atomic_store_ptr_release(mi_atomic_cast(void*, &page->heap), NULL); mi_page_set_in_full(page,false); } static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_t* page) { mi_assert_internal(mi_page_heap(page) == heap); mi_assert_internal(!mi_page_queue_contains(queue, page)); #if MI_HUGE_PAGE_ABANDON mi_assert_internal(_mi_page_segment(page)->kind != MI_SEGMENT_HUGE); #endif mi_assert_internal(page->xblock_size == queue->block_size || (page->xblock_size > MI_MEDIUM_OBJ_SIZE_MAX) || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); mi_page_set_in_full(page, mi_page_queue_is_full(queue)); // mi_atomic_store_ptr_release(mi_atomic_cast(void*, &page->heap), heap); page->next = queue->first; page->prev = NULL; if (queue->first != NULL) { mi_assert_internal(queue->first->prev == NULL); queue->first->prev = page; queue->first = page; } else { queue->first = queue->last = page; } // update direct mi_heap_queue_first_update(heap, queue); heap->page_count++; } static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* from, mi_page_t* page) { mi_assert_internal(page != NULL); mi_assert_expensive(mi_page_queue_contains(from, page)); mi_assert_expensive(!mi_page_queue_contains(to, page)); mi_assert_internal((page->xblock_size == to->block_size && page->xblock_size == from->block_size) || (page->xblock_size == to->block_size && mi_page_queue_is_full(from)) || (page->xblock_size == from->block_size && mi_page_queue_is_full(to)) || (page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(to)) || (page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_full(to))); mi_heap_t* heap = mi_page_heap(page); if (page->prev != NULL) page->prev->next = page->next; if (page->next != NULL) page->next->prev = page->prev; if (page == from->last) from->last = page->prev; if (page == from->first) { from->first = page->next; // update first mi_assert_internal(mi_heap_contains_queue(heap, from)); mi_heap_queue_first_update(heap, from); } page->prev = to->last; page->next = NULL; if (to->last != NULL) { mi_assert_internal(heap == mi_page_heap(to->last)); to->last->next = page; to->last = page; } else { to->first = page; to->last = page; mi_heap_queue_first_update(heap, to); } mi_page_set_in_full(page, mi_page_queue_is_full(to)); } // Only called from `mi_heap_absorb`. size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append) { mi_assert_internal(mi_heap_contains_queue(heap,pq)); mi_assert_internal(pq->block_size == append->block_size); if (append->first==NULL) return 0; // set append pages to new heap and count size_t count = 0; for (mi_page_t* page = append->first; page != NULL; page = page->next) { // inline `mi_page_set_heap` to avoid wrong assertion during absorption; // in this case it is ok to be delayed freeing since both "to" and "from" heap are still alive. mi_atomic_store_release(&page->xheap, (uintptr_t)heap); // set the flag to delayed free (not overriding NEVER_DELAYED_FREE) which has as a // side effect that it spins until any DELAYED_FREEING is finished. This ensures // that after appending only the new heap will be used for delayed free operations. _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, false); count++; } if (pq->last==NULL) { // take over afresh mi_assert_internal(pq->first==NULL); pq->first = append->first; pq->last = append->last; mi_heap_queue_first_update(heap, pq); } else { // append to end mi_assert_internal(pq->last!=NULL); mi_assert_internal(append->first!=NULL); pq->last->next = append->first; append->first->prev = pq->last; pq->last = append->last; } return count; } luametatex-2.10.08/source/libraries/mimalloc/src/page.c000066400000000000000000001072301442250314700227600ustar00rootroot00000000000000/*---------------------------------------------------------------------------- Copyright (c) 2018-2020, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ /* ----------------------------------------------------------- The core of the allocator. Every segment contains pages of a certain block size. The main function exported is `mi_malloc_generic`. ----------------------------------------------------------- */ #include "mimalloc.h" #include "mimalloc/internal.h" #include "mimalloc/atomic.h" /* ----------------------------------------------------------- Definition of page queues for each block size ----------------------------------------------------------- */ #define MI_IN_PAGE_C #include "page-queue.c" #undef MI_IN_PAGE_C /* ----------------------------------------------------------- Page helpers ----------------------------------------------------------- */ // Index a block in a page static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_start, size_t block_size, size_t i) { MI_UNUSED(page); mi_assert_internal(page != NULL); mi_assert_internal(i <= page->reserved); return (mi_block_t*)((uint8_t*)page_start + (i * block_size)); } static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t size, mi_tld_t* tld); static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld); #if (MI_DEBUG>=3) static size_t mi_page_list_count(mi_page_t* page, mi_block_t* head) { size_t count = 0; while (head != NULL) { mi_assert_internal(page == _mi_ptr_page(head)); count++; head = mi_block_next(page, head); } return count; } /* // Start of the page available memory static inline uint8_t* mi_page_area(const mi_page_t* page) { return _mi_page_start(_mi_page_segment(page), page, NULL); } */ static bool mi_page_list_is_valid(mi_page_t* page, mi_block_t* p) { size_t psize; uint8_t* page_area = _mi_page_start(_mi_page_segment(page), page, &psize); mi_block_t* start = (mi_block_t*)page_area; mi_block_t* end = (mi_block_t*)(page_area + psize); while(p != NULL) { if (p < start || p >= end) return false; p = mi_block_next(page, p); } #if MI_DEBUG>3 // generally too expensive to check this if (page->free_is_zero) { const size_t ubsize = mi_page_usable_block_size(page); for (mi_block_t* block = page->free; block != NULL; block = mi_block_next(page, block)) { mi_assert_expensive(mi_mem_is_zero(block + 1, ubsize - sizeof(mi_block_t))); } } #endif return true; } static bool mi_page_is_valid_init(mi_page_t* page) { mi_assert_internal(page->xblock_size > 0); mi_assert_internal(page->used <= page->capacity); mi_assert_internal(page->capacity <= page->reserved); mi_segment_t* segment = _mi_page_segment(page); uint8_t* start = _mi_page_start(segment,page,NULL); mi_assert_internal(start == _mi_segment_page_start(segment,page,NULL)); //const size_t bsize = mi_page_block_size(page); //mi_assert_internal(start + page->capacity*page->block_size == page->top); mi_assert_internal(mi_page_list_is_valid(page,page->free)); mi_assert_internal(mi_page_list_is_valid(page,page->local_free)); #if MI_DEBUG>3 // generally too expensive to check this if (page->free_is_zero) { const size_t ubsize = mi_page_usable_block_size(page); for(mi_block_t* block = page->free; block != NULL; block = mi_block_next(page,block)) { mi_assert_expensive(mi_mem_is_zero(block + 1, ubsize - sizeof(mi_block_t))); } } #endif #if !MI_TRACK_ENABLED && !MI_TSAN mi_block_t* tfree = mi_page_thread_free(page); mi_assert_internal(mi_page_list_is_valid(page, tfree)); //size_t tfree_count = mi_page_list_count(page, tfree); //mi_assert_internal(tfree_count <= page->thread_freed + 1); #endif size_t free_count = mi_page_list_count(page, page->free) + mi_page_list_count(page, page->local_free); mi_assert_internal(page->used + free_count == page->capacity); return true; } extern bool _mi_process_is_initialized; // has mi_process_init been called? bool _mi_page_is_valid(mi_page_t* page) { mi_assert_internal(mi_page_is_valid_init(page)); #if MI_SECURE mi_assert_internal(page->keys[0] != 0); #endif if (mi_page_heap(page)!=NULL) { mi_segment_t* segment = _mi_page_segment(page); mi_assert_internal(!_mi_process_is_initialized || segment->thread_id==0 || segment->thread_id == mi_page_heap(page)->thread_id); #if MI_HUGE_PAGE_ABANDON if (segment->kind != MI_SEGMENT_HUGE) #endif { mi_page_queue_t* pq = mi_page_queue_of(page); mi_assert_internal(mi_page_queue_contains(pq, page)); mi_assert_internal(pq->block_size==mi_page_block_size(page) || mi_page_block_size(page) > MI_MEDIUM_OBJ_SIZE_MAX || mi_page_is_in_full(page)); mi_assert_internal(mi_heap_contains_queue(mi_page_heap(page),pq)); } } return true; } #endif void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never) { while (!_mi_page_try_use_delayed_free(page, delay, override_never)) { mi_atomic_yield(); } } bool _mi_page_try_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never) { mi_thread_free_t tfreex; mi_delayed_t old_delay; mi_thread_free_t tfree; size_t yield_count = 0; do { tfree = mi_atomic_load_acquire(&page->xthread_free); // note: must acquire as we can break/repeat this loop and not do a CAS; tfreex = mi_tf_set_delayed(tfree, delay); old_delay = mi_tf_delayed(tfree); if mi_unlikely(old_delay == MI_DELAYED_FREEING) { if (yield_count >= 4) return false; // give up after 4 tries yield_count++; mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done. // tfree = mi_tf_set_delayed(tfree, MI_NO_DELAYED_FREE); // will cause CAS to busy fail } else if (delay == old_delay) { break; // avoid atomic operation if already equal } else if (!override_never && old_delay == MI_NEVER_DELAYED_FREE) { break; // leave never-delayed flag set } } while ((old_delay == MI_DELAYED_FREEING) || !mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex)); return true; // success } /* ----------------------------------------------------------- Page collect the `local_free` and `thread_free` lists ----------------------------------------------------------- */ // Collect the local `thread_free` list using an atomic exchange. // Note: The exchange must be done atomically as this is used right after // moving to the full list in `mi_page_collect_ex` and we need to // ensure that there was no race where the page became unfull just before the move. static void _mi_page_thread_free_collect(mi_page_t* page) { mi_block_t* head; mi_thread_free_t tfreex; mi_thread_free_t tfree = mi_atomic_load_relaxed(&page->xthread_free); do { head = mi_tf_block(tfree); tfreex = mi_tf_set_block(tfree,NULL); } while (!mi_atomic_cas_weak_acq_rel(&page->xthread_free, &tfree, tfreex)); // return if the list is empty if (head == NULL) return; // find the tail -- also to get a proper count (without data races) uint32_t max_count = page->capacity; // cannot collect more than capacity uint32_t count = 1; mi_block_t* tail = head; mi_block_t* next; while ((next = mi_block_next(page,tail)) != NULL && count <= max_count) { count++; tail = next; } // if `count > max_count` there was a memory corruption (possibly infinite list due to double multi-threaded free) if (count > max_count) { _mi_error_message(EFAULT, "corrupted thread-free list\n"); return; // the thread-free items cannot be freed } // and append the current local free list mi_block_set_next(page,tail, page->local_free); page->local_free = head; // update counts now page->used -= count; } void _mi_page_free_collect(mi_page_t* page, bool force) { mi_assert_internal(page!=NULL); // collect the thread free list if (force || mi_page_thread_free(page) != NULL) { // quick test to avoid an atomic operation _mi_page_thread_free_collect(page); } // and the local free list if (page->local_free != NULL) { if mi_likely(page->free == NULL) { // usual case page->free = page->local_free; page->local_free = NULL; page->free_is_zero = false; } else if (force) { // append -- only on shutdown (force) as this is a linear operation mi_block_t* tail = page->local_free; mi_block_t* next; while ((next = mi_block_next(page, tail)) != NULL) { tail = next; } mi_block_set_next(page, tail, page->free); page->free = page->local_free; page->local_free = NULL; page->free_is_zero = false; } } mi_assert_internal(!force || page->local_free == NULL); } /* ----------------------------------------------------------- Page fresh and retire ----------------------------------------------------------- */ // called from segments when reclaiming abandoned pages void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) { mi_assert_expensive(mi_page_is_valid_init(page)); mi_assert_internal(mi_page_heap(page) == heap); mi_assert_internal(mi_page_thread_free_flag(page) != MI_NEVER_DELAYED_FREE); #if MI_HUGE_PAGE_ABANDON mi_assert_internal(_mi_page_segment(page)->kind != MI_SEGMENT_HUGE); #endif // TODO: push on full queue immediately if it is full? mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page)); mi_page_queue_push(heap, pq, page); mi_assert_expensive(_mi_page_is_valid(page)); } // allocate a fresh page from a segment static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size_t block_size, size_t page_alignment) { #if !MI_HUGE_PAGE_ABANDON mi_assert_internal(pq != NULL); mi_assert_internal(mi_heap_contains_queue(heap, pq)); mi_assert_internal(page_alignment > 0 || block_size > MI_MEDIUM_OBJ_SIZE_MAX || block_size == pq->block_size); #endif mi_page_t* page = _mi_segment_page_alloc(heap, block_size, page_alignment, &heap->tld->segments, &heap->tld->os); if (page == NULL) { // this may be out-of-memory, or an abandoned page was reclaimed (and in our queue) return NULL; } mi_assert_internal(page_alignment >0 || block_size > MI_MEDIUM_OBJ_SIZE_MAX || _mi_page_segment(page)->kind != MI_SEGMENT_HUGE); mi_assert_internal(pq!=NULL || page->xblock_size != 0); mi_assert_internal(pq!=NULL || mi_page_block_size(page) >= block_size); // a fresh page was found, initialize it const size_t full_block_size = ((pq == NULL || mi_page_queue_is_huge(pq)) ? mi_page_block_size(page) : block_size); // see also: mi_segment_huge_page_alloc mi_assert_internal(full_block_size >= block_size); mi_page_init(heap, page, full_block_size, heap->tld); mi_heap_stat_increase(heap, pages, 1); if (pq != NULL) { mi_page_queue_push(heap, pq, page); } mi_assert_expensive(_mi_page_is_valid(page)); return page; } // Get a fresh page to use static mi_page_t* mi_page_fresh(mi_heap_t* heap, mi_page_queue_t* pq) { mi_assert_internal(mi_heap_contains_queue(heap, pq)); mi_page_t* page = mi_page_fresh_alloc(heap, pq, pq->block_size, 0); if (page==NULL) return NULL; mi_assert_internal(pq->block_size==mi_page_block_size(page)); mi_assert_internal(pq==mi_page_queue(heap, mi_page_block_size(page))); return page; } /* ----------------------------------------------------------- Do any delayed frees (put there by other threads if they deallocated in a full page) ----------------------------------------------------------- */ void _mi_heap_delayed_free_all(mi_heap_t* heap) { while (!_mi_heap_delayed_free_partial(heap)) { mi_atomic_yield(); } } // returns true if all delayed frees were processed bool _mi_heap_delayed_free_partial(mi_heap_t* heap) { // take over the list (note: no atomic exchange since it is often NULL) mi_block_t* block = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free); while (block != NULL && !mi_atomic_cas_ptr_weak_acq_rel(mi_block_t, &heap->thread_delayed_free, &block, NULL)) { /* nothing */ }; bool all_freed = true; // and free them all while(block != NULL) { mi_block_t* next = mi_block_nextx(heap,block, heap->keys); // use internal free instead of regular one to keep stats etc correct if (!_mi_free_delayed_block(block)) { // we might already start delayed freeing while another thread has not yet // reset the delayed_freeing flag; in that case delay it further by reinserting the current block // into the delayed free list all_freed = false; mi_block_t* dfree = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free); do { mi_block_set_nextx(heap, block, dfree, heap->keys); } while (!mi_atomic_cas_ptr_weak_release(mi_block_t,&heap->thread_delayed_free, &dfree, block)); } block = next; } return all_freed; } /* ----------------------------------------------------------- Unfull, abandon, free and retire ----------------------------------------------------------- */ // Move a page from the full list back to a regular list void _mi_page_unfull(mi_page_t* page) { mi_assert_internal(page != NULL); mi_assert_expensive(_mi_page_is_valid(page)); mi_assert_internal(mi_page_is_in_full(page)); if (!mi_page_is_in_full(page)) return; mi_heap_t* heap = mi_page_heap(page); mi_page_queue_t* pqfull = &heap->pages[MI_BIN_FULL]; mi_page_set_in_full(page, false); // to get the right queue mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page); mi_page_set_in_full(page, true); mi_page_queue_enqueue_from(pq, pqfull, page); } static void mi_page_to_full(mi_page_t* page, mi_page_queue_t* pq) { mi_assert_internal(pq == mi_page_queue_of(page)); mi_assert_internal(!mi_page_immediate_available(page)); mi_assert_internal(!mi_page_is_in_full(page)); if (mi_page_is_in_full(page)) return; mi_page_queue_enqueue_from(&mi_page_heap(page)->pages[MI_BIN_FULL], pq, page); _mi_page_free_collect(page,false); // try to collect right away in case another thread freed just before MI_USE_DELAYED_FREE was set } // Abandon a page with used blocks at the end of a thread. // Note: only call if it is ensured that no references exist from // the `page->heap->thread_delayed_free` into this page. // Currently only called through `mi_heap_collect_ex` which ensures this. void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) { mi_assert_internal(page != NULL); mi_assert_expensive(_mi_page_is_valid(page)); mi_assert_internal(pq == mi_page_queue_of(page)); mi_assert_internal(mi_page_heap(page) != NULL); mi_heap_t* pheap = mi_page_heap(page); // remove from our page list mi_segments_tld_t* segments_tld = &pheap->tld->segments; mi_page_queue_remove(pq, page); // page is no longer associated with our heap mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE); mi_page_set_heap(page, NULL); #if (MI_DEBUG>1) && !MI_TRACK_ENABLED // check there are no references left.. for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap, block, pheap->keys)) { mi_assert_internal(_mi_ptr_page(block) != page); } #endif // and abandon it mi_assert_internal(mi_page_heap(page) == NULL); _mi_segment_page_abandon(page,segments_tld); } // Free a page with no more free blocks void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { mi_assert_internal(page != NULL); mi_assert_expensive(_mi_page_is_valid(page)); mi_assert_internal(pq == mi_page_queue_of(page)); mi_assert_internal(mi_page_all_free(page)); mi_assert_internal(mi_page_thread_free_flag(page)!=MI_DELAYED_FREEING); // no more aligned blocks in here mi_page_set_has_aligned(page, false); mi_heap_t* heap = mi_page_heap(page); // remove from the page list // (no need to do _mi_heap_delayed_free first as all blocks are already free) mi_segments_tld_t* segments_tld = &heap->tld->segments; mi_page_queue_remove(pq, page); // and free it mi_page_set_heap(page,NULL); _mi_segment_page_free(page, force, segments_tld); } // Retire parameters #define MI_MAX_RETIRE_SIZE (MI_MEDIUM_OBJ_SIZE_MAX) #define MI_RETIRE_CYCLES (16) // Retire a page with no more used blocks // Important to not retire too quickly though as new // allocations might coming. // Note: called from `mi_free` and benchmarks often // trigger this due to freeing everything and then // allocating again so careful when changing this. void _mi_page_retire(mi_page_t* page) mi_attr_noexcept { mi_assert_internal(page != NULL); mi_assert_expensive(_mi_page_is_valid(page)); mi_assert_internal(mi_page_all_free(page)); mi_page_set_has_aligned(page, false); // don't retire too often.. // (or we end up retiring and re-allocating most of the time) // NOTE: refine this more: we should not retire if this // is the only page left with free blocks. It is not clear // how to check this efficiently though... // for now, we don't retire if it is the only page left of this size class. mi_page_queue_t* pq = mi_page_queue_of(page); if mi_likely(page->xblock_size <= MI_MAX_RETIRE_SIZE && !mi_page_queue_is_special(pq)) { // not too large && not full or huge queue? if (pq->last==page && pq->first==page) { // the only page in the queue? mi_stat_counter_increase(_mi_stats_main.page_no_retire,1); page->retire_expire = 1 + (page->xblock_size <= MI_SMALL_OBJ_SIZE_MAX ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4); mi_heap_t* heap = mi_page_heap(page); mi_assert_internal(pq >= heap->pages); const size_t index = pq - heap->pages; mi_assert_internal(index < MI_BIN_FULL && index < MI_BIN_HUGE); if (index < heap->page_retired_min) heap->page_retired_min = index; if (index > heap->page_retired_max) heap->page_retired_max = index; mi_assert_internal(mi_page_all_free(page)); return; // dont't free after all } } _mi_page_free(page, pq, false); } // free retired pages: we don't need to look at the entire queues // since we only retire pages that are at the head position in a queue. void _mi_heap_collect_retired(mi_heap_t* heap, bool force) { size_t min = MI_BIN_FULL; size_t max = 0; for(size_t bin = heap->page_retired_min; bin <= heap->page_retired_max; bin++) { mi_page_queue_t* pq = &heap->pages[bin]; mi_page_t* page = pq->first; if (page != NULL && page->retire_expire != 0) { if (mi_page_all_free(page)) { page->retire_expire--; if (force || page->retire_expire == 0) { _mi_page_free(pq->first, pq, force); } else { // keep retired, update min/max if (bin < min) min = bin; if (bin > max) max = bin; } } else { page->retire_expire = 0; } } } heap->page_retired_min = min; heap->page_retired_max = max; } /* ----------------------------------------------------------- Initialize the initial free list in a page. In secure mode we initialize a randomized list by alternating between slices. ----------------------------------------------------------- */ #define MI_MAX_SLICE_SHIFT (6) // at most 64 slices #define MI_MAX_SLICES (1UL << MI_MAX_SLICE_SHIFT) #define MI_MIN_SLICES (2) static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* const page, const size_t bsize, const size_t extend, mi_stats_t* const stats) { MI_UNUSED(stats); #if (MI_SECURE<=2) mi_assert_internal(page->free == NULL); mi_assert_internal(page->local_free == NULL); #endif mi_assert_internal(page->capacity + extend <= page->reserved); mi_assert_internal(bsize == mi_page_block_size(page)); void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL); // initialize a randomized free list // set up `slice_count` slices to alternate between size_t shift = MI_MAX_SLICE_SHIFT; while ((extend >> shift) == 0) { shift--; } const size_t slice_count = (size_t)1U << shift; const size_t slice_extend = extend / slice_count; mi_assert_internal(slice_extend >= 1); mi_block_t* blocks[MI_MAX_SLICES]; // current start of the slice size_t counts[MI_MAX_SLICES]; // available objects in the slice for (size_t i = 0; i < slice_count; i++) { blocks[i] = mi_page_block_at(page, page_area, bsize, page->capacity + i*slice_extend); counts[i] = slice_extend; } counts[slice_count-1] += (extend % slice_count); // final slice holds the modulus too (todo: distribute evenly?) // and initialize the free list by randomly threading through them // set up first element const uintptr_t r = _mi_heap_random_next(heap); size_t current = r % slice_count; counts[current]--; mi_block_t* const free_start = blocks[current]; // and iterate through the rest; use `random_shuffle` for performance uintptr_t rnd = _mi_random_shuffle(r|1); // ensure not 0 for (size_t i = 1; i < extend; i++) { // call random_shuffle only every INTPTR_SIZE rounds const size_t round = i%MI_INTPTR_SIZE; if (round == 0) rnd = _mi_random_shuffle(rnd); // select a random next slice index size_t next = ((rnd >> 8*round) & (slice_count-1)); while (counts[next]==0) { // ensure it still has space next++; if (next==slice_count) next = 0; } // and link the current block to it counts[next]--; mi_block_t* const block = blocks[current]; blocks[current] = (mi_block_t*)((uint8_t*)block + bsize); // bump to the following block mi_block_set_next(page, block, blocks[next]); // and set next; note: we may have `current == next` current = next; } // prepend to the free list (usually NULL) mi_block_set_next(page, blocks[current], page->free); // end of the list page->free = free_start; } static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, const size_t bsize, const size_t extend, mi_stats_t* const stats) { MI_UNUSED(stats); #if (MI_SECURE <= 2) mi_assert_internal(page->free == NULL); mi_assert_internal(page->local_free == NULL); #endif mi_assert_internal(page->capacity + extend <= page->reserved); mi_assert_internal(bsize == mi_page_block_size(page)); void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL ); mi_block_t* const start = mi_page_block_at(page, page_area, bsize, page->capacity); // initialize a sequential free list mi_block_t* const last = mi_page_block_at(page, page_area, bsize, page->capacity + extend - 1); mi_block_t* block = start; while(block <= last) { mi_block_t* next = (mi_block_t*)((uint8_t*)block + bsize); mi_block_set_next(page,block,next); block = next; } // prepend to free list (usually `NULL`) mi_block_set_next(page, last, page->free); page->free = start; } /* ----------------------------------------------------------- Page initialize and extend the capacity ----------------------------------------------------------- */ #define MI_MAX_EXTEND_SIZE (4*1024) // heuristic, one OS page seems to work well. #if (MI_SECURE>0) #define MI_MIN_EXTEND (8*MI_SECURE) // extend at least by this many #else #define MI_MIN_EXTEND (4) #endif // Extend the capacity (up to reserved) by initializing a free list // We do at most `MI_MAX_EXTEND` to avoid touching too much memory // Note: we also experimented with "bump" allocation on the first // allocations but this did not speed up any benchmark (due to an // extra test in malloc? or cache effects?) static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) { MI_UNUSED(tld); mi_assert_expensive(mi_page_is_valid_init(page)); #if (MI_SECURE<=2) mi_assert(page->free == NULL); mi_assert(page->local_free == NULL); if (page->free != NULL) return; #endif if (page->capacity >= page->reserved) return; size_t page_size; _mi_page_start(_mi_page_segment(page), page, &page_size); mi_stat_counter_increase(tld->stats.pages_extended, 1); // calculate the extend count const size_t bsize = (page->xblock_size < MI_HUGE_BLOCK_SIZE ? page->xblock_size : page_size); size_t extend = page->reserved - page->capacity; mi_assert_internal(extend > 0); size_t max_extend = (bsize >= MI_MAX_EXTEND_SIZE ? MI_MIN_EXTEND : MI_MAX_EXTEND_SIZE/(uint32_t)bsize); if (max_extend < MI_MIN_EXTEND) { max_extend = MI_MIN_EXTEND; } mi_assert_internal(max_extend > 0); if (extend > max_extend) { // ensure we don't touch memory beyond the page to reduce page commit. // the `lean` benchmark tests this. Going from 1 to 8 increases rss by 50%. extend = max_extend; } mi_assert_internal(extend > 0 && extend + page->capacity <= page->reserved); mi_assert_internal(extend < (1UL<<16)); // and append the extend the free list if (extend < MI_MIN_SLICES || MI_SECURE==0) { //!mi_option_is_enabled(mi_option_secure)) { mi_page_free_list_extend(page, bsize, extend, &tld->stats ); } else { mi_page_free_list_extend_secure(heap, page, bsize, extend, &tld->stats); } // enable the new free list page->capacity += (uint16_t)extend; mi_stat_increase(tld->stats.page_committed, extend * bsize); mi_assert_expensive(mi_page_is_valid_init(page)); } // Initialize a fresh page static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi_tld_t* tld) { mi_assert(page != NULL); mi_segment_t* segment = _mi_page_segment(page); mi_assert(segment != NULL); mi_assert_internal(block_size > 0); // set fields mi_page_set_heap(page, heap); page->xblock_size = (block_size < MI_HUGE_BLOCK_SIZE ? (uint32_t)block_size : MI_HUGE_BLOCK_SIZE); // initialize before _mi_segment_page_start size_t page_size; const void* page_start = _mi_segment_page_start(segment, page, &page_size); MI_UNUSED(page_start); mi_track_mem_noaccess(page_start,page_size); mi_assert_internal(mi_page_block_size(page) <= page_size); mi_assert_internal(page_size <= page->slice_count*MI_SEGMENT_SLICE_SIZE); mi_assert_internal(page_size / block_size < (1L<<16)); page->reserved = (uint16_t)(page_size / block_size); mi_assert_internal(page->reserved > 0); #if (MI_PADDING || MI_ENCODE_FREELIST) page->keys[0] = _mi_heap_random_next(heap); page->keys[1] = _mi_heap_random_next(heap); #endif page->free_is_zero = page->is_zero_init; #if MI_DEBUG>2 if (page->is_zero_init) { mi_track_mem_defined(page_start, page_size); mi_assert_expensive(mi_mem_is_zero(page_start, page_size)); } #endif mi_assert_internal(page->is_committed); mi_assert_internal(page->capacity == 0); mi_assert_internal(page->free == NULL); mi_assert_internal(page->used == 0); mi_assert_internal(page->xthread_free == 0); mi_assert_internal(page->next == NULL); mi_assert_internal(page->prev == NULL); mi_assert_internal(page->retire_expire == 0); mi_assert_internal(!mi_page_has_aligned(page)); #if (MI_PADDING || MI_ENCODE_FREELIST) mi_assert_internal(page->keys[0] != 0); mi_assert_internal(page->keys[1] != 0); #endif mi_assert_expensive(mi_page_is_valid_init(page)); // initialize an initial free list mi_page_extend_free(heap,page,tld); mi_assert(mi_page_immediate_available(page)); } /* ----------------------------------------------------------- Find pages with free blocks -------------------------------------------------------------*/ // Find a page with free blocks of `page->block_size`. static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq, bool first_try) { // search through the pages in "next fit" order #if MI_STAT size_t count = 0; #endif mi_page_t* page = pq->first; while (page != NULL) { mi_page_t* next = page->next; // remember next #if MI_STAT count++; #endif // 0. collect freed blocks by us and other threads _mi_page_free_collect(page, false); // 1. if the page contains free blocks, we are done if (mi_page_immediate_available(page)) { break; // pick this one } // 2. Try to extend if (page->capacity < page->reserved) { mi_page_extend_free(heap, page, heap->tld); mi_assert_internal(mi_page_immediate_available(page)); break; } // 3. If the page is completely full, move it to the `mi_pages_full` // queue so we don't visit long-lived pages too often. mi_assert_internal(!mi_page_is_in_full(page) && !mi_page_immediate_available(page)); mi_page_to_full(page, pq); page = next; } // for each page mi_heap_stat_counter_increase(heap, searches, count); if (page == NULL) { _mi_heap_collect_retired(heap, false); // perhaps make a page available? page = mi_page_fresh(heap, pq); if (page == NULL && first_try) { // out-of-memory _or_ an abandoned page with free blocks was reclaimed, try once again page = mi_page_queue_find_free_ex(heap, pq, false); } } else { mi_assert(pq->first == page); page->retire_expire = 0; } mi_assert_internal(page == NULL || mi_page_immediate_available(page)); return page; } // Find a page with free blocks of `size`. static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) { mi_page_queue_t* pq = mi_page_queue(heap,size); mi_page_t* page = pq->first; if (page != NULL) { #if (MI_SECURE>=3) // in secure mode, we extend half the time to increase randomness if (page->capacity < page->reserved && ((_mi_heap_random_next(heap) & 1) == 1)) { mi_page_extend_free(heap, page, heap->tld); mi_assert_internal(mi_page_immediate_available(page)); } else #endif { _mi_page_free_collect(page,false); } if (mi_page_immediate_available(page)) { page->retire_expire = 0; return page; // fast path } } return mi_page_queue_find_free_ex(heap, pq, true); } /* ----------------------------------------------------------- Users can register a deferred free function called when the `free` list is empty. Since the `local_free` is separate this is deterministically called after a certain number of allocations. ----------------------------------------------------------- */ static mi_deferred_free_fun* volatile deferred_free = NULL; static _Atomic(void*) deferred_arg; // = NULL void _mi_deferred_free(mi_heap_t* heap, bool force) { heap->tld->heartbeat++; if (deferred_free != NULL && !heap->tld->recurse) { heap->tld->recurse = true; deferred_free(force, heap->tld->heartbeat, mi_atomic_load_ptr_relaxed(void,&deferred_arg)); heap->tld->recurse = false; } } void mi_register_deferred_free(mi_deferred_free_fun* fn, void* arg) mi_attr_noexcept { deferred_free = fn; mi_atomic_store_ptr_release(void,&deferred_arg, arg); } /* ----------------------------------------------------------- General allocation ----------------------------------------------------------- */ // Large and huge page allocation. // Huge pages are allocated directly without being in a queue. // Because huge pages contain just one block, and the segment contains // just that page, we always treat them as abandoned and any thread // that frees the block can free the whole page and segment directly. // Huge pages are also use if the requested alignment is very large (> MI_ALIGNMENT_MAX). static mi_page_t* mi_large_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_alignment) { size_t block_size = _mi_os_good_alloc_size(size); mi_assert_internal(mi_bin(block_size) == MI_BIN_HUGE || page_alignment > 0); bool is_huge = (block_size > MI_LARGE_OBJ_SIZE_MAX || page_alignment > 0); #if MI_HUGE_PAGE_ABANDON mi_page_queue_t* pq = (is_huge ? NULL : mi_page_queue(heap, block_size)); #else mi_page_queue_t* pq = mi_page_queue(heap, is_huge ? MI_HUGE_BLOCK_SIZE : block_size); // not block_size as that can be low if the page_alignment > 0 mi_assert_internal(!is_huge || mi_page_queue_is_huge(pq)); #endif mi_page_t* page = mi_page_fresh_alloc(heap, pq, block_size, page_alignment); if (page != NULL) { mi_assert_internal(mi_page_immediate_available(page)); if (is_huge) { mi_assert_internal(_mi_page_segment(page)->kind == MI_SEGMENT_HUGE); mi_assert_internal(_mi_page_segment(page)->used==1); #if MI_HUGE_PAGE_ABANDON mi_assert_internal(_mi_page_segment(page)->thread_id==0); // abandoned, not in the huge queue mi_page_set_heap(page, NULL); #endif } else { mi_assert_internal(_mi_page_segment(page)->kind != MI_SEGMENT_HUGE); } const size_t bsize = mi_page_usable_block_size(page); // note: not `mi_page_block_size` to account for padding if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { mi_heap_stat_increase(heap, large, bsize); mi_heap_stat_counter_increase(heap, large_count, 1); } else { mi_heap_stat_increase(heap, huge, bsize); mi_heap_stat_counter_increase(heap, huge_count, 1); } } return page; } // Allocate a page // Note: in debug mode the size includes MI_PADDING_SIZE and might have overflowed. static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size, size_t huge_alignment) mi_attr_noexcept { // huge allocation? const size_t req_size = size - MI_PADDING_SIZE; // correct for padding_size in case of an overflow on `size` if mi_unlikely(req_size > (MI_MEDIUM_OBJ_SIZE_MAX - MI_PADDING_SIZE) || huge_alignment > 0) { if mi_unlikely(req_size > PTRDIFF_MAX) { // we don't allocate more than PTRDIFF_MAX (see ) _mi_error_message(EOVERFLOW, "allocation request is too large (%zu bytes)\n", req_size); return NULL; } else { return mi_large_huge_page_alloc(heap,size,huge_alignment); } } else { // otherwise find a page with free blocks in our size segregated queues #if MI_PADDING mi_assert_internal(size >= MI_PADDING_SIZE); #endif return mi_find_free_page(heap, size); } } // Generic allocation routine if the fast path (`alloc.c:mi_page_malloc`) does not succeed. // Note: in debug mode the size includes MI_PADDING_SIZE and might have overflowed. // The `huge_alignment` is normally 0 but is set to a multiple of MI_SEGMENT_SIZE for // very large requested alignments in which case we use a huge segment. void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept { mi_assert_internal(heap != NULL); // initialize if necessary if mi_unlikely(!mi_heap_is_initialized(heap)) { heap = mi_heap_get_default(); // calls mi_thread_init if mi_unlikely(!mi_heap_is_initialized(heap)) { return NULL; } } mi_assert_internal(mi_heap_is_initialized(heap)); // call potential deferred free routines _mi_deferred_free(heap, false); // free delayed frees from other threads (but skip contended ones) _mi_heap_delayed_free_partial(heap); // find (or allocate) a page of the right size mi_page_t* page = mi_find_page(heap, size, huge_alignment); if mi_unlikely(page == NULL) { // first time out of memory, try to collect and retry the allocation once more mi_heap_collect(heap, true /* force */); page = mi_find_page(heap, size, huge_alignment); } if mi_unlikely(page == NULL) { // out of memory const size_t req_size = size - MI_PADDING_SIZE; // correct for padding_size in case of an overflow on `size` _mi_error_message(ENOMEM, "unable to allocate memory (%zu bytes)\n", req_size); return NULL; } mi_assert_internal(mi_page_immediate_available(page)); mi_assert_internal(mi_page_block_size(page) >= size); // and try again, this time succeeding! (i.e. this should never recurse through _mi_page_malloc) if mi_unlikely(zero && page->xblock_size == 0) { // note: we cannot call _mi_page_malloc with zeroing for huge blocks; we zero it afterwards in that case. void* p = _mi_page_malloc(heap, page, size, false); mi_assert_internal(p != NULL); _mi_memzero_aligned(p, mi_page_usable_block_size(page)); return p; } else { return _mi_page_malloc(heap, page, size, zero); } } luametatex-2.10.08/source/libraries/mimalloc/src/prim/000077500000000000000000000000001442250314700226445ustar00rootroot00000000000000luametatex-2.10.08/source/libraries/mimalloc/src/prim/osx/000077500000000000000000000000001442250314700234555ustar00rootroot00000000000000luametatex-2.10.08/source/libraries/mimalloc/src/prim/osx/alloc-override-zone.c000066400000000000000000000323431442250314700275060ustar00rootroot00000000000000/* ---------------------------------------------------------------------------- Copyright (c) 2018-2022, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ #include "mimalloc.h" #include "mimalloc/internal.h" #if defined(MI_MALLOC_OVERRIDE) #if !defined(__APPLE__) #error "this file should only be included on macOS" #endif /* ------------------------------------------------------ Override system malloc on macOS This is done through the malloc zone interface. It seems to be most robust in combination with interposing though or otherwise we may get zone errors as there are could be allocations done by the time we take over the zone. ------------------------------------------------------ */ #include #include #include // memset #include #ifdef __cplusplus extern "C" { #endif #if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) // only available from OSX 10.6 extern malloc_zone_t* malloc_default_purgeable_zone(void) __attribute__((weak_import)); #endif /* ------------------------------------------------------ malloc zone members ------------------------------------------------------ */ static size_t zone_size(malloc_zone_t* zone, const void* p) { MI_UNUSED(zone); if (!mi_is_in_heap_region(p)){ return 0; } // not our pointer, bail out return mi_usable_size(p); } static void* zone_malloc(malloc_zone_t* zone, size_t size) { MI_UNUSED(zone); return mi_malloc(size); } static void* zone_calloc(malloc_zone_t* zone, size_t count, size_t size) { MI_UNUSED(zone); return mi_calloc(count, size); } static void* zone_valloc(malloc_zone_t* zone, size_t size) { MI_UNUSED(zone); return mi_malloc_aligned(size, _mi_os_page_size()); } static void zone_free(malloc_zone_t* zone, void* p) { MI_UNUSED(zone); mi_cfree(p); } static void* zone_realloc(malloc_zone_t* zone, void* p, size_t newsize) { MI_UNUSED(zone); return mi_realloc(p, newsize); } static void* zone_memalign(malloc_zone_t* zone, size_t alignment, size_t size) { MI_UNUSED(zone); return mi_malloc_aligned(size,alignment); } static void zone_destroy(malloc_zone_t* zone) { MI_UNUSED(zone); // todo: ignore for now? } static unsigned zone_batch_malloc(malloc_zone_t* zone, size_t size, void** ps, unsigned count) { size_t i; for (i = 0; i < count; i++) { ps[i] = zone_malloc(zone, size); if (ps[i] == NULL) break; } return i; } static void zone_batch_free(malloc_zone_t* zone, void** ps, unsigned count) { for(size_t i = 0; i < count; i++) { zone_free(zone, ps[i]); ps[i] = NULL; } } static size_t zone_pressure_relief(malloc_zone_t* zone, size_t size) { MI_UNUSED(zone); MI_UNUSED(size); mi_collect(false); return 0; } static void zone_free_definite_size(malloc_zone_t* zone, void* p, size_t size) { MI_UNUSED(size); zone_free(zone,p); } static boolean_t zone_claimed_address(malloc_zone_t* zone, void* p) { MI_UNUSED(zone); return mi_is_in_heap_region(p); } /* ------------------------------------------------------ Introspection members ------------------------------------------------------ */ static kern_return_t intro_enumerator(task_t task, void* p, unsigned type_mask, vm_address_t zone_address, memory_reader_t reader, vm_range_recorder_t recorder) { // todo: enumerate all memory MI_UNUSED(task); MI_UNUSED(p); MI_UNUSED(type_mask); MI_UNUSED(zone_address); MI_UNUSED(reader); MI_UNUSED(recorder); return KERN_SUCCESS; } static size_t intro_good_size(malloc_zone_t* zone, size_t size) { MI_UNUSED(zone); return mi_good_size(size); } static boolean_t intro_check(malloc_zone_t* zone) { MI_UNUSED(zone); return true; } static void intro_print(malloc_zone_t* zone, boolean_t verbose) { MI_UNUSED(zone); MI_UNUSED(verbose); mi_stats_print(NULL); } static void intro_log(malloc_zone_t* zone, void* p) { MI_UNUSED(zone); MI_UNUSED(p); // todo? } static void intro_force_lock(malloc_zone_t* zone) { MI_UNUSED(zone); // todo? } static void intro_force_unlock(malloc_zone_t* zone) { MI_UNUSED(zone); // todo? } static void intro_statistics(malloc_zone_t* zone, malloc_statistics_t* stats) { MI_UNUSED(zone); // todo... stats->blocks_in_use = 0; stats->size_in_use = 0; stats->max_size_in_use = 0; stats->size_allocated = 0; } static boolean_t intro_zone_locked(malloc_zone_t* zone) { MI_UNUSED(zone); return false; } /* ------------------------------------------------------ At process start, override the default allocator ------------------------------------------------------ */ #if defined(__GNUC__) && !defined(__clang__) #pragma GCC diagnostic ignored "-Wmissing-field-initializers" #endif #if defined(__clang__) #pragma clang diagnostic ignored "-Wc99-extensions" #endif static malloc_introspection_t mi_introspect = { .enumerator = &intro_enumerator, .good_size = &intro_good_size, .check = &intro_check, .print = &intro_print, .log = &intro_log, .force_lock = &intro_force_lock, .force_unlock = &intro_force_unlock, #if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) && !defined(__ppc__) .statistics = &intro_statistics, .zone_locked = &intro_zone_locked, #endif }; static malloc_zone_t mi_malloc_zone = { // note: even with designators, the order is important for C++ compilation //.reserved1 = NULL, //.reserved2 = NULL, .size = &zone_size, .malloc = &zone_malloc, .calloc = &zone_calloc, .valloc = &zone_valloc, .free = &zone_free, .realloc = &zone_realloc, .destroy = &zone_destroy, .zone_name = "mimalloc", .batch_malloc = &zone_batch_malloc, .batch_free = &zone_batch_free, .introspect = &mi_introspect, #if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) && !defined(__ppc__) #if defined(MAC_OS_X_VERSION_10_14) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_14) .version = 10, #else .version = 9, #endif // switch to version 9+ on OSX 10.6 to support memalign. .memalign = &zone_memalign, .free_definite_size = &zone_free_definite_size, .pressure_relief = &zone_pressure_relief, #if defined(MAC_OS_X_VERSION_10_14) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_14) .claimed_address = &zone_claimed_address, #endif #else .version = 4, #endif }; #ifdef __cplusplus } #endif #if defined(MI_OSX_INTERPOSE) && defined(MI_SHARED_LIB_EXPORT) // ------------------------------------------------------ // Override malloc_xxx and malloc_zone_xxx api's to use only // our mimalloc zone. Since even the loader uses malloc // on macOS, this ensures that all allocations go through // mimalloc (as all calls are interposed). // The main `malloc`, `free`, etc calls are interposed in `alloc-override.c`, // Here, we also override macOS specific API's like // `malloc_zone_calloc` etc. see // ------------------------------------------------------ static inline malloc_zone_t* mi_get_default_zone(void) { static bool init; if mi_unlikely(!init) { init = true; malloc_zone_register(&mi_malloc_zone); // by calling register we avoid a zone error on free (see ) } return &mi_malloc_zone; } mi_decl_externc int malloc_jumpstart(uintptr_t cookie); mi_decl_externc void _malloc_fork_prepare(void); mi_decl_externc void _malloc_fork_parent(void); mi_decl_externc void _malloc_fork_child(void); static malloc_zone_t* mi_malloc_create_zone(vm_size_t size, unsigned flags) { MI_UNUSED(size); MI_UNUSED(flags); return mi_get_default_zone(); } static malloc_zone_t* mi_malloc_default_zone (void) { return mi_get_default_zone(); } static malloc_zone_t* mi_malloc_default_purgeable_zone(void) { return mi_get_default_zone(); } static void mi_malloc_destroy_zone(malloc_zone_t* zone) { MI_UNUSED(zone); // nothing. } static kern_return_t mi_malloc_get_all_zones (task_t task, memory_reader_t mr, vm_address_t** addresses, unsigned* count) { MI_UNUSED(task); MI_UNUSED(mr); if (addresses != NULL) *addresses = NULL; if (count != NULL) *count = 0; return KERN_SUCCESS; } static const char* mi_malloc_get_zone_name(malloc_zone_t* zone) { return (zone == NULL ? mi_malloc_zone.zone_name : zone->zone_name); } static void mi_malloc_set_zone_name(malloc_zone_t* zone, const char* name) { MI_UNUSED(zone); MI_UNUSED(name); } static int mi_malloc_jumpstart(uintptr_t cookie) { MI_UNUSED(cookie); return 1; // or 0 for no error? } static void mi__malloc_fork_prepare(void) { // nothing } static void mi__malloc_fork_parent(void) { // nothing } static void mi__malloc_fork_child(void) { // nothing } static void mi_malloc_printf(const char* fmt, ...) { MI_UNUSED(fmt); } static bool zone_check(malloc_zone_t* zone) { MI_UNUSED(zone); return true; } static malloc_zone_t* zone_from_ptr(const void* p) { MI_UNUSED(p); return mi_get_default_zone(); } static void zone_log(malloc_zone_t* zone, void* p) { MI_UNUSED(zone); MI_UNUSED(p); } static void zone_print(malloc_zone_t* zone, bool b) { MI_UNUSED(zone); MI_UNUSED(b); } static void zone_print_ptr_info(void* p) { MI_UNUSED(p); } static void zone_register(malloc_zone_t* zone) { MI_UNUSED(zone); } static void zone_unregister(malloc_zone_t* zone) { MI_UNUSED(zone); } // use interposing so `DYLD_INSERT_LIBRARIES` works without `DYLD_FORCE_FLAT_NAMESPACE=1` // See: struct mi_interpose_s { const void* replacement; const void* target; }; #define MI_INTERPOSE_FUN(oldfun,newfun) { (const void*)&newfun, (const void*)&oldfun } #define MI_INTERPOSE_MI(fun) MI_INTERPOSE_FUN(fun,mi_##fun) #define MI_INTERPOSE_ZONE(fun) MI_INTERPOSE_FUN(malloc_##fun,fun) __attribute__((used)) static const struct mi_interpose_s _mi_zone_interposes[] __attribute__((section("__DATA, __interpose"))) = { MI_INTERPOSE_MI(malloc_create_zone), MI_INTERPOSE_MI(malloc_default_purgeable_zone), MI_INTERPOSE_MI(malloc_default_zone), MI_INTERPOSE_MI(malloc_destroy_zone), MI_INTERPOSE_MI(malloc_get_all_zones), MI_INTERPOSE_MI(malloc_get_zone_name), MI_INTERPOSE_MI(malloc_jumpstart), MI_INTERPOSE_MI(malloc_printf), MI_INTERPOSE_MI(malloc_set_zone_name), MI_INTERPOSE_MI(_malloc_fork_child), MI_INTERPOSE_MI(_malloc_fork_parent), MI_INTERPOSE_MI(_malloc_fork_prepare), MI_INTERPOSE_ZONE(zone_batch_free), MI_INTERPOSE_ZONE(zone_batch_malloc), MI_INTERPOSE_ZONE(zone_calloc), MI_INTERPOSE_ZONE(zone_check), MI_INTERPOSE_ZONE(zone_free), MI_INTERPOSE_ZONE(zone_from_ptr), MI_INTERPOSE_ZONE(zone_log), MI_INTERPOSE_ZONE(zone_malloc), MI_INTERPOSE_ZONE(zone_memalign), MI_INTERPOSE_ZONE(zone_print), MI_INTERPOSE_ZONE(zone_print_ptr_info), MI_INTERPOSE_ZONE(zone_realloc), MI_INTERPOSE_ZONE(zone_register), MI_INTERPOSE_ZONE(zone_unregister), MI_INTERPOSE_ZONE(zone_valloc) }; #else // ------------------------------------------------------ // hook into the zone api's without interposing // This is the official way of adding an allocator but // it seems less robust than using interpose. // ------------------------------------------------------ static inline malloc_zone_t* mi_get_default_zone(void) { // The first returned zone is the real default malloc_zone_t** zones = NULL; unsigned count = 0; kern_return_t ret = malloc_get_all_zones(0, NULL, (vm_address_t**)&zones, &count); if (ret == KERN_SUCCESS && count > 0) { return zones[0]; } else { // fallback return malloc_default_zone(); } } #if defined(__clang__) __attribute__((constructor(0))) #else __attribute__((constructor)) // seems not supported by g++-11 on the M1 #endif static void _mi_macos_override_malloc(void) { malloc_zone_t* purgeable_zone = NULL; #if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) // force the purgeable zone to exist to avoid strange bugs if (malloc_default_purgeable_zone) { purgeable_zone = malloc_default_purgeable_zone(); } #endif // Register our zone. // thomcc: I think this is still needed to put us in the zone list. malloc_zone_register(&mi_malloc_zone); // Unregister the default zone, this makes our zone the new default // as that was the last registered. malloc_zone_t *default_zone = mi_get_default_zone(); // thomcc: Unsure if the next test is *always* false or just false in the // cases I've tried. I'm also unsure if the code inside is needed. at all if (default_zone != &mi_malloc_zone) { malloc_zone_unregister(default_zone); // Reregister the default zone so free and realloc in that zone keep working. malloc_zone_register(default_zone); } // Unregister, and re-register the purgeable_zone to avoid bugs if it occurs // earlier than the default zone. if (purgeable_zone != NULL) { malloc_zone_unregister(purgeable_zone); malloc_zone_register(purgeable_zone); } } #endif // MI_OSX_INTERPOSE #endif // MI_MALLOC_OVERRIDE luametatex-2.10.08/source/libraries/mimalloc/src/prim/osx/prim.c000066400000000000000000000007511442250314700245730ustar00rootroot00000000000000/* ---------------------------------------------------------------------------- Copyright (c) 2018-2023, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ // We use the unix/prim.c with the mmap API on macOSX #include "../unix/prim.c" luametatex-2.10.08/source/libraries/mimalloc/src/prim/prim.c000066400000000000000000000015431442250314700237620ustar00rootroot00000000000000/* ---------------------------------------------------------------------------- Copyright (c) 2018-2023, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ // Select the implementation of the primitives // depending on the OS. #if defined(_WIN32) #include "windows/prim.c" // VirtualAlloc (Windows) #elif defined(__APPLE__) #include "osx/prim.c" // macOSX (actually defers to mmap in unix/prim.c) #elif defined(__wasi__) #define MI_USE_SBRK #include "wasi/prim.c" // memory-grow or sbrk (Wasm) #else #include "unix/prim.c" // mmap() (Linux, macOSX, BSD, Illumnos, Haiku, DragonFly, etc.) #endif luametatex-2.10.08/source/libraries/mimalloc/src/prim/readme.md000066400000000000000000000007101442250314700244210ustar00rootroot00000000000000## Portability Primitives This is the portability layer where all primitives needed from the OS are defined. - `include/mimalloc/prim.h`: primitive portability API definition. - `prim.c`: Selects one of `unix/prim.c`, `wasi/prim.c`, or `windows/prim.c` depending on the host platform (and on macOS, `osx/prim.c` defers to `unix/prim.c`). Note: still work in progress, there may still be places in the sources that still depend on OS ifdef's.luametatex-2.10.08/source/libraries/mimalloc/src/prim/unix/000077500000000000000000000000001442250314700236275ustar00rootroot00000000000000luametatex-2.10.08/source/libraries/mimalloc/src/prim/unix/prim.c000066400000000000000000000671521442250314700247550ustar00rootroot00000000000000/* ---------------------------------------------------------------------------- Copyright (c) 2018-2023, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ // This file is included in `src/prim/prim.c` #ifndef _DEFAULT_SOURCE #define _DEFAULT_SOURCE // ensure mmap flags and syscall are defined #endif #if defined(__sun) // illumos provides new mman.h api when any of these are defined // otherwise the old api based on caddr_t which predates the void pointers one. // stock solaris provides only the former, chose to atomically to discard those // flags only here rather than project wide tough. #undef _XOPEN_SOURCE #undef _POSIX_C_SOURCE #endif #include "mimalloc.h" #include "mimalloc/internal.h" #include "mimalloc/atomic.h" #include "mimalloc/prim.h" #include // mmap #include // sysconf #if defined(__linux__) #include #include #if defined(__GLIBC__) #include // linux mmap flags #else #include #endif #elif defined(__APPLE__) #include #if !TARGET_IOS_IPHONE && !TARGET_IOS_SIMULATOR #include #endif #elif defined(__FreeBSD__) || defined(__DragonFly__) #include #if __FreeBSD_version >= 1200000 #include #include #endif #include #endif #if !defined(__HAIKU__) && !defined(__APPLE__) && !defined(__CYGWIN__) #define MI_HAS_SYSCALL_H #include #endif //------------------------------------------------------------------------------------ // Use syscalls for some primitives to allow for libraries that override open/read/close etc. // and do allocation themselves; using syscalls prevents recursion when mimalloc is // still initializing (issue #713) //------------------------------------------------------------------------------------ #if defined(MI_HAS_SYSCALL_H) && defined(SYS_open) && defined(SYS_close) && defined(SYS_read) && defined(SYS_access) static int mi_prim_open(const char* fpath, int open_flags) { return syscall(SYS_open,fpath,open_flags,0); } static ssize_t mi_prim_read(int fd, void* buf, size_t bufsize) { return syscall(SYS_read,fd,buf,bufsize); } static int mi_prim_close(int fd) { return syscall(SYS_close,fd); } static int mi_prim_access(const char *fpath, int mode) { return syscall(SYS_access,fpath,mode); } #elif !defined(__APPLE__) // avoid unused warnings static int mi_prim_open(const char* fpath, int open_flags) { return open(fpath,open_flags); } static ssize_t mi_prim_read(int fd, void* buf, size_t bufsize) { return read(fd,buf,bufsize); } static int mi_prim_close(int fd) { return close(fd); } static int mi_prim_access(const char *fpath, int mode) { return access(fpath,mode); } #endif //--------------------------------------------- // init //--------------------------------------------- static bool unix_detect_overcommit(void) { bool os_overcommit = true; #if defined(__linux__) int fd = mi_prim_open("/proc/sys/vm/overcommit_memory", O_RDONLY); if (fd >= 0) { char buf[32]; ssize_t nread = mi_prim_read(fd, &buf, sizeof(buf)); mi_prim_close(fd); // // 0: heuristic overcommit, 1: always overcommit, 2: never overcommit (ignore NORESERVE) if (nread >= 1) { os_overcommit = (buf[0] == '0' || buf[0] == '1'); } } #elif defined(__FreeBSD__) int val = 0; size_t olen = sizeof(val); if (sysctlbyname("vm.overcommit", &val, &olen, NULL, 0) == 0) { os_overcommit = (val != 0); } #else // default: overcommit is true #endif return os_overcommit; } void _mi_prim_mem_init( mi_os_mem_config_t* config ) { long psize = sysconf(_SC_PAGESIZE); if (psize > 0) { config->page_size = (size_t)psize; config->alloc_granularity = (size_t)psize; } config->large_page_size = 2*MI_MiB; // TODO: can we query the OS for this? config->has_overcommit = unix_detect_overcommit(); config->must_free_whole = false; // mmap can free in parts config->has_virtual_reserve = true; // todo: check if this true for NetBSD? (for anonymous mmap with PROT_NONE) } //--------------------------------------------- // free //--------------------------------------------- int _mi_prim_free(void* addr, size_t size ) { bool err = (munmap(addr, size) == -1); return (err ? errno : 0); } //--------------------------------------------- // mmap //--------------------------------------------- static int unix_madvise(void* addr, size_t size, int advice) { #if defined(__sun) return madvise((caddr_t)addr, size, advice); // Solaris needs cast (issue #520) #else return madvise(addr, size, advice); #endif } static void* unix_mmap_prim(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) { MI_UNUSED(try_alignment); void* p = NULL; #if defined(MAP_ALIGNED) // BSD if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) { size_t n = mi_bsr(try_alignment); if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) { // alignment is a power of 2 and 4096 <= alignment <= 1GiB p = mmap(addr, size, protect_flags, flags | MAP_ALIGNED(n), fd, 0); if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) { int err = errno; _mi_warning_message("unable to directly request aligned OS memory (error: %d (0x%x), size: 0x%zx bytes, alignment: 0x%zx, hint address: %p)\n", err, err, size, try_alignment, addr); } if (p!=MAP_FAILED) return p; // fall back to regular mmap } } #elif defined(MAP_ALIGN) // Solaris if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) { p = mmap((void*)try_alignment, size, protect_flags, flags | MAP_ALIGN, fd, 0); // addr parameter is the required alignment if (p!=MAP_FAILED) return p; // fall back to regular mmap } #endif #if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED) // on 64-bit systems, use the virtual address area after 2TiB for 4MiB aligned allocations if (addr == NULL) { void* hint = _mi_os_get_aligned_hint(try_alignment, size); if (hint != NULL) { p = mmap(hint, size, protect_flags, flags, fd, 0); if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) { #if MI_TRACK_ENABLED // asan sometimes does not instrument errno correctly? int err = 0; #else int err = errno; #endif _mi_warning_message("unable to directly request hinted aligned OS memory (error: %d (0x%x), size: 0x%zx bytes, alignment: 0x%zx, hint address: %p)\n", err, err, size, try_alignment, hint); } if (p!=MAP_FAILED) return p; // fall back to regular mmap } } #endif // regular mmap p = mmap(addr, size, protect_flags, flags, fd, 0); if (p!=MAP_FAILED) return p; // failed to allocate return NULL; } static int unix_mmap_fd(void) { #if defined(VM_MAKE_TAG) // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99) int os_tag = (int)mi_option_get(mi_option_os_tag); if (os_tag < 100 || os_tag > 255) { os_tag = 100; } return VM_MAKE_TAG(os_tag); #else return -1; #endif } static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protect_flags, bool large_only, bool allow_large, bool* is_large) { #if !defined(MAP_ANONYMOUS) #define MAP_ANONYMOUS MAP_ANON #endif #if !defined(MAP_NORESERVE) #define MAP_NORESERVE 0 #endif void* p = NULL; const int fd = unix_mmap_fd(); int flags = MAP_PRIVATE | MAP_ANONYMOUS; if (_mi_os_has_overcommit()) { flags |= MAP_NORESERVE; } #if defined(PROT_MAX) protect_flags |= PROT_MAX(PROT_READ | PROT_WRITE); // BSD #endif // huge page allocation if ((large_only || _mi_os_use_large_page(size, try_alignment)) && allow_large) { static _Atomic(size_t) large_page_try_ok; // = 0; size_t try_ok = mi_atomic_load_acquire(&large_page_try_ok); if (!large_only && try_ok > 0) { // If the OS is not configured for large OS pages, or the user does not have // enough permission, the `mmap` will always fail (but it might also fail for other reasons). // Therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times // to avoid too many failing calls to mmap. mi_atomic_cas_strong_acq_rel(&large_page_try_ok, &try_ok, try_ok - 1); } else { int lflags = flags & ~MAP_NORESERVE; // using NORESERVE on huge pages seems to fail on Linux int lfd = fd; #ifdef MAP_ALIGNED_SUPER lflags |= MAP_ALIGNED_SUPER; #endif #ifdef MAP_HUGETLB lflags |= MAP_HUGETLB; #endif #ifdef MAP_HUGE_1GB static bool mi_huge_pages_available = true; if ((size % MI_GiB) == 0 && mi_huge_pages_available) { lflags |= MAP_HUGE_1GB; } else #endif { #ifdef MAP_HUGE_2MB lflags |= MAP_HUGE_2MB; #endif } #ifdef VM_FLAGS_SUPERPAGE_SIZE_2MB lfd |= VM_FLAGS_SUPERPAGE_SIZE_2MB; #endif if (large_only || lflags != flags) { // try large OS page allocation *is_large = true; p = unix_mmap_prim(addr, size, try_alignment, protect_flags, lflags, lfd); #ifdef MAP_HUGE_1GB if (p == NULL && (lflags & MAP_HUGE_1GB) != 0) { mi_huge_pages_available = false; // don't try huge 1GiB pages again _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (errno: %i)\n", errno); lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB); p = unix_mmap_prim(addr, size, try_alignment, protect_flags, lflags, lfd); } #endif if (large_only) return p; if (p == NULL) { mi_atomic_store_release(&large_page_try_ok, (size_t)8); // on error, don't try again for the next N allocations } } } } // regular allocation if (p == NULL) { *is_large = false; p = unix_mmap_prim(addr, size, try_alignment, protect_flags, flags, fd); if (p != NULL) { #if defined(MADV_HUGEPAGE) // Many Linux systems don't allow MAP_HUGETLB but they support instead // transparent huge pages (THP). Generally, it is not required to call `madvise` with MADV_HUGE // though since properly aligned allocations will already use large pages if available // in that case -- in particular for our large regions (in `memory.c`). // However, some systems only allow THP if called with explicit `madvise`, so // when large OS pages are enabled for mimalloc, we call `madvise` anyways. if (allow_large && _mi_os_use_large_page(size, try_alignment)) { if (unix_madvise(p, size, MADV_HUGEPAGE) == 0) { *is_large = true; // possibly }; } #elif defined(__sun) if (allow_large && _mi_os_use_large_page(size, try_alignment)) { struct memcntl_mha cmd = {0}; cmd.mha_pagesize = large_os_page_size; cmd.mha_cmd = MHA_MAPSIZE_VA; if (memcntl((caddr_t)p, size, MC_HAT_ADVISE, (caddr_t)&cmd, 0, 0) == 0) { *is_large = true; } } #endif } } return p; } // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned. int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) { mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); mi_assert_internal(commit || !allow_large); mi_assert_internal(try_alignment > 0); *is_zero = true; int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE); *addr = unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large); return (*addr != NULL ? 0 : errno); } //--------------------------------------------- // Commit/Reset //--------------------------------------------- static void unix_mprotect_hint(int err) { #if defined(__linux__) && (MI_SECURE>=2) // guard page around every mimalloc page if (err == ENOMEM) { _mi_warning_message("The next warning may be caused by a low memory map limit.\n" " On Linux this is controlled by the vm.max_map_count -- maybe increase it?\n" " For example: sudo sysctl -w vm.max_map_count=262144\n"); } #else MI_UNUSED(err); #endif } int _mi_prim_commit(void* start, size_t size, bool* is_zero) { // commit: ensure we can access the area // note: we may think that *is_zero can be true since the memory // was either from mmap PROT_NONE, or from decommit MADV_DONTNEED, but // we sometimes call commit on a range with still partially committed // memory and `mprotect` does not zero the range. *is_zero = false; int err = mprotect(start, size, (PROT_READ | PROT_WRITE)); if (err != 0) { err = errno; unix_mprotect_hint(err); } return err; } int _mi_prim_decommit(void* start, size_t size, bool* needs_recommit) { int err = 0; // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE) err = unix_madvise(start, size, MADV_DONTNEED); #if !MI_DEBUG && !MI_SECURE *needs_recommit = false; #else *needs_recommit = true; mprotect(start, size, PROT_NONE); #endif /* // decommit: use mmap with MAP_FIXED and PROT_NONE to discard the existing memory (and reduce rss) *needs_recommit = true; const int fd = unix_mmap_fd(); void* p = mmap(start, size, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), fd, 0); if (p != start) { err = errno; } */ return err; } int _mi_prim_reset(void* start, size_t size) { // We try to use `MADV_FREE` as that is the fastest. A drawback though is that it // will not reduce the `rss` stats in tools like `top` even though the memory is available // to other processes. With the default `MIMALLOC_PURGE_DECOMMITS=1` we ensure that by // default `MADV_DONTNEED` is used though. #if defined(MADV_FREE) static _Atomic(size_t) advice = MI_ATOMIC_VAR_INIT(MADV_FREE); int oadvice = (int)mi_atomic_load_relaxed(&advice); int err; while ((err = unix_madvise(start, size, oadvice)) != 0 && errno == EAGAIN) { errno = 0; }; if (err != 0 && errno == EINVAL && oadvice == MADV_FREE) { // if MADV_FREE is not supported, fall back to MADV_DONTNEED from now on mi_atomic_store_release(&advice, (size_t)MADV_DONTNEED); err = unix_madvise(start, size, MADV_DONTNEED); } #else int err = unix_madvise(start, size, MADV_DONTNEED); #endif return err; } int _mi_prim_protect(void* start, size_t size, bool protect) { int err = mprotect(start, size, protect ? PROT_NONE : (PROT_READ | PROT_WRITE)); if (err != 0) { err = errno; } unix_mprotect_hint(err); return err; } //--------------------------------------------- // Huge page allocation //--------------------------------------------- #if (MI_INTPTR_SIZE >= 8) && !defined(__HAIKU__) && !defined(__CYGWIN__) #ifndef MPOL_PREFERRED #define MPOL_PREFERRED 1 #endif #if defined(MI_HAS_SYSCALL_H) && defined(SYS_mbind) static long mi_prim_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) { return syscall(SYS_mbind, start, len, mode, nmask, maxnode, flags); } #else static long mi_prim_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) { MI_UNUSED(start); MI_UNUSED(len); MI_UNUSED(mode); MI_UNUSED(nmask); MI_UNUSED(maxnode); MI_UNUSED(flags); return 0; } #endif int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) { bool is_large = true; *is_zero = true; *addr = unix_mmap(hint_addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); if (*addr != NULL && numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes unsigned long numa_mask = (1UL << numa_node); // TODO: does `mbind` work correctly for huge OS pages? should we // use `set_mempolicy` before calling mmap instead? // see: long err = mi_prim_mbind(*addr, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0); if (err != 0) { err = errno; _mi_warning_message("failed to bind huge (1GiB) pages to numa node %d (error: %d (0x%x))\n", numa_node, err, err); } } return (*addr != NULL ? 0 : errno); } #else int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) { MI_UNUSED(hint_addr); MI_UNUSED(size); MI_UNUSED(numa_node); *is_zero = false; *addr = NULL; return ENOMEM; } #endif //--------------------------------------------- // NUMA nodes //--------------------------------------------- #if defined(__linux__) #include // snprintf size_t _mi_prim_numa_node(void) { #if defined(MI_HAS_SYSCALL_H) && defined(SYS_getcpu) unsigned long node = 0; unsigned long ncpu = 0; long err = syscall(SYS_getcpu, &ncpu, &node, NULL); if (err != 0) return 0; return node; #else return 0; #endif } size_t _mi_prim_numa_node_count(void) { char buf[128]; unsigned node = 0; for(node = 0; node < 256; node++) { // enumerate node entries -- todo: it there a more efficient way to do this? (but ensure there is no allocation) snprintf(buf, 127, "/sys/devices/system/node/node%u", node + 1); if (mi_prim_access(buf,R_OK) != 0) break; } return (node+1); } #elif defined(__FreeBSD__) && __FreeBSD_version >= 1200000 size_t _mi_prim_numa_node(void) { domainset_t dom; size_t node; int policy; if (cpuset_getdomain(CPU_LEVEL_CPUSET, CPU_WHICH_PID, -1, sizeof(dom), &dom, &policy) == -1) return 0ul; for (node = 0; node < MAXMEMDOM; node++) { if (DOMAINSET_ISSET(node, &dom)) return node; } return 0ul; } size_t _mi_prim_numa_node_count(void) { size_t ndomains = 0; size_t len = sizeof(ndomains); if (sysctlbyname("vm.ndomains", &ndomains, &len, NULL, 0) == -1) return 0ul; return ndomains; } #elif defined(__DragonFly__) size_t _mi_prim_numa_node(void) { // TODO: DragonFly does not seem to provide any userland means to get this information. return 0ul; } size_t _mi_prim_numa_node_count(void) { size_t ncpus = 0, nvirtcoresperphys = 0; size_t len = sizeof(size_t); if (sysctlbyname("hw.ncpu", &ncpus, &len, NULL, 0) == -1) return 0ul; if (sysctlbyname("hw.cpu_topology_ht_ids", &nvirtcoresperphys, &len, NULL, 0) == -1) return 0ul; return nvirtcoresperphys * ncpus; } #else size_t _mi_prim_numa_node(void) { return 0; } size_t _mi_prim_numa_node_count(void) { return 1; } #endif // ---------------------------------------------------------------- // Clock // ---------------------------------------------------------------- #include #if defined(CLOCK_REALTIME) || defined(CLOCK_MONOTONIC) mi_msecs_t _mi_prim_clock_now(void) { struct timespec t; #ifdef CLOCK_MONOTONIC clock_gettime(CLOCK_MONOTONIC, &t); #else clock_gettime(CLOCK_REALTIME, &t); #endif return ((mi_msecs_t)t.tv_sec * 1000) + ((mi_msecs_t)t.tv_nsec / 1000000); } #else // low resolution timer mi_msecs_t _mi_prim_clock_now(void) { #if !defined(CLOCKS_PER_SEC) || (CLOCKS_PER_SEC == 1000) || (CLOCKS_PER_SEC == 0) return (mi_msecs_t)clock(); #elif (CLOCKS_PER_SEC < 1000) return (mi_msecs_t)clock() * (1000 / (mi_msecs_t)CLOCKS_PER_SEC); #else return (mi_msecs_t)clock() / ((mi_msecs_t)CLOCKS_PER_SEC / 1000); #endif } #endif //---------------------------------------------------------------- // Process info //---------------------------------------------------------------- #if defined(__unix__) || defined(__unix) || defined(unix) || defined(__APPLE__) || defined(__HAIKU__) #include #include #include #if defined(__APPLE__) #include #endif #if defined(__HAIKU__) #include #endif static mi_msecs_t timeval_secs(const struct timeval* tv) { return ((mi_msecs_t)tv->tv_sec * 1000L) + ((mi_msecs_t)tv->tv_usec / 1000L); } void _mi_prim_process_info(mi_process_info_t* pinfo) { struct rusage rusage; getrusage(RUSAGE_SELF, &rusage); pinfo->utime = timeval_secs(&rusage.ru_utime); pinfo->stime = timeval_secs(&rusage.ru_stime); #if !defined(__HAIKU__) pinfo->page_faults = rusage.ru_majflt; #endif #if defined(__HAIKU__) // Haiku does not have (yet?) a way to // get these stats per process thread_info tid; area_info mem; ssize_t c; get_thread_info(find_thread(0), &tid); while (get_next_area_info(tid.team, &c, &mem) == B_OK) { pinfo->peak_rss += mem.ram_size; } pinfo->page_faults = 0; #elif defined(__APPLE__) pinfo->peak_rss = rusage.ru_maxrss; // macos reports in bytes #ifdef MACH_TASK_BASIC_INFO struct mach_task_basic_info info; mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT; if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&info, &infoCount) == KERN_SUCCESS) { pinfo->current_rss = (size_t)info.resident_size; } #else struct task_basic_info info; mach_msg_type_number_t infoCount = TASK_BASIC_INFO_COUNT; if (task_info(mach_task_self(), TASK_BASIC_INFO, (task_info_t)&info, &infoCount) == KERN_SUCCESS) { pinfo->current_rss = (size_t)info.resident_size; } #endif #else pinfo->peak_rss = rusage.ru_maxrss * 1024; // Linux/BSD report in KiB #endif // use defaults for commit } #else #ifndef __wasi__ // WebAssembly instances are not processes #pragma message("define a way to get process info") #endif void _mi_prim_process_info(mi_process_info_t* pinfo) { // use defaults MI_UNUSED(pinfo); } #endif //---------------------------------------------------------------- // Output //---------------------------------------------------------------- void _mi_prim_out_stderr( const char* msg ) { fputs(msg,stderr); } //---------------------------------------------------------------- // Environment //---------------------------------------------------------------- #if !defined(MI_USE_ENVIRON) || (MI_USE_ENVIRON!=0) // On Posix systemsr use `environ` to access environment variables // even before the C runtime is initialized. #if defined(__APPLE__) && defined(__has_include) && __has_include() #include static char** mi_get_environ(void) { return (*_NSGetEnviron()); } #else extern char** environ; static char** mi_get_environ(void) { return environ; } #endif bool _mi_prim_getenv(const char* name, char* result, size_t result_size) { if (name==NULL) return false; const size_t len = _mi_strlen(name); if (len == 0) return false; char** env = mi_get_environ(); if (env == NULL) return false; // compare up to 10000 entries for (int i = 0; i < 10000 && env[i] != NULL; i++) { const char* s = env[i]; if (_mi_strnicmp(name, s, len) == 0 && s[len] == '=') { // case insensitive // found it _mi_strlcpy(result, s + len + 1, result_size); return true; } } return false; } #else // fallback: use standard C `getenv` but this cannot be used while initializing the C runtime bool _mi_prim_getenv(const char* name, char* result, size_t result_size) { // cannot call getenv() when still initializing the C runtime. if (_mi_preloading()) return false; const char* s = getenv(name); if (s == NULL) { // we check the upper case name too. char buf[64+1]; size_t len = _mi_strnlen(name,sizeof(buf)-1); for (size_t i = 0; i < len; i++) { buf[i] = _mi_toupper(name[i]); } buf[len] = 0; s = getenv(buf); } if (s == NULL || _mi_strnlen(s,result_size) >= result_size) return false; _mi_strlcpy(result, s, result_size); return true; } #endif // !MI_USE_ENVIRON //---------------------------------------------------------------- // Random //---------------------------------------------------------------- #if defined(__APPLE__) #include #if defined(MAC_OS_X_VERSION_10_10) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_10 #include #include #endif bool _mi_prim_random_buf(void* buf, size_t buf_len) { #if defined(MAC_OS_X_VERSION_10_15) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_15 // We prefere CCRandomGenerateBytes as it returns an error code while arc4random_buf // may fail silently on macOS. See PR #390, and return (CCRandomGenerateBytes(buf, buf_len) == kCCSuccess); #else // fall back on older macOS arc4random_buf(buf, buf_len); return true; #endif } #elif defined(__ANDROID__) || defined(__DragonFly__) || \ defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \ defined(__sun) #include bool _mi_prim_random_buf(void* buf, size_t buf_len) { arc4random_buf(buf, buf_len); return true; } #elif defined(__linux__) || defined(__HAIKU__) #include #include #include #include bool _mi_prim_random_buf(void* buf, size_t buf_len) { // Modern Linux provides `getrandom` but different distributions either use `sys/random.h` or `linux/random.h` // and for the latter the actual `getrandom` call is not always defined. // (see ) // We therefore use a syscall directly and fall back dynamically to /dev/urandom when needed. #if defined(MI_HAS_SYSCALL_H) && defined(SYS_getrandom) #ifndef GRND_NONBLOCK #define GRND_NONBLOCK (1) #endif static _Atomic(uintptr_t) no_getrandom; // = 0 if (mi_atomic_load_acquire(&no_getrandom)==0) { ssize_t ret = syscall(SYS_getrandom, buf, buf_len, GRND_NONBLOCK); if (ret >= 0) return (buf_len == (size_t)ret); if (errno != ENOSYS) return false; mi_atomic_store_release(&no_getrandom, (uintptr_t)1); // don't call again, and fall back to /dev/urandom } #endif int flags = O_RDONLY; #if defined(O_CLOEXEC) flags |= O_CLOEXEC; #endif int fd = mi_prim_open("/dev/urandom", flags); if (fd < 0) return false; size_t count = 0; while(count < buf_len) { ssize_t ret = mi_prim_read(fd, (char*)buf + count, buf_len - count); if (ret<=0) { if (errno!=EAGAIN && errno!=EINTR) break; } else { count += ret; } } mi_prim_close(fd); return (count==buf_len); } #else bool _mi_prim_random_buf(void* buf, size_t buf_len) { return false; } #endif //---------------------------------------------------------------- // Thread init/done //---------------------------------------------------------------- #if defined(MI_USE_PTHREADS) // use pthread local storage keys to detect thread ending // (and used with MI_TLS_PTHREADS for the default heap) pthread_key_t _mi_heap_default_key = (pthread_key_t)(-1); static void mi_pthread_done(void* value) { if (value!=NULL) { _mi_thread_done((mi_heap_t*)value); } } void _mi_prim_thread_init_auto_done(void) { mi_assert_internal(_mi_heap_default_key == (pthread_key_t)(-1)); pthread_key_create(&_mi_heap_default_key, &mi_pthread_done); } void _mi_prim_thread_done_auto_done(void) { // nothing to do } void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) { if (_mi_heap_default_key != (pthread_key_t)(-1)) { // can happen during recursive invocation on freeBSD pthread_setspecific(_mi_heap_default_key, heap); } } #else void _mi_prim_thread_init_auto_done(void) { // nothing } void _mi_prim_thread_done_auto_done(void) { // nothing } void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) { MI_UNUSED(heap); } #endif luametatex-2.10.08/source/libraries/mimalloc/src/prim/wasi/000077500000000000000000000000001442250314700236075ustar00rootroot00000000000000luametatex-2.10.08/source/libraries/mimalloc/src/prim/wasi/prim.c000066400000000000000000000177771442250314700247450ustar00rootroot00000000000000/* ---------------------------------------------------------------------------- Copyright (c) 2018-2023, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ // This file is included in `src/prim/prim.c` #include "mimalloc.h" #include "mimalloc/internal.h" #include "mimalloc/atomic.h" #include "mimalloc/prim.h" //--------------------------------------------- // Initialize //--------------------------------------------- void _mi_prim_mem_init( mi_os_mem_config_t* config ) { config->page_size = 64*MI_KiB; // WebAssembly has a fixed page size: 64KiB config->alloc_granularity = 16; config->has_overcommit = false; config->must_free_whole = true; config->has_virtual_reserve = false; } //--------------------------------------------- // Free //--------------------------------------------- int _mi_prim_free(void* addr, size_t size ) { MI_UNUSED(addr); MI_UNUSED(size); // wasi heap cannot be shrunk return 0; } //--------------------------------------------- // Allocation: sbrk or memory_grow //--------------------------------------------- #if defined(MI_USE_SBRK) static void* mi_memory_grow( size_t size ) { void* p = sbrk(size); if (p == (void*)(-1)) return NULL; #if !defined(__wasi__) // on wasi this is always zero initialized already (?) memset(p,0,size); #endif return p; } #elif defined(__wasi__) static void* mi_memory_grow( size_t size ) { size_t base = (size > 0 ? __builtin_wasm_memory_grow(0,_mi_divide_up(size, _mi_os_page_size())) : __builtin_wasm_memory_size(0)); if (base == SIZE_MAX) return NULL; return (void*)(base * _mi_os_page_size()); } #endif #if defined(MI_USE_PTHREADS) static pthread_mutex_t mi_heap_grow_mutex = PTHREAD_MUTEX_INITIALIZER; #endif static void* mi_prim_mem_grow(size_t size, size_t try_alignment) { void* p = NULL; if (try_alignment <= 1) { // `sbrk` is not thread safe in general so try to protect it (we could skip this on WASM but leave it in for now) #if defined(MI_USE_PTHREADS) pthread_mutex_lock(&mi_heap_grow_mutex); #endif p = mi_memory_grow(size); #if defined(MI_USE_PTHREADS) pthread_mutex_unlock(&mi_heap_grow_mutex); #endif } else { void* base = NULL; size_t alloc_size = 0; // to allocate aligned use a lock to try to avoid thread interaction // between getting the current size and actual allocation // (also, `sbrk` is not thread safe in general) #if defined(MI_USE_PTHREADS) pthread_mutex_lock(&mi_heap_grow_mutex); #endif { void* current = mi_memory_grow(0); // get current size if (current != NULL) { void* aligned_current = mi_align_up_ptr(current, try_alignment); // and align from there to minimize wasted space alloc_size = _mi_align_up( ((uint8_t*)aligned_current - (uint8_t*)current) + size, _mi_os_page_size()); base = mi_memory_grow(alloc_size); } } #if defined(MI_USE_PTHREADS) pthread_mutex_unlock(&mi_heap_grow_mutex); #endif if (base != NULL) { p = mi_align_up_ptr(base, try_alignment); if ((uint8_t*)p + size > (uint8_t*)base + alloc_size) { // another thread used wasm_memory_grow/sbrk in-between and we do not have enough // space after alignment. Give up (and waste the space as we cannot shrink :-( ) // (in `mi_os_mem_alloc_aligned` this will fall back to overallocation to align) p = NULL; } } } /* if (p == NULL) { _mi_warning_message("unable to allocate sbrk/wasm_memory_grow OS memory (%zu bytes, %zu alignment)\n", size, try_alignment); errno = ENOMEM; return NULL; } */ mi_assert_internal( p == NULL || try_alignment == 0 || (uintptr_t)p % try_alignment == 0 ); return p; } // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned. int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) { MI_UNUSED(allow_large); MI_UNUSED(commit); *is_large = false; *is_zero = false; *addr = mi_prim_mem_grow(size, try_alignment); return (*addr != NULL ? 0 : ENOMEM); } //--------------------------------------------- // Commit/Reset/Protect //--------------------------------------------- int _mi_prim_commit(void* addr, size_t size, bool* is_zero) { MI_UNUSED(addr); MI_UNUSED(size); *is_zero = false; return 0; } int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit) { MI_UNUSED(addr); MI_UNUSED(size); *needs_recommit = false; return 0; } int _mi_prim_reset(void* addr, size_t size) { MI_UNUSED(addr); MI_UNUSED(size); return 0; } int _mi_prim_protect(void* addr, size_t size, bool protect) { MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(protect); return 0; } //--------------------------------------------- // Huge pages and NUMA nodes //--------------------------------------------- int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) { MI_UNUSED(hint_addr); MI_UNUSED(size); MI_UNUSED(numa_node); *is_zero = true; *addr = NULL; return ENOSYS; } size_t _mi_prim_numa_node(void) { return 0; } size_t _mi_prim_numa_node_count(void) { return 1; } //---------------------------------------------------------------- // Clock //---------------------------------------------------------------- #include #if defined(CLOCK_REALTIME) || defined(CLOCK_MONOTONIC) mi_msecs_t _mi_prim_clock_now(void) { struct timespec t; #ifdef CLOCK_MONOTONIC clock_gettime(CLOCK_MONOTONIC, &t); #else clock_gettime(CLOCK_REALTIME, &t); #endif return ((mi_msecs_t)t.tv_sec * 1000) + ((mi_msecs_t)t.tv_nsec / 1000000); } #else // low resolution timer mi_msecs_t _mi_prim_clock_now(void) { #if !defined(CLOCKS_PER_SEC) || (CLOCKS_PER_SEC == 1000) || (CLOCKS_PER_SEC == 0) return (mi_msecs_t)clock(); #elif (CLOCKS_PER_SEC < 1000) return (mi_msecs_t)clock() * (1000 / (mi_msecs_t)CLOCKS_PER_SEC); #else return (mi_msecs_t)clock() / ((mi_msecs_t)CLOCKS_PER_SEC / 1000); #endif } #endif //---------------------------------------------------------------- // Process info //---------------------------------------------------------------- void _mi_prim_process_info(mi_process_info_t* pinfo) { // use defaults MI_UNUSED(pinfo); } //---------------------------------------------------------------- // Output //---------------------------------------------------------------- void _mi_prim_out_stderr( const char* msg ) { fputs(msg,stderr); } //---------------------------------------------------------------- // Environment //---------------------------------------------------------------- bool _mi_prim_getenv(const char* name, char* result, size_t result_size) { // cannot call getenv() when still initializing the C runtime. if (_mi_preloading()) return false; const char* s = getenv(name); if (s == NULL) { // we check the upper case name too. char buf[64+1]; size_t len = _mi_strnlen(name,sizeof(buf)-1); for (size_t i = 0; i < len; i++) { buf[i] = _mi_toupper(name[i]); } buf[len] = 0; s = getenv(buf); } if (s == NULL || _mi_strnlen(s,result_size) >= result_size) return false; _mi_strlcpy(result, s, result_size); return true; } //---------------------------------------------------------------- // Random //---------------------------------------------------------------- bool _mi_prim_random_buf(void* buf, size_t buf_len) { return false; } //---------------------------------------------------------------- // Thread init/done //---------------------------------------------------------------- void _mi_prim_thread_init_auto_done(void) { // nothing } void _mi_prim_thread_done_auto_done(void) { // nothing } void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) { MI_UNUSED(heap); } luametatex-2.10.08/source/libraries/mimalloc/src/prim/windows/000077500000000000000000000000001442250314700243365ustar00rootroot00000000000000luametatex-2.10.08/source/libraries/mimalloc/src/prim/windows/etw-mimalloc.wprp000066400000000000000000000047351442250314700276530ustar00rootroot00000000000000 luametatex-2.10.08/source/libraries/mimalloc/src/prim/windows/etw.h000066400000000000000000001065421442250314700253160ustar00rootroot00000000000000//**********************************************************************` //* This is an include file generated by Message Compiler. *` //* *` //* Copyright (c) Microsoft Corporation. All Rights Reserved. *` //**********************************************************************` #pragma once //***************************************************************************** // // Notes on the ETW event code generated by MC: // // - Structures and arrays of structures are treated as an opaque binary blob. // The caller is responsible for packing the data for the structure into a // single region of memory, with no padding between values. The macro will // have an extra parameter for the length of the blob. // - Arrays of nul-terminated strings must be packed by the caller into a // single binary blob containing the correct number of strings, with a nul // after each string. The size of the blob is specified in characters, and // includes the final nul. // - Arrays of SID are treated as a single binary blob. The caller is // responsible for packing the SID values into a single region of memory with // no padding. // - The length attribute on the data element in the manifest is significant // for values with intype win:UnicodeString, win:AnsiString, or win:Binary. // The length attribute must be specified for win:Binary, and is optional for // win:UnicodeString and win:AnsiString (if no length is given, the strings // are assumed to be nul-terminated). For win:UnicodeString, the length is // measured in characters, not bytes. // - For an array of win:UnicodeString, win:AnsiString, or win:Binary, the // length attribute applies to every value in the array, so every value in // the array must have the same length. The values in the array are provided // to the macro via a single pointer -- the caller is responsible for packing // all of the values into a single region of memory with no padding between // values. // - Values of type win:CountedUnicodeString, win:CountedAnsiString, and // win:CountedBinary can be generated and collected on Vista or later. // However, they may not decode properly without the Windows 10 2018 Fall // Update. // - Arrays of type win:CountedUnicodeString, win:CountedAnsiString, and // win:CountedBinary must be packed by the caller into a single region of // memory. The format for each item is a UINT16 byte-count followed by that // many bytes of data. When providing the array to the generated macro, you // must provide the total size of the packed array data, including the UINT16 // sizes for each item. In the case of win:CountedUnicodeString, the data // size is specified in WCHAR (16-bit) units. In the case of // win:CountedAnsiString and win:CountedBinary, the data size is specified in // bytes. // //***************************************************************************** #include #include #include #ifndef ETW_INLINE #ifdef _ETW_KM_ // In kernel mode, save stack space by never inlining templates. #define ETW_INLINE DECLSPEC_NOINLINE __inline #else // In user mode, save code size by inlining templates as appropriate. #define ETW_INLINE __inline #endif #endif // ETW_INLINE #if defined(__cplusplus) extern "C" { #endif // // MCGEN_DISABLE_PROVIDER_CODE_GENERATION macro: // Define this macro to have the compiler skip the generated functions in this // header. // #ifndef MCGEN_DISABLE_PROVIDER_CODE_GENERATION // // MCGEN_USE_KERNEL_MODE_APIS macro: // Controls whether the generated code uses kernel-mode or user-mode APIs. // - Set to 0 to use Windows user-mode APIs such as EventRegister. // - Set to 1 to use Windows kernel-mode APIs such as EtwRegister. // Default is based on whether the _ETW_KM_ macro is defined (i.e. by wdm.h). // Note that the APIs can also be overridden directly, e.g. by setting the // MCGEN_EVENTWRITETRANSFER or MCGEN_EVENTREGISTER macros. // #ifndef MCGEN_USE_KERNEL_MODE_APIS #ifdef _ETW_KM_ #define MCGEN_USE_KERNEL_MODE_APIS 1 #else #define MCGEN_USE_KERNEL_MODE_APIS 0 #endif #endif // MCGEN_USE_KERNEL_MODE_APIS // // MCGEN_HAVE_EVENTSETINFORMATION macro: // Controls how McGenEventSetInformation uses the EventSetInformation API. // - Set to 0 to disable the use of EventSetInformation // (McGenEventSetInformation will always return an error). // - Set to 1 to directly invoke MCGEN_EVENTSETINFORMATION. // - Set to 2 to to locate EventSetInformation at runtime via GetProcAddress // (user-mode) or MmGetSystemRoutineAddress (kernel-mode). // Default is determined as follows: // - If MCGEN_EVENTSETINFORMATION has been customized, set to 1 // (i.e. use MCGEN_EVENTSETINFORMATION). // - Else if the target OS version has EventSetInformation, set to 1 // (i.e. use MCGEN_EVENTSETINFORMATION). // - Else set to 2 (i.e. try to dynamically locate EventSetInformation). // Note that an McGenEventSetInformation function will only be generated if one // or more provider in a manifest has provider traits. // #ifndef MCGEN_HAVE_EVENTSETINFORMATION #ifdef MCGEN_EVENTSETINFORMATION // if MCGEN_EVENTSETINFORMATION has been customized, #define MCGEN_HAVE_EVENTSETINFORMATION 1 // directly invoke MCGEN_EVENTSETINFORMATION(...). #elif MCGEN_USE_KERNEL_MODE_APIS // else if using kernel-mode APIs, #if NTDDI_VERSION >= 0x06040000 // if target OS is Windows 10 or later, #define MCGEN_HAVE_EVENTSETINFORMATION 1 // directly invoke MCGEN_EVENTSETINFORMATION(...). #else // else #define MCGEN_HAVE_EVENTSETINFORMATION 2 // find "EtwSetInformation" via MmGetSystemRoutineAddress. #endif // else (using user-mode APIs) #else // if target OS and SDK is Windows 8 or later, #if WINVER >= 0x0602 && defined(EVENT_FILTER_TYPE_SCHEMATIZED) #define MCGEN_HAVE_EVENTSETINFORMATION 1 // directly invoke MCGEN_EVENTSETINFORMATION(...). #else // else #define MCGEN_HAVE_EVENTSETINFORMATION 2 // find "EventSetInformation" via GetModuleHandleExW/GetProcAddress. #endif #endif #endif // MCGEN_HAVE_EVENTSETINFORMATION // // MCGEN Override Macros // // The following override macros may be defined before including this header // to control the APIs used by this header: // // - MCGEN_EVENTREGISTER // - MCGEN_EVENTUNREGISTER // - MCGEN_EVENTSETINFORMATION // - MCGEN_EVENTWRITETRANSFER // // If the the macro is undefined, the MC implementation will default to the // corresponding ETW APIs. For example, if the MCGEN_EVENTREGISTER macro is // undefined, the EventRegister[MyProviderName] macro will use EventRegister // in user mode and will use EtwRegister in kernel mode. // // To prevent issues from conflicting definitions of these macros, the value // of the override macro will be used as a suffix in certain internal function // names. Because of this, the override macros must follow certain rules: // // - The macro must be defined before any MC-generated header is included and // must not be undefined or redefined after any MC-generated header is // included. Different translation units (i.e. different .c or .cpp files) // may set the macros to different values, but within a translation unit // (within a single .c or .cpp file), the macro must be set once and not // changed. // - The override must be an object-like macro, not a function-like macro // (i.e. the override macro must not have a parameter list). // - The override macro's value must be a simple identifier, i.e. must be // something that starts with a letter or '_' and contains only letters, // numbers, and '_' characters. // - If the override macro's value is the name of a second object-like macro, // the second object-like macro must follow the same rules. (The override // macro's value can also be the name of a function-like macro, in which // case the function-like macro does not need to follow the same rules.) // // For example, the following will cause compile errors: // // #define MCGEN_EVENTWRITETRANSFER MyNamespace::MyClass::MyFunction // Value has non-identifier characters (colon). // #define MCGEN_EVENTWRITETRANSFER GetEventWriteFunctionPointer(7) // Value has non-identifier characters (parentheses). // #define MCGEN_EVENTWRITETRANSFER(h,e,a,r,c,d) EventWrite(h,e,c,d) // Override is defined as a function-like macro. // #define MY_OBJECT_LIKE_MACRO MyNamespace::MyClass::MyEventWriteFunction // #define MCGEN_EVENTWRITETRANSFER MY_OBJECT_LIKE_MACRO // Evaluates to something with non-identifier characters (colon). // // The following would be ok: // // #define MCGEN_EVENTWRITETRANSFER MyEventWriteFunction1 // OK, suffix will be "MyEventWriteFunction1". // #define MY_OBJECT_LIKE_MACRO MyEventWriteFunction2 // #define MCGEN_EVENTWRITETRANSFER MY_OBJECT_LIKE_MACRO // OK, suffix will be "MyEventWriteFunction2". // #define MY_FUNCTION_LIKE_MACRO(h,e,a,r,c,d) MyNamespace::MyClass::MyEventWriteFunction3(h,e,c,d) // #define MCGEN_EVENTWRITETRANSFER MY_FUNCTION_LIKE_MACRO // OK, suffix will be "MY_FUNCTION_LIKE_MACRO". // #ifndef MCGEN_EVENTREGISTER #if MCGEN_USE_KERNEL_MODE_APIS #define MCGEN_EVENTREGISTER EtwRegister #else #define MCGEN_EVENTREGISTER EventRegister #endif #endif // MCGEN_EVENTREGISTER #ifndef MCGEN_EVENTUNREGISTER #if MCGEN_USE_KERNEL_MODE_APIS #define MCGEN_EVENTUNREGISTER EtwUnregister #else #define MCGEN_EVENTUNREGISTER EventUnregister #endif #endif // MCGEN_EVENTUNREGISTER #ifndef MCGEN_EVENTSETINFORMATION #if MCGEN_USE_KERNEL_MODE_APIS #define MCGEN_EVENTSETINFORMATION EtwSetInformation #else #define MCGEN_EVENTSETINFORMATION EventSetInformation #endif #endif // MCGEN_EVENTSETINFORMATION #ifndef MCGEN_EVENTWRITETRANSFER #if MCGEN_USE_KERNEL_MODE_APIS #define MCGEN_EVENTWRITETRANSFER EtwWriteTransfer #else #define MCGEN_EVENTWRITETRANSFER EventWriteTransfer #endif #endif // MCGEN_EVENTWRITETRANSFER // // MCGEN_EVENT_ENABLED macro: // Override to control how the EventWrite[EventName] macros determine whether // an event is enabled. The default behavior is for EventWrite[EventName] to // use the EventEnabled[EventName] macros. // #ifndef MCGEN_EVENT_ENABLED #define MCGEN_EVENT_ENABLED(EventName) EventEnabled##EventName() #endif // // MCGEN_EVENT_ENABLED_FORCONTEXT macro: // Override to control how the EventWrite[EventName]_ForContext macros // determine whether an event is enabled. The default behavior is for // EventWrite[EventName]_ForContext to use the // EventEnabled[EventName]_ForContext macros. // #ifndef MCGEN_EVENT_ENABLED_FORCONTEXT #define MCGEN_EVENT_ENABLED_FORCONTEXT(pContext, EventName) EventEnabled##EventName##_ForContext(pContext) #endif // // MCGEN_ENABLE_CHECK macro: // Determines whether the specified event would be considered as enabled // based on the state of the specified context. Slightly faster than calling // McGenEventEnabled directly. // #ifndef MCGEN_ENABLE_CHECK #define MCGEN_ENABLE_CHECK(Context, Descriptor) (Context.IsEnabled && McGenEventEnabled(&Context, &Descriptor)) #endif #if !defined(MCGEN_TRACE_CONTEXT_DEF) #define MCGEN_TRACE_CONTEXT_DEF // This structure is for use by MC-generated code and should not be used directly. typedef struct _MCGEN_TRACE_CONTEXT { TRACEHANDLE RegistrationHandle; TRACEHANDLE Logger; // Used as pointer to provider traits. ULONGLONG MatchAnyKeyword; ULONGLONG MatchAllKeyword; ULONG Flags; ULONG IsEnabled; UCHAR Level; UCHAR Reserve; USHORT EnableBitsCount; PULONG EnableBitMask; const ULONGLONG* EnableKeyWords; const UCHAR* EnableLevel; } MCGEN_TRACE_CONTEXT, *PMCGEN_TRACE_CONTEXT; #endif // MCGEN_TRACE_CONTEXT_DEF #if !defined(MCGEN_LEVEL_KEYWORD_ENABLED_DEF) #define MCGEN_LEVEL_KEYWORD_ENABLED_DEF // // Determines whether an event with a given Level and Keyword would be // considered as enabled based on the state of the specified context. // Note that you may want to use MCGEN_ENABLE_CHECK instead of calling this // function directly. // FORCEINLINE BOOLEAN McGenLevelKeywordEnabled( _In_ PMCGEN_TRACE_CONTEXT EnableInfo, _In_ UCHAR Level, _In_ ULONGLONG Keyword ) { // // Check if the event Level is lower than the level at which // the channel is enabled. // If the event Level is 0 or the channel is enabled at level 0, // all levels are enabled. // if ((Level <= EnableInfo->Level) || // This also covers the case of Level == 0. (EnableInfo->Level == 0)) { // // Check if Keyword is enabled // if ((Keyword == (ULONGLONG)0) || ((Keyword & EnableInfo->MatchAnyKeyword) && ((Keyword & EnableInfo->MatchAllKeyword) == EnableInfo->MatchAllKeyword))) { return TRUE; } } return FALSE; } #endif // MCGEN_LEVEL_KEYWORD_ENABLED_DEF #if !defined(MCGEN_EVENT_ENABLED_DEF) #define MCGEN_EVENT_ENABLED_DEF // // Determines whether the specified event would be considered as enabled based // on the state of the specified context. Note that you may want to use // MCGEN_ENABLE_CHECK instead of calling this function directly. // FORCEINLINE BOOLEAN McGenEventEnabled( _In_ PMCGEN_TRACE_CONTEXT EnableInfo, _In_ PCEVENT_DESCRIPTOR EventDescriptor ) { return McGenLevelKeywordEnabled(EnableInfo, EventDescriptor->Level, EventDescriptor->Keyword); } #endif // MCGEN_EVENT_ENABLED_DEF #if !defined(MCGEN_CONTROL_CALLBACK) #define MCGEN_CONTROL_CALLBACK // This function is for use by MC-generated code and should not be used directly. DECLSPEC_NOINLINE __inline VOID __stdcall McGenControlCallbackV2( _In_ LPCGUID SourceId, _In_ ULONG ControlCode, _In_ UCHAR Level, _In_ ULONGLONG MatchAnyKeyword, _In_ ULONGLONG MatchAllKeyword, _In_opt_ PEVENT_FILTER_DESCRIPTOR FilterData, _Inout_opt_ PVOID CallbackContext ) /*++ Routine Description: This is the notification callback for Windows Vista and later. Arguments: SourceId - The GUID that identifies the session that enabled the provider. ControlCode - The parameter indicates whether the provider is being enabled or disabled. Level - The level at which the event is enabled. MatchAnyKeyword - The bitmask of keywords that the provider uses to determine the category of events that it writes. MatchAllKeyword - This bitmask additionally restricts the category of events that the provider writes. FilterData - The provider-defined data. CallbackContext - The context of the callback that is defined when the provider called EtwRegister to register itself. Remarks: ETW calls this function to notify provider of enable/disable --*/ { PMCGEN_TRACE_CONTEXT Ctx = (PMCGEN_TRACE_CONTEXT)CallbackContext; ULONG Ix; #ifndef MCGEN_PRIVATE_ENABLE_CALLBACK_V2 UNREFERENCED_PARAMETER(SourceId); UNREFERENCED_PARAMETER(FilterData); #endif if (Ctx == NULL) { return; } switch (ControlCode) { case EVENT_CONTROL_CODE_ENABLE_PROVIDER: Ctx->Level = Level; Ctx->MatchAnyKeyword = MatchAnyKeyword; Ctx->MatchAllKeyword = MatchAllKeyword; Ctx->IsEnabled = EVENT_CONTROL_CODE_ENABLE_PROVIDER; for (Ix = 0; Ix < Ctx->EnableBitsCount; Ix += 1) { if (McGenLevelKeywordEnabled(Ctx, Ctx->EnableLevel[Ix], Ctx->EnableKeyWords[Ix]) != FALSE) { Ctx->EnableBitMask[Ix >> 5] |= (1 << (Ix % 32)); } else { Ctx->EnableBitMask[Ix >> 5] &= ~(1 << (Ix % 32)); } } break; case EVENT_CONTROL_CODE_DISABLE_PROVIDER: Ctx->IsEnabled = EVENT_CONTROL_CODE_DISABLE_PROVIDER; Ctx->Level = 0; Ctx->MatchAnyKeyword = 0; Ctx->MatchAllKeyword = 0; if (Ctx->EnableBitsCount > 0) { #pragma warning(suppress: 26451) // Arithmetic overflow cannot occur, no matter the value of EnableBitCount RtlZeroMemory(Ctx->EnableBitMask, (((Ctx->EnableBitsCount - 1) / 32) + 1) * sizeof(ULONG)); } break; default: break; } #ifdef MCGEN_PRIVATE_ENABLE_CALLBACK_V2 // // Call user defined callback // MCGEN_PRIVATE_ENABLE_CALLBACK_V2( SourceId, ControlCode, Level, MatchAnyKeyword, MatchAllKeyword, FilterData, CallbackContext ); #endif // MCGEN_PRIVATE_ENABLE_CALLBACK_V2 return; } #endif // MCGEN_CONTROL_CALLBACK #ifndef _mcgen_PENABLECALLBACK #if MCGEN_USE_KERNEL_MODE_APIS #define _mcgen_PENABLECALLBACK PETWENABLECALLBACK #else #define _mcgen_PENABLECALLBACK PENABLECALLBACK #endif #endif // _mcgen_PENABLECALLBACK #if !defined(_mcgen_PASTE2) // This macro is for use by MC-generated code and should not be used directly. #define _mcgen_PASTE2(a, b) _mcgen_PASTE2_imp(a, b) #define _mcgen_PASTE2_imp(a, b) a##b #endif // _mcgen_PASTE2 #if !defined(_mcgen_PASTE3) // This macro is for use by MC-generated code and should not be used directly. #define _mcgen_PASTE3(a, b, c) _mcgen_PASTE3_imp(a, b, c) #define _mcgen_PASTE3_imp(a, b, c) a##b##_##c #endif // _mcgen_PASTE3 // // Macro validation // // Validate MCGEN_EVENTREGISTER: // Trigger an error if MCGEN_EVENTREGISTER is not an unqualified (simple) identifier: struct _mcgen_PASTE2(MCGEN_EVENTREGISTER_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTREGISTER); // Trigger an error if MCGEN_EVENTREGISTER is redefined: typedef struct _mcgen_PASTE2(MCGEN_EVENTREGISTER_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTREGISTER) MCGEN_EVENTREGISTER_must_not_be_redefined_between_headers; // Trigger an error if MCGEN_EVENTREGISTER is defined as a function-like macro: typedef void MCGEN_EVENTREGISTER_must_not_be_a_functionLike_macro_MCGEN_EVENTREGISTER; typedef int _mcgen_PASTE2(MCGEN_EVENTREGISTER_must_not_be_a_functionLike_macro_, MCGEN_EVENTREGISTER); // Validate MCGEN_EVENTUNREGISTER: // Trigger an error if MCGEN_EVENTUNREGISTER is not an unqualified (simple) identifier: struct _mcgen_PASTE2(MCGEN_EVENTUNREGISTER_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTUNREGISTER); // Trigger an error if MCGEN_EVENTUNREGISTER is redefined: typedef struct _mcgen_PASTE2(MCGEN_EVENTUNREGISTER_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTUNREGISTER) MCGEN_EVENTUNREGISTER_must_not_be_redefined_between_headers; // Trigger an error if MCGEN_EVENTUNREGISTER is defined as a function-like macro: typedef void MCGEN_EVENTUNREGISTER_must_not_be_a_functionLike_macro_MCGEN_EVENTUNREGISTER; typedef int _mcgen_PASTE2(MCGEN_EVENTUNREGISTER_must_not_be_a_functionLike_macro_, MCGEN_EVENTUNREGISTER); // Validate MCGEN_EVENTSETINFORMATION: // Trigger an error if MCGEN_EVENTSETINFORMATION is not an unqualified (simple) identifier: struct _mcgen_PASTE2(MCGEN_EVENTSETINFORMATION_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTSETINFORMATION); // Trigger an error if MCGEN_EVENTSETINFORMATION is redefined: typedef struct _mcgen_PASTE2(MCGEN_EVENTSETINFORMATION_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTSETINFORMATION) MCGEN_EVENTSETINFORMATION_must_not_be_redefined_between_headers; // Trigger an error if MCGEN_EVENTSETINFORMATION is defined as a function-like macro: typedef void MCGEN_EVENTSETINFORMATION_must_not_be_a_functionLike_macro_MCGEN_EVENTSETINFORMATION; typedef int _mcgen_PASTE2(MCGEN_EVENTSETINFORMATION_must_not_be_a_functionLike_macro_, MCGEN_EVENTSETINFORMATION); // Validate MCGEN_EVENTWRITETRANSFER: // Trigger an error if MCGEN_EVENTWRITETRANSFER is not an unqualified (simple) identifier: struct _mcgen_PASTE2(MCGEN_EVENTWRITETRANSFER_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTWRITETRANSFER); // Trigger an error if MCGEN_EVENTWRITETRANSFER is redefined: typedef struct _mcgen_PASTE2(MCGEN_EVENTWRITETRANSFER_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTWRITETRANSFER) MCGEN_EVENTWRITETRANSFER_must_not_be_redefined_between_headers;; // Trigger an error if MCGEN_EVENTWRITETRANSFER is defined as a function-like macro: typedef void MCGEN_EVENTWRITETRANSFER_must_not_be_a_functionLike_macro_MCGEN_EVENTWRITETRANSFER; typedef int _mcgen_PASTE2(MCGEN_EVENTWRITETRANSFER_must_not_be_a_functionLike_macro_, MCGEN_EVENTWRITETRANSFER); #ifndef McGenEventWrite_def #define McGenEventWrite_def // This macro is for use by MC-generated code and should not be used directly. #define McGenEventWrite _mcgen_PASTE2(McGenEventWrite_, MCGEN_EVENTWRITETRANSFER) // This function is for use by MC-generated code and should not be used directly. DECLSPEC_NOINLINE __inline ULONG __stdcall McGenEventWrite( _In_ PMCGEN_TRACE_CONTEXT Context, _In_ PCEVENT_DESCRIPTOR Descriptor, _In_opt_ LPCGUID ActivityId, _In_range_(1, 128) ULONG EventDataCount, _Pre_cap_(EventDataCount) EVENT_DATA_DESCRIPTOR* EventData ) { const USHORT UNALIGNED* Traits; // Some customized MCGEN_EVENTWRITETRANSFER macros might ignore ActivityId. UNREFERENCED_PARAMETER(ActivityId); Traits = (const USHORT UNALIGNED*)(UINT_PTR)Context->Logger; if (Traits == NULL) { EventData[0].Ptr = 0; EventData[0].Size = 0; EventData[0].Reserved = 0; } else { EventData[0].Ptr = (ULONG_PTR)Traits; EventData[0].Size = *Traits; EventData[0].Reserved = 2; // EVENT_DATA_DESCRIPTOR_TYPE_PROVIDER_METADATA } return MCGEN_EVENTWRITETRANSFER( Context->RegistrationHandle, Descriptor, ActivityId, NULL, EventDataCount, EventData); } #endif // McGenEventWrite_def #if !defined(McGenEventRegisterUnregister) #define McGenEventRegisterUnregister // This macro is for use by MC-generated code and should not be used directly. #define McGenEventRegister _mcgen_PASTE2(McGenEventRegister_, MCGEN_EVENTREGISTER) #pragma warning(push) #pragma warning(disable:6103) // This function is for use by MC-generated code and should not be used directly. DECLSPEC_NOINLINE __inline ULONG __stdcall McGenEventRegister( _In_ LPCGUID ProviderId, _In_opt_ _mcgen_PENABLECALLBACK EnableCallback, _In_opt_ PVOID CallbackContext, _Inout_ PREGHANDLE RegHandle ) /*++ Routine Description: This function registers the provider with ETW. Arguments: ProviderId - Provider ID to register with ETW. EnableCallback - Callback to be used. CallbackContext - Context for the callback. RegHandle - Pointer to registration handle. Remarks: Should not be called if the provider is already registered (i.e. should not be called if *RegHandle != 0). Repeatedly registering a provider is a bug and may indicate a race condition. However, for compatibility with previous behavior, this function will return SUCCESS in this case. --*/ { ULONG Error; if (*RegHandle != 0) { Error = 0; // ERROR_SUCCESS } else { Error = MCGEN_EVENTREGISTER(ProviderId, EnableCallback, CallbackContext, RegHandle); } return Error; } #pragma warning(pop) // This macro is for use by MC-generated code and should not be used directly. #define McGenEventUnregister _mcgen_PASTE2(McGenEventUnregister_, MCGEN_EVENTUNREGISTER) // This function is for use by MC-generated code and should not be used directly. DECLSPEC_NOINLINE __inline ULONG __stdcall McGenEventUnregister(_Inout_ PREGHANDLE RegHandle) /*++ Routine Description: Unregister from ETW and set *RegHandle = 0. Arguments: RegHandle - the pointer to the provider registration handle Remarks: If provider has not been registered (i.e. if *RegHandle == 0), return SUCCESS. It is safe to call McGenEventUnregister even if the call to McGenEventRegister returned an error. --*/ { ULONG Error; if(*RegHandle == 0) { Error = 0; // ERROR_SUCCESS } else { Error = MCGEN_EVENTUNREGISTER(*RegHandle); *RegHandle = (REGHANDLE)0; } return Error; } #endif // McGenEventRegisterUnregister #ifndef _mcgen_EVENT_BIT_SET #if defined(_M_IX86) || defined(_M_X64) // This macro is for use by MC-generated code and should not be used directly. #define _mcgen_EVENT_BIT_SET(EnableBits, BitPosition) ((((const unsigned char*)EnableBits)[BitPosition >> 3] & (1u << (BitPosition & 7))) != 0) #else // CPU type // This macro is for use by MC-generated code and should not be used directly. #define _mcgen_EVENT_BIT_SET(EnableBits, BitPosition) ((EnableBits[BitPosition >> 5] & (1u << (BitPosition & 31))) != 0) #endif // CPU type #endif // _mcgen_EVENT_BIT_SET #endif // MCGEN_DISABLE_PROVIDER_CODE_GENERATION //+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // Provider "microsoft-windows-mimalloc" event count 2 //+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // Provider GUID = 138f4dbb-ee04-4899-aa0a-572ad4475779 EXTERN_C __declspec(selectany) const GUID ETW_MI_Provider = {0x138f4dbb, 0xee04, 0x4899, {0xaa, 0x0a, 0x57, 0x2a, 0xd4, 0x47, 0x57, 0x79}}; #ifndef ETW_MI_Provider_Traits #define ETW_MI_Provider_Traits NULL #endif // ETW_MI_Provider_Traits // // Event Descriptors // EXTERN_C __declspec(selectany) const EVENT_DESCRIPTOR ETW_MI_ALLOC = {0x64, 0x1, 0x0, 0x4, 0x0, 0x0, 0x0}; #define ETW_MI_ALLOC_value 0x64 EXTERN_C __declspec(selectany) const EVENT_DESCRIPTOR ETW_MI_FREE = {0x65, 0x1, 0x0, 0x4, 0x0, 0x0, 0x0}; #define ETW_MI_FREE_value 0x65 // // MCGEN_DISABLE_PROVIDER_CODE_GENERATION macro: // Define this macro to have the compiler skip the generated functions in this // header. // #ifndef MCGEN_DISABLE_PROVIDER_CODE_GENERATION // // Event Enablement Bits // These variables are for use by MC-generated code and should not be used directly. // EXTERN_C __declspec(selectany) DECLSPEC_CACHEALIGN ULONG microsoft_windows_mimallocEnableBits[1]; EXTERN_C __declspec(selectany) const ULONGLONG microsoft_windows_mimallocKeywords[1] = {0x0}; EXTERN_C __declspec(selectany) const unsigned char microsoft_windows_mimallocLevels[1] = {4}; // // Provider context // EXTERN_C __declspec(selectany) MCGEN_TRACE_CONTEXT ETW_MI_Provider_Context = {0, (ULONG_PTR)ETW_MI_Provider_Traits, 0, 0, 0, 0, 0, 0, 1, microsoft_windows_mimallocEnableBits, microsoft_windows_mimallocKeywords, microsoft_windows_mimallocLevels}; // // Provider REGHANDLE // #define microsoft_windows_mimallocHandle (ETW_MI_Provider_Context.RegistrationHandle) // // This macro is set to 0, indicating that the EventWrite[Name] macros do not // have an Activity parameter. This is controlled by the -km and -um options. // #define ETW_MI_Provider_EventWriteActivity 0 // // Register with ETW using the control GUID specified in the manifest. // Invoke this macro during module initialization (i.e. program startup, // DLL process attach, or driver load) to initialize the provider. // Note that if this function returns an error, the error means that // will not work, but no action needs to be taken -- even if EventRegister // returns an error, it is generally safe to use EventWrite and // EventUnregister macros (they will be no-ops if EventRegister failed). // #ifndef EventRegistermicrosoft_windows_mimalloc #define EventRegistermicrosoft_windows_mimalloc() McGenEventRegister(&ETW_MI_Provider, McGenControlCallbackV2, &ETW_MI_Provider_Context, µsoft_windows_mimallocHandle) #endif // // Register with ETW using a specific control GUID (i.e. a GUID other than what // is specified in the manifest). Advanced scenarios only. // #ifndef EventRegisterByGuidmicrosoft_windows_mimalloc #define EventRegisterByGuidmicrosoft_windows_mimalloc(Guid) McGenEventRegister(&(Guid), McGenControlCallbackV2, &ETW_MI_Provider_Context, µsoft_windows_mimallocHandle) #endif // // Unregister with ETW and close the provider. // Invoke this macro during module shutdown (i.e. program exit, DLL process // detach, or driver unload) to unregister the provider. // Note that you MUST call EventUnregister before DLL or driver unload // (not optional): failure to unregister a provider before DLL or driver unload // will result in crashes. // #ifndef EventUnregistermicrosoft_windows_mimalloc #define EventUnregistermicrosoft_windows_mimalloc() McGenEventUnregister(µsoft_windows_mimallocHandle) #endif // // MCGEN_ENABLE_FORCONTEXT_CODE_GENERATION macro: // Define this macro to enable support for caller-allocated provider context. // #ifdef MCGEN_ENABLE_FORCONTEXT_CODE_GENERATION // // Advanced scenarios: Caller-allocated provider context. // Use when multiple differently-configured provider handles are needed, // e.g. for container-aware drivers, one context per container. // // Usage: // // - Caller enables the feature before including this header, e.g. // #define MCGEN_ENABLE_FORCONTEXT_CODE_GENERATION 1 // - Caller allocates memory, e.g. pContext = malloc(sizeof(McGenContext_microsoft_windows_mimalloc)); // - Caller registers the provider, e.g. EventRegistermicrosoft_windows_mimalloc_ForContext(pContext); // - Caller writes events, e.g. EventWriteMyEvent_ForContext(pContext, ...); // - Caller unregisters, e.g. EventUnregistermicrosoft_windows_mimalloc_ForContext(pContext); // - Caller frees memory, e.g. free(pContext); // typedef struct tagMcGenContext_microsoft_windows_mimalloc { // The fields of this structure are subject to change and should // not be accessed directly. To access the provider's REGHANDLE, // use microsoft_windows_mimallocHandle_ForContext(pContext). MCGEN_TRACE_CONTEXT Context; ULONG EnableBits[1]; } McGenContext_microsoft_windows_mimalloc; #define EventRegistermicrosoft_windows_mimalloc_ForContext(pContext) _mcgen_PASTE2(_mcgen_RegisterForContext_microsoft_windows_mimalloc_, MCGEN_EVENTREGISTER)(&ETW_MI_Provider, pContext) #define EventRegisterByGuidmicrosoft_windows_mimalloc_ForContext(Guid, pContext) _mcgen_PASTE2(_mcgen_RegisterForContext_microsoft_windows_mimalloc_, MCGEN_EVENTREGISTER)(&(Guid), pContext) #define EventUnregistermicrosoft_windows_mimalloc_ForContext(pContext) McGenEventUnregister(&(pContext)->Context.RegistrationHandle) // // Provider REGHANDLE for caller-allocated context. // #define microsoft_windows_mimallocHandle_ForContext(pContext) ((pContext)->Context.RegistrationHandle) // This function is for use by MC-generated code and should not be used directly. // Initialize and register the caller-allocated context. __inline ULONG __stdcall _mcgen_PASTE2(_mcgen_RegisterForContext_microsoft_windows_mimalloc_, MCGEN_EVENTREGISTER)( _In_ LPCGUID pProviderId, _Out_ McGenContext_microsoft_windows_mimalloc* pContext) { RtlZeroMemory(pContext, sizeof(*pContext)); pContext->Context.Logger = (ULONG_PTR)ETW_MI_Provider_Traits; pContext->Context.EnableBitsCount = 1; pContext->Context.EnableBitMask = pContext->EnableBits; pContext->Context.EnableKeyWords = microsoft_windows_mimallocKeywords; pContext->Context.EnableLevel = microsoft_windows_mimallocLevels; return McGenEventRegister( pProviderId, McGenControlCallbackV2, &pContext->Context, &pContext->Context.RegistrationHandle); } // This function is for use by MC-generated code and should not be used directly. // Trigger a compile error if called with the wrong parameter type. FORCEINLINE _Ret_ McGenContext_microsoft_windows_mimalloc* _mcgen_CheckContextType_microsoft_windows_mimalloc(_In_ McGenContext_microsoft_windows_mimalloc* pContext) { return pContext; } #endif // MCGEN_ENABLE_FORCONTEXT_CODE_GENERATION // // Enablement check macro for event "ETW_MI_ALLOC" // #define EventEnabledETW_MI_ALLOC() _mcgen_EVENT_BIT_SET(microsoft_windows_mimallocEnableBits, 0) #define EventEnabledETW_MI_ALLOC_ForContext(pContext) _mcgen_EVENT_BIT_SET(_mcgen_CheckContextType_microsoft_windows_mimalloc(pContext)->EnableBits, 0) // // Event write macros for event "ETW_MI_ALLOC" // #define EventWriteETW_MI_ALLOC(Address, Size) \ MCGEN_EVENT_ENABLED(ETW_MI_ALLOC) \ ? _mcgen_TEMPLATE_FOR_ETW_MI_ALLOC(&ETW_MI_Provider_Context, &ETW_MI_ALLOC, Address, Size) : 0 #define EventWriteETW_MI_ALLOC_AssumeEnabled(Address, Size) \ _mcgen_TEMPLATE_FOR_ETW_MI_ALLOC(&ETW_MI_Provider_Context, &ETW_MI_ALLOC, Address, Size) #define EventWriteETW_MI_ALLOC_ForContext(pContext, Address, Size) \ MCGEN_EVENT_ENABLED_FORCONTEXT(pContext, ETW_MI_ALLOC) \ ? _mcgen_TEMPLATE_FOR_ETW_MI_ALLOC(&(pContext)->Context, &ETW_MI_ALLOC, Address, Size) : 0 #define EventWriteETW_MI_ALLOC_ForContextAssumeEnabled(pContext, Address, Size) \ _mcgen_TEMPLATE_FOR_ETW_MI_ALLOC(&_mcgen_CheckContextType_microsoft_windows_mimalloc(pContext)->Context, &ETW_MI_ALLOC, Address, Size) // This macro is for use by MC-generated code and should not be used directly. #define _mcgen_TEMPLATE_FOR_ETW_MI_ALLOC _mcgen_PASTE2(McTemplateU0xx_, MCGEN_EVENTWRITETRANSFER) // // Enablement check macro for event "ETW_MI_FREE" // #define EventEnabledETW_MI_FREE() _mcgen_EVENT_BIT_SET(microsoft_windows_mimallocEnableBits, 0) #define EventEnabledETW_MI_FREE_ForContext(pContext) _mcgen_EVENT_BIT_SET(_mcgen_CheckContextType_microsoft_windows_mimalloc(pContext)->EnableBits, 0) // // Event write macros for event "ETW_MI_FREE" // #define EventWriteETW_MI_FREE(Address, Size) \ MCGEN_EVENT_ENABLED(ETW_MI_FREE) \ ? _mcgen_TEMPLATE_FOR_ETW_MI_FREE(&ETW_MI_Provider_Context, &ETW_MI_FREE, Address, Size) : 0 #define EventWriteETW_MI_FREE_AssumeEnabled(Address, Size) \ _mcgen_TEMPLATE_FOR_ETW_MI_FREE(&ETW_MI_Provider_Context, &ETW_MI_FREE, Address, Size) #define EventWriteETW_MI_FREE_ForContext(pContext, Address, Size) \ MCGEN_EVENT_ENABLED_FORCONTEXT(pContext, ETW_MI_FREE) \ ? _mcgen_TEMPLATE_FOR_ETW_MI_FREE(&(pContext)->Context, &ETW_MI_FREE, Address, Size) : 0 #define EventWriteETW_MI_FREE_ForContextAssumeEnabled(pContext, Address, Size) \ _mcgen_TEMPLATE_FOR_ETW_MI_FREE(&_mcgen_CheckContextType_microsoft_windows_mimalloc(pContext)->Context, &ETW_MI_FREE, Address, Size) // This macro is for use by MC-generated code and should not be used directly. #define _mcgen_TEMPLATE_FOR_ETW_MI_FREE _mcgen_PASTE2(McTemplateU0xx_, MCGEN_EVENTWRITETRANSFER) #endif // MCGEN_DISABLE_PROVIDER_CODE_GENERATION // // MCGEN_DISABLE_PROVIDER_CODE_GENERATION macro: // Define this macro to have the compiler skip the generated functions in this // header. // #ifndef MCGEN_DISABLE_PROVIDER_CODE_GENERATION // // Template Functions // // // Function for template "ETW_CUSTOM_HEAP_ALLOC_DATA" (and possibly others). // This function is for use by MC-generated code and should not be used directly. // #ifndef McTemplateU0xx_def #define McTemplateU0xx_def ETW_INLINE ULONG _mcgen_PASTE2(McTemplateU0xx_, MCGEN_EVENTWRITETRANSFER)( _In_ PMCGEN_TRACE_CONTEXT Context, _In_ PCEVENT_DESCRIPTOR Descriptor, _In_ const unsigned __int64 _Arg0, _In_ const unsigned __int64 _Arg1 ) { #define McTemplateU0xx_ARGCOUNT 2 EVENT_DATA_DESCRIPTOR EventData[McTemplateU0xx_ARGCOUNT + 1]; EventDataDescCreate(&EventData[1],&_Arg0, sizeof(const unsigned __int64) ); EventDataDescCreate(&EventData[2],&_Arg1, sizeof(const unsigned __int64) ); return McGenEventWrite(Context, Descriptor, NULL, McTemplateU0xx_ARGCOUNT + 1, EventData); } #endif // McTemplateU0xx_def #endif // MCGEN_DISABLE_PROVIDER_CODE_GENERATION #if defined(__cplusplus) } #endif luametatex-2.10.08/source/libraries/mimalloc/src/prim/windows/etw.man000066400000000000000000000075261442250314700256440ustar00rootroot00000000000000<?xml version="1.0" encoding="UTF-16"?> <!-- under visual studio command prompt, run: mc .\etw.man --> <instrumentationManifest xsi:schemaLocation="http://schemas.microsoft.com/win/2004/08/events eventman.xsd" xmlns="http://schemas.microsoft.com/win/2004/08/events" xmlns:win="http://manifests.microsoft.com/win/2004/08/windows/events" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:trace="http://schemas.microsoft.com/win/2004/08/events/trace"> <instrumentation> <events> <provider name="microsoft-windows-mimalloc" guid="{138f4dbb-ee04-4899-aa0a-572ad4475779}" symbol="ETW_MI_Provider" resourceFileName="dummy" messageFileName="dummy"> <events> <event symbol="ETW_MI_ALLOC" value="100" version="1" level="win:Informational" template="ETW_CUSTOM_HEAP_ALLOC_DATA"> </event> <event symbol="ETW_MI_FREE" value="101" version="1" level="win:Informational" template="ETW_CUSTOM_HEAP_FREE_DATA"> </event> </events> <templates> <template tid="ETW_CUSTOM_HEAP_ALLOC_DATA"> <data name="Address" inType="win:UInt64" outType="xs:unsignedLong"> </data> <data name="Size" inType="win:UInt64" outType="xs:unsignedLong"> </data> </template> <template tid="ETW_CUSTOM_HEAP_FREE_DATA"> <data name="Address" inType="win:UInt64" outType="xs:unsignedLong"> </data> <data name="Size" inType="win:UInt64" outType="xs:unsignedLong"> </data> </template> </templates> </provider> </events> </instrumentation> </instrumentationManifest> luametatex-2.10.08/source/libraries/mimalloc/src/prim/windows/prim.c000066400000000000000000000570711442250314700254630ustar00rootroot00000000000000/* ---------------------------------------------------------------------------- Copyright (c) 2018-2023, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ // This file is included in `src/prim/prim.c` #include "mimalloc.h" #include "mimalloc/internal.h" #include "mimalloc/atomic.h" #include "mimalloc/prim.h" #include // fputs, stderr //--------------------------------------------- // Dynamically bind Windows API points for portability //--------------------------------------------- // We use VirtualAlloc2 for aligned allocation, but it is only supported on Windows 10 and Windows Server 2016. // So, we need to look it up dynamically to run on older systems. (use __stdcall for 32-bit compatibility) // NtAllocateVirtualAllocEx is used for huge OS page allocation (1GiB) // We define a minimal MEM_EXTENDED_PARAMETER ourselves in order to be able to compile with older SDK's. typedef enum MI_MEM_EXTENDED_PARAMETER_TYPE_E { MiMemExtendedParameterInvalidType = 0, MiMemExtendedParameterAddressRequirements, MiMemExtendedParameterNumaNode, MiMemExtendedParameterPartitionHandle, MiMemExtendedParameterUserPhysicalHandle, MiMemExtendedParameterAttributeFlags, MiMemExtendedParameterMax } MI_MEM_EXTENDED_PARAMETER_TYPE; typedef struct DECLSPEC_ALIGN(8) MI_MEM_EXTENDED_PARAMETER_S { struct { DWORD64 Type : 8; DWORD64 Reserved : 56; } Type; union { DWORD64 ULong64; PVOID Pointer; SIZE_T Size; HANDLE Handle; DWORD ULong; } Arg; } MI_MEM_EXTENDED_PARAMETER; typedef struct MI_MEM_ADDRESS_REQUIREMENTS_S { PVOID LowestStartingAddress; PVOID HighestEndingAddress; SIZE_T Alignment; } MI_MEM_ADDRESS_REQUIREMENTS; #define MI_MEM_EXTENDED_PARAMETER_NONPAGED_HUGE 0x00000010 #include typedef PVOID (__stdcall *PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG); typedef NTSTATUS (__stdcall *PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG); static PVirtualAlloc2 pVirtualAlloc2 = NULL; static PNtAllocateVirtualMemoryEx pNtAllocateVirtualMemoryEx = NULL; // Similarly, GetNumaProcesorNodeEx is only supported since Windows 7 typedef struct MI_PROCESSOR_NUMBER_S { WORD Group; BYTE Number; BYTE Reserved; } MI_PROCESSOR_NUMBER; typedef VOID (__stdcall *PGetCurrentProcessorNumberEx)(MI_PROCESSOR_NUMBER* ProcNumber); typedef BOOL (__stdcall *PGetNumaProcessorNodeEx)(MI_PROCESSOR_NUMBER* Processor, PUSHORT NodeNumber); typedef BOOL (__stdcall* PGetNumaNodeProcessorMaskEx)(USHORT Node, PGROUP_AFFINITY ProcessorMask); typedef BOOL (__stdcall *PGetNumaProcessorNode)(UCHAR Processor, PUCHAR NodeNumber); static PGetCurrentProcessorNumberEx pGetCurrentProcessorNumberEx = NULL; static PGetNumaProcessorNodeEx pGetNumaProcessorNodeEx = NULL; static PGetNumaNodeProcessorMaskEx pGetNumaNodeProcessorMaskEx = NULL; static PGetNumaProcessorNode pGetNumaProcessorNode = NULL; //--------------------------------------------- // Enable large page support dynamically (if possible) //--------------------------------------------- static bool win_enable_large_os_pages(size_t* large_page_size) { static bool large_initialized = false; if (large_initialized) return (_mi_os_large_page_size() > 0); large_initialized = true; // Try to see if large OS pages are supported // To use large pages on Windows, we first need access permission // Set "Lock pages in memory" permission in the group policy editor // unsigned long err = 0; HANDLE token = NULL; BOOL ok = OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token); if (ok) { TOKEN_PRIVILEGES tp; ok = LookupPrivilegeValue(NULL, TEXT("SeLockMemoryPrivilege"), &tp.Privileges[0].Luid); if (ok) { tp.PrivilegeCount = 1; tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; ok = AdjustTokenPrivileges(token, FALSE, &tp, 0, (PTOKEN_PRIVILEGES)NULL, 0); if (ok) { err = GetLastError(); ok = (err == ERROR_SUCCESS); if (ok && large_page_size != NULL) { *large_page_size = GetLargePageMinimum(); } } } CloseHandle(token); } if (!ok) { if (err == 0) err = GetLastError(); _mi_warning_message("cannot enable large OS page support, error %lu\n", err); } return (ok!=0); } //--------------------------------------------- // Initialize //--------------------------------------------- void _mi_prim_mem_init( mi_os_mem_config_t* config ) { config->has_overcommit = false; config->must_free_whole = true; config->has_virtual_reserve = true; // get the page size SYSTEM_INFO si; GetSystemInfo(&si); if (si.dwPageSize > 0) { config->page_size = si.dwPageSize; } if (si.dwAllocationGranularity > 0) { config->alloc_granularity = si.dwAllocationGranularity; } // get the VirtualAlloc2 function HINSTANCE hDll; hDll = LoadLibrary(TEXT("kernelbase.dll")); if (hDll != NULL) { // use VirtualAlloc2FromApp if possible as it is available to Windows store apps pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2FromApp"); if (pVirtualAlloc2==NULL) pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2"); FreeLibrary(hDll); } // NtAllocateVirtualMemoryEx is used for huge page allocation hDll = LoadLibrary(TEXT("ntdll.dll")); if (hDll != NULL) { pNtAllocateVirtualMemoryEx = (PNtAllocateVirtualMemoryEx)(void (*)(void))GetProcAddress(hDll, "NtAllocateVirtualMemoryEx"); FreeLibrary(hDll); } // Try to use Win7+ numa API hDll = LoadLibrary(TEXT("kernel32.dll")); if (hDll != NULL) { pGetCurrentProcessorNumberEx = (PGetCurrentProcessorNumberEx)(void (*)(void))GetProcAddress(hDll, "GetCurrentProcessorNumberEx"); pGetNumaProcessorNodeEx = (PGetNumaProcessorNodeEx)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNodeEx"); pGetNumaNodeProcessorMaskEx = (PGetNumaNodeProcessorMaskEx)(void (*)(void))GetProcAddress(hDll, "GetNumaNodeProcessorMaskEx"); pGetNumaProcessorNode = (PGetNumaProcessorNode)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNode"); FreeLibrary(hDll); } if (mi_option_is_enabled(mi_option_allow_large_os_pages) || mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { win_enable_large_os_pages(&config->large_page_size); } } //--------------------------------------------- // Free //--------------------------------------------- int _mi_prim_free(void* addr, size_t size ) { MI_UNUSED(size); DWORD errcode = 0; bool err = (VirtualFree(addr, 0, MEM_RELEASE) == 0); if (err) { errcode = GetLastError(); } if (errcode == ERROR_INVALID_ADDRESS) { // In mi_os_mem_alloc_aligned the fallback path may have returned a pointer inside // the memory region returned by VirtualAlloc; in that case we need to free using // the start of the region. MEMORY_BASIC_INFORMATION info = { 0 }; VirtualQuery(addr, &info, sizeof(info)); if (info.AllocationBase < addr && ((uint8_t*)addr - (uint8_t*)info.AllocationBase) < (ptrdiff_t)MI_SEGMENT_SIZE) { errcode = 0; err = (VirtualFree(info.AllocationBase, 0, MEM_RELEASE) == 0); if (err) { errcode = GetLastError(); } } } return (int)errcode; } //--------------------------------------------- // VirtualAlloc //--------------------------------------------- static void* win_virtual_alloc_prim(void* addr, size_t size, size_t try_alignment, DWORD flags) { #if (MI_INTPTR_SIZE >= 8) // on 64-bit systems, try to use the virtual address area after 2TiB for 4MiB aligned allocations if (addr == NULL) { void* hint = _mi_os_get_aligned_hint(try_alignment,size); if (hint != NULL) { void* p = VirtualAlloc(hint, size, flags, PAGE_READWRITE); if (p != NULL) return p; _mi_verbose_message("warning: unable to allocate hinted aligned OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x)\n", size, GetLastError(), hint, try_alignment, flags); // fall through on error } } #endif // on modern Windows try use VirtualAlloc2 for aligned allocation if (try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0 && pVirtualAlloc2 != NULL) { MI_MEM_ADDRESS_REQUIREMENTS reqs = { 0, 0, 0 }; reqs.Alignment = try_alignment; MI_MEM_EXTENDED_PARAMETER param = { {0, 0}, {0} }; param.Type.Type = MiMemExtendedParameterAddressRequirements; param.Arg.Pointer = &reqs; void* p = (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, ¶m, 1); if (p != NULL) return p; _mi_warning_message("unable to allocate aligned OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x)\n", size, GetLastError(), addr, try_alignment, flags); // fall through on error } // last resort return VirtualAlloc(addr, size, flags, PAGE_READWRITE); } static void* win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DWORD flags, bool large_only, bool allow_large, bool* is_large) { mi_assert_internal(!(large_only && !allow_large)); static _Atomic(size_t) large_page_try_ok; // = 0; void* p = NULL; // Try to allocate large OS pages (2MiB) if allowed or required. if ((large_only || _mi_os_use_large_page(size, try_alignment)) && allow_large && (flags&MEM_COMMIT)!=0 && (flags&MEM_RESERVE)!=0) { size_t try_ok = mi_atomic_load_acquire(&large_page_try_ok); if (!large_only && try_ok > 0) { // if a large page allocation fails, it seems the calls to VirtualAlloc get very expensive. // therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times. mi_atomic_cas_strong_acq_rel(&large_page_try_ok, &try_ok, try_ok - 1); } else { // large OS pages must always reserve and commit. *is_large = true; p = win_virtual_alloc_prim(addr, size, try_alignment, flags | MEM_LARGE_PAGES); if (large_only) return p; // fall back to non-large page allocation on error (`p == NULL`). if (p == NULL) { mi_atomic_store_release(&large_page_try_ok,10UL); // on error, don't try again for the next N allocations } } } // Fall back to regular page allocation if (p == NULL) { *is_large = ((flags&MEM_LARGE_PAGES) != 0); p = win_virtual_alloc_prim(addr, size, try_alignment, flags); } //if (p == NULL) { _mi_warning_message("unable to allocate OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x, large only: %d, allow large: %d)\n", size, GetLastError(), addr, try_alignment, flags, large_only, allow_large); } return p; } int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) { mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); mi_assert_internal(commit || !allow_large); mi_assert_internal(try_alignment > 0); *is_zero = true; int flags = MEM_RESERVE; if (commit) { flags |= MEM_COMMIT; } *addr = win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large); return (*addr != NULL ? 0 : (int)GetLastError()); } //--------------------------------------------- // Commit/Reset/Protect //--------------------------------------------- #ifdef _MSC_VER #pragma warning(disable:6250) // suppress warning calling VirtualFree without MEM_RELEASE (for decommit) #endif int _mi_prim_commit(void* addr, size_t size, bool* is_zero) { *is_zero = false; /* // zero'ing only happens on an initial commit... but checking upfront seems expensive.. _MEMORY_BASIC_INFORMATION meminfo; _mi_memzero_var(meminfo); if (VirtualQuery(addr, &meminfo, size) > 0) { if ((meminfo.State & MEM_COMMIT) == 0) { *is_zero = true; } } */ // commit void* p = VirtualAlloc(addr, size, MEM_COMMIT, PAGE_READWRITE); if (p == NULL) return (int)GetLastError(); return 0; } int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit) { BOOL ok = VirtualFree(addr, size, MEM_DECOMMIT); *needs_recommit = true; // for safety, assume always decommitted even in the case of an error. return (ok ? 0 : (int)GetLastError()); } int _mi_prim_reset(void* addr, size_t size) { void* p = VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE); mi_assert_internal(p == addr); #if 0 if (p != NULL) { VirtualUnlock(addr,size); // VirtualUnlock after MEM_RESET removes the memory directly from the working set } #endif return (p != NULL ? 0 : (int)GetLastError()); } int _mi_prim_protect(void* addr, size_t size, bool protect) { DWORD oldprotect = 0; BOOL ok = VirtualProtect(addr, size, protect ? PAGE_NOACCESS : PAGE_READWRITE, &oldprotect); return (ok ? 0 : (int)GetLastError()); } //--------------------------------------------- // Huge page allocation //--------------------------------------------- static void* _mi_prim_alloc_huge_os_pagesx(void* hint_addr, size_t size, int numa_node) { const DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE; win_enable_large_os_pages(NULL); MI_MEM_EXTENDED_PARAMETER params[3] = { {{0,0},{0}},{{0,0},{0}},{{0,0},{0}} }; // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages static bool mi_huge_pages_available = true; if (pNtAllocateVirtualMemoryEx != NULL && mi_huge_pages_available) { params[0].Type.Type = MiMemExtendedParameterAttributeFlags; params[0].Arg.ULong64 = MI_MEM_EXTENDED_PARAMETER_NONPAGED_HUGE; ULONG param_count = 1; if (numa_node >= 0) { param_count++; params[1].Type.Type = MiMemExtendedParameterNumaNode; params[1].Arg.ULong = (unsigned)numa_node; } SIZE_T psize = size; void* base = hint_addr; NTSTATUS err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, params, param_count); if (err == 0 && base != NULL) { return base; } else { // fall back to regular large pages mi_huge_pages_available = false; // don't try further huge pages _mi_warning_message("unable to allocate using huge (1GiB) pages, trying large (2MiB) pages instead (status 0x%lx)\n", err); } } // on modern Windows try use VirtualAlloc2 for numa aware large OS page allocation if (pVirtualAlloc2 != NULL && numa_node >= 0) { params[0].Type.Type = MiMemExtendedParameterNumaNode; params[0].Arg.ULong = (unsigned)numa_node; return (*pVirtualAlloc2)(GetCurrentProcess(), hint_addr, size, flags, PAGE_READWRITE, params, 1); } // otherwise use regular virtual alloc on older windows return VirtualAlloc(hint_addr, size, flags, PAGE_READWRITE); } int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) { *is_zero = true; *addr = _mi_prim_alloc_huge_os_pagesx(hint_addr,size,numa_node); return (*addr != NULL ? 0 : (int)GetLastError()); } //--------------------------------------------- // Numa nodes //--------------------------------------------- size_t _mi_prim_numa_node(void) { USHORT numa_node = 0; if (pGetCurrentProcessorNumberEx != NULL && pGetNumaProcessorNodeEx != NULL) { // Extended API is supported MI_PROCESSOR_NUMBER pnum; (*pGetCurrentProcessorNumberEx)(&pnum); USHORT nnode = 0; BOOL ok = (*pGetNumaProcessorNodeEx)(&pnum, &nnode); if (ok) { numa_node = nnode; } } else if (pGetNumaProcessorNode != NULL) { // Vista or earlier, use older API that is limited to 64 processors. Issue #277 DWORD pnum = GetCurrentProcessorNumber(); UCHAR nnode = 0; BOOL ok = pGetNumaProcessorNode((UCHAR)pnum, &nnode); if (ok) { numa_node = nnode; } } return numa_node; } size_t _mi_prim_numa_node_count(void) { ULONG numa_max = 0; GetNumaHighestNodeNumber(&numa_max); // find the highest node number that has actual processors assigned to it. Issue #282 while(numa_max > 0) { if (pGetNumaNodeProcessorMaskEx != NULL) { // Extended API is supported GROUP_AFFINITY affinity; if ((*pGetNumaNodeProcessorMaskEx)((USHORT)numa_max, &affinity)) { if (affinity.Mask != 0) break; // found the maximum non-empty node } } else { // Vista or earlier, use older API that is limited to 64 processors. ULONGLONG mask; if (GetNumaNodeProcessorMask((UCHAR)numa_max, &mask)) { if (mask != 0) break; // found the maximum non-empty node }; } // max node was invalid or had no processor assigned, try again numa_max--; } return ((size_t)numa_max + 1); } //---------------------------------------------------------------- // Clock //---------------------------------------------------------------- static mi_msecs_t mi_to_msecs(LARGE_INTEGER t) { static LARGE_INTEGER mfreq; // = 0 if (mfreq.QuadPart == 0LL) { LARGE_INTEGER f; QueryPerformanceFrequency(&f); mfreq.QuadPart = f.QuadPart/1000LL; if (mfreq.QuadPart == 0) mfreq.QuadPart = 1; } return (mi_msecs_t)(t.QuadPart / mfreq.QuadPart); } mi_msecs_t _mi_prim_clock_now(void) { LARGE_INTEGER t; QueryPerformanceCounter(&t); return mi_to_msecs(t); } //---------------------------------------------------------------- // Process Info //---------------------------------------------------------------- #include #include static mi_msecs_t filetime_msecs(const FILETIME* ftime) { ULARGE_INTEGER i; i.LowPart = ftime->dwLowDateTime; i.HighPart = ftime->dwHighDateTime; mi_msecs_t msecs = (i.QuadPart / 10000); // FILETIME is in 100 nano seconds return msecs; } typedef BOOL (WINAPI *PGetProcessMemoryInfo)(HANDLE, PPROCESS_MEMORY_COUNTERS, DWORD); static PGetProcessMemoryInfo pGetProcessMemoryInfo = NULL; void _mi_prim_process_info(mi_process_info_t* pinfo) { FILETIME ct; FILETIME ut; FILETIME st; FILETIME et; GetProcessTimes(GetCurrentProcess(), &ct, &et, &st, &ut); pinfo->utime = filetime_msecs(&ut); pinfo->stime = filetime_msecs(&st); // load psapi on demand if (pGetProcessMemoryInfo == NULL) { HINSTANCE hDll = LoadLibrary(TEXT("psapi.dll")); if (hDll != NULL) { pGetProcessMemoryInfo = (PGetProcessMemoryInfo)(void (*)(void))GetProcAddress(hDll, "GetProcessMemoryInfo"); } } // get process info PROCESS_MEMORY_COUNTERS info; memset(&info, 0, sizeof(info)); if (pGetProcessMemoryInfo != NULL) { pGetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info)); } pinfo->current_rss = (size_t)info.WorkingSetSize; pinfo->peak_rss = (size_t)info.PeakWorkingSetSize; pinfo->current_commit = (size_t)info.PagefileUsage; pinfo->peak_commit = (size_t)info.PeakPagefileUsage; pinfo->page_faults = (size_t)info.PageFaultCount; } //---------------------------------------------------------------- // Output //---------------------------------------------------------------- void _mi_prim_out_stderr( const char* msg ) { // on windows with redirection, the C runtime cannot handle locale dependent output // after the main thread closes so we use direct console output. if (!_mi_preloading()) { // _cputs(msg); // _cputs cannot be used at is aborts if it fails to lock the console static HANDLE hcon = INVALID_HANDLE_VALUE; static bool hconIsConsole; if (hcon == INVALID_HANDLE_VALUE) { CONSOLE_SCREEN_BUFFER_INFO sbi; hcon = GetStdHandle(STD_ERROR_HANDLE); hconIsConsole = ((hcon != INVALID_HANDLE_VALUE) && GetConsoleScreenBufferInfo(hcon, &sbi)); } const size_t len = _mi_strlen(msg); if (len > 0 && len < UINT32_MAX) { DWORD written = 0; if (hconIsConsole) { WriteConsoleA(hcon, msg, (DWORD)len, &written, NULL); } else if (hcon != INVALID_HANDLE_VALUE) { // use direct write if stderr was redirected WriteFile(hcon, msg, (DWORD)len, &written, NULL); } else { // finally fall back to fputs after all fputs(msg, stderr); } } } } //---------------------------------------------------------------- // Environment //---------------------------------------------------------------- // On Windows use GetEnvironmentVariable instead of getenv to work // reliably even when this is invoked before the C runtime is initialized. // i.e. when `_mi_preloading() == true`. // Note: on windows, environment names are not case sensitive. bool _mi_prim_getenv(const char* name, char* result, size_t result_size) { result[0] = 0; size_t len = GetEnvironmentVariableA(name, result, (DWORD)result_size); return (len > 0 && len < result_size); } //---------------------------------------------------------------- // Random //---------------------------------------------------------------- #if defined(MI_USE_RTLGENRANDOM) // || defined(__cplusplus) // We prefer to use BCryptGenRandom instead of (the unofficial) RtlGenRandom but when using // dynamic overriding, we observed it can raise an exception when compiled with C++, and // sometimes deadlocks when also running under the VS debugger. // In contrast, issue #623 implies that on Windows Server 2019 we need to use BCryptGenRandom. // To be continued.. #pragma comment (lib,"advapi32.lib") #define RtlGenRandom SystemFunction036 mi_decl_externc BOOLEAN NTAPI RtlGenRandom(PVOID RandomBuffer, ULONG RandomBufferLength); bool _mi_prim_random_buf(void* buf, size_t buf_len) { return (RtlGenRandom(buf, (ULONG)buf_len) != 0); } #else #ifndef BCRYPT_USE_SYSTEM_PREFERRED_RNG #define BCRYPT_USE_SYSTEM_PREFERRED_RNG 0x00000002 #endif typedef LONG (NTAPI *PBCryptGenRandom)(HANDLE, PUCHAR, ULONG, ULONG); static PBCryptGenRandom pBCryptGenRandom = NULL; bool _mi_prim_random_buf(void* buf, size_t buf_len) { if (pBCryptGenRandom == NULL) { HINSTANCE hDll = LoadLibrary(TEXT("bcrypt.dll")); if (hDll != NULL) { pBCryptGenRandom = (PBCryptGenRandom)(void (*)(void))GetProcAddress(hDll, "BCryptGenRandom"); } if (pBCryptGenRandom == NULL) return false; } return (pBCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0); } #endif // MI_USE_RTLGENRANDOM //---------------------------------------------------------------- // Thread init/done //---------------------------------------------------------------- #if !defined(MI_SHARED_LIB) // use thread local storage keys to detect thread ending #include #if (_WIN32_WINNT < 0x600) // before Windows Vista WINBASEAPI DWORD WINAPI FlsAlloc( _In_opt_ PFLS_CALLBACK_FUNCTION lpCallback ); WINBASEAPI PVOID WINAPI FlsGetValue( _In_ DWORD dwFlsIndex ); WINBASEAPI BOOL WINAPI FlsSetValue( _In_ DWORD dwFlsIndex, _In_opt_ PVOID lpFlsData ); WINBASEAPI BOOL WINAPI FlsFree(_In_ DWORD dwFlsIndex); #endif static DWORD mi_fls_key = (DWORD)(-1); static void NTAPI mi_fls_done(PVOID value) { mi_heap_t* heap = (mi_heap_t*)value; if (heap != NULL) { _mi_thread_done(heap); FlsSetValue(mi_fls_key, NULL); // prevent recursion as _mi_thread_done may set it back to the main heap, issue #672 } } void _mi_prim_thread_init_auto_done(void) { mi_fls_key = FlsAlloc(&mi_fls_done); } void _mi_prim_thread_done_auto_done(void) { // call thread-done on all threads (except the main thread) to prevent // dangling callback pointer if statically linked with a DLL; Issue #208 FlsFree(mi_fls_key); } void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) { mi_assert_internal(mi_fls_key != (DWORD)(-1)); FlsSetValue(mi_fls_key, heap); } #else // Dll; nothing to do as in that case thread_done is handled through the DLL_THREAD_DETACH event. void _mi_prim_thread_init_auto_done(void) { } void _mi_prim_thread_done_auto_done(void) { } void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) { MI_UNUSED(heap); } #endif luametatex-2.10.08/source/libraries/mimalloc/src/prim/windows/readme.md000066400000000000000000000010441442250314700261140ustar00rootroot00000000000000## Primitives: - `prim.c` contains Windows primitives for OS allocation. ## Event Tracing for Windows (ETW) - `etw.h` is generated from `etw.man` which contains the manifest for mimalloc events. (100 is an allocation, 101 is for a free) - `etw-mimalloc.wprp` is a profile for the Windows Performance Recorder (WPR). In an admin prompt, you can use: ``` > wpr -start src\prim\windows\etw-mimalloc.wprp -filemode > > wpr -stop test.etl ``` and then open `test.etl` in the Windows Performance Analyzer (WPA).luametatex-2.10.08/source/libraries/mimalloc/src/random.c000066400000000000000000000214111442250314700233200ustar00rootroot00000000000000/* ---------------------------------------------------------------------------- Copyright (c) 2019-2021, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ #include "mimalloc.h" #include "mimalloc/internal.h" #include "mimalloc/prim.h" // _mi_prim_random_buf #include // memset /* ---------------------------------------------------------------------------- We use our own PRNG to keep predictable performance of random number generation and to avoid implementations that use a lock. We only use the OS provided random source to initialize the initial seeds. Since we do not need ultimate performance but we do rely on the security (for secret cookies in secure mode) we use a cryptographically secure generator (chacha20). -----------------------------------------------------------------------------*/ #define MI_CHACHA_ROUNDS (20) // perhaps use 12 for better performance? /* ---------------------------------------------------------------------------- Chacha20 implementation as the original algorithm with a 64-bit nonce and counter: https://en.wikipedia.org/wiki/Salsa20 The input matrix has sixteen 32-bit values: Position 0 to 3: constant key Position 4 to 11: the key Position 12 to 13: the counter. Position 14 to 15: the nonce. The implementation uses regular C code which compiles very well on modern compilers. (gcc x64 has no register spills, and clang 6+ uses SSE instructions) -----------------------------------------------------------------------------*/ static inline uint32_t rotl(uint32_t x, uint32_t shift) { return (x << shift) | (x >> (32 - shift)); } static inline void qround(uint32_t x[16], size_t a, size_t b, size_t c, size_t d) { x[a] += x[b]; x[d] = rotl(x[d] ^ x[a], 16); x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 12); x[a] += x[b]; x[d] = rotl(x[d] ^ x[a], 8); x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 7); } static void chacha_block(mi_random_ctx_t* ctx) { // scramble into `x` uint32_t x[16]; for (size_t i = 0; i < 16; i++) { x[i] = ctx->input[i]; } for (size_t i = 0; i < MI_CHACHA_ROUNDS; i += 2) { qround(x, 0, 4, 8, 12); qround(x, 1, 5, 9, 13); qround(x, 2, 6, 10, 14); qround(x, 3, 7, 11, 15); qround(x, 0, 5, 10, 15); qround(x, 1, 6, 11, 12); qround(x, 2, 7, 8, 13); qround(x, 3, 4, 9, 14); } // add scrambled data to the initial state for (size_t i = 0; i < 16; i++) { ctx->output[i] = x[i] + ctx->input[i]; } ctx->output_available = 16; // increment the counter for the next round ctx->input[12] += 1; if (ctx->input[12] == 0) { ctx->input[13] += 1; if (ctx->input[13] == 0) { // and keep increasing into the nonce ctx->input[14] += 1; } } } static uint32_t chacha_next32(mi_random_ctx_t* ctx) { if (ctx->output_available <= 0) { chacha_block(ctx); ctx->output_available = 16; // (assign again to suppress static analysis warning) } const uint32_t x = ctx->output[16 - ctx->output_available]; ctx->output[16 - ctx->output_available] = 0; // reset once the data is handed out ctx->output_available--; return x; } static inline uint32_t read32(const uint8_t* p, size_t idx32) { const size_t i = 4*idx32; return ((uint32_t)p[i+0] | (uint32_t)p[i+1] << 8 | (uint32_t)p[i+2] << 16 | (uint32_t)p[i+3] << 24); } static void chacha_init(mi_random_ctx_t* ctx, const uint8_t key[32], uint64_t nonce) { // since we only use chacha for randomness (and not encryption) we // do not _need_ to read 32-bit values as little endian but we do anyways // just for being compatible :-) memset(ctx, 0, sizeof(*ctx)); for (size_t i = 0; i < 4; i++) { const uint8_t* sigma = (uint8_t*)"expand 32-byte k"; ctx->input[i] = read32(sigma,i); } for (size_t i = 0; i < 8; i++) { ctx->input[i + 4] = read32(key,i); } ctx->input[12] = 0; ctx->input[13] = 0; ctx->input[14] = (uint32_t)nonce; ctx->input[15] = (uint32_t)(nonce >> 32); } static void chacha_split(mi_random_ctx_t* ctx, uint64_t nonce, mi_random_ctx_t* ctx_new) { memset(ctx_new, 0, sizeof(*ctx_new)); _mi_memcpy(ctx_new->input, ctx->input, sizeof(ctx_new->input)); ctx_new->input[12] = 0; ctx_new->input[13] = 0; ctx_new->input[14] = (uint32_t)nonce; ctx_new->input[15] = (uint32_t)(nonce >> 32); mi_assert_internal(ctx->input[14] != ctx_new->input[14] || ctx->input[15] != ctx_new->input[15]); // do not reuse nonces! chacha_block(ctx_new); } /* ---------------------------------------------------------------------------- Random interface -----------------------------------------------------------------------------*/ #if MI_DEBUG>1 static bool mi_random_is_initialized(mi_random_ctx_t* ctx) { return (ctx != NULL && ctx->input[0] != 0); } #endif void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* ctx_new) { mi_assert_internal(mi_random_is_initialized(ctx)); mi_assert_internal(ctx != ctx_new); chacha_split(ctx, (uintptr_t)ctx_new /*nonce*/, ctx_new); } uintptr_t _mi_random_next(mi_random_ctx_t* ctx) { mi_assert_internal(mi_random_is_initialized(ctx)); #if MI_INTPTR_SIZE <= 4 return chacha_next32(ctx); #elif MI_INTPTR_SIZE == 8 return (((uintptr_t)chacha_next32(ctx) << 32) | chacha_next32(ctx)); #else # error "define mi_random_next for this platform" #endif } /* ---------------------------------------------------------------------------- To initialize a fresh random context. If we cannot get good randomness, we fall back to weak randomness based on a timer and ASLR. -----------------------------------------------------------------------------*/ uintptr_t _mi_os_random_weak(uintptr_t extra_seed) { uintptr_t x = (uintptr_t)&_mi_os_random_weak ^ extra_seed; // ASLR makes the address random x ^= _mi_prim_clock_now(); // and do a few randomization steps uintptr_t max = ((x ^ (x >> 17)) & 0x0F) + 1; for (uintptr_t i = 0; i < max; i++) { x = _mi_random_shuffle(x); } mi_assert_internal(x != 0); return x; } static void mi_random_init_ex(mi_random_ctx_t* ctx, bool use_weak) { uint8_t key[32]; if (use_weak || !_mi_prim_random_buf(key, sizeof(key))) { // if we fail to get random data from the OS, we fall back to a // weak random source based on the current time #if !defined(__wasi__) if (!use_weak) { _mi_warning_message("unable to use secure randomness\n"); } #endif uintptr_t x = _mi_os_random_weak(0); for (size_t i = 0; i < 8; i++) { // key is eight 32-bit words. x = _mi_random_shuffle(x); ((uint32_t*)key)[i] = (uint32_t)x; } ctx->weak = true; } else { ctx->weak = false; } chacha_init(ctx, key, (uintptr_t)ctx /*nonce*/ ); } void _mi_random_init(mi_random_ctx_t* ctx) { mi_random_init_ex(ctx, false); } void _mi_random_init_weak(mi_random_ctx_t * ctx) { mi_random_init_ex(ctx, true); } void _mi_random_reinit_if_weak(mi_random_ctx_t * ctx) { if (ctx->weak) { _mi_random_init(ctx); } } /* -------------------------------------------------------- test vectors from ----------------------------------------------------------- */ /* static bool array_equals(uint32_t* x, uint32_t* y, size_t n) { for (size_t i = 0; i < n; i++) { if (x[i] != y[i]) return false; } return true; } static void chacha_test(void) { uint32_t x[4] = { 0x11111111, 0x01020304, 0x9b8d6f43, 0x01234567 }; uint32_t x_out[4] = { 0xea2a92f4, 0xcb1cf8ce, 0x4581472e, 0x5881c4bb }; qround(x, 0, 1, 2, 3); mi_assert_internal(array_equals(x, x_out, 4)); uint32_t y[16] = { 0x879531e0, 0xc5ecf37d, 0x516461b1, 0xc9a62f8a, 0x44c20ef3, 0x3390af7f, 0xd9fc690b, 0x2a5f714c, 0x53372767, 0xb00a5631, 0x974c541a, 0x359e9963, 0x5c971061, 0x3d631689, 0x2098d9d6, 0x91dbd320 }; uint32_t y_out[16] = { 0x879531e0, 0xc5ecf37d, 0xbdb886dc, 0xc9a62f8a, 0x44c20ef3, 0x3390af7f, 0xd9fc690b, 0xcfacafd2, 0xe46bea80, 0xb00a5631, 0x974c541a, 0x359e9963, 0x5c971061, 0xccc07c79, 0x2098d9d6, 0x91dbd320 }; qround(y, 2, 7, 8, 13); mi_assert_internal(array_equals(y, y_out, 16)); mi_random_ctx_t r = { { 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574, 0x03020100, 0x07060504, 0x0b0a0908, 0x0f0e0d0c, 0x13121110, 0x17161514, 0x1b1a1918, 0x1f1e1d1c, 0x00000001, 0x09000000, 0x4a000000, 0x00000000 }, {0}, 0 }; uint32_t r_out[16] = { 0xe4e7f110, 0x15593bd1, 0x1fdd0f50, 0xc47120a3, 0xc7f4d1c7, 0x0368c033, 0x9aaa2204, 0x4e6cd4c3, 0x466482d2, 0x09aa9f07, 0x05d7c214, 0xa2028bd9, 0xd19c12b5, 0xb94e16de, 0xe883d0cb, 0x4e3c50a2 }; chacha_block(&r); mi_assert_internal(array_equals(r.output, r_out, 16)); } */ luametatex-2.10.08/source/libraries/mimalloc/src/region.c000066400000000000000000000522261442250314700233330ustar00rootroot00000000000000/* ---------------------------------------------------------------------------- Copyright (c) 2019-2020, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ /* ---------------------------------------------------------------------------- This implements a layer between the raw OS memory (VirtualAlloc/mmap/sbrk/..) and the segment and huge object allocation by mimalloc. There may be multiple implementations of this (one could be the identity going directly to the OS, another could be a simple cache etc), but the current one uses large "regions". In contrast to the rest of mimalloc, the "regions" are shared between threads and need to be accessed using atomic operations. We need this memory layer between the raw OS calls because of: 1. on `sbrk` like systems (like WebAssembly) we need our own memory maps in order to reuse memory effectively. 2. It turns out that for large objects, between 1MiB and 32MiB (?), the cost of an OS allocation/free is still (much) too expensive relative to the accesses in that object :-( (`malloc-large` tests this). This means we need a cheaper way to reuse memory. 3. This layer allows for NUMA aware allocation. Possible issues: - (2) can potentially be addressed too with a small cache per thread which is much simpler. Generally though that requires shrinking of huge pages, and may overuse memory per thread. (and is not compatible with `sbrk`). - Since the current regions are per-process, we need atomic operations to claim blocks which may be contended - In the worst case, we need to search the whole region map (16KiB for 256GiB) linearly. At what point will direct OS calls be faster? Is there a way to do this better without adding too much complexity? -----------------------------------------------------------------------------*/ #include "mimalloc.h" #include "mimalloc-internal.h" #include "mimalloc-atomic.h" #include // memset #include "bitmap.h" // Internal raw OS interface size_t _mi_os_large_page_size(void); bool _mi_os_protect(void* addr, size_t size); bool _mi_os_unprotect(void* addr, size_t size); bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats); bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats); bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats); bool _mi_os_commit_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats); // arena.c mi_arena_id_t _mi_arena_id_none(void); void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t memid, bool all_committed, mi_stats_t* stats); void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld); void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld); // Constants #if (MI_INTPTR_SIZE==8) #define MI_HEAP_REGION_MAX_SIZE (256 * MI_GiB) // 64KiB for the region map #elif (MI_INTPTR_SIZE==4) #define MI_HEAP_REGION_MAX_SIZE (3 * MI_GiB) // ~ KiB for the region map #else #error "define the maximum heap space allowed for regions on this platform" #endif #define MI_REGION_MAX_BLOCKS MI_BITMAP_FIELD_BITS #define MI_REGION_SIZE (MI_SEGMENT_SIZE * MI_BITMAP_FIELD_BITS) // 256MiB (64MiB on 32 bits) #define MI_REGION_MAX (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE) // 1024 (48 on 32 bits) #define MI_REGION_MAX_OBJ_BLOCKS (MI_REGION_MAX_BLOCKS/4) // 64MiB #define MI_REGION_MAX_OBJ_SIZE (MI_REGION_MAX_OBJ_BLOCKS*MI_SEGMENT_SIZE) // Region info typedef union mi_region_info_u { size_t value; struct { bool valid; // initialized? bool is_large:1; // allocated in fixed large/huge OS pages bool is_pinned:1; // pinned memory cannot be decommitted short numa_node; // the associated NUMA node (where -1 means no associated node) } x; } mi_region_info_t; // A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with // a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block. typedef struct mem_region_s { _Atomic(size_t) info; // mi_region_info_t.value _Atomic(void*) start; // start of the memory area mi_bitmap_field_t in_use; // bit per in-use block mi_bitmap_field_t dirty; // track if non-zero per block mi_bitmap_field_t commit; // track if committed per block mi_bitmap_field_t reset; // track if reset per block _Atomic(size_t) arena_memid; // if allocated from a (huge page) arena _Atomic(size_t) padding; // round to 8 fields (needs to be atomic for msvc, see issue #508) } mem_region_t; // The region map static mem_region_t regions[MI_REGION_MAX]; // Allocated regions static _Atomic(size_t) regions_count; // = 0; /* ---------------------------------------------------------------------------- Utility functions -----------------------------------------------------------------------------*/ // Blocks (of 4MiB) needed for the given size. static size_t mi_region_block_count(size_t size) { return _mi_divide_up(size, MI_SEGMENT_SIZE); } /* // Return a rounded commit/reset size such that we don't fragment large OS pages into small ones. static size_t mi_good_commit_size(size_t size) { if (size > (SIZE_MAX - _mi_os_large_page_size())) return size; return _mi_align_up(size, _mi_os_large_page_size()); } */ // Return if a pointer points into a region reserved by us. mi_decl_nodiscard bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { if (p==NULL) return false; size_t count = mi_atomic_load_relaxed(®ions_count); for (size_t i = 0; i < count; i++) { uint8_t* start = (uint8_t*)mi_atomic_load_ptr_relaxed(uint8_t, ®ions[i].start); if (start != NULL && (uint8_t*)p >= start && (uint8_t*)p < start + MI_REGION_SIZE) return true; } return false; } static void* mi_region_blocks_start(const mem_region_t* region, mi_bitmap_index_t bit_idx) { uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t, &((mem_region_t*)region)->start); mi_assert_internal(start != NULL); return (start + (bit_idx * MI_SEGMENT_SIZE)); } static size_t mi_memid_create(mem_region_t* region, mi_bitmap_index_t bit_idx) { mi_assert_internal(bit_idx < MI_BITMAP_FIELD_BITS); size_t idx = region - regions; mi_assert_internal(®ions[idx] == region); return (idx*MI_BITMAP_FIELD_BITS + bit_idx)<<1; } static size_t mi_memid_create_from_arena(size_t arena_memid) { return (arena_memid << 1) | 1; } static bool mi_memid_is_arena(size_t id, mem_region_t** region, mi_bitmap_index_t* bit_idx, size_t* arena_memid) { if ((id&1)==1) { if (arena_memid != NULL) *arena_memid = (id>>1); return true; } else { size_t idx = (id >> 1) / MI_BITMAP_FIELD_BITS; *bit_idx = (mi_bitmap_index_t)(id>>1) % MI_BITMAP_FIELD_BITS; *region = ®ions[idx]; return false; } } /* ---------------------------------------------------------------------------- Allocate a region is allocated from the OS (or an arena) -----------------------------------------------------------------------------*/ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) { // not out of regions yet? if (mi_atomic_load_relaxed(®ions_count) >= MI_REGION_MAX - 1) return false; // try to allocate a fresh region from the OS bool region_commit = (commit && mi_option_is_enabled(mi_option_eager_region_commit)); bool region_large = (commit && allow_large); bool is_zero = false; bool is_pinned = false; size_t arena_memid = 0; void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, 0, ®ion_commit, ®ion_large, &is_pinned, &is_zero, _mi_arena_id_none(), & arena_memid, tld); if (start == NULL) return false; mi_assert_internal(!(region_large && !allow_large)); mi_assert_internal(!region_large || region_commit); // claim a fresh slot const size_t idx = mi_atomic_increment_acq_rel(®ions_count); if (idx >= MI_REGION_MAX) { mi_atomic_decrement_acq_rel(®ions_count); _mi_arena_free(start, MI_REGION_SIZE, MI_SEGMENT_ALIGN, 0, arena_memid, region_commit, tld->stats); _mi_warning_message("maximum regions used: %zu GiB (perhaps recompile with a larger setting for MI_HEAP_REGION_MAX_SIZE)", _mi_divide_up(MI_HEAP_REGION_MAX_SIZE, MI_GiB)); return false; } // allocated, initialize and claim the initial blocks mem_region_t* r = ®ions[idx]; r->arena_memid = arena_memid; mi_atomic_store_release(&r->in_use, (size_t)0); mi_atomic_store_release(&r->dirty, (is_zero ? 0 : MI_BITMAP_FIELD_FULL)); mi_atomic_store_release(&r->commit, (region_commit ? MI_BITMAP_FIELD_FULL : 0)); mi_atomic_store_release(&r->reset, (size_t)0); *bit_idx = 0; _mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL); mi_atomic_store_ptr_release(void,&r->start, start); // and share it mi_region_info_t info; info.value = 0; // initialize the full union to zero info.x.valid = true; info.x.is_large = region_large; info.x.is_pinned = is_pinned; info.x.numa_node = (short)_mi_os_numa_node(tld); mi_atomic_store_release(&r->info, info.value); // now make it available to others *region = r; return true; } /* ---------------------------------------------------------------------------- Try to claim blocks in suitable regions -----------------------------------------------------------------------------*/ static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool allow_large ) { // initialized at all? mi_region_info_t info; info.value = mi_atomic_load_relaxed(&((mem_region_t*)region)->info); if (info.value==0) return false; // numa correct if (numa_node >= 0) { // use negative numa node to always succeed int rnode = info.x.numa_node; if (rnode >= 0 && rnode != numa_node) return false; } // check allow-large if (!allow_large && info.x.is_large) return false; return true; } static bool mi_region_try_claim(int numa_node, size_t blocks, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) { // try all regions for a free slot const size_t count = mi_atomic_load_relaxed(®ions_count); // monotonic, so ok to be relaxed size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? Starting at 0 seems to increase latency though for (size_t visited = 0; visited < count; visited++, idx++) { if (idx >= count) idx = 0; // wrap around mem_region_t* r = ®ions[idx]; // if this region suits our demand (numa node matches, large OS page matches) if (mi_region_is_suitable(r, numa_node, allow_large)) { // then try to atomically claim a segment(s) in this region if (_mi_bitmap_try_find_claim_field(&r->in_use, 0, blocks, bit_idx)) { tld->region_idx = idx; // remember the last found position *region = r; return true; } } } return false; } static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { mi_assert_internal(blocks <= MI_BITMAP_FIELD_BITS); mem_region_t* region; mi_bitmap_index_t bit_idx; const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld)); // try to claim in existing regions if (!mi_region_try_claim(numa_node, blocks, *large, ®ion, &bit_idx, tld)) { // otherwise try to allocate a fresh region and claim in there if (!mi_region_try_alloc_os(blocks, *commit, *large, ®ion, &bit_idx, tld)) { // out of regions or memory return NULL; } } // ------------------------------------------------ // found a region and claimed `blocks` at `bit_idx`, initialize them now mi_assert_internal(region != NULL); mi_assert_internal(_mi_bitmap_is_claimed(®ion->in_use, 1, blocks, bit_idx)); mi_region_info_t info; info.value = mi_atomic_load_acquire(®ion->info); uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t,®ion->start); mi_assert_internal(!(info.x.is_large && !*large)); mi_assert_internal(start != NULL); *is_zero = _mi_bitmap_claim(®ion->dirty, 1, blocks, bit_idx, NULL); *large = info.x.is_large; *is_pinned = info.x.is_pinned; *memid = mi_memid_create(region, bit_idx); void* p = start + (mi_bitmap_index_bit_in_field(bit_idx) * MI_SEGMENT_SIZE); // commit if (*commit) { // ensure commit bool any_uncommitted; _mi_bitmap_claim(®ion->commit, 1, blocks, bit_idx, &any_uncommitted); if (any_uncommitted) { mi_assert_internal(!info.x.is_large && !info.x.is_pinned); bool commit_zero = false; if (!_mi_mem_commit(p, blocks * MI_SEGMENT_SIZE, &commit_zero, tld)) { // failed to commit! unclaim and return mi_bitmap_unclaim(®ion->in_use, 1, blocks, bit_idx); return NULL; } if (commit_zero) *is_zero = true; } } else { // no need to commit, but check if already fully committed *commit = _mi_bitmap_is_claimed(®ion->commit, 1, blocks, bit_idx); } mi_assert_internal(!*commit || _mi_bitmap_is_claimed(®ion->commit, 1, blocks, bit_idx)); // unreset reset blocks if (_mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)) { // some blocks are still reset mi_assert_internal(!info.x.is_large && !info.x.is_pinned); mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit || mi_option_get(mi_option_eager_commit_delay) > 0); mi_bitmap_unclaim(®ion->reset, 1, blocks, bit_idx); if (*commit || !mi_option_is_enabled(mi_option_reset_decommits)) { // only if needed bool reset_zero = false; _mi_mem_unreset(p, blocks * MI_SEGMENT_SIZE, &reset_zero, tld); if (reset_zero) *is_zero = true; } } mi_assert_internal(!_mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)); #if (MI_DEBUG>=2) && !MI_TRACK_ENABLED if (*commit) { ((uint8_t*)p)[0] = 0; } #endif // and return the allocation mi_assert_internal(p != NULL); return p; } /* ---------------------------------------------------------------------------- Allocation -----------------------------------------------------------------------------*/ // Allocate `size` memory aligned at `alignment`. Return non NULL on success, with a given memory `id`. // (`id` is abstract, but `id = idx*MI_REGION_MAP_BITS + bitidx`) void* _mi_mem_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { mi_assert_internal(memid != NULL && tld != NULL); mi_assert_internal(size > 0); *memid = 0; *is_zero = false; *is_pinned = false; bool default_large = false; if (large==NULL) large = &default_large; // ensure `large != NULL` if (size == 0) return NULL; size = _mi_align_up(size, _mi_os_page_size()); // allocate from regions if possible void* p = NULL; size_t arena_memid; const size_t blocks = mi_region_block_count(size); if (blocks <= MI_REGION_MAX_OBJ_BLOCKS && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) { p = mi_region_try_alloc(blocks, commit, large, is_pinned, is_zero, memid, tld); if (p == NULL) { _mi_warning_message("unable to allocate from region: size %zu\n", size); } } if (p == NULL) { // and otherwise fall back to the OS p = _mi_arena_alloc_aligned(size, alignment, align_offset, commit, large, is_pinned, is_zero, _mi_arena_id_none(), & arena_memid, tld); *memid = mi_memid_create_from_arena(arena_memid); } if (p != NULL) { mi_assert_internal(((uintptr_t)p + align_offset) % alignment == 0); #if (MI_DEBUG>=2) && !MI_TRACK_ENABLED if (*commit) { ((uint8_t*)p)[0] = 0; } // ensure the memory is committed #endif } return p; } /* ---------------------------------------------------------------------------- Free -----------------------------------------------------------------------------*/ // Free previously allocated memory with a given id. void _mi_mem_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t id, bool full_commit, bool any_reset, mi_os_tld_t* tld) { mi_assert_internal(size > 0 && tld != NULL); if (p==NULL) return; if (size==0) return; size = _mi_align_up(size, _mi_os_page_size()); size_t arena_memid = 0; mi_bitmap_index_t bit_idx; mem_region_t* region; if (mi_memid_is_arena(id,®ion,&bit_idx,&arena_memid)) { // was a direct arena allocation, pass through _mi_arena_free(p, size, alignment, align_offset, arena_memid, full_commit, tld->stats); } else { // allocated in a region mi_assert_internal(align_offset == 0); mi_assert_internal(size <= MI_REGION_MAX_OBJ_SIZE); if (size > MI_REGION_MAX_OBJ_SIZE) return; const size_t blocks = mi_region_block_count(size); mi_assert_internal(blocks + bit_idx <= MI_BITMAP_FIELD_BITS); mi_region_info_t info; info.value = mi_atomic_load_acquire(®ion->info); mi_assert_internal(info.value != 0); void* blocks_start = mi_region_blocks_start(region, bit_idx); mi_assert_internal(blocks_start == p); // not a pointer in our area? mi_assert_internal(bit_idx + blocks <= MI_BITMAP_FIELD_BITS); if (blocks_start != p || bit_idx + blocks > MI_BITMAP_FIELD_BITS) return; // or `abort`? // committed? if (full_commit && (size % MI_SEGMENT_SIZE) == 0) { _mi_bitmap_claim(®ion->commit, 1, blocks, bit_idx, NULL); } if (any_reset) { // set the is_reset bits if any pages were reset _mi_bitmap_claim(®ion->reset, 1, blocks, bit_idx, NULL); } // reset the blocks to reduce the working set. if (!info.x.is_large && !info.x.is_pinned && mi_option_is_enabled(mi_option_segment_reset) && (mi_option_is_enabled(mi_option_eager_commit) || mi_option_is_enabled(mi_option_reset_decommits))) // cannot reset halfway committed segments, use only `option_page_reset` instead { bool any_unreset; _mi_bitmap_claim(®ion->reset, 1, blocks, bit_idx, &any_unreset); if (any_unreset) { _mi_abandoned_await_readers(); // ensure no more pending write (in case reset = decommit) _mi_mem_reset(p, blocks * MI_SEGMENT_SIZE, tld); } } // and unclaim bool all_unclaimed = mi_bitmap_unclaim(®ion->in_use, 1, blocks, bit_idx); mi_assert_internal(all_unclaimed); MI_UNUSED(all_unclaimed); } } /* ---------------------------------------------------------------------------- collection -----------------------------------------------------------------------------*/ void _mi_mem_collect(mi_os_tld_t* tld) { // free every region that has no segments in use. size_t rcount = mi_atomic_load_relaxed(®ions_count); for (size_t i = 0; i < rcount; i++) { mem_region_t* region = ®ions[i]; if (mi_atomic_load_relaxed(®ion->info) != 0) { // if no segments used, try to claim the whole region size_t m = mi_atomic_load_relaxed(®ion->in_use); while (m == 0 && !mi_atomic_cas_weak_release(®ion->in_use, &m, MI_BITMAP_FIELD_FULL)) { /* nothing */ }; if (m == 0) { // on success, free the whole region uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t,®ions[i].start); size_t arena_memid = mi_atomic_load_relaxed(®ions[i].arena_memid); size_t commit = mi_atomic_load_relaxed(®ions[i].commit); memset((void*)®ions[i], 0, sizeof(mem_region_t)); // cast to void* to avoid atomic warning // and release the whole region mi_atomic_store_release(®ion->info, (size_t)0); if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) { _mi_abandoned_await_readers(); // ensure no pending reads _mi_arena_free(start, MI_REGION_SIZE, MI_SEGMENT_ALIGN, 0, arena_memid, (~commit == 0), tld->stats); } } } } } /* ---------------------------------------------------------------------------- Other -----------------------------------------------------------------------------*/ bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) { if (mi_option_is_enabled(mi_option_reset_decommits)) { return _mi_os_decommit(p, size, tld->stats); } else { return _mi_os_reset(p, size, tld->stats); } } bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { if (mi_option_is_enabled(mi_option_reset_decommits)) { return _mi_os_commit(p, size, is_zero, tld->stats); } else { return _mi_os_unreset(p, size, is_zero, tld->stats); } } bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { return _mi_os_commit(p, size, is_zero, tld->stats); } bool _mi_mem_decommit(void* p, size_t size, mi_os_tld_t* tld) { return _mi_os_decommit(p, size, tld->stats); } bool _mi_mem_protect(void* p, size_t size) { return _mi_os_protect(p, size); } bool _mi_mem_unprotect(void* p, size_t size) { return _mi_os_unprotect(p, size); } luametatex-2.10.08/source/libraries/mimalloc/src/segment-cache.c000066400000000000000000000402061442250314700245460ustar00rootroot00000000000000/* ---------------------------------------------------------------------------- Copyright (c) 2020, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ /* ---------------------------------------------------------------------------- Implements a cache of segments to avoid expensive OS calls and to reuse the commit_mask to optimize the commit/decommit calls. The full memory map of all segments is also implemented here. -----------------------------------------------------------------------------*/ #include "mimalloc.h" #include "mimalloc-internal.h" #include "mimalloc-atomic.h" #include "bitmap.h" // atomic bitmap //#define MI_CACHE_DISABLE 1 // define to completely disable the segment cache #define MI_CACHE_FIELDS (16) #define MI_CACHE_MAX (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS) // 1024 on 64-bit #define BITS_SET() MI_ATOMIC_VAR_INIT(UINTPTR_MAX) #define MI_CACHE_BITS_SET MI_INIT16(BITS_SET) // note: update if MI_CACHE_FIELDS changes typedef struct mi_cache_slot_s { void* p; size_t memid; bool is_pinned; mi_commit_mask_t commit_mask; mi_commit_mask_t decommit_mask; _Atomic(mi_msecs_t) expire; } mi_cache_slot_t; static mi_decl_cache_align mi_cache_slot_t cache[MI_CACHE_MAX]; // = 0 static mi_decl_cache_align mi_bitmap_field_t cache_available[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET }; // zero bit = available! static mi_decl_cache_align mi_bitmap_field_t cache_available_large[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET }; static mi_decl_cache_align mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS]; // zero bit = free static bool mi_cdecl mi_segment_cache_is_suitable(mi_bitmap_index_t bitidx, void* arg) { mi_arena_id_t req_arena_id = *((mi_arena_id_t*)arg); mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; return _mi_arena_memid_is_suitable(slot->memid, req_arena_id); } mi_decl_noinline static void* mi_segment_cache_pop_ex( bool all_suitable, size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t _req_arena_id, size_t* memid, mi_os_tld_t* tld) { #ifdef MI_CACHE_DISABLE return NULL; #else // only segment blocks if (size != MI_SEGMENT_SIZE) return NULL; // numa node determines start field const int numa_node = _mi_os_numa_node(tld); size_t start_field = 0; if (numa_node > 0) { start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node; if (start_field >= MI_CACHE_FIELDS) start_field = 0; } // find an available slot mi_bitmap_index_t bitidx = 0; bool claimed = false; mi_arena_id_t req_arena_id = _req_arena_id; mi_bitmap_pred_fun_t pred_fun = (all_suitable ? NULL : &mi_segment_cache_is_suitable); // cannot pass NULL as the arena may be exclusive itself; todo: do not put exclusive arenas in the cache? if (*large) { // large allowed? claimed = _mi_bitmap_try_find_from_claim_pred(cache_available_large, MI_CACHE_FIELDS, start_field, 1, pred_fun, &req_arena_id, &bitidx); if (claimed) *large = true; } if (!claimed) { claimed = _mi_bitmap_try_find_from_claim_pred (cache_available, MI_CACHE_FIELDS, start_field, 1, pred_fun, &req_arena_id, &bitidx); if (claimed) *large = false; } if (!claimed) return NULL; // found a slot mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; void* p = slot->p; *memid = slot->memid; *is_pinned = slot->is_pinned; *is_zero = false; *commit_mask = slot->commit_mask; *decommit_mask = slot->decommit_mask; slot->p = NULL; mi_atomic_storei64_release(&slot->expire,(mi_msecs_t)0); // mark the slot as free again mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx)); _mi_bitmap_unclaim(cache_inuse, MI_CACHE_FIELDS, 1, bitidx); return p; #endif } mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t _req_arena_id, size_t* memid, mi_os_tld_t* tld) { return mi_segment_cache_pop_ex(false, size, commit_mask, decommit_mask, large, is_pinned, is_zero, _req_arena_id, memid, tld); } static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, void* p, size_t total, mi_stats_t* stats) { if (mi_commit_mask_is_empty(cmask)) { // nothing } else if (mi_commit_mask_is_full(cmask)) { _mi_os_decommit(p, total, stats); } else { // todo: one call to decommit the whole at once? mi_assert_internal((total%MI_COMMIT_MASK_BITS)==0); size_t part = total/MI_COMMIT_MASK_BITS; size_t idx; size_t count; mi_commit_mask_foreach(cmask, idx, count) { void* start = (uint8_t*)p + (idx*part); size_t size = count*part; _mi_os_decommit(start, size, stats); } mi_commit_mask_foreach_end() } mi_commit_mask_create_empty(cmask); } #define MI_MAX_PURGE_PER_PUSH (4) static mi_decl_noinline void mi_segment_cache_purge(bool visit_all, bool force, mi_os_tld_t* tld) { MI_UNUSED(tld); if (!mi_option_is_enabled(mi_option_allow_decommit)) return; mi_msecs_t now = _mi_clock_now(); size_t purged = 0; const size_t max_visits = (visit_all ? MI_CACHE_MAX /* visit all */ : MI_CACHE_FIELDS /* probe at most N (=16) slots */); size_t idx = (visit_all ? 0 : _mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX /* random start */ ); for (size_t visited = 0; visited < max_visits; visited++,idx++) { // visit N slots if (idx >= MI_CACHE_MAX) idx = 0; // wrap mi_cache_slot_t* slot = &cache[idx]; mi_msecs_t expire = mi_atomic_loadi64_relaxed(&slot->expire); if (expire != 0 && (force || now >= expire)) { // racy read // seems expired, first claim it from available purged++; mi_bitmap_index_t bitidx = mi_bitmap_index_create_from_bit(idx); if (_mi_bitmap_claim(cache_available, MI_CACHE_FIELDS, 1, bitidx, NULL)) { // was available, we claimed it expire = mi_atomic_loadi64_acquire(&slot->expire); if (expire != 0 && (force || now >= expire)) { // safe read // still expired, decommit it mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0); mi_assert_internal(!mi_commit_mask_is_empty(&slot->commit_mask) && _mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx)); _mi_abandoned_await_readers(); // wait until safe to decommit // decommit committed parts // TODO: instead of decommit, we could also free to the OS? mi_commit_mask_decommit(&slot->commit_mask, slot->p, MI_SEGMENT_SIZE, tld->stats); mi_commit_mask_create_empty(&slot->decommit_mask); } _mi_bitmap_unclaim(cache_available, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop } if (!visit_all && purged > MI_MAX_PURGE_PER_PUSH) break; // bound to no more than N purge tries per push } } } void _mi_segment_cache_collect(bool force, mi_os_tld_t* tld) { if (force) { // called on `mi_collect(true)` but not on thread termination _mi_segment_cache_free_all(tld); } else { mi_segment_cache_purge(true /* visit all */, false /* don't force unexpired */, tld); } } void _mi_segment_cache_free_all(mi_os_tld_t* tld) { mi_commit_mask_t commit_mask; mi_commit_mask_t decommit_mask; bool is_pinned; bool is_zero; size_t memid; const size_t size = MI_SEGMENT_SIZE; // iterate twice: first large pages, then regular memory for (int i = 0; i < 2; i++) { void* p; do { // keep popping and freeing the memory bool large = (i == 0); p = mi_segment_cache_pop_ex(true /* all */, size, &commit_mask, &decommit_mask, &large, &is_pinned, &is_zero, _mi_arena_id_none(), &memid, tld); if (p != NULL) { size_t csize = _mi_commit_mask_committed_size(&commit_mask, size); if (csize > 0 && !is_pinned) _mi_stat_decrease(&_mi_stats_main.committed, csize); _mi_arena_free(p, size, MI_SEGMENT_ALIGN, 0, memid, is_pinned /* pretend not committed to not double count decommits */, tld->stats); } } while (p != NULL); } } mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld) { #ifdef MI_CACHE_DISABLE return false; #else // only for normal segment blocks if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false; // numa node determines start field int numa_node = _mi_os_numa_node(NULL); size_t start_field = 0; if (numa_node > 0) { start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node; if (start_field >= MI_CACHE_FIELDS) start_field = 0; } // purge expired entries mi_segment_cache_purge(false /* limit purges to a constant N */, false /* don't force unexpired */, tld); // find an available slot mi_bitmap_index_t bitidx; bool claimed = _mi_bitmap_try_find_from_claim(cache_inuse, MI_CACHE_FIELDS, start_field, 1, &bitidx); if (!claimed) return false; mi_assert_internal(_mi_bitmap_is_claimed(cache_available, MI_CACHE_FIELDS, 1, bitidx)); mi_assert_internal(_mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx)); #if MI_DEBUG>1 if (is_pinned || is_large) { mi_assert_internal(mi_commit_mask_is_full(commit_mask)); } #endif // set the slot mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; slot->p = start; slot->memid = memid; slot->is_pinned = is_pinned; mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0); slot->commit_mask = *commit_mask; slot->decommit_mask = *decommit_mask; if (!mi_commit_mask_is_empty(commit_mask) && !is_large && !is_pinned && mi_option_is_enabled(mi_option_allow_decommit)) { long delay = mi_option_get(mi_option_segment_decommit_delay); if (delay == 0) { _mi_abandoned_await_readers(); // wait until safe to decommit mi_commit_mask_decommit(&slot->commit_mask, start, MI_SEGMENT_SIZE, tld->stats); mi_commit_mask_create_empty(&slot->decommit_mask); } else { mi_atomic_storei64_release(&slot->expire, _mi_clock_now() + delay); } } // make it available _mi_bitmap_unclaim((is_large ? cache_available_large : cache_available), MI_CACHE_FIELDS, 1, bitidx); return true; #endif } /* ----------------------------------------------------------- The following functions are to reliably find the segment or block that encompasses any pointer p (or NULL if it is not in any of our segments). We maintain a bitmap of all memory with 1 bit per MI_SEGMENT_SIZE (64MiB) set to 1 if it contains the segment meta data. ----------------------------------------------------------- */ #if (MI_INTPTR_SIZE==8) #define MI_MAX_ADDRESS ((size_t)20 << 40) // 20TB #else #define MI_MAX_ADDRESS ((size_t)2 << 30) // 2Gb #endif #define MI_SEGMENT_MAP_BITS (MI_MAX_ADDRESS / MI_SEGMENT_SIZE) #define MI_SEGMENT_MAP_SIZE (MI_SEGMENT_MAP_BITS / 8) #define MI_SEGMENT_MAP_WSIZE (MI_SEGMENT_MAP_SIZE / MI_INTPTR_SIZE) static _Atomic(uintptr_t) mi_segment_map[MI_SEGMENT_MAP_WSIZE + 1]; // 2KiB per TB with 64MiB segments static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) { mi_assert_internal(_mi_ptr_segment(segment + 1) == segment); // is it aligned on MI_SEGMENT_SIZE? if ((uintptr_t)segment >= MI_MAX_ADDRESS) { *bitidx = 0; return MI_SEGMENT_MAP_WSIZE; } else { const uintptr_t segindex = ((uintptr_t)segment) / MI_SEGMENT_SIZE; *bitidx = segindex % MI_INTPTR_BITS; const size_t mapindex = segindex / MI_INTPTR_BITS; mi_assert_internal(mapindex < MI_SEGMENT_MAP_WSIZE); return mapindex; } } void _mi_segment_map_allocated_at(const mi_segment_t* segment) { size_t bitidx; size_t index = mi_segment_map_index_of(segment, &bitidx); mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE); if (index==MI_SEGMENT_MAP_WSIZE) return; uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); uintptr_t newmask; do { newmask = (mask | ((uintptr_t)1 << bitidx)); } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask)); } void _mi_segment_map_freed_at(const mi_segment_t* segment) { size_t bitidx; size_t index = mi_segment_map_index_of(segment, &bitidx); mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE); if (index == MI_SEGMENT_MAP_WSIZE) return; uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); uintptr_t newmask; do { newmask = (mask & ~((uintptr_t)1 << bitidx)); } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask)); } // Determine the segment belonging to a pointer or NULL if it is not in a valid segment. static mi_segment_t* _mi_segment_of(const void* p) { if (p == NULL) return NULL; mi_segment_t* segment = _mi_ptr_segment(p); mi_assert_internal(segment != NULL); size_t bitidx; size_t index = mi_segment_map_index_of(segment, &bitidx); // fast path: for any pointer to valid small/medium/large object or first MI_SEGMENT_SIZE in huge const uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); if mi_likely((mask & ((uintptr_t)1 << bitidx)) != 0) { return segment; // yes, allocated by us } if (index==MI_SEGMENT_MAP_WSIZE) return NULL; // TODO: maintain max/min allocated range for efficiency for more efficient rejection of invalid pointers? // search downwards for the first segment in case it is an interior pointer // could be slow but searches in MI_INTPTR_SIZE * MI_SEGMENT_SIZE (512MiB) steps trough // valid huge objects // note: we could maintain a lowest index to speed up the path for invalid pointers? size_t lobitidx; size_t loindex; uintptr_t lobits = mask & (((uintptr_t)1 << bitidx) - 1); if (lobits != 0) { loindex = index; lobitidx = mi_bsr(lobits); // lobits != 0 } else if (index == 0) { return NULL; } else { mi_assert_internal(index > 0); uintptr_t lomask = mask; loindex = index; do { loindex--; lomask = mi_atomic_load_relaxed(&mi_segment_map[loindex]); } while (lomask != 0 && loindex > 0); if (lomask == 0) return NULL; lobitidx = mi_bsr(lomask); // lomask != 0 } mi_assert_internal(loindex < MI_SEGMENT_MAP_WSIZE); // take difference as the addresses could be larger than the MAX_ADDRESS space. size_t diff = (((index - loindex) * (8*MI_INTPTR_SIZE)) + bitidx - lobitidx) * MI_SEGMENT_SIZE; segment = (mi_segment_t*)((uint8_t*)segment - diff); if (segment == NULL) return NULL; mi_assert_internal((void*)segment < p); bool cookie_ok = (_mi_ptr_cookie(segment) == segment->cookie); mi_assert_internal(cookie_ok); if mi_unlikely(!cookie_ok) return NULL; if (((uint8_t*)segment + mi_segment_size(segment)) <= (uint8_t*)p) return NULL; // outside the range mi_assert_internal(p >= (void*)segment && (uint8_t*)p < (uint8_t*)segment + mi_segment_size(segment)); return segment; } // Is this a valid pointer in our heap? static bool mi_is_valid_pointer(const void* p) { return (_mi_segment_of(p) != NULL); } mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { return mi_is_valid_pointer(p); } /* // Return the full segment range belonging to a pointer static void* mi_segment_range_of(const void* p, size_t* size) { mi_segment_t* segment = _mi_segment_of(p); if (segment == NULL) { if (size != NULL) *size = 0; return NULL; } else { if (size != NULL) *size = segment->segment_size; return segment; } mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld)); mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size); mi_reset_delayed(tld); mi_assert_internal(page == NULL || mi_page_not_in_queue(page, tld)); return page; } */ luametatex-2.10.08/source/libraries/mimalloc/src/segment-map.c000066400000000000000000000140121442250314700242540ustar00rootroot00000000000000/* ---------------------------------------------------------------------------- Copyright (c) 2019-2023, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ /* ----------------------------------------------------------- The following functions are to reliably find the segment or block that encompasses any pointer p (or NULL if it is not in any of our segments). We maintain a bitmap of all memory with 1 bit per MI_SEGMENT_SIZE (64MiB) set to 1 if it contains the segment meta data. ----------------------------------------------------------- */ #include "mimalloc.h" #include "mimalloc/internal.h" #include "mimalloc/atomic.h" #if (MI_INTPTR_SIZE==8) #define MI_MAX_ADDRESS ((size_t)40 << 40) // 40TB (to include huge page areas) #else #define MI_MAX_ADDRESS ((size_t)2 << 30) // 2Gb #endif #define MI_SEGMENT_MAP_BITS (MI_MAX_ADDRESS / MI_SEGMENT_SIZE) #define MI_SEGMENT_MAP_SIZE (MI_SEGMENT_MAP_BITS / 8) #define MI_SEGMENT_MAP_WSIZE (MI_SEGMENT_MAP_SIZE / MI_INTPTR_SIZE) static _Atomic(uintptr_t) mi_segment_map[MI_SEGMENT_MAP_WSIZE + 1]; // 2KiB per TB with 64MiB segments static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) { mi_assert_internal(_mi_ptr_segment(segment + 1) == segment); // is it aligned on MI_SEGMENT_SIZE? if ((uintptr_t)segment >= MI_MAX_ADDRESS) { *bitidx = 0; return MI_SEGMENT_MAP_WSIZE; } else { const uintptr_t segindex = ((uintptr_t)segment) / MI_SEGMENT_SIZE; *bitidx = segindex % MI_INTPTR_BITS; const size_t mapindex = segindex / MI_INTPTR_BITS; mi_assert_internal(mapindex < MI_SEGMENT_MAP_WSIZE); return mapindex; } } void _mi_segment_map_allocated_at(const mi_segment_t* segment) { size_t bitidx; size_t index = mi_segment_map_index_of(segment, &bitidx); mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE); if (index==MI_SEGMENT_MAP_WSIZE) return; uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); uintptr_t newmask; do { newmask = (mask | ((uintptr_t)1 << bitidx)); } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask)); } void _mi_segment_map_freed_at(const mi_segment_t* segment) { size_t bitidx; size_t index = mi_segment_map_index_of(segment, &bitidx); mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE); if (index == MI_SEGMENT_MAP_WSIZE) return; uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); uintptr_t newmask; do { newmask = (mask & ~((uintptr_t)1 << bitidx)); } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask)); } // Determine the segment belonging to a pointer or NULL if it is not in a valid segment. static mi_segment_t* _mi_segment_of(const void* p) { if (p == NULL) return NULL; mi_segment_t* segment = _mi_ptr_segment(p); mi_assert_internal(segment != NULL); size_t bitidx; size_t index = mi_segment_map_index_of(segment, &bitidx); // fast path: for any pointer to valid small/medium/large object or first MI_SEGMENT_SIZE in huge const uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); if mi_likely((mask & ((uintptr_t)1 << bitidx)) != 0) { return segment; // yes, allocated by us } if (index==MI_SEGMENT_MAP_WSIZE) return NULL; // TODO: maintain max/min allocated range for efficiency for more efficient rejection of invalid pointers? // search downwards for the first segment in case it is an interior pointer // could be slow but searches in MI_INTPTR_SIZE * MI_SEGMENT_SIZE (512MiB) steps trough // valid huge objects // note: we could maintain a lowest index to speed up the path for invalid pointers? size_t lobitidx; size_t loindex; uintptr_t lobits = mask & (((uintptr_t)1 << bitidx) - 1); if (lobits != 0) { loindex = index; lobitidx = mi_bsr(lobits); // lobits != 0 } else if (index == 0) { return NULL; } else { mi_assert_internal(index > 0); uintptr_t lomask = mask; loindex = index; do { loindex--; lomask = mi_atomic_load_relaxed(&mi_segment_map[loindex]); } while (lomask != 0 && loindex > 0); if (lomask == 0) return NULL; lobitidx = mi_bsr(lomask); // lomask != 0 } mi_assert_internal(loindex < MI_SEGMENT_MAP_WSIZE); // take difference as the addresses could be larger than the MAX_ADDRESS space. size_t diff = (((index - loindex) * (8*MI_INTPTR_SIZE)) + bitidx - lobitidx) * MI_SEGMENT_SIZE; segment = (mi_segment_t*)((uint8_t*)segment - diff); if (segment == NULL) return NULL; mi_assert_internal((void*)segment < p); bool cookie_ok = (_mi_ptr_cookie(segment) == segment->cookie); mi_assert_internal(cookie_ok); if mi_unlikely(!cookie_ok) return NULL; if (((uint8_t*)segment + mi_segment_size(segment)) <= (uint8_t*)p) return NULL; // outside the range mi_assert_internal(p >= (void*)segment && (uint8_t*)p < (uint8_t*)segment + mi_segment_size(segment)); return segment; } // Is this a valid pointer in our heap? static bool mi_is_valid_pointer(const void* p) { return ((_mi_segment_of(p) != NULL) || (_mi_arena_contains(p))); } mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { return mi_is_valid_pointer(p); } /* // Return the full segment range belonging to a pointer static void* mi_segment_range_of(const void* p, size_t* size) { mi_segment_t* segment = _mi_segment_of(p); if (segment == NULL) { if (size != NULL) *size = 0; return NULL; } else { if (size != NULL) *size = segment->segment_size; return segment; } mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld)); mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size); mi_reset_delayed(tld); mi_assert_internal(page == NULL || mi_page_not_in_queue(page, tld)); return page; } */ luametatex-2.10.08/source/libraries/mimalloc/src/segment.c000066400000000000000000002060421442250314700235070ustar00rootroot00000000000000/* ---------------------------------------------------------------------------- Copyright (c) 2018-2020, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ #include "mimalloc.h" #include "mimalloc/internal.h" #include "mimalloc/atomic.h" #include // memset #include #define MI_PAGE_HUGE_ALIGN (256*1024) static void mi_segment_try_purge(mi_segment_t* segment, bool force, mi_stats_t* stats); // ------------------------------------------------------------------- // commit mask // ------------------------------------------------------------------- static bool mi_commit_mask_all_set(const mi_commit_mask_t* commit, const mi_commit_mask_t* cm) { for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { if ((commit->mask[i] & cm->mask[i]) != cm->mask[i]) return false; } return true; } static bool mi_commit_mask_any_set(const mi_commit_mask_t* commit, const mi_commit_mask_t* cm) { for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { if ((commit->mask[i] & cm->mask[i]) != 0) return true; } return false; } static void mi_commit_mask_create_intersect(const mi_commit_mask_t* commit, const mi_commit_mask_t* cm, mi_commit_mask_t* res) { for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { res->mask[i] = (commit->mask[i] & cm->mask[i]); } } static void mi_commit_mask_clear(mi_commit_mask_t* res, const mi_commit_mask_t* cm) { for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { res->mask[i] &= ~(cm->mask[i]); } } static void mi_commit_mask_set(mi_commit_mask_t* res, const mi_commit_mask_t* cm) { for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { res->mask[i] |= cm->mask[i]; } } static void mi_commit_mask_create(size_t bitidx, size_t bitcount, mi_commit_mask_t* cm) { mi_assert_internal(bitidx < MI_COMMIT_MASK_BITS); mi_assert_internal((bitidx + bitcount) <= MI_COMMIT_MASK_BITS); if (bitcount == MI_COMMIT_MASK_BITS) { mi_assert_internal(bitidx==0); mi_commit_mask_create_full(cm); } else if (bitcount == 0) { mi_commit_mask_create_empty(cm); } else { mi_commit_mask_create_empty(cm); size_t i = bitidx / MI_COMMIT_MASK_FIELD_BITS; size_t ofs = bitidx % MI_COMMIT_MASK_FIELD_BITS; while (bitcount > 0) { mi_assert_internal(i < MI_COMMIT_MASK_FIELD_COUNT); size_t avail = MI_COMMIT_MASK_FIELD_BITS - ofs; size_t count = (bitcount > avail ? avail : bitcount); size_t mask = (count >= MI_COMMIT_MASK_FIELD_BITS ? ~((size_t)0) : (((size_t)1 << count) - 1) << ofs); cm->mask[i] = mask; bitcount -= count; ofs = 0; i++; } } } size_t _mi_commit_mask_committed_size(const mi_commit_mask_t* cm, size_t total) { mi_assert_internal((total%MI_COMMIT_MASK_BITS)==0); size_t count = 0; for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { size_t mask = cm->mask[i]; if (~mask == 0) { count += MI_COMMIT_MASK_FIELD_BITS; } else { for (; mask != 0; mask >>= 1) { // todo: use popcount if ((mask&1)!=0) count++; } } } // we use total since for huge segments each commit bit may represent a larger size return ((total / MI_COMMIT_MASK_BITS) * count); } size_t _mi_commit_mask_next_run(const mi_commit_mask_t* cm, size_t* idx) { size_t i = (*idx) / MI_COMMIT_MASK_FIELD_BITS; size_t ofs = (*idx) % MI_COMMIT_MASK_FIELD_BITS; size_t mask = 0; // find first ones while (i < MI_COMMIT_MASK_FIELD_COUNT) { mask = cm->mask[i]; mask >>= ofs; if (mask != 0) { while ((mask&1) == 0) { mask >>= 1; ofs++; } break; } i++; ofs = 0; } if (i >= MI_COMMIT_MASK_FIELD_COUNT) { // not found *idx = MI_COMMIT_MASK_BITS; return 0; } else { // found, count ones size_t count = 0; *idx = (i*MI_COMMIT_MASK_FIELD_BITS) + ofs; do { mi_assert_internal(ofs < MI_COMMIT_MASK_FIELD_BITS && (mask&1) == 1); do { count++; mask >>= 1; } while ((mask&1) == 1); if ((((*idx + count) % MI_COMMIT_MASK_FIELD_BITS) == 0)) { i++; if (i >= MI_COMMIT_MASK_FIELD_COUNT) break; mask = cm->mask[i]; ofs = 0; } } while ((mask&1) == 1); mi_assert_internal(count > 0); return count; } } /* -------------------------------------------------------------------------------- Segment allocation If a thread ends, it "abandons" pages with used blocks and there is an abandoned segment list whose segments can be reclaimed by still running threads, much like work-stealing. -------------------------------------------------------------------------------- */ /* ----------------------------------------------------------- Slices ----------------------------------------------------------- */ static const mi_slice_t* mi_segment_slices_end(const mi_segment_t* segment) { return &segment->slices[segment->slice_entries]; } static uint8_t* mi_slice_start(const mi_slice_t* slice) { mi_segment_t* segment = _mi_ptr_segment(slice); mi_assert_internal(slice >= segment->slices && slice < mi_segment_slices_end(segment)); return ((uint8_t*)segment + ((slice - segment->slices)*MI_SEGMENT_SLICE_SIZE)); } /* ----------------------------------------------------------- Bins ----------------------------------------------------------- */ // Use bit scan forward to quickly find the first zero bit if it is available static inline size_t mi_slice_bin8(size_t slice_count) { if (slice_count<=1) return slice_count; mi_assert_internal(slice_count <= MI_SLICES_PER_SEGMENT); slice_count--; size_t s = mi_bsr(slice_count); // slice_count > 1 if (s <= 2) return slice_count + 1; size_t bin = ((s << 2) | ((slice_count >> (s - 2))&0x03)) - 4; return bin; } static inline size_t mi_slice_bin(size_t slice_count) { mi_assert_internal(slice_count*MI_SEGMENT_SLICE_SIZE <= MI_SEGMENT_SIZE); mi_assert_internal(mi_slice_bin8(MI_SLICES_PER_SEGMENT) <= MI_SEGMENT_BIN_MAX); size_t bin = mi_slice_bin8(slice_count); mi_assert_internal(bin <= MI_SEGMENT_BIN_MAX); return bin; } static inline size_t mi_slice_index(const mi_slice_t* slice) { mi_segment_t* segment = _mi_ptr_segment(slice); ptrdiff_t index = slice - segment->slices; mi_assert_internal(index >= 0 && index < (ptrdiff_t)segment->slice_entries); return index; } /* ----------------------------------------------------------- Slice span queues ----------------------------------------------------------- */ static void mi_span_queue_push(mi_span_queue_t* sq, mi_slice_t* slice) { // todo: or push to the end? mi_assert_internal(slice->prev == NULL && slice->next==NULL); slice->prev = NULL; // paranoia slice->next = sq->first; sq->first = slice; if (slice->next != NULL) slice->next->prev = slice; else sq->last = slice; slice->xblock_size = 0; // free } static mi_span_queue_t* mi_span_queue_for(size_t slice_count, mi_segments_tld_t* tld) { size_t bin = mi_slice_bin(slice_count); mi_span_queue_t* sq = &tld->spans[bin]; mi_assert_internal(sq->slice_count >= slice_count); return sq; } static void mi_span_queue_delete(mi_span_queue_t* sq, mi_slice_t* slice) { mi_assert_internal(slice->xblock_size==0 && slice->slice_count>0 && slice->slice_offset==0); // should work too if the queue does not contain slice (which can happen during reclaim) if (slice->prev != NULL) slice->prev->next = slice->next; if (slice == sq->first) sq->first = slice->next; if (slice->next != NULL) slice->next->prev = slice->prev; if (slice == sq->last) sq->last = slice->prev; slice->prev = NULL; slice->next = NULL; slice->xblock_size = 1; // no more free } /* ----------------------------------------------------------- Invariant checking ----------------------------------------------------------- */ static bool mi_slice_is_used(const mi_slice_t* slice) { return (slice->xblock_size > 0); } #if (MI_DEBUG>=3) static bool mi_span_queue_contains(mi_span_queue_t* sq, mi_slice_t* slice) { for (mi_slice_t* s = sq->first; s != NULL; s = s->next) { if (s==slice) return true; } return false; } static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(segment != NULL); mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie); mi_assert_internal(segment->abandoned <= segment->used); mi_assert_internal(segment->thread_id == 0 || segment->thread_id == _mi_thread_id()); mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->purge_mask)); // can only decommit committed blocks //mi_assert_internal(segment->segment_info_size % MI_SEGMENT_SLICE_SIZE == 0); mi_slice_t* slice = &segment->slices[0]; const mi_slice_t* end = mi_segment_slices_end(segment); size_t used_count = 0; mi_span_queue_t* sq; while(slice < end) { mi_assert_internal(slice->slice_count > 0); mi_assert_internal(slice->slice_offset == 0); size_t index = mi_slice_index(slice); size_t maxindex = (index + slice->slice_count >= segment->slice_entries ? segment->slice_entries : index + slice->slice_count) - 1; if (mi_slice_is_used(slice)) { // a page in use, we need at least MAX_SLICE_OFFSET valid back offsets used_count++; for (size_t i = 0; i <= MI_MAX_SLICE_OFFSET && index + i <= maxindex; i++) { mi_assert_internal(segment->slices[index + i].slice_offset == i*sizeof(mi_slice_t)); mi_assert_internal(i==0 || segment->slices[index + i].slice_count == 0); mi_assert_internal(i==0 || segment->slices[index + i].xblock_size == 1); } // and the last entry as well (for coalescing) const mi_slice_t* last = slice + slice->slice_count - 1; if (last > slice && last < mi_segment_slices_end(segment)) { mi_assert_internal(last->slice_offset == (slice->slice_count-1)*sizeof(mi_slice_t)); mi_assert_internal(last->slice_count == 0); mi_assert_internal(last->xblock_size == 1); } } else { // free range of slices; only last slice needs a valid back offset mi_slice_t* last = &segment->slices[maxindex]; if (segment->kind != MI_SEGMENT_HUGE || slice->slice_count <= (segment->slice_entries - segment->segment_info_slices)) { mi_assert_internal((uint8_t*)slice == (uint8_t*)last - last->slice_offset); } mi_assert_internal(slice == last || last->slice_count == 0 ); mi_assert_internal(last->xblock_size == 0 || (segment->kind==MI_SEGMENT_HUGE && last->xblock_size==1)); if (segment->kind != MI_SEGMENT_HUGE && segment->thread_id != 0) { // segment is not huge or abandoned sq = mi_span_queue_for(slice->slice_count,tld); mi_assert_internal(mi_span_queue_contains(sq,slice)); } } slice = &segment->slices[maxindex+1]; } mi_assert_internal(slice == end); mi_assert_internal(used_count == segment->used + 1); return true; } #endif /* ----------------------------------------------------------- Segment size calculations ----------------------------------------------------------- */ static size_t mi_segment_info_size(mi_segment_t* segment) { return segment->segment_info_slices * MI_SEGMENT_SLICE_SIZE; } static uint8_t* _mi_segment_page_start_from_slice(const mi_segment_t* segment, const mi_slice_t* slice, size_t xblock_size, size_t* page_size) { ptrdiff_t idx = slice - segment->slices; size_t psize = (size_t)slice->slice_count * MI_SEGMENT_SLICE_SIZE; // make the start not OS page aligned for smaller blocks to avoid page/cache effects // note: the offset must always be an xblock_size multiple since we assume small allocations // are aligned (see `mi_heap_malloc_aligned`). size_t start_offset = 0; if (xblock_size >= MI_INTPTR_SIZE) { if (xblock_size <= 64) { start_offset = 3*xblock_size; } else if (xblock_size <= 512) { start_offset = xblock_size; } } if (page_size != NULL) { *page_size = psize - start_offset; } return (uint8_t*)segment + ((idx*MI_SEGMENT_SLICE_SIZE) + start_offset); } // Start of the page available memory; can be used on uninitialized pages uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) { const mi_slice_t* slice = mi_page_to_slice((mi_page_t*)page); uint8_t* p = _mi_segment_page_start_from_slice(segment, slice, page->xblock_size, page_size); mi_assert_internal(page->xblock_size > 0 || _mi_ptr_page(p) == page); mi_assert_internal(_mi_ptr_segment(p) == segment); return p; } static size_t mi_segment_calculate_slices(size_t required, size_t* pre_size, size_t* info_slices) { size_t page_size = _mi_os_page_size(); size_t isize = _mi_align_up(sizeof(mi_segment_t), page_size); size_t guardsize = 0; if (MI_SECURE>0) { // in secure mode, we set up a protected page in between the segment info // and the page data (and one at the end of the segment) guardsize = page_size; if (required > 0) { required = _mi_align_up(required, MI_SEGMENT_SLICE_SIZE) + page_size; } } if (pre_size != NULL) *pre_size = isize; isize = _mi_align_up(isize + guardsize, MI_SEGMENT_SLICE_SIZE); if (info_slices != NULL) *info_slices = isize / MI_SEGMENT_SLICE_SIZE; size_t segment_size = (required==0 ? MI_SEGMENT_SIZE : _mi_align_up( required + isize + guardsize, MI_SEGMENT_SLICE_SIZE) ); mi_assert_internal(segment_size % MI_SEGMENT_SLICE_SIZE == 0); return (segment_size / MI_SEGMENT_SLICE_SIZE); } /* ---------------------------------------------------------------------------- Segment caches We keep a small segment cache per thread to increase local reuse and avoid setting/clearing guard pages in secure mode. ------------------------------------------------------------------------------- */ static void mi_segments_track_size(long segment_size, mi_segments_tld_t* tld) { if (segment_size>=0) _mi_stat_increase(&tld->stats->segments,1); else _mi_stat_decrease(&tld->stats->segments,1); tld->count += (segment_size >= 0 ? 1 : -1); if (tld->count > tld->peak_count) tld->peak_count = tld->count; tld->current_size += segment_size; if (tld->current_size > tld->peak_size) tld->peak_size = tld->current_size; } static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) { segment->thread_id = 0; _mi_segment_map_freed_at(segment); mi_segments_track_size(-((long)mi_segment_size(segment)),tld); if (MI_SECURE>0) { // _mi_os_unprotect(segment, mi_segment_size(segment)); // ensure no more guard pages are set // unprotect the guard pages; we cannot just unprotect the whole segment size as part may be decommitted size_t os_pagesize = _mi_os_page_size(); _mi_os_unprotect((uint8_t*)segment + mi_segment_info_size(segment) - os_pagesize, os_pagesize); uint8_t* end = (uint8_t*)segment + mi_segment_size(segment) - os_pagesize; _mi_os_unprotect(end, os_pagesize); } // purge delayed decommits now? (no, leave it to the arena) // mi_segment_try_purge(segment,true,tld->stats); const size_t size = mi_segment_size(segment); const size_t csize = _mi_commit_mask_committed_size(&segment->commit_mask, size); _mi_abandoned_await_readers(); // wait until safe to free _mi_arena_free(segment, mi_segment_size(segment), csize, segment->memid, tld->stats); } // called by threads that are terminating void _mi_segment_thread_collect(mi_segments_tld_t* tld) { MI_UNUSED(tld); // nothing to do } /* ----------------------------------------------------------- Commit/Decommit ranges ----------------------------------------------------------- */ static void mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uint8_t* p, size_t size, uint8_t** start_p, size_t* full_size, mi_commit_mask_t* cm) { mi_assert_internal(_mi_ptr_segment(p + 1) == segment); mi_assert_internal(segment->kind != MI_SEGMENT_HUGE); mi_commit_mask_create_empty(cm); if (size == 0 || size > MI_SEGMENT_SIZE || segment->kind == MI_SEGMENT_HUGE) return; const size_t segstart = mi_segment_info_size(segment); const size_t segsize = mi_segment_size(segment); if (p >= (uint8_t*)segment + segsize) return; size_t pstart = (p - (uint8_t*)segment); mi_assert_internal(pstart + size <= segsize); size_t start; size_t end; if (conservative) { // decommit conservative start = _mi_align_up(pstart, MI_COMMIT_SIZE); end = _mi_align_down(pstart + size, MI_COMMIT_SIZE); mi_assert_internal(start >= segstart); mi_assert_internal(end <= segsize); } else { // commit liberal start = _mi_align_down(pstart, MI_MINIMAL_COMMIT_SIZE); end = _mi_align_up(pstart + size, MI_MINIMAL_COMMIT_SIZE); } if (pstart >= segstart && start < segstart) { // note: the mask is also calculated for an initial commit of the info area start = segstart; } if (end > segsize) { end = segsize; } mi_assert_internal(start <= pstart && (pstart + size) <= end); mi_assert_internal(start % MI_COMMIT_SIZE==0 && end % MI_COMMIT_SIZE == 0); *start_p = (uint8_t*)segment + start; *full_size = (end > start ? end - start : 0); if (*full_size == 0) return; size_t bitidx = start / MI_COMMIT_SIZE; mi_assert_internal(bitidx < MI_COMMIT_MASK_BITS); size_t bitcount = *full_size / MI_COMMIT_SIZE; // can be 0 if (bitidx + bitcount > MI_COMMIT_MASK_BITS) { _mi_warning_message("commit mask overflow: idx=%zu count=%zu start=%zx end=%zx p=0x%p size=%zu fullsize=%zu\n", bitidx, bitcount, start, end, p, size, *full_size); } mi_assert_internal((bitidx + bitcount) <= MI_COMMIT_MASK_BITS); mi_commit_mask_create(bitidx, bitcount, cm); } static bool mi_segment_commit(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) { mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->purge_mask)); // commit liberal uint8_t* start = NULL; size_t full_size = 0; mi_commit_mask_t mask; mi_segment_commit_mask(segment, false /* conservative? */, p, size, &start, &full_size, &mask); if (mi_commit_mask_is_empty(&mask) || full_size == 0) return true; if (!mi_commit_mask_all_set(&segment->commit_mask, &mask)) { // committing bool is_zero = false; mi_commit_mask_t cmask; mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); _mi_stat_decrease(&_mi_stats_main.committed, _mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for overlap if (!_mi_os_commit(start, full_size, &is_zero, stats)) return false; mi_commit_mask_set(&segment->commit_mask, &mask); } // increase purge expiration when using part of delayed purges -- we assume more allocations are coming soon. if (mi_commit_mask_any_set(&segment->purge_mask, &mask)) { segment->purge_expire = _mi_clock_now() + mi_option_get(mi_option_purge_delay); } // always clear any delayed purges in our range (as they are either committed now) mi_commit_mask_clear(&segment->purge_mask, &mask); return true; } static bool mi_segment_ensure_committed(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) { mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->purge_mask)); // note: assumes commit_mask is always full for huge segments as otherwise the commit mask bits can overflow if (mi_commit_mask_is_full(&segment->commit_mask) && mi_commit_mask_is_empty(&segment->purge_mask)) return true; // fully committed mi_assert_internal(segment->kind != MI_SEGMENT_HUGE); return mi_segment_commit(segment, p, size, stats); } static bool mi_segment_purge(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) { mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->purge_mask)); if (!segment->allow_purge) return true; // purge conservative uint8_t* start = NULL; size_t full_size = 0; mi_commit_mask_t mask; mi_segment_commit_mask(segment, true /* conservative? */, p, size, &start, &full_size, &mask); if (mi_commit_mask_is_empty(&mask) || full_size==0) return true; if (mi_commit_mask_any_set(&segment->commit_mask, &mask)) { // purging mi_assert_internal((void*)start != (void*)segment); mi_assert_internal(segment->allow_decommit); const bool decommitted = _mi_os_purge(start, full_size, stats); // reset or decommit if (decommitted) { mi_commit_mask_t cmask; mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); _mi_stat_increase(&_mi_stats_main.committed, full_size - _mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for double counting mi_commit_mask_clear(&segment->commit_mask, &mask); } } // always clear any scheduled purges in our range mi_commit_mask_clear(&segment->purge_mask, &mask); return true; } static void mi_segment_schedule_purge(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) { if (!segment->allow_purge) return; if (mi_option_get(mi_option_purge_delay) == 0) { mi_segment_purge(segment, p, size, stats); } else { // register for future purge in the purge mask uint8_t* start = NULL; size_t full_size = 0; mi_commit_mask_t mask; mi_segment_commit_mask(segment, true /*conservative*/, p, size, &start, &full_size, &mask); if (mi_commit_mask_is_empty(&mask) || full_size==0) return; // update delayed commit mi_assert_internal(segment->purge_expire > 0 || mi_commit_mask_is_empty(&segment->purge_mask)); mi_commit_mask_t cmask; mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); // only purge what is committed; span_free may try to decommit more mi_commit_mask_set(&segment->purge_mask, &cmask); mi_msecs_t now = _mi_clock_now(); if (segment->purge_expire == 0) { // no previous purgess, initialize now segment->purge_expire = now + mi_option_get(mi_option_purge_delay); } else if (segment->purge_expire <= now) { // previous purge mask already expired if (segment->purge_expire + mi_option_get(mi_option_purge_extend_delay) <= now) { mi_segment_try_purge(segment, true, stats); } else { segment->purge_expire = now + mi_option_get(mi_option_purge_extend_delay); // (mi_option_get(mi_option_purge_delay) / 8); // wait a tiny bit longer in case there is a series of free's } } else { // previous purge mask is not yet expired, increase the expiration by a bit. segment->purge_expire += mi_option_get(mi_option_purge_extend_delay); } } } static void mi_segment_try_purge(mi_segment_t* segment, bool force, mi_stats_t* stats) { if (!segment->allow_purge || mi_commit_mask_is_empty(&segment->purge_mask)) return; mi_msecs_t now = _mi_clock_now(); if (!force && now < segment->purge_expire) return; mi_commit_mask_t mask = segment->purge_mask; segment->purge_expire = 0; mi_commit_mask_create_empty(&segment->purge_mask); size_t idx; size_t count; mi_commit_mask_foreach(&mask, idx, count) { // if found, decommit that sequence if (count > 0) { uint8_t* p = (uint8_t*)segment + (idx*MI_COMMIT_SIZE); size_t size = count * MI_COMMIT_SIZE; mi_segment_purge(segment, p, size, stats); } } mi_commit_mask_foreach_end() mi_assert_internal(mi_commit_mask_is_empty(&segment->purge_mask)); } /* ----------------------------------------------------------- Span free ----------------------------------------------------------- */ static bool mi_segment_is_abandoned(mi_segment_t* segment) { return (segment->thread_id == 0); } // note: can be called on abandoned segments static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size_t slice_count, bool allow_purge, mi_segments_tld_t* tld) { mi_assert_internal(slice_index < segment->slice_entries); mi_span_queue_t* sq = (segment->kind == MI_SEGMENT_HUGE || mi_segment_is_abandoned(segment) ? NULL : mi_span_queue_for(slice_count,tld)); if (slice_count==0) slice_count = 1; mi_assert_internal(slice_index + slice_count - 1 < segment->slice_entries); // set first and last slice (the intermediates can be undetermined) mi_slice_t* slice = &segment->slices[slice_index]; slice->slice_count = (uint32_t)slice_count; mi_assert_internal(slice->slice_count == slice_count); // no overflow? slice->slice_offset = 0; if (slice_count > 1) { mi_slice_t* last = &segment->slices[slice_index + slice_count - 1]; last->slice_count = 0; last->slice_offset = (uint32_t)(sizeof(mi_page_t)*(slice_count - 1)); last->xblock_size = 0; } // perhaps decommit if (allow_purge) { mi_segment_schedule_purge(segment, mi_slice_start(slice), slice_count * MI_SEGMENT_SLICE_SIZE, tld->stats); } // and push it on the free page queue (if it was not a huge page) if (sq != NULL) mi_span_queue_push( sq, slice ); else slice->xblock_size = 0; // mark huge page as free anyways } /* // called from reclaim to add existing free spans static void mi_segment_span_add_free(mi_slice_t* slice, mi_segments_tld_t* tld) { mi_segment_t* segment = _mi_ptr_segment(slice); mi_assert_internal(slice->xblock_size==0 && slice->slice_count>0 && slice->slice_offset==0); size_t slice_index = mi_slice_index(slice); mi_segment_span_free(segment,slice_index,slice->slice_count,tld); } */ static void mi_segment_span_remove_from_queue(mi_slice_t* slice, mi_segments_tld_t* tld) { mi_assert_internal(slice->slice_count > 0 && slice->slice_offset==0 && slice->xblock_size==0); mi_assert_internal(_mi_ptr_segment(slice)->kind != MI_SEGMENT_HUGE); mi_span_queue_t* sq = mi_span_queue_for(slice->slice_count, tld); mi_span_queue_delete(sq, slice); } // note: can be called on abandoned segments static mi_slice_t* mi_segment_span_free_coalesce(mi_slice_t* slice, mi_segments_tld_t* tld) { mi_assert_internal(slice != NULL && slice->slice_count > 0 && slice->slice_offset == 0); mi_segment_t* segment = _mi_ptr_segment(slice); bool is_abandoned = mi_segment_is_abandoned(segment); // for huge pages, just mark as free but don't add to the queues if (segment->kind == MI_SEGMENT_HUGE) { // issue #691: segment->used can be 0 if the huge page block was freed while abandoned (reclaim will get here in that case) mi_assert_internal((segment->used==0 && slice->xblock_size==0) || segment->used == 1); // decreased right after this call in `mi_segment_page_clear` slice->xblock_size = 0; // mark as free anyways // we should mark the last slice `xblock_size=0` now to maintain invariants but we skip it to // avoid a possible cache miss (and the segment is about to be freed) return slice; } // otherwise coalesce the span and add to the free span queues size_t slice_count = slice->slice_count; mi_slice_t* next = slice + slice->slice_count; mi_assert_internal(next <= mi_segment_slices_end(segment)); if (next < mi_segment_slices_end(segment) && next->xblock_size==0) { // free next block -- remove it from free and merge mi_assert_internal(next->slice_count > 0 && next->slice_offset==0); slice_count += next->slice_count; // extend if (!is_abandoned) { mi_segment_span_remove_from_queue(next, tld); } } if (slice > segment->slices) { mi_slice_t* prev = mi_slice_first(slice - 1); mi_assert_internal(prev >= segment->slices); if (prev->xblock_size==0) { // free previous slice -- remove it from free and merge mi_assert_internal(prev->slice_count > 0 && prev->slice_offset==0); slice_count += prev->slice_count; if (!is_abandoned) { mi_segment_span_remove_from_queue(prev, tld); } slice = prev; } } // and add the new free page mi_segment_span_free(segment, mi_slice_index(slice), slice_count, true, tld); return slice; } /* ----------------------------------------------------------- Page allocation ----------------------------------------------------------- */ // Note: may still return NULL if committing the memory failed static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) { mi_assert_internal(slice_index < segment->slice_entries); mi_slice_t* const slice = &segment->slices[slice_index]; mi_assert_internal(slice->xblock_size==0 || slice->xblock_size==1); // commit before changing the slice data if (!mi_segment_ensure_committed(segment, _mi_segment_page_start_from_slice(segment, slice, 0, NULL), slice_count * MI_SEGMENT_SLICE_SIZE, tld->stats)) { return NULL; // commit failed! } // convert the slices to a page slice->slice_offset = 0; slice->slice_count = (uint32_t)slice_count; mi_assert_internal(slice->slice_count == slice_count); const size_t bsize = slice_count * MI_SEGMENT_SLICE_SIZE; slice->xblock_size = (uint32_t)(bsize >= MI_HUGE_BLOCK_SIZE ? MI_HUGE_BLOCK_SIZE : bsize); mi_page_t* page = mi_slice_to_page(slice); mi_assert_internal(mi_page_block_size(page) == bsize); // set slice back pointers for the first MI_MAX_SLICE_OFFSET entries size_t extra = slice_count-1; if (extra > MI_MAX_SLICE_OFFSET) extra = MI_MAX_SLICE_OFFSET; if (slice_index + extra >= segment->slice_entries) extra = segment->slice_entries - slice_index - 1; // huge objects may have more slices than avaiable entries in the segment->slices mi_slice_t* slice_next = slice + 1; for (size_t i = 1; i <= extra; i++, slice_next++) { slice_next->slice_offset = (uint32_t)(sizeof(mi_slice_t)*i); slice_next->slice_count = 0; slice_next->xblock_size = 1; } // and also for the last one (if not set already) (the last one is needed for coalescing and for large alignments) // note: the cast is needed for ubsan since the index can be larger than MI_SLICES_PER_SEGMENT for huge allocations (see #543) mi_slice_t* last = slice + slice_count - 1; mi_slice_t* end = (mi_slice_t*)mi_segment_slices_end(segment); if (last > end) last = end; if (last > slice) { last->slice_offset = (uint32_t)(sizeof(mi_slice_t) * (last - slice)); last->slice_count = 0; last->xblock_size = 1; } // and initialize the page page->is_committed = true; segment->used++; return page; } static void mi_segment_slice_split(mi_segment_t* segment, mi_slice_t* slice, size_t slice_count, mi_segments_tld_t* tld) { mi_assert_internal(_mi_ptr_segment(slice) == segment); mi_assert_internal(slice->slice_count >= slice_count); mi_assert_internal(slice->xblock_size > 0); // no more in free queue if (slice->slice_count <= slice_count) return; mi_assert_internal(segment->kind != MI_SEGMENT_HUGE); size_t next_index = mi_slice_index(slice) + slice_count; size_t next_count = slice->slice_count - slice_count; mi_segment_span_free(segment, next_index, next_count, false /* don't purge left-over part */, tld); slice->slice_count = (uint32_t)slice_count; } static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_arena_id_t req_arena_id, mi_segments_tld_t* tld) { mi_assert_internal(slice_count*MI_SEGMENT_SLICE_SIZE <= MI_LARGE_OBJ_SIZE_MAX); // search from best fit up mi_span_queue_t* sq = mi_span_queue_for(slice_count, tld); if (slice_count == 0) slice_count = 1; while (sq <= &tld->spans[MI_SEGMENT_BIN_MAX]) { for (mi_slice_t* slice = sq->first; slice != NULL; slice = slice->next) { if (slice->slice_count >= slice_count) { // found one mi_segment_t* segment = _mi_ptr_segment(slice); if (_mi_arena_memid_is_suitable(segment->memid, req_arena_id)) { // found a suitable page span mi_span_queue_delete(sq, slice); if (slice->slice_count > slice_count) { mi_segment_slice_split(segment, slice, slice_count, tld); } mi_assert_internal(slice != NULL && slice->slice_count == slice_count && slice->xblock_size > 0); mi_page_t* page = mi_segment_span_allocate(segment, mi_slice_index(slice), slice->slice_count, tld); if (page == NULL) { // commit failed; return NULL but first restore the slice mi_segment_span_free_coalesce(slice, tld); return NULL; } return page; } } } sq++; } // could not find a page.. return NULL; } /* ----------------------------------------------------------- Segment allocation ----------------------------------------------------------- */ static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment, bool eager_delayed, mi_arena_id_t req_arena_id, size_t* psegment_slices, size_t* ppre_size, size_t* pinfo_slices, bool commit, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { mi_memid_t memid; bool allow_large = (!eager_delayed && (MI_SECURE == 0)); // only allow large OS pages once we are no longer lazy size_t align_offset = 0; size_t alignment = MI_SEGMENT_ALIGN; if (page_alignment > 0) { // mi_assert_internal(huge_page != NULL); mi_assert_internal(page_alignment >= MI_SEGMENT_ALIGN); alignment = page_alignment; const size_t info_size = (*pinfo_slices) * MI_SEGMENT_SLICE_SIZE; align_offset = _mi_align_up( info_size, MI_SEGMENT_ALIGN ); const size_t extra = align_offset - info_size; // recalculate due to potential guard pages *psegment_slices = mi_segment_calculate_slices(required + extra, ppre_size, pinfo_slices); } const size_t segment_size = (*psegment_slices) * MI_SEGMENT_SLICE_SIZE; mi_segment_t* segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, alignment, align_offset, commit, allow_large, req_arena_id, &memid, os_tld); if (segment == NULL) { return NULL; // failed to allocate } // ensure metadata part of the segment is committed mi_commit_mask_t commit_mask; if (memid.initially_committed) { mi_commit_mask_create_full(&commit_mask); } else { // at least commit the info slices const size_t commit_needed = _mi_divide_up((*pinfo_slices)*MI_SEGMENT_SLICE_SIZE, MI_COMMIT_SIZE); mi_assert_internal(commit_needed>0); mi_commit_mask_create(0, commit_needed, &commit_mask); mi_assert_internal(commit_needed*MI_COMMIT_SIZE >= (*pinfo_slices)*MI_SEGMENT_SLICE_SIZE); if (!_mi_os_commit(segment, commit_needed*MI_COMMIT_SIZE, NULL, tld->stats)) { _mi_arena_free(segment,segment_size,0,memid,tld->stats); return NULL; } } mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); segment->memid = memid; segment->allow_decommit = !memid.is_pinned; segment->allow_purge = segment->allow_decommit && (mi_option_get(mi_option_purge_delay) >= 0); segment->segment_size = segment_size; segment->commit_mask = commit_mask; segment->purge_expire = 0; mi_commit_mask_create_empty(&segment->purge_mask); mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL); // tsan mi_segments_track_size((long)(segment_size), tld); _mi_segment_map_allocated_at(segment); return segment; } // Allocate a segment from the OS aligned to `MI_SEGMENT_SIZE` . static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi_arena_id_t req_arena_id, mi_segments_tld_t* tld, mi_os_tld_t* os_tld, mi_page_t** huge_page) { mi_assert_internal((required==0 && huge_page==NULL) || (required>0 && huge_page != NULL)); // calculate needed sizes first size_t info_slices; size_t pre_size; size_t segment_slices = mi_segment_calculate_slices(required, &pre_size, &info_slices); // Commit eagerly only if not the first N lazy segments (to reduce impact of many threads that allocate just a little) const bool eager_delay = (// !_mi_os_has_overcommit() && // never delay on overcommit systems _mi_current_thread_count() > 1 && // do not delay for the first N threads tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay)); const bool eager = !eager_delay && mi_option_is_enabled(mi_option_eager_commit); bool commit = eager || (required > 0); // Allocate the segment from the OS mi_segment_t* segment = mi_segment_os_alloc(required, page_alignment, eager_delay, req_arena_id, &segment_slices, &pre_size, &info_slices, commit, tld, os_tld); if (segment == NULL) return NULL; // zero the segment info? -- not always needed as it may be zero initialized from the OS if (!segment->memid.initially_zero) { ptrdiff_t ofs = offsetof(mi_segment_t, next); size_t prefix = offsetof(mi_segment_t, slices) - ofs; size_t zsize = prefix + (sizeof(mi_slice_t) * (segment_slices + 1)); // one more _mi_memzero((uint8_t*)segment + ofs, zsize); } // initialize the rest of the segment info const size_t slice_entries = (segment_slices > MI_SLICES_PER_SEGMENT ? MI_SLICES_PER_SEGMENT : segment_slices); segment->segment_slices = segment_slices; segment->segment_info_slices = info_slices; segment->thread_id = _mi_thread_id(); segment->cookie = _mi_ptr_cookie(segment); segment->slice_entries = slice_entries; segment->kind = (required == 0 ? MI_SEGMENT_NORMAL : MI_SEGMENT_HUGE); // _mi_memzero(segment->slices, sizeof(mi_slice_t)*(info_slices+1)); _mi_stat_increase(&tld->stats->page_committed, mi_segment_info_size(segment)); // set up guard pages size_t guard_slices = 0; if (MI_SECURE>0) { // in secure mode, we set up a protected page in between the segment info // and the page data, and at the end of the segment. size_t os_pagesize = _mi_os_page_size(); mi_assert_internal(mi_segment_info_size(segment) - os_pagesize >= pre_size); _mi_os_protect((uint8_t*)segment + mi_segment_info_size(segment) - os_pagesize, os_pagesize); uint8_t* end = (uint8_t*)segment + mi_segment_size(segment) - os_pagesize; mi_segment_ensure_committed(segment, end, os_pagesize, tld->stats); _mi_os_protect(end, os_pagesize); if (slice_entries == segment_slices) segment->slice_entries--; // don't use the last slice :-( guard_slices = 1; } // reserve first slices for segment info mi_page_t* page0 = mi_segment_span_allocate(segment, 0, info_slices, tld); mi_assert_internal(page0!=NULL); if (page0==NULL) return NULL; // cannot fail as we always commit in advance mi_assert_internal(segment->used == 1); segment->used = 0; // don't count our internal slices towards usage // initialize initial free pages if (segment->kind == MI_SEGMENT_NORMAL) { // not a huge page mi_assert_internal(huge_page==NULL); mi_segment_span_free(segment, info_slices, segment->slice_entries - info_slices, false /* don't purge */, tld); } else { mi_assert_internal(huge_page!=NULL); mi_assert_internal(mi_commit_mask_is_empty(&segment->purge_mask)); mi_assert_internal(mi_commit_mask_is_full(&segment->commit_mask)); *huge_page = mi_segment_span_allocate(segment, info_slices, segment_slices - info_slices - guard_slices, tld); mi_assert_internal(*huge_page != NULL); // cannot fail as we commit in advance } mi_assert_expensive(mi_segment_is_valid(segment,tld)); return segment; } static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) { MI_UNUSED(force); mi_assert_internal(segment != NULL); mi_assert_internal(segment->next == NULL); mi_assert_internal(segment->used == 0); // Remove the free pages mi_slice_t* slice = &segment->slices[0]; const mi_slice_t* end = mi_segment_slices_end(segment); #if MI_DEBUG>1 size_t page_count = 0; #endif while (slice < end) { mi_assert_internal(slice->slice_count > 0); mi_assert_internal(slice->slice_offset == 0); mi_assert_internal(mi_slice_index(slice)==0 || slice->xblock_size == 0); // no more used pages .. if (slice->xblock_size == 0 && segment->kind != MI_SEGMENT_HUGE) { mi_segment_span_remove_from_queue(slice, tld); } #if MI_DEBUG>1 page_count++; #endif slice = slice + slice->slice_count; } mi_assert_internal(page_count == 2); // first page is allocated by the segment itself // stats _mi_stat_decrease(&tld->stats->page_committed, mi_segment_info_size(segment)); // return it to the OS mi_segment_os_free(segment, tld); } /* ----------------------------------------------------------- Page Free ----------------------------------------------------------- */ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld); // note: can be called on abandoned pages static mi_slice_t* mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld) { mi_assert_internal(page->xblock_size > 0); mi_assert_internal(mi_page_all_free(page)); mi_segment_t* segment = _mi_ptr_segment(page); mi_assert_internal(segment->used > 0); size_t inuse = page->capacity * mi_page_block_size(page); _mi_stat_decrease(&tld->stats->page_committed, inuse); _mi_stat_decrease(&tld->stats->pages, 1); // reset the page memory to reduce memory pressure? if (segment->allow_decommit && mi_option_is_enabled(mi_option_deprecated_page_reset)) { size_t psize; uint8_t* start = _mi_page_start(segment, page, &psize); _mi_os_reset(start, psize, tld->stats); } // zero the page data, but not the segment fields page->is_zero_init = false; ptrdiff_t ofs = offsetof(mi_page_t, capacity); _mi_memzero((uint8_t*)page + ofs, sizeof(*page) - ofs); page->xblock_size = 1; // and free it mi_slice_t* slice = mi_segment_span_free_coalesce(mi_page_to_slice(page), tld); segment->used--; // cannot assert segment valid as it is called during reclaim // mi_assert_expensive(mi_segment_is_valid(segment, tld)); return slice; } void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) { mi_assert(page != NULL); mi_segment_t* segment = _mi_page_segment(page); mi_assert_expensive(mi_segment_is_valid(segment,tld)); // mark it as free now mi_segment_page_clear(page, tld); mi_assert_expensive(mi_segment_is_valid(segment, tld)); if (segment->used == 0) { // no more used pages; remove from the free list and free the segment mi_segment_free(segment, force, tld); } else if (segment->used == segment->abandoned) { // only abandoned pages; remove from free list and abandon mi_segment_abandon(segment,tld); } } /* ----------------------------------------------------------- Abandonment When threads terminate, they can leave segments with live blocks (reachable through other threads). Such segments are "abandoned" and will be reclaimed by other threads to reuse their pages and/or free them eventually We maintain a global list of abandoned segments that are reclaimed on demand. Since this is shared among threads the implementation needs to avoid the A-B-A problem on popping abandoned segments: We use tagged pointers to avoid accidentally identifying reused segments, much like stamped references in Java. Secondly, we maintain a reader counter to avoid resetting or decommitting segments that have a pending read operation. Note: the current implementation is one possible design; another way might be to keep track of abandoned segments in the arenas/segment_cache's. This would have the advantage of keeping all concurrent code in one place and not needing to deal with ABA issues. The drawback is that it is unclear how to scan abandoned segments efficiently in that case as they would be spread among all other segments in the arenas. ----------------------------------------------------------- */ // Use the bottom 20-bits (on 64-bit) of the aligned segment pointers // to put in a tag that increments on update to avoid the A-B-A problem. #define MI_TAGGED_MASK MI_SEGMENT_MASK typedef uintptr_t mi_tagged_segment_t; static mi_segment_t* mi_tagged_segment_ptr(mi_tagged_segment_t ts) { return (mi_segment_t*)(ts & ~MI_TAGGED_MASK); } static mi_tagged_segment_t mi_tagged_segment(mi_segment_t* segment, mi_tagged_segment_t ts) { mi_assert_internal(((uintptr_t)segment & MI_TAGGED_MASK) == 0); uintptr_t tag = ((ts & MI_TAGGED_MASK) + 1) & MI_TAGGED_MASK; return ((uintptr_t)segment | tag); } // This is a list of visited abandoned pages that were full at the time. // this list migrates to `abandoned` when that becomes NULL. The use of // this list reduces contention and the rate at which segments are visited. static mi_decl_cache_align _Atomic(mi_segment_t*) abandoned_visited; // = NULL // The abandoned page list (tagged as it supports pop) static mi_decl_cache_align _Atomic(mi_tagged_segment_t) abandoned; // = NULL // Maintain these for debug purposes (these counts may be a bit off) static mi_decl_cache_align _Atomic(size_t) abandoned_count; static mi_decl_cache_align _Atomic(size_t) abandoned_visited_count; // We also maintain a count of current readers of the abandoned list // in order to prevent resetting/decommitting segment memory if it might // still be read. static mi_decl_cache_align _Atomic(size_t) abandoned_readers; // = 0 // Push on the visited list static void mi_abandoned_visited_push(mi_segment_t* segment) { mi_assert_internal(segment->thread_id == 0); mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t,&segment->abandoned_next) == NULL); mi_assert_internal(segment->next == NULL); mi_assert_internal(segment->used > 0); mi_segment_t* anext = mi_atomic_load_ptr_relaxed(mi_segment_t, &abandoned_visited); do { mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, anext); } while (!mi_atomic_cas_ptr_weak_release(mi_segment_t, &abandoned_visited, &anext, segment)); mi_atomic_increment_relaxed(&abandoned_visited_count); } // Move the visited list to the abandoned list. static bool mi_abandoned_visited_revisit(void) { // quick check if the visited list is empty if (mi_atomic_load_ptr_relaxed(mi_segment_t, &abandoned_visited) == NULL) return false; // grab the whole visited list mi_segment_t* first = mi_atomic_exchange_ptr_acq_rel(mi_segment_t, &abandoned_visited, NULL); if (first == NULL) return false; // first try to swap directly if the abandoned list happens to be NULL mi_tagged_segment_t afirst; mi_tagged_segment_t ts = mi_atomic_load_relaxed(&abandoned); if (mi_tagged_segment_ptr(ts)==NULL) { size_t count = mi_atomic_load_relaxed(&abandoned_visited_count); afirst = mi_tagged_segment(first, ts); if (mi_atomic_cas_strong_acq_rel(&abandoned, &ts, afirst)) { mi_atomic_add_relaxed(&abandoned_count, count); mi_atomic_sub_relaxed(&abandoned_visited_count, count); return true; } } // find the last element of the visited list: O(n) mi_segment_t* last = first; mi_segment_t* next; while ((next = mi_atomic_load_ptr_relaxed(mi_segment_t, &last->abandoned_next)) != NULL) { last = next; } // and atomically prepend to the abandoned list // (no need to increase the readers as we don't access the abandoned segments) mi_tagged_segment_t anext = mi_atomic_load_relaxed(&abandoned); size_t count; do { count = mi_atomic_load_relaxed(&abandoned_visited_count); mi_atomic_store_ptr_release(mi_segment_t, &last->abandoned_next, mi_tagged_segment_ptr(anext)); afirst = mi_tagged_segment(first, anext); } while (!mi_atomic_cas_weak_release(&abandoned, &anext, afirst)); mi_atomic_add_relaxed(&abandoned_count, count); mi_atomic_sub_relaxed(&abandoned_visited_count, count); return true; } // Push on the abandoned list. static void mi_abandoned_push(mi_segment_t* segment) { mi_assert_internal(segment->thread_id == 0); mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL); mi_assert_internal(segment->next == NULL); mi_assert_internal(segment->used > 0); mi_tagged_segment_t next; mi_tagged_segment_t ts = mi_atomic_load_relaxed(&abandoned); do { mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, mi_tagged_segment_ptr(ts)); next = mi_tagged_segment(segment, ts); } while (!mi_atomic_cas_weak_release(&abandoned, &ts, next)); mi_atomic_increment_relaxed(&abandoned_count); } // Wait until there are no more pending reads on segments that used to be in the abandoned list // called for example from `arena.c` before decommitting void _mi_abandoned_await_readers(void) { size_t n; do { n = mi_atomic_load_acquire(&abandoned_readers); if (n != 0) mi_atomic_yield(); } while (n != 0); } // Pop from the abandoned list static mi_segment_t* mi_abandoned_pop(void) { mi_segment_t* segment; // Check efficiently if it is empty (or if the visited list needs to be moved) mi_tagged_segment_t ts = mi_atomic_load_relaxed(&abandoned); segment = mi_tagged_segment_ptr(ts); if mi_likely(segment == NULL) { if mi_likely(!mi_abandoned_visited_revisit()) { // try to swap in the visited list on NULL return NULL; } } // Do a pop. We use a reader count to prevent // a segment to be decommitted while a read is still pending, // and a tagged pointer to prevent A-B-A link corruption. // (this is called from `region.c:_mi_mem_free` for example) mi_atomic_increment_relaxed(&abandoned_readers); // ensure no segment gets decommitted mi_tagged_segment_t next = 0; ts = mi_atomic_load_acquire(&abandoned); do { segment = mi_tagged_segment_ptr(ts); if (segment != NULL) { mi_segment_t* anext = mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next); next = mi_tagged_segment(anext, ts); // note: reads the segment's `abandoned_next` field so should not be decommitted } } while (segment != NULL && !mi_atomic_cas_weak_acq_rel(&abandoned, &ts, next)); mi_atomic_decrement_relaxed(&abandoned_readers); // release reader lock if (segment != NULL) { mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL); mi_atomic_decrement_relaxed(&abandoned_count); } return segment; } /* ----------------------------------------------------------- Abandon segment/page ----------------------------------------------------------- */ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(segment->used == segment->abandoned); mi_assert_internal(segment->used > 0); mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL); mi_assert_internal(segment->abandoned_visits == 0); mi_assert_expensive(mi_segment_is_valid(segment,tld)); // remove the free pages from the free page queues mi_slice_t* slice = &segment->slices[0]; const mi_slice_t* end = mi_segment_slices_end(segment); while (slice < end) { mi_assert_internal(slice->slice_count > 0); mi_assert_internal(slice->slice_offset == 0); if (slice->xblock_size == 0) { // a free page mi_segment_span_remove_from_queue(slice,tld); slice->xblock_size = 0; // but keep it free } slice = slice + slice->slice_count; } // perform delayed decommits (forcing is much slower on mstress) mi_segment_try_purge(segment, mi_option_is_enabled(mi_option_abandoned_page_purge) /* force? */, tld->stats); // all pages in the segment are abandoned; add it to the abandoned list _mi_stat_increase(&tld->stats->segments_abandoned, 1); mi_segments_track_size(-((long)mi_segment_size(segment)), tld); segment->thread_id = 0; mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL); segment->abandoned_visits = 1; // from 0 to 1 to signify it is abandoned mi_abandoned_push(segment); } void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { mi_assert(page != NULL); mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE); mi_assert_internal(mi_page_heap(page) == NULL); mi_segment_t* segment = _mi_page_segment(page); mi_assert_expensive(mi_segment_is_valid(segment,tld)); segment->abandoned++; _mi_stat_increase(&tld->stats->pages_abandoned, 1); mi_assert_internal(segment->abandoned <= segment->used); if (segment->used == segment->abandoned) { // all pages are abandoned, abandon the entire segment mi_segment_abandon(segment, tld); } } /* ----------------------------------------------------------- Reclaim abandoned pages ----------------------------------------------------------- */ static mi_slice_t* mi_slices_start_iterate(mi_segment_t* segment, const mi_slice_t** end) { mi_slice_t* slice = &segment->slices[0]; *end = mi_segment_slices_end(segment); mi_assert_internal(slice->slice_count>0 && slice->xblock_size>0); // segment allocated page slice = slice + slice->slice_count; // skip the first segment allocated page return slice; } // Possibly free pages and check if free space is available static bool mi_segment_check_free(mi_segment_t* segment, size_t slices_needed, size_t block_size, mi_segments_tld_t* tld) { mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE); mi_assert_internal(mi_segment_is_abandoned(segment)); bool has_page = false; // for all slices const mi_slice_t* end; mi_slice_t* slice = mi_slices_start_iterate(segment, &end); while (slice < end) { mi_assert_internal(slice->slice_count > 0); mi_assert_internal(slice->slice_offset == 0); if (mi_slice_is_used(slice)) { // used page // ensure used count is up to date and collect potential concurrent frees mi_page_t* const page = mi_slice_to_page(slice); _mi_page_free_collect(page, false); if (mi_page_all_free(page)) { // if this page is all free now, free it without adding to any queues (yet) mi_assert_internal(page->next == NULL && page->prev==NULL); _mi_stat_decrease(&tld->stats->pages_abandoned, 1); segment->abandoned--; slice = mi_segment_page_clear(page, tld); // re-assign slice due to coalesce! mi_assert_internal(!mi_slice_is_used(slice)); if (slice->slice_count >= slices_needed) { has_page = true; } } else { if (page->xblock_size == block_size && mi_page_has_any_available(page)) { // a page has available free blocks of the right size has_page = true; } } } else { // empty span if (slice->slice_count >= slices_needed) { has_page = true; } } slice = slice + slice->slice_count; } return has_page; } // Reclaim an abandoned segment; returns NULL if the segment was freed // set `right_page_reclaimed` to `true` if it reclaimed a page of the right `block_size` that was not full. static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, size_t requested_block_size, bool* right_page_reclaimed, mi_segments_tld_t* tld) { mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL); mi_assert_expensive(mi_segment_is_valid(segment, tld)); if (right_page_reclaimed != NULL) { *right_page_reclaimed = false; } segment->thread_id = _mi_thread_id(); segment->abandoned_visits = 0; mi_segments_track_size((long)mi_segment_size(segment), tld); mi_assert_internal(segment->next == NULL); _mi_stat_decrease(&tld->stats->segments_abandoned, 1); // for all slices const mi_slice_t* end; mi_slice_t* slice = mi_slices_start_iterate(segment, &end); while (slice < end) { mi_assert_internal(slice->slice_count > 0); mi_assert_internal(slice->slice_offset == 0); if (mi_slice_is_used(slice)) { // in use: reclaim the page in our heap mi_page_t* page = mi_slice_to_page(slice); mi_assert_internal(page->is_committed); mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE); mi_assert_internal(mi_page_heap(page) == NULL); mi_assert_internal(page->next == NULL && page->prev==NULL); _mi_stat_decrease(&tld->stats->pages_abandoned, 1); segment->abandoned--; // set the heap again and allow delayed free again mi_page_set_heap(page, heap); _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set) _mi_page_free_collect(page, false); // ensure used count is up to date if (mi_page_all_free(page)) { // if everything free by now, free the page slice = mi_segment_page_clear(page, tld); // set slice again due to coalesceing } else { // otherwise reclaim it into the heap _mi_page_reclaim(heap, page); if (requested_block_size == page->xblock_size && mi_page_has_any_available(page)) { if (right_page_reclaimed != NULL) { *right_page_reclaimed = true; } } } } else { // the span is free, add it to our page queues slice = mi_segment_span_free_coalesce(slice, tld); // set slice again due to coalesceing } mi_assert_internal(slice->slice_count>0 && slice->slice_offset==0); slice = slice + slice->slice_count; } mi_assert(segment->abandoned == 0); if (segment->used == 0) { // due to page_clear mi_assert_internal(right_page_reclaimed == NULL || !(*right_page_reclaimed)); mi_segment_free(segment, false, tld); return NULL; } else { return segment; } } void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld) { mi_segment_t* segment; while ((segment = mi_abandoned_pop()) != NULL) { mi_segment_reclaim(segment, heap, 0, NULL, tld); } } static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slices, size_t block_size, bool* reclaimed, mi_segments_tld_t* tld) { *reclaimed = false; mi_segment_t* segment; long max_tries = mi_option_get_clamp(mi_option_max_segment_reclaim, 8, 1024); // limit the work to bound allocation times while ((max_tries-- > 0) && ((segment = mi_abandoned_pop()) != NULL)) { segment->abandoned_visits++; // todo: an arena exclusive heap will potentially visit many abandoned unsuitable segments // and push them into the visited list and use many tries. Perhaps we can skip non-suitable ones in a better way? bool is_suitable = _mi_heap_memid_is_suitable(heap, segment->memid); bool has_page = mi_segment_check_free(segment,needed_slices,block_size,tld); // try to free up pages (due to concurrent frees) if (segment->used == 0) { // free the segment (by forced reclaim) to make it available to other threads. // note1: we prefer to free a segment as that might lead to reclaiming another // segment that is still partially used. // note2: we could in principle optimize this by skipping reclaim and directly // freeing but that would violate some invariants temporarily) mi_segment_reclaim(segment, heap, 0, NULL, tld); } else if (has_page && is_suitable) { // found a large enough free span, or a page of the right block_size with free space // we return the result of reclaim (which is usually `segment`) as it might free // the segment due to concurrent frees (in which case `NULL` is returned). return mi_segment_reclaim(segment, heap, block_size, reclaimed, tld); } else if (segment->abandoned_visits > 3 && is_suitable) { // always reclaim on 3rd visit to limit the abandoned queue length. mi_segment_reclaim(segment, heap, 0, NULL, tld); } else { // otherwise, push on the visited list so it gets not looked at too quickly again mi_segment_try_purge(segment, true /* force? */, tld->stats); // force purge if needed as we may not visit soon again mi_abandoned_visited_push(segment); } } return NULL; } void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld) { mi_segment_t* segment; int max_tries = (force ? 16*1024 : 1024); // limit latency if (force) { mi_abandoned_visited_revisit(); } while ((max_tries-- > 0) && ((segment = mi_abandoned_pop()) != NULL)) { mi_segment_check_free(segment,0,0,tld); // try to free up pages (due to concurrent frees) if (segment->used == 0) { // free the segment (by forced reclaim) to make it available to other threads. // note: we could in principle optimize this by skipping reclaim and directly // freeing but that would violate some invariants temporarily) mi_segment_reclaim(segment, heap, 0, NULL, tld); } else { // otherwise, purge if needed and push on the visited list // note: forced purge can be expensive if many threads are destroyed/created as in mstress. mi_segment_try_purge(segment, force, tld->stats); mi_abandoned_visited_push(segment); } } } /* ----------------------------------------------------------- Reclaim or allocate ----------------------------------------------------------- */ static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t needed_slices, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE); mi_assert_internal(block_size <= MI_LARGE_OBJ_SIZE_MAX); // 1. try to reclaim an abandoned segment bool reclaimed; mi_segment_t* segment = mi_segment_try_reclaim(heap, needed_slices, block_size, &reclaimed, tld); if (reclaimed) { // reclaimed the right page right into the heap mi_assert_internal(segment != NULL); return NULL; // pretend out-of-memory as the page will be in the page queue of the heap with available blocks } else if (segment != NULL) { // reclaimed a segment with a large enough empty span in it return segment; } // 2. otherwise allocate a fresh segment return mi_segment_alloc(0, 0, heap->arena_id, tld, os_tld, NULL); } /* ----------------------------------------------------------- Page allocation ----------------------------------------------------------- */ static mi_page_t* mi_segments_page_alloc(mi_heap_t* heap, mi_page_kind_t page_kind, size_t required, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { mi_assert_internal(required <= MI_LARGE_OBJ_SIZE_MAX && page_kind <= MI_PAGE_LARGE); // find a free page size_t page_size = _mi_align_up(required, (required > MI_MEDIUM_PAGE_SIZE ? MI_MEDIUM_PAGE_SIZE : MI_SEGMENT_SLICE_SIZE)); size_t slices_needed = page_size / MI_SEGMENT_SLICE_SIZE; mi_assert_internal(slices_needed * MI_SEGMENT_SLICE_SIZE == page_size); mi_page_t* page = mi_segments_page_find_and_allocate(slices_needed, heap->arena_id, tld); //(required <= MI_SMALL_SIZE_MAX ? 0 : slices_needed), tld); if (page==NULL) { // no free page, allocate a new segment and try again if (mi_segment_reclaim_or_alloc(heap, slices_needed, block_size, tld, os_tld) == NULL) { // OOM or reclaimed a good page in the heap return NULL; } else { // otherwise try again return mi_segments_page_alloc(heap, page_kind, required, block_size, tld, os_tld); } } mi_assert_internal(page != NULL && page->slice_count*MI_SEGMENT_SLICE_SIZE == page_size); mi_assert_internal(_mi_ptr_segment(page)->thread_id == _mi_thread_id()); mi_segment_try_purge(_mi_ptr_segment(page), false, tld->stats); return page; } /* ----------------------------------------------------------- Huge page allocation ----------------------------------------------------------- */ static mi_page_t* mi_segment_huge_page_alloc(size_t size, size_t page_alignment, mi_arena_id_t req_arena_id, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { mi_page_t* page = NULL; mi_segment_t* segment = mi_segment_alloc(size,page_alignment,req_arena_id,tld,os_tld,&page); if (segment == NULL || page==NULL) return NULL; mi_assert_internal(segment->used==1); mi_assert_internal(mi_page_block_size(page) >= size); #if MI_HUGE_PAGE_ABANDON segment->thread_id = 0; // huge segments are immediately abandoned #endif // for huge pages we initialize the xblock_size as we may // overallocate to accommodate large alignments. size_t psize; uint8_t* start = _mi_segment_page_start(segment, page, &psize); page->xblock_size = (psize > MI_HUGE_BLOCK_SIZE ? MI_HUGE_BLOCK_SIZE : (uint32_t)psize); // decommit the part of the prefix of a page that will not be used; this can be quite large (close to MI_SEGMENT_SIZE) if (page_alignment > 0 && segment->allow_decommit) { uint8_t* aligned_p = (uint8_t*)_mi_align_up((uintptr_t)start, page_alignment); mi_assert_internal(_mi_is_aligned(aligned_p, page_alignment)); mi_assert_internal(psize - (aligned_p - start) >= size); uint8_t* decommit_start = start + sizeof(mi_block_t); // for the free list ptrdiff_t decommit_size = aligned_p - decommit_start; _mi_os_reset(decommit_start, decommit_size, &_mi_stats_main); // note: cannot use segment_decommit on huge segments } return page; } #if MI_HUGE_PAGE_ABANDON // free huge block from another thread void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block) { // huge page segments are always abandoned and can be freed immediately by any thread mi_assert_internal(segment->kind==MI_SEGMENT_HUGE); mi_assert_internal(segment == _mi_page_segment(page)); mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id)==0); // claim it and free mi_heap_t* heap = mi_heap_get_default(); // issue #221; don't use the internal get_default_heap as we need to ensure the thread is initialized. // paranoia: if this it the last reference, the cas should always succeed size_t expected_tid = 0; if (mi_atomic_cas_strong_acq_rel(&segment->thread_id, &expected_tid, heap->thread_id)) { mi_block_set_next(page, block, page->free); page->free = block; page->used--; page->is_zero = false; mi_assert(page->used == 0); mi_tld_t* tld = heap->tld; _mi_segment_page_free(page, true, &tld->segments); } #if (MI_DEBUG!=0) else { mi_assert_internal(false); } #endif } #else // reset memory of a huge block from another thread void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, mi_block_t* block) { MI_UNUSED(page); mi_assert_internal(segment->kind == MI_SEGMENT_HUGE); mi_assert_internal(segment == _mi_page_segment(page)); mi_assert_internal(page->used == 1); // this is called just before the free mi_assert_internal(page->free == NULL); if (segment->allow_decommit) { size_t csize = mi_usable_size(block); if (csize > sizeof(mi_block_t)) { csize = csize - sizeof(mi_block_t); uint8_t* p = (uint8_t*)block + sizeof(mi_block_t); _mi_os_reset(p, csize, &_mi_stats_main); // note: cannot use segment_decommit on huge segments } } } #endif /* ----------------------------------------------------------- Page allocation and free ----------------------------------------------------------- */ mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { mi_page_t* page; if mi_unlikely(page_alignment > MI_ALIGNMENT_MAX) { mi_assert_internal(_mi_is_power_of_two(page_alignment)); mi_assert_internal(page_alignment >= MI_SEGMENT_SIZE); if (page_alignment < MI_SEGMENT_SIZE) { page_alignment = MI_SEGMENT_SIZE; } page = mi_segment_huge_page_alloc(block_size,page_alignment,heap->arena_id,tld,os_tld); } else if (block_size <= MI_SMALL_OBJ_SIZE_MAX) { page = mi_segments_page_alloc(heap,MI_PAGE_SMALL,block_size,block_size,tld,os_tld); } else if (block_size <= MI_MEDIUM_OBJ_SIZE_MAX) { page = mi_segments_page_alloc(heap,MI_PAGE_MEDIUM,MI_MEDIUM_PAGE_SIZE,block_size,tld, os_tld); } else if (block_size <= MI_LARGE_OBJ_SIZE_MAX) { page = mi_segments_page_alloc(heap,MI_PAGE_LARGE,block_size,block_size,tld, os_tld); } else { page = mi_segment_huge_page_alloc(block_size,page_alignment,heap->arena_id,tld,os_tld); } mi_assert_internal(page == NULL || _mi_heap_memid_is_suitable(heap, _mi_page_segment(page)->memid)); mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld)); return page; } luametatex-2.10.08/source/libraries/mimalloc/src/static.c000066400000000000000000000023771442250314700233410ustar00rootroot00000000000000/* ---------------------------------------------------------------------------- Copyright (c) 2018-2020, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ #ifndef _DEFAULT_SOURCE #define _DEFAULT_SOURCE #endif #if defined(__sun) // same remarks as os.c for the static's context. #undef _XOPEN_SOURCE #undef _POSIX_C_SOURCE #endif #include "mimalloc.h" #include "mimalloc/internal.h" // For a static override we create a single object file // containing the whole library. If it is linked first // it will override all the standard library allocation // functions (on Unix's). #include "alloc.c" // includes alloc-override.c #include "alloc-aligned.c" #include "alloc-posix.c" #include "arena.c" #include "bitmap.c" #include "heap.c" #include "init.c" #include "options.c" #include "os.c" #include "page.c" // includes page-queue.c #include "random.c" #include "segment.c" #include "segment-map.c" #include "stats.c" #include "prim/prim.c" #if MI_OSX_ZONE #include "prim/osx/alloc-override-zone.c" #endif luametatex-2.10.08/source/libraries/mimalloc/src/stats.c000066400000000000000000000431011442250314700231760ustar00rootroot00000000000000/* ---------------------------------------------------------------------------- Copyright (c) 2018-2021, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ #include "mimalloc.h" #include "mimalloc/internal.h" #include "mimalloc/atomic.h" #include "mimalloc/prim.h" #include // snprintf #include // memset #if defined(_MSC_VER) && (_MSC_VER < 1920) #pragma warning(disable:4204) // non-constant aggregate initializer #endif /* ----------------------------------------------------------- Statistics operations ----------------------------------------------------------- */ static bool mi_is_in_main(void* stat) { return ((uint8_t*)stat >= (uint8_t*)&_mi_stats_main && (uint8_t*)stat < ((uint8_t*)&_mi_stats_main + sizeof(mi_stats_t))); } static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) { if (amount == 0) return; if (mi_is_in_main(stat)) { // add atomically (for abandoned pages) int64_t current = mi_atomic_addi64_relaxed(&stat->current, amount); mi_atomic_maxi64_relaxed(&stat->peak, current + amount); if (amount > 0) { mi_atomic_addi64_relaxed(&stat->allocated,amount); } else { mi_atomic_addi64_relaxed(&stat->freed, -amount); } } else { // add thread local stat->current += amount; if (stat->current > stat->peak) stat->peak = stat->current; if (amount > 0) { stat->allocated += amount; } else { stat->freed += -amount; } } } void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount) { if (mi_is_in_main(stat)) { mi_atomic_addi64_relaxed( &stat->count, 1 ); mi_atomic_addi64_relaxed( &stat->total, (int64_t)amount ); } else { stat->count++; stat->total += amount; } } void _mi_stat_increase(mi_stat_count_t* stat, size_t amount) { mi_stat_update(stat, (int64_t)amount); } void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount) { mi_stat_update(stat, -((int64_t)amount)); } // must be thread safe as it is called from stats_merge static void mi_stat_add(mi_stat_count_t* stat, const mi_stat_count_t* src, int64_t unit) { if (stat==src) return; if (src->allocated==0 && src->freed==0) return; mi_atomic_addi64_relaxed( &stat->allocated, src->allocated * unit); mi_atomic_addi64_relaxed( &stat->current, src->current * unit); mi_atomic_addi64_relaxed( &stat->freed, src->freed * unit); // peak scores do not work across threads.. mi_atomic_addi64_relaxed( &stat->peak, src->peak * unit); } static void mi_stat_counter_add(mi_stat_counter_t* stat, const mi_stat_counter_t* src, int64_t unit) { if (stat==src) return; mi_atomic_addi64_relaxed( &stat->total, src->total * unit); mi_atomic_addi64_relaxed( &stat->count, src->count * unit); } // must be thread safe as it is called from stats_merge static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) { if (stats==src) return; mi_stat_add(&stats->segments, &src->segments,1); mi_stat_add(&stats->pages, &src->pages,1); mi_stat_add(&stats->reserved, &src->reserved, 1); mi_stat_add(&stats->committed, &src->committed, 1); mi_stat_add(&stats->reset, &src->reset, 1); mi_stat_add(&stats->purged, &src->purged, 1); mi_stat_add(&stats->page_committed, &src->page_committed, 1); mi_stat_add(&stats->pages_abandoned, &src->pages_abandoned, 1); mi_stat_add(&stats->segments_abandoned, &src->segments_abandoned, 1); mi_stat_add(&stats->threads, &src->threads, 1); mi_stat_add(&stats->malloc, &src->malloc, 1); mi_stat_add(&stats->segments_cache, &src->segments_cache, 1); mi_stat_add(&stats->normal, &src->normal, 1); mi_stat_add(&stats->huge, &src->huge, 1); mi_stat_add(&stats->large, &src->large, 1); mi_stat_counter_add(&stats->pages_extended, &src->pages_extended, 1); mi_stat_counter_add(&stats->mmap_calls, &src->mmap_calls, 1); mi_stat_counter_add(&stats->commit_calls, &src->commit_calls, 1); mi_stat_counter_add(&stats->reset_calls, &src->reset_calls, 1); mi_stat_counter_add(&stats->purge_calls, &src->purge_calls, 1); mi_stat_counter_add(&stats->page_no_retire, &src->page_no_retire, 1); mi_stat_counter_add(&stats->searches, &src->searches, 1); mi_stat_counter_add(&stats->normal_count, &src->normal_count, 1); mi_stat_counter_add(&stats->huge_count, &src->huge_count, 1); mi_stat_counter_add(&stats->large_count, &src->large_count, 1); #if MI_STAT>1 for (size_t i = 0; i <= MI_BIN_HUGE; i++) { if (src->normal_bins[i].allocated > 0 || src->normal_bins[i].freed > 0) { mi_stat_add(&stats->normal_bins[i], &src->normal_bins[i], 1); } } #endif } /* ----------------------------------------------------------- Display statistics ----------------------------------------------------------- */ // unit > 0 : size in binary bytes // unit == 0: count as decimal // unit < 0 : count in binary static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, void* arg, const char* fmt) { char buf[32]; buf[0] = 0; int len = 32; const char* suffix = (unit <= 0 ? " " : "B"); const int64_t base = (unit == 0 ? 1000 : 1024); if (unit>0) n *= unit; const int64_t pos = (n < 0 ? -n : n); if (pos < base) { if (n!=1 || suffix[0] != 'B') { // skip printing 1 B for the unit column snprintf(buf, len, "%d %-3s", (int)n, (n==0 ? "" : suffix)); } } else { int64_t divider = base; const char* magnitude = "K"; if (pos >= divider*base) { divider *= base; magnitude = "M"; } if (pos >= divider*base) { divider *= base; magnitude = "G"; } const int64_t tens = (n / (divider/10)); const long whole = (long)(tens/10); const long frac1 = (long)(tens%10); char unitdesc[8]; snprintf(unitdesc, 8, "%s%s%s", magnitude, (base==1024 ? "i" : ""), suffix); snprintf(buf, len, "%ld.%ld %-3s", whole, (frac1 < 0 ? -frac1 : frac1), unitdesc); } _mi_fprintf(out, arg, (fmt==NULL ? "%12s" : fmt), buf); } static void mi_print_amount(int64_t n, int64_t unit, mi_output_fun* out, void* arg) { mi_printf_amount(n,unit,out,arg,NULL); } static void mi_print_count(int64_t n, int64_t unit, mi_output_fun* out, void* arg) { if (unit==1) _mi_fprintf(out, arg, "%12s"," "); else mi_print_amount(n,0,out,arg); } static void mi_stat_print_ex(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg, const char* notok ) { _mi_fprintf(out, arg,"%10s:", msg); if (unit > 0) { mi_print_amount(stat->peak, unit, out, arg); mi_print_amount(stat->allocated, unit, out, arg); mi_print_amount(stat->freed, unit, out, arg); mi_print_amount(stat->current, unit, out, arg); mi_print_amount(unit, 1, out, arg); mi_print_count(stat->allocated, unit, out, arg); if (stat->allocated > stat->freed) { _mi_fprintf(out, arg, " "); _mi_fprintf(out, arg, (notok == NULL ? "not all freed" : notok)); _mi_fprintf(out, arg, "\n"); } else { _mi_fprintf(out, arg, " ok\n"); } } else if (unit<0) { mi_print_amount(stat->peak, -1, out, arg); mi_print_amount(stat->allocated, -1, out, arg); mi_print_amount(stat->freed, -1, out, arg); mi_print_amount(stat->current, -1, out, arg); if (unit==-1) { _mi_fprintf(out, arg, "%24s", ""); } else { mi_print_amount(-unit, 1, out, arg); mi_print_count((stat->allocated / -unit), 0, out, arg); } if (stat->allocated > stat->freed) _mi_fprintf(out, arg, " not all freed!\n"); else _mi_fprintf(out, arg, " ok\n"); } else { mi_print_amount(stat->peak, 1, out, arg); mi_print_amount(stat->allocated, 1, out, arg); _mi_fprintf(out, arg, "%11s", " "); // no freed mi_print_amount(stat->current, 1, out, arg); _mi_fprintf(out, arg, "\n"); } } static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg) { mi_stat_print_ex(stat, msg, unit, out, arg, NULL); } static void mi_stat_peak_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg) { _mi_fprintf(out, arg, "%10s:", msg); mi_print_amount(stat->peak, unit, out, arg); _mi_fprintf(out, arg, "\n"); } static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out, void* arg ) { _mi_fprintf(out, arg, "%10s:", msg); mi_print_amount(stat->total, -1, out, arg); _mi_fprintf(out, arg, "\n"); } static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out, void* arg) { const int64_t avg_tens = (stat->count == 0 ? 0 : (stat->total*10 / stat->count)); const long avg_whole = (long)(avg_tens/10); const long avg_frac1 = (long)(avg_tens%10); _mi_fprintf(out, arg, "%10s: %5ld.%ld avg\n", msg, avg_whole, avg_frac1); } static void mi_print_header(mi_output_fun* out, void* arg ) { _mi_fprintf(out, arg, "%10s: %11s %11s %11s %11s %11s %11s\n", "heap stats", "peak ", "total ", "freed ", "current ", "unit ", "count "); } #if MI_STAT>1 static void mi_stats_print_bins(const mi_stat_count_t* bins, size_t max, const char* fmt, mi_output_fun* out, void* arg) { bool found = false; char buf[64]; for (size_t i = 0; i <= max; i++) { if (bins[i].allocated > 0) { found = true; int64_t unit = _mi_bin_size((uint8_t)i); snprintf(buf, 64, "%s %3lu", fmt, (long)i); mi_stat_print(&bins[i], buf, unit, out, arg); } } if (found) { _mi_fprintf(out, arg, "\n"); mi_print_header(out, arg); } } #endif //------------------------------------------------------------ // Use an output wrapper for line-buffered output // (which is nice when using loggers etc.) //------------------------------------------------------------ typedef struct buffered_s { mi_output_fun* out; // original output function void* arg; // and state char* buf; // local buffer of at least size `count+1` size_t used; // currently used chars `used <= count` size_t count; // total chars available for output } buffered_t; static void mi_buffered_flush(buffered_t* buf) { buf->buf[buf->used] = 0; _mi_fputs(buf->out, buf->arg, NULL, buf->buf); buf->used = 0; } static void mi_cdecl mi_buffered_out(const char* msg, void* arg) { buffered_t* buf = (buffered_t*)arg; if (msg==NULL || buf==NULL) return; for (const char* src = msg; *src != 0; src++) { char c = *src; if (buf->used >= buf->count) mi_buffered_flush(buf); mi_assert_internal(buf->used < buf->count); buf->buf[buf->used++] = c; if (c == '\n') mi_buffered_flush(buf); } } //------------------------------------------------------------ // Print statistics //------------------------------------------------------------ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) mi_attr_noexcept { // wrap the output function to be line buffered char buf[256]; buffered_t buffer = { out0, arg0, NULL, 0, 255 }; buffer.buf = buf; mi_output_fun* out = &mi_buffered_out; void* arg = &buffer; // and print using that mi_print_header(out,arg); #if MI_STAT>1 mi_stats_print_bins(stats->normal_bins, MI_BIN_HUGE, "normal",out,arg); #endif #if MI_STAT mi_stat_print(&stats->normal, "normal", (stats->normal_count.count == 0 ? 1 : -(stats->normal.allocated / stats->normal_count.count)), out, arg); mi_stat_print(&stats->large, "large", (stats->large_count.count == 0 ? 1 : -(stats->large.allocated / stats->large_count.count)), out, arg); mi_stat_print(&stats->huge, "huge", (stats->huge_count.count == 0 ? 1 : -(stats->huge.allocated / stats->huge_count.count)), out, arg); mi_stat_count_t total = { 0,0,0,0 }; mi_stat_add(&total, &stats->normal, 1); mi_stat_add(&total, &stats->large, 1); mi_stat_add(&total, &stats->huge, 1); mi_stat_print(&total, "total", 1, out, arg); #endif #if MI_STAT>1 mi_stat_print(&stats->malloc, "malloc req", 1, out, arg); _mi_fprintf(out, arg, "\n"); #endif mi_stat_print_ex(&stats->reserved, "reserved", 1, out, arg, ""); mi_stat_print_ex(&stats->committed, "committed", 1, out, arg, ""); mi_stat_peak_print(&stats->reset, "reset", 1, out, arg ); mi_stat_peak_print(&stats->purged, "purged", 1, out, arg ); mi_stat_print(&stats->page_committed, "touched", 1, out, arg); mi_stat_print(&stats->segments, "segments", -1, out, arg); mi_stat_print(&stats->segments_abandoned, "-abandoned", -1, out, arg); mi_stat_print(&stats->segments_cache, "-cached", -1, out, arg); mi_stat_print(&stats->pages, "pages", -1, out, arg); mi_stat_print(&stats->pages_abandoned, "-abandoned", -1, out, arg); mi_stat_counter_print(&stats->pages_extended, "-extended", out, arg); mi_stat_counter_print(&stats->page_no_retire, "-noretire", out, arg); mi_stat_counter_print(&stats->mmap_calls, "mmaps", out, arg); mi_stat_counter_print(&stats->commit_calls, "commits", out, arg); mi_stat_counter_print(&stats->reset_calls, "resets", out, arg); mi_stat_counter_print(&stats->purge_calls, "purges", out, arg); mi_stat_print(&stats->threads, "threads", -1, out, arg); mi_stat_counter_print_avg(&stats->searches, "searches", out, arg); _mi_fprintf(out, arg, "%10s: %5zu\n", "numa nodes", _mi_os_numa_node_count()); size_t elapsed; size_t user_time; size_t sys_time; size_t current_rss; size_t peak_rss; size_t current_commit; size_t peak_commit; size_t page_faults; mi_process_info(&elapsed, &user_time, &sys_time, ¤t_rss, &peak_rss, ¤t_commit, &peak_commit, &page_faults); _mi_fprintf(out, arg, "%10s: %5ld.%03ld s\n", "elapsed", elapsed/1000, elapsed%1000); _mi_fprintf(out, arg, "%10s: user: %ld.%03ld s, system: %ld.%03ld s, faults: %lu, rss: ", "process", user_time/1000, user_time%1000, sys_time/1000, sys_time%1000, (unsigned long)page_faults ); mi_printf_amount((int64_t)peak_rss, 1, out, arg, "%s"); if (peak_commit > 0) { _mi_fprintf(out, arg, ", commit: "); mi_printf_amount((int64_t)peak_commit, 1, out, arg, "%s"); } _mi_fprintf(out, arg, "\n"); } static mi_msecs_t mi_process_start; // = 0 static mi_stats_t* mi_stats_get_default(void) { mi_heap_t* heap = mi_heap_get_default(); return &heap->tld->stats; } static void mi_stats_merge_from(mi_stats_t* stats) { if (stats != &_mi_stats_main) { mi_stats_add(&_mi_stats_main, stats); memset(stats, 0, sizeof(mi_stats_t)); } } void mi_stats_reset(void) mi_attr_noexcept { mi_stats_t* stats = mi_stats_get_default(); if (stats != &_mi_stats_main) { memset(stats, 0, sizeof(mi_stats_t)); } memset(&_mi_stats_main, 0, sizeof(mi_stats_t)); if (mi_process_start == 0) { mi_process_start = _mi_clock_start(); }; } void mi_stats_merge(void) mi_attr_noexcept { mi_stats_merge_from( mi_stats_get_default() ); } void _mi_stats_done(mi_stats_t* stats) { // called from `mi_thread_done` mi_stats_merge_from(stats); } void mi_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept { mi_stats_merge_from(mi_stats_get_default()); _mi_stats_print(&_mi_stats_main, out, arg); } void mi_stats_print(void* out) mi_attr_noexcept { // for compatibility there is an `out` parameter (which can be `stdout` or `stderr`) mi_stats_print_out((mi_output_fun*)out, NULL); } void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept { _mi_stats_print(mi_stats_get_default(), out, arg); } // ---------------------------------------------------------------- // Basic timer for convenience; use milli-seconds to avoid doubles // ---------------------------------------------------------------- static mi_msecs_t mi_clock_diff; mi_msecs_t _mi_clock_now(void) { return _mi_prim_clock_now(); } mi_msecs_t _mi_clock_start(void) { if (mi_clock_diff == 0.0) { mi_msecs_t t0 = _mi_clock_now(); mi_clock_diff = _mi_clock_now() - t0; } return _mi_clock_now(); } mi_msecs_t _mi_clock_end(mi_msecs_t start) { mi_msecs_t end = _mi_clock_now(); return (end - start - mi_clock_diff); } // -------------------------------------------------------- // Basic process statistics // -------------------------------------------------------- mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept { mi_process_info_t pinfo; _mi_memzero_var(pinfo); pinfo.elapsed = _mi_clock_end(mi_process_start); pinfo.current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current)); pinfo.peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak)); pinfo.current_rss = pinfo.current_commit; pinfo.peak_rss = pinfo.peak_commit; pinfo.utime = 0; pinfo.stime = 0; pinfo.page_faults = 0; _mi_prim_process_info(&pinfo); if (elapsed_msecs!=NULL) *elapsed_msecs = (pinfo.elapsed < 0 ? 0 : (pinfo.elapsed < (mi_msecs_t)PTRDIFF_MAX ? (size_t)pinfo.elapsed : PTRDIFF_MAX)); if (user_msecs!=NULL) *user_msecs = (pinfo.utime < 0 ? 0 : (pinfo.utime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)pinfo.utime : PTRDIFF_MAX)); if (system_msecs!=NULL) *system_msecs = (pinfo.stime < 0 ? 0 : (pinfo.stime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)pinfo.stime : PTRDIFF_MAX)); if (current_rss!=NULL) *current_rss = pinfo.current_rss; if (peak_rss!=NULL) *peak_rss = pinfo.peak_rss; if (current_commit!=NULL) *current_commit = pinfo.current_commit; if (peak_commit!=NULL) *peak_commit = pinfo.peak_commit; if (page_faults!=NULL) *page_faults = pinfo.page_faults; } luametatex-2.10.08/source/libraries/miniz/000077500000000000000000000000001442250314700204375ustar00rootroot00000000000000luametatex-2.10.08/source/libraries/miniz/ChangeLog.md000066400000000000000000000440651442250314700226210ustar00rootroot00000000000000## Changelog ### 3.0.1 - Fix compilation error with MINIZ_USE_UNALIGNED_LOADS_AND_STORES=1 ### 3.0.0 - Reduce memory usage for inflate. This changes `struct tinfl_decompressor_tag` and therefore requires a major version bump (breaks ABI compatibility) - Add padding to structures so it continues to work if features differ. This also changes some structures - Use _ftelli64, _fseeki64 and stat with MinGW32 and OpenWatcom - Fix varios warnings with OpenWatcom compiler - Avoid using unaligned memory access in UBSan builds - Set MINIZ_LITTLE_ENDIAN only if not set - Add MINIZ_NO_DEFLATE_APIS and MINIZ_NO_INFLATE_APIS - Fix use of uninitialized memory in tinfl_decompress_mem_to_callback() - Use wfopen on windows - Use _wstat64 instead _stat64 on windows - Use level_and_flags after MZ_DEFAULT_COMPRESSION has been handled - Improve endianess detection - Don't use unaligned stores and loads per default - Fix function declaration if MINIZ_NO_STDIO is used - Fix MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_UTF8 not being set - Remove total files check (its 32-bit uint) - tinfl_decompress: avoid NULL ptr arithmetic UB - miniz_zip: fix mz_zip_reader_extract_to_heap to read correct sizes - Eliminate 64-bit operations on 32-bit machines - Disable treating warnings as error with MSVC - Disable building shared lib via CMake by default - Fixed alignment problems on MacOS - Fixed get error string for MZ_ZIP_TOTAL_ERRORS - Write correct FLEVEL 2-bit value in zlib header - miniz.pc.in: fix include path not containing the "miniz" suffix - Fix compatibility with FreeBSD - pkg-config tweaks - Fix integer overflow in header corruption check - Fix some warnings - tdefl_compress_normal: Avoid NULL ptr arithmetic UB - replace use of stdint.h types with mz_ variants ### 2.2.0 - Fix examples with amalgamation - Modified cmake script to support shared library mode and find_package - Fix for misleading doc comment on `mz_zip_reader_init_cfile` function - Add include location tolerance and stop forcing `_GNU_SOURCE` - Fix: mz_zip_reader_locate_file_v2 returns an mz_bool - Fix large file system checks - Add #elif to enable an external mz_crc32() to be linked in - Write with dynamic size (size of file/data to be added not known before adding) - Added uncompress2 for zlib compatibility - Add support for building as a Meson subproject - Added OSSFuzz support; Integrate with CIFuzz - Add pkg-config file - Fixed use-of-uninitialized value msan error when copying dist bytes with no output bytes written. - mz_zip_validate_file(): fix memory leak on errors - Fixed MSAN use-of-uninitialized in tinfl_decompress when invalid dist is decoded. In this instance dist was 31 which s_dist_base translates as 0 - Add flag to set (compressed) size in local file header - avoid use of uninitialized value in tdefl_record_literal ### 2.1.0 - More instances of memcpy instead of cast and use memcpy per default - Remove inline for c90 support - New function to read files via callback functions when adding them - Fix out of bounds read while reading Zip64 extended information - guard memcpy when n == 0 because buffer may be NULL - Implement inflateReset() function - Move comp/decomp alloc/free prototypes under guarding #ifndef MZ_NO_MALLOC - Fix large file support under Windows - Don't warn if _LARGEFILE64_SOURCE is not defined to 1 - Fixes for MSVC warnings - Remove check that path of file added to archive contains ':' or '\' - Add !defined check on MINIZ_USE_ALIGNED_LOADS_AND_STORES ### 2.0.8 - Remove unimplemented functions (mz_zip_locate_file and mz_zip_locate_file_v2) - Add license, changelog, readme and example files to release zip - Fix heap overflow to user buffer in tinfl_status tinfl_decompress - Fix corrupt archive if uncompressed file smaller than 4 byte and the file is added by mz_zip_writer_add_mem* ### 2.0.7 - Removed need in C++ compiler in cmake build - Fixed a lot of uninitialized value errors found with Valgrind by memsetting m_dict to 0 in tdefl_init - Fix resource leak in mz_zip_reader_init_file_v2 - Fix assert with mz_zip_writer_add_mem* w/MZ_DEFAULT_COMPRESSION - cmake build: install library and headers - Remove _LARGEFILE64_SOURCE requirement from apple defines for large files ### 2.0.6 - Improve MZ_ZIP_FLAG_WRITE_ZIP64 documentation - Remove check for cur_archive_file_ofs > UINT_MAX because cur_archive_file_ofs is not used after this point - Add cmake debug configuration - Fix PNG height when creating png files - Add "iterative" file extraction method based on mz_zip_reader_extract_to_callback. - Option to use memcpy for unaligned data access - Define processor/arch macros as zero if not set to one ### 2.0.4/2.0.5 - Fix compilation with the various omission compile definitions ### 2.0.3 - Fix GCC/clang compile warnings - Added callback for periodic flushes (for ZIP file streaming) - Use UTF-8 for file names in ZIP files per default ### 2.0.2 - Fix source backwards compatibility with 1.x - Fix a ZIP bit not being set correctly ### 2.0.1 - Added some tests - Added CI - Make source code ANSI C compatible ### 2.0.0 beta - Matthew Sitton merged miniz 1.x to Rich Geldreich's vogl ZIP64 changes. Miniz is now licensed as MIT since the vogl code base is MIT licensed - Miniz is now split into several files - Miniz does now not seek backwards when creating ZIP files. That is the ZIP files can be streamed - Miniz automatically switches to the ZIP64 format when the created ZIP files goes over ZIP file limits - Similar to [SQLite](https://www.sqlite.org/amalgamation.html) the Miniz source code is amalgamated into one miniz.c/miniz.h pair in a build step (amalgamate.sh). Please use miniz.c/miniz.h in your projects - Miniz 2 is only source back-compatible with miniz 1.x. It breaks binary compatibility because structures changed ### v1.16 BETA Oct 19, 2013 Still testing, this release is downloadable from [here](http://www.tenacioussoftware.com/miniz_v116_beta_r1.7z). Two key inflator-only robustness and streaming related changes. Also merged in tdefl_compressor_alloc(), tdefl_compressor_free() helpers to make script bindings easier for rustyzip. I would greatly appreciate any help with testing or any feedback. The inflator in raw (non-zlib) mode is now usable on gzip or similar streams that have a bunch of bytes following the raw deflate data (problem discovered by rustyzip author williamw520). This version should never read beyond the last byte of the raw deflate data independent of how many bytes you pass into the input buffer. The inflator now has a new failure status TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS (-4). Previously, if the inflator was starved of bytes and could not make progress (because the input buffer was empty and the caller did not set the TINFL_FLAG_HAS_MORE_INPUT flag - say on truncated or corrupted compressed data stream) it would append all 0's to the input and try to soldier on. This is scary behavior if the caller didn't know when to stop accepting output (because it didn't know how much uncompressed data was expected, or didn't enforce a sane maximum). v1.16 will instead return TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS immediately if it needs 1 or more bytes to make progress, the input buf is empty, and the caller has indicated that no more input is available. This is a "soft" failure, so you can call the inflator again with more input and it will try to continue, or you can give up and fail. This could be very useful in network streaming scenarios. - The inflator coroutine func. is subtle and complex so I'm being cautious about this release. I would greatly appreciate any help with testing or any feedback. I feel good about these changes, and they've been through several hours of automated testing, but they will probably not fix anything for the majority of prev. users so I'm going to mark this release as beta for a few weeks and continue testing it at work/home on various things. - The inflator in raw (non-zlib) mode is now usable on gzip or similar data streams that have a bunch of bytes following the raw deflate data (problem discovered by rustyzip author williamw520). This version should *never* read beyond the last byte of the raw deflate data independent of how many bytes you pass into the input buffer. This issue was caused by the various Huffman bitbuffer lookahead optimizations, and would not be an issue if the caller knew and enforced the precise size of the raw compressed data *or* if the compressed data was in zlib format (i.e. always followed by the byte aligned zlib adler32). So in other words, you can now call the inflator on deflate streams that are followed by arbitrary amounts of data and it's guaranteed that decompression will stop exactly on the last byte. - The inflator now has a new failure status: TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS (-4). Previously, if the inflator was starved of bytes and could not make progress (because the input buffer was empty and the caller did not set the TINFL_FLAG_HAS_MORE_INPUT flag - say on truncated or corrupted compressed data stream) it would append all 0's to the input and try to soldier on. This is scary, because in the worst case, I believe it was possible for the prev. inflator to start outputting large amounts of literal data. If the caller didn't know when to stop accepting output (because it didn't know how much uncompressed data was expected, or didn't enforce a sane maximum) it could continue forever. v1.16 cannot fall into this failure mode, instead it'll return TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS immediately if it needs 1 or more bytes to make progress, the input buf is empty, and the caller has indicated that no more input is available. This is a "soft" failure, so you can call the inflator again with more input and it will try to continue, or you can give up and fail. This could be very useful in network streaming scenarios. - Added documentation to all the tinfl return status codes, fixed miniz_tester so it accepts double minus params for Linux, tweaked example1.c, added a simple "follower bytes" test to miniz_tester.cpp. ### v1.15 r4 STABLE - Oct 13, 2013 Merged over a few very minor bug fixes that I fixed in the zip64 branch. This is downloadable from [here](http://code.google.com/p/miniz/downloads/list) and also in SVN head (as of 10/19/13). ### v1.15 - Oct. 13, 2013 Interim bugfix release while I work on the next major release with zip64 and streaming compression/decompression support. Fixed the MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY bug (thanks kahmyong.moon@hp.com), which could cause the locate files func to not find files when this flag was specified. Also fixed a bug in mz_zip_reader_extract_to_mem_no_alloc() with user provided read buffers (thanks kymoon). I also merged lots of compiler fixes from various github repo branches and Google Code issue reports. I finally added cmake support (only tested under for Linux so far), compiled and tested with clang v3.3 and gcc 4.6 (under Linux), added defl_write_image_to_png_file_in_memory_ex() (supports Y flipping for OpenGL use, real-time compression), added a new PNG example (example6.c - Mandelbrot), and I added 64-bit file I/O support (stat64(), etc.) for glibc. - Critical fix for the MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY bug (thanks kahmyong.moon@hp.com) which could cause locate files to not find files. This bug would only have occured in earlier versions if you explicitly used this flag, OR if you used mz_zip_extract_archive_file_to_heap() or mz_zip_add_mem_to_archive_file_in_place() (which used this flag). If you can't switch to v1.15 but want to fix this bug, just remove the uses of this flag from both helper funcs (and of course don't use the flag). - Bugfix in mz_zip_reader_extract_to_mem_no_alloc() from kymoon when pUser_read_buf is not NULL and compressed size is > uncompressed size - Fixing mz_zip_reader_extract_*() funcs so they don't try to extract compressed data from directory entries, to account for weird zipfiles which contain zero-size compressed data on dir entries. Hopefully this fix won't cause any issues on weird zip archives, because it assumes the low 16-bits of zip external attributes are DOS attributes (which I believe they always are in practice). - Fixing mz_zip_reader_is_file_a_directory() so it doesn't check the internal attributes, just the filename and external attributes - mz_zip_reader_init_file() - missing MZ_FCLOSE() call if the seek failed - Added cmake support for Linux builds which builds all the examples, tested with clang v3.3 and gcc v4.6. - Clang fix for tdefl_write_image_to_png_file_in_memory() from toffaletti - Merged MZ_FORCEINLINE fix from hdeanclark - Fix include before config #ifdef, thanks emil.brink - Added tdefl_write_image_to_png_file_in_memory_ex(): supports Y flipping (super useful for OpenGL apps), and explicit control over the compression level (so you can set it to 1 for real-time compression). - Merged in some compiler fixes from paulharris's github repro. - Retested this build under Windows (VS 2010, including static analysis), tcc 0.9.26, gcc v4.6 and clang v3.3. - Added example6.c, which dumps an image of the mandelbrot set to a PNG file. - Modified example2 to help test the MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY flag more. - In r3: Bugfix to mz_zip_writer_add_file() found during merge: Fix possible src file fclose() leak if alignment bytes+local header file write faiiled - In r4: Minor bugfix to mz_zip_writer_add_from_zip_reader(): Was pushing the wrong central dir header offset, appears harmless in this release, but it became a problem in the zip64 branch ### v1.14 - May 20, 2012 (SVN Only) Minor tweaks to get miniz.c compiling with the Tiny C Compiler, added #ifndef MINIZ_NO_TIME guards around utime.h includes. Adding mz_free() function, so the caller can free heap blocks returned by miniz using whatever heap functions it has been configured to use, MSVC specific fixes to use "safe" variants of several functions (localtime_s, fopen_s, freopen_s). MinGW32/64 GCC 4.6.1 compiler fixes: added MZ_FORCEINLINE, #include (thanks fermtect). Compiler specific fixes, some from fermtect. I upgraded to TDM GCC 4.6.1 and now static __forceinline is giving it fits, so I'm changing all usage of __forceinline to MZ_FORCEINLINE and forcing gcc to use __attribute__((__always_inline__)) (and MSVC to use __forceinline). Also various fixes from fermtect for MinGW32: added #include , 64-bit ftell/fseek fixes. ### v1.13 - May 19, 2012 From jason@cornsyrup.org and kelwert@mtu.edu - Most importantly, fixed mz_crc32() so it doesn't compute the wrong CRC-32's when mz_ulong is 64-bits. Temporarily/locally slammed in "typedef unsigned long mz_ulong" and re-ran a randomized regression test on ~500k files. Other stuff: Eliminated a bunch of warnings when compiling with GCC 32-bit/64. Ran all examples, miniz.c, and tinfl.c through MSVC 2008's /analyze (static analysis) option and fixed all warnings (except for the silly "Use of the comma-operator in a tested expression.." analysis warning, which I purposely use to work around a MSVC compiler warning). Created 32-bit and 64-bit Codeblocks projects/workspace. Built and tested Linux executables. The codeblocks workspace is compatible with Linux+Win32/x64. Added miniz_tester solution/project, which is a useful little app derived from LZHAM's tester app that I use as part of the regression test. Ran miniz.c and tinfl.c through another series of regression testing on ~500,000 files and archives. Modified example5.c so it purposely disables a bunch of high-level functionality (MINIZ_NO_STDIO, etc.). (Thanks to corysama for the MINIZ_NO_STDIO bug report.) Fix ftell() usage in a few of the examples so they exit with an error on files which are too large (a limitation of the examples, not miniz itself). Fix fail logic handling in mz_zip_add_mem_to_archive_file_in_place() so it always calls mz_zip_writer_finalize_archive() and mz_zip_writer_end(), even if the file add fails. - From jason@cornsyrup.org and kelwert@mtu.edu - Fix mz_crc32() so it doesn't compute the wrong CRC-32's when mz_ulong is 64-bit. - Temporarily/locally slammed in "typedef unsigned long mz_ulong" and re-ran a randomized regression test on ~500k files. - Eliminated a bunch of warnings when compiling with GCC 32-bit/64. - Ran all examples, miniz.c, and tinfl.c through MSVC 2008's /analyze (static analysis) option and fixed all warnings (except for the silly "Use of the comma-operator in a tested expression.." analysis warning, which I purposely use to work around a MSVC compiler warning). - Created 32-bit and 64-bit Codeblocks projects/workspace. Built and tested Linux executables. The codeblocks workspace is compatible with Linux+Win32/x64. - Added miniz_tester solution/project, which is a useful little app derived from LZHAM's tester app that I use as part of the regression test. - Ran miniz.c and tinfl.c through another series of regression testing on ~500,000 files and archives. - Modified example5.c so it purposely disables a bunch of high-level functionality (MINIZ_NO_STDIO, etc.). (Thanks to corysama for the MINIZ_NO_STDIO bug report.) - Fix ftell() usage in examples so they exit with an error on files which are too large (a limitation of the examples, not miniz itself). ### v1.12 - 4/12/12 More comments, added low-level example5.c, fixed a couple minor level_and_flags issues in the archive API's. level_and_flags can now be set to MZ_DEFAULT_COMPRESSION. Thanks to Bruce Dawson for the feedback/bug report. ### v1.11 - 5/28/11 Added statement from unlicense.org ### v1.10 - 5/27/11 - Substantial compressor optimizations: - Level 1 is now ~4x faster than before. The L1 compressor's throughput now varies between 70-110MB/sec. on a Core i7 (actual throughput varies depending on the type of data, and x64 vs. x86). - Improved baseline L2-L9 compression perf. Also, greatly improved compression perf. issues on some file types. - Refactored the compression code for better readability and maintainability. - Added level 10 compression level (L10 has slightly better ratio than level 9, but could have a potentially large drop in throughput on some files). ### v1.09 - 5/15/11 Initial stable release. luametatex-2.10.08/source/libraries/miniz/LICENSE000066400000000000000000000022121442250314700214410ustar00rootroot00000000000000Copyright 2013-2014 RAD Game Tools and Valve Software Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC All Rights Reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. luametatex-2.10.08/source/libraries/miniz/miniz.c000066400000000000000000011657651442250314700217560ustar00rootroot00000000000000#include "miniz.h" /************************************************************************** * * Copyright 2013-2014 RAD Game Tools and Valve Software * Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * **************************************************************************/ typedef unsigned char mz_validate_uint16[sizeof(mz_uint16) == 2 ? 1 : -1]; typedef unsigned char mz_validate_uint32[sizeof(mz_uint32) == 4 ? 1 : -1]; typedef unsigned char mz_validate_uint64[sizeof(mz_uint64) == 8 ? 1 : -1]; #ifdef __cplusplus extern "C" { #endif /* ------------------- zlib-style API's */ mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len) { mz_uint32 i, s1 = (mz_uint32)(adler & 0xffff), s2 = (mz_uint32)(adler >> 16); size_t block_len = buf_len % 5552; if (!ptr) return MZ_ADLER32_INIT; while (buf_len) { for (i = 0; i + 7 < block_len; i += 8, ptr += 8) { s1 += ptr[0], s2 += s1; s1 += ptr[1], s2 += s1; s1 += ptr[2], s2 += s1; s1 += ptr[3], s2 += s1; s1 += ptr[4], s2 += s1; s1 += ptr[5], s2 += s1; s1 += ptr[6], s2 += s1; s1 += ptr[7], s2 += s1; } for (; i < block_len; ++i) s1 += *ptr++, s2 += s1; s1 %= 65521U, s2 %= 65521U; buf_len -= block_len; block_len = 5552; } return (s2 << 16) + s1; } /* Karl Malbrain's compact CRC-32. See "A compact CCITT crc16 and crc32 C implementation that balances processor cache usage against speed": http://www.geocities.com/malbrain/ */ #if 0 mz_ulong mz_crc32(mz_ulong crc, const mz_uint8 *ptr, size_t buf_len) { static const mz_uint32 s_crc32[16] = { 0, 0x1db71064, 0x3b6e20c8, 0x26d930ac, 0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c, 0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c, 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c }; mz_uint32 crcu32 = (mz_uint32)crc; if (!ptr) return MZ_CRC32_INIT; crcu32 = ~crcu32; while (buf_len--) { mz_uint8 b = *ptr++; crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b & 0xF)]; crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b >> 4)]; } return ~crcu32; } #elif defined(USE_EXTERNAL_MZCRC) /* If USE_EXTERNAL_CRC is defined, an external module will export the * mz_crc32() symbol for us to use, e.g. an SSE-accelerated version. * Depending on the impl, it may be necessary to ~ the input/output crc values. */ mz_ulong mz_crc32(mz_ulong crc, const mz_uint8 *ptr, size_t buf_len); #else /* Faster, but larger CPU cache footprint. */ mz_ulong mz_crc32(mz_ulong crc, const mz_uint8 *ptr, size_t buf_len) { static const mz_uint32 s_crc_table[256] = { 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01, 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D }; mz_uint32 crc32 = (mz_uint32)crc ^ 0xFFFFFFFF; const mz_uint8 *pByte_buf = (const mz_uint8 *)ptr; while (buf_len >= 4) { crc32 = (crc32 >> 8) ^ s_crc_table[(crc32 ^ pByte_buf[0]) & 0xFF]; crc32 = (crc32 >> 8) ^ s_crc_table[(crc32 ^ pByte_buf[1]) & 0xFF]; crc32 = (crc32 >> 8) ^ s_crc_table[(crc32 ^ pByte_buf[2]) & 0xFF]; crc32 = (crc32 >> 8) ^ s_crc_table[(crc32 ^ pByte_buf[3]) & 0xFF]; pByte_buf += 4; buf_len -= 4; } while (buf_len) { crc32 = (crc32 >> 8) ^ s_crc_table[(crc32 ^ pByte_buf[0]) & 0xFF]; ++pByte_buf; --buf_len; } return ~crc32; } #endif void mz_free(void *p) { MZ_FREE(p); } MINIZ_EXPORT void *miniz_def_alloc_func(void *opaque, size_t items, size_t size) { (void)opaque, (void)items, (void)size; return MZ_MALLOC(items * size); } MINIZ_EXPORT void miniz_def_free_func(void *opaque, void *address) { (void)opaque, (void)address; MZ_FREE(address); } MINIZ_EXPORT void *miniz_def_realloc_func(void *opaque, void *address, size_t items, size_t size) { (void)opaque, (void)address, (void)items, (void)size; return MZ_REALLOC(address, items * size); } const char *mz_version(void) { return MZ_VERSION; } #ifndef MINIZ_NO_ZLIB_APIS #ifndef MINIZ_NO_DEFLATE_APIS int mz_deflateInit(mz_streamp pStream, int level) { return mz_deflateInit2(pStream, level, MZ_DEFLATED, MZ_DEFAULT_WINDOW_BITS, 9, MZ_DEFAULT_STRATEGY); } int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy) { tdefl_compressor *pComp; mz_uint comp_flags = TDEFL_COMPUTE_ADLER32 | tdefl_create_comp_flags_from_zip_params(level, window_bits, strategy); if (!pStream) return MZ_STREAM_ERROR; if ((method != MZ_DEFLATED) || ((mem_level < 1) || (mem_level > 9)) || ((window_bits != MZ_DEFAULT_WINDOW_BITS) && (-window_bits != MZ_DEFAULT_WINDOW_BITS))) return MZ_PARAM_ERROR; pStream->data_type = 0; pStream->adler = MZ_ADLER32_INIT; pStream->msg = NULL; pStream->reserved = 0; pStream->total_in = 0; pStream->total_out = 0; if (!pStream->zalloc) pStream->zalloc = miniz_def_alloc_func; if (!pStream->zfree) pStream->zfree = miniz_def_free_func; pComp = (tdefl_compressor *)pStream->zalloc(pStream->opaque, 1, sizeof(tdefl_compressor)); if (!pComp) return MZ_MEM_ERROR; pStream->state = (struct mz_internal_state *)pComp; if (tdefl_init(pComp, NULL, NULL, comp_flags) != TDEFL_STATUS_OKAY) { mz_deflateEnd(pStream); return MZ_PARAM_ERROR; } return MZ_OK; } int mz_deflateReset(mz_streamp pStream) { if ((!pStream) || (!pStream->state) || (!pStream->zalloc) || (!pStream->zfree)) return MZ_STREAM_ERROR; pStream->total_in = pStream->total_out = 0; tdefl_init((tdefl_compressor *)pStream->state, NULL, NULL, ((tdefl_compressor *)pStream->state)->m_flags); return MZ_OK; } int mz_deflate(mz_streamp pStream, int flush) { size_t in_bytes, out_bytes; mz_ulong orig_total_in, orig_total_out; int mz_status = MZ_OK; if ((!pStream) || (!pStream->state) || (flush < 0) || (flush > MZ_FINISH) || (!pStream->next_out)) return MZ_STREAM_ERROR; if (!pStream->avail_out) return MZ_BUF_ERROR; if (flush == MZ_PARTIAL_FLUSH) flush = MZ_SYNC_FLUSH; if (((tdefl_compressor *)pStream->state)->m_prev_return_status == TDEFL_STATUS_DONE) return (flush == MZ_FINISH) ? MZ_STREAM_END : MZ_BUF_ERROR; orig_total_in = pStream->total_in; orig_total_out = pStream->total_out; for (;;) { tdefl_status defl_status; in_bytes = pStream->avail_in; out_bytes = pStream->avail_out; defl_status = tdefl_compress((tdefl_compressor *)pStream->state, pStream->next_in, &in_bytes, pStream->next_out, &out_bytes, (tdefl_flush)flush); pStream->next_in += (mz_uint)in_bytes; pStream->avail_in -= (mz_uint)in_bytes; pStream->total_in += (mz_uint)in_bytes; pStream->adler = tdefl_get_adler32((tdefl_compressor *)pStream->state); pStream->next_out += (mz_uint)out_bytes; pStream->avail_out -= (mz_uint)out_bytes; pStream->total_out += (mz_uint)out_bytes; if (defl_status < 0) { mz_status = MZ_STREAM_ERROR; break; } else if (defl_status == TDEFL_STATUS_DONE) { mz_status = MZ_STREAM_END; break; } else if (!pStream->avail_out) break; else if ((!pStream->avail_in) && (flush != MZ_FINISH)) { if ((flush) || (pStream->total_in != orig_total_in) || (pStream->total_out != orig_total_out)) break; return MZ_BUF_ERROR; /* Can't make forward progress without some input. */ } } return mz_status; } int mz_deflateEnd(mz_streamp pStream) { if (!pStream) return MZ_STREAM_ERROR; if (pStream->state) { pStream->zfree(pStream->opaque, pStream->state); pStream->state = NULL; } return MZ_OK; } mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len) { (void)pStream; /* This is really over conservative. (And lame, but it's actually pretty tricky to compute a true upper bound given the way tdefl's blocking works.) */ return MZ_MAX(128 + (source_len * 110) / 100, 128 + source_len + ((source_len / (31 * 1024)) + 1) * 5); } int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len, int level) { int status; mz_stream stream; memset(&stream, 0, sizeof(stream)); /* In case mz_ulong is 64-bits (argh I hate longs). */ if ((mz_uint64)(source_len | *pDest_len) > 0xFFFFFFFFU) return MZ_PARAM_ERROR; stream.next_in = pSource; stream.avail_in = (mz_uint32)source_len; stream.next_out = pDest; stream.avail_out = (mz_uint32)*pDest_len; status = mz_deflateInit(&stream, level); if (status != MZ_OK) return status; status = mz_deflate(&stream, MZ_FINISH); if (status != MZ_STREAM_END) { mz_deflateEnd(&stream); return (status == MZ_OK) ? MZ_BUF_ERROR : status; } *pDest_len = stream.total_out; return mz_deflateEnd(&stream); } int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len) { return mz_compress2(pDest, pDest_len, pSource, source_len, MZ_DEFAULT_COMPRESSION); } mz_ulong mz_compressBound(mz_ulong source_len) { return mz_deflateBound(NULL, source_len); } #endif /*#ifndef MINIZ_NO_DEFLATE_APIS*/ #ifndef MINIZ_NO_INFLATE_APIS typedef struct { tinfl_decompressor m_decomp; mz_uint m_dict_ofs, m_dict_avail, m_first_call, m_has_flushed; int m_window_bits; mz_uint8 m_dict[TINFL_LZ_DICT_SIZE]; tinfl_status m_last_status; } inflate_state; int mz_inflateInit2(mz_streamp pStream, int window_bits) { inflate_state *pDecomp; if (!pStream) return MZ_STREAM_ERROR; if ((window_bits != MZ_DEFAULT_WINDOW_BITS) && (-window_bits != MZ_DEFAULT_WINDOW_BITS)) return MZ_PARAM_ERROR; pStream->data_type = 0; pStream->adler = 0; pStream->msg = NULL; pStream->total_in = 0; pStream->total_out = 0; pStream->reserved = 0; if (!pStream->zalloc) pStream->zalloc = miniz_def_alloc_func; if (!pStream->zfree) pStream->zfree = miniz_def_free_func; pDecomp = (inflate_state *)pStream->zalloc(pStream->opaque, 1, sizeof(inflate_state)); if (!pDecomp) return MZ_MEM_ERROR; pStream->state = (struct mz_internal_state *)pDecomp; tinfl_init(&pDecomp->m_decomp); pDecomp->m_dict_ofs = 0; pDecomp->m_dict_avail = 0; pDecomp->m_last_status = TINFL_STATUS_NEEDS_MORE_INPUT; pDecomp->m_first_call = 1; pDecomp->m_has_flushed = 0; pDecomp->m_window_bits = window_bits; return MZ_OK; } int mz_inflateInit(mz_streamp pStream) { return mz_inflateInit2(pStream, MZ_DEFAULT_WINDOW_BITS); } int mz_inflateReset(mz_streamp pStream) { inflate_state *pDecomp; if (!pStream) return MZ_STREAM_ERROR; pStream->data_type = 0; pStream->adler = 0; pStream->msg = NULL; pStream->total_in = 0; pStream->total_out = 0; pStream->reserved = 0; pDecomp = (inflate_state *)pStream->state; tinfl_init(&pDecomp->m_decomp); pDecomp->m_dict_ofs = 0; pDecomp->m_dict_avail = 0; pDecomp->m_last_status = TINFL_STATUS_NEEDS_MORE_INPUT; pDecomp->m_first_call = 1; pDecomp->m_has_flushed = 0; /* pDecomp->m_window_bits = window_bits */; return MZ_OK; } int mz_inflate(mz_streamp pStream, int flush) { inflate_state *pState; mz_uint n, first_call, decomp_flags = TINFL_FLAG_COMPUTE_ADLER32; size_t in_bytes, out_bytes, orig_avail_in; tinfl_status status; if ((!pStream) || (!pStream->state)) return MZ_STREAM_ERROR; if (flush == MZ_PARTIAL_FLUSH) flush = MZ_SYNC_FLUSH; if ((flush) && (flush != MZ_SYNC_FLUSH) && (flush != MZ_FINISH)) return MZ_STREAM_ERROR; pState = (inflate_state *)pStream->state; if (pState->m_window_bits > 0) decomp_flags |= TINFL_FLAG_PARSE_ZLIB_HEADER; orig_avail_in = pStream->avail_in; first_call = pState->m_first_call; pState->m_first_call = 0; if (pState->m_last_status < 0) return MZ_DATA_ERROR; if (pState->m_has_flushed && (flush != MZ_FINISH)) return MZ_STREAM_ERROR; pState->m_has_flushed |= (flush == MZ_FINISH); if ((flush == MZ_FINISH) && (first_call)) { /* MZ_FINISH on the first call implies that the input and output buffers are large enough to hold the entire compressed/decompressed file. */ decomp_flags |= TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF; in_bytes = pStream->avail_in; out_bytes = pStream->avail_out; status = tinfl_decompress(&pState->m_decomp, pStream->next_in, &in_bytes, pStream->next_out, pStream->next_out, &out_bytes, decomp_flags); pState->m_last_status = status; pStream->next_in += (mz_uint)in_bytes; pStream->avail_in -= (mz_uint)in_bytes; pStream->total_in += (mz_uint)in_bytes; pStream->adler = tinfl_get_adler32(&pState->m_decomp); pStream->next_out += (mz_uint)out_bytes; pStream->avail_out -= (mz_uint)out_bytes; pStream->total_out += (mz_uint)out_bytes; if (status < 0) return MZ_DATA_ERROR; else if (status != TINFL_STATUS_DONE) { pState->m_last_status = TINFL_STATUS_FAILED; return MZ_BUF_ERROR; } return MZ_STREAM_END; } /* flush != MZ_FINISH then we must assume there's more input. */ if (flush != MZ_FINISH) decomp_flags |= TINFL_FLAG_HAS_MORE_INPUT; if (pState->m_dict_avail) { n = MZ_MIN(pState->m_dict_avail, pStream->avail_out); memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n); pStream->next_out += n; pStream->avail_out -= n; pStream->total_out += n; pState->m_dict_avail -= n; pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1); return ((pState->m_last_status == TINFL_STATUS_DONE) && (!pState->m_dict_avail)) ? MZ_STREAM_END : MZ_OK; } for (;;) { in_bytes = pStream->avail_in; out_bytes = TINFL_LZ_DICT_SIZE - pState->m_dict_ofs; status = tinfl_decompress(&pState->m_decomp, pStream->next_in, &in_bytes, pState->m_dict, pState->m_dict + pState->m_dict_ofs, &out_bytes, decomp_flags); pState->m_last_status = status; pStream->next_in += (mz_uint)in_bytes; pStream->avail_in -= (mz_uint)in_bytes; pStream->total_in += (mz_uint)in_bytes; pStream->adler = tinfl_get_adler32(&pState->m_decomp); pState->m_dict_avail = (mz_uint)out_bytes; n = MZ_MIN(pState->m_dict_avail, pStream->avail_out); memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n); pStream->next_out += n; pStream->avail_out -= n; pStream->total_out += n; pState->m_dict_avail -= n; pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1); if (status < 0) return MZ_DATA_ERROR; /* Stream is corrupted (there could be some uncompressed data left in the output dictionary - oh well). */ else if ((status == TINFL_STATUS_NEEDS_MORE_INPUT) && (!orig_avail_in)) return MZ_BUF_ERROR; /* Signal caller that we can't make forward progress without supplying more input or by setting flush to MZ_FINISH. */ else if (flush == MZ_FINISH) { /* The output buffer MUST be large to hold the remaining uncompressed data when flush==MZ_FINISH. */ if (status == TINFL_STATUS_DONE) return pState->m_dict_avail ? MZ_BUF_ERROR : MZ_STREAM_END; /* status here must be TINFL_STATUS_HAS_MORE_OUTPUT, which means there's at least 1 more byte on the way. If there's no more room left in the output buffer then something is wrong. */ else if (!pStream->avail_out) return MZ_BUF_ERROR; } else if ((status == TINFL_STATUS_DONE) || (!pStream->avail_in) || (!pStream->avail_out) || (pState->m_dict_avail)) break; } return ((status == TINFL_STATUS_DONE) && (!pState->m_dict_avail)) ? MZ_STREAM_END : MZ_OK; } int mz_inflateEnd(mz_streamp pStream) { if (!pStream) return MZ_STREAM_ERROR; if (pStream->state) { pStream->zfree(pStream->opaque, pStream->state); pStream->state = NULL; } return MZ_OK; } int mz_uncompress2(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong *pSource_len) { mz_stream stream; int status; memset(&stream, 0, sizeof(stream)); /* In case mz_ulong is 64-bits (argh I hate longs). */ if ((mz_uint64)(*pSource_len | *pDest_len) > 0xFFFFFFFFU) return MZ_PARAM_ERROR; stream.next_in = pSource; stream.avail_in = (mz_uint32)*pSource_len; stream.next_out = pDest; stream.avail_out = (mz_uint32)*pDest_len; status = mz_inflateInit(&stream); if (status != MZ_OK) return status; status = mz_inflate(&stream, MZ_FINISH); *pSource_len = *pSource_len - stream.avail_in; if (status != MZ_STREAM_END) { mz_inflateEnd(&stream); return ((status == MZ_BUF_ERROR) && (!stream.avail_in)) ? MZ_DATA_ERROR : status; } *pDest_len = stream.total_out; return mz_inflateEnd(&stream); } int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len) { return mz_uncompress2(pDest, pDest_len, pSource, &source_len); } #endif /*#ifndef MINIZ_NO_INFLATE_APIS*/ const char *mz_error(int err) { static struct { int m_err; const char *m_pDesc; } s_error_descs[] = { { MZ_OK, "" }, { MZ_STREAM_END, "stream end" }, { MZ_NEED_DICT, "need dictionary" }, { MZ_ERRNO, "file error" }, { MZ_STREAM_ERROR, "stream error" }, { MZ_DATA_ERROR, "data error" }, { MZ_MEM_ERROR, "out of memory" }, { MZ_BUF_ERROR, "buf error" }, { MZ_VERSION_ERROR, "version error" }, { MZ_PARAM_ERROR, "parameter error" } }; mz_uint i; for (i = 0; i < sizeof(s_error_descs) / sizeof(s_error_descs[0]); ++i) if (s_error_descs[i].m_err == err) return s_error_descs[i].m_pDesc; return NULL; } #endif /*MINIZ_NO_ZLIB_APIS */ #ifdef __cplusplus } #endif /* This is free and unencumbered software released into the public domain. Anyone is free to copy, modify, publish, use, compile, sell, or distribute this software, either in source code form or as a compiled binary, for any purpose, commercial or non-commercial, and by any means. In jurisdictions that recognize copyright laws, the author or authors of this software dedicate any and all copyright interest in the software to the public domain. We make this dedication for the benefit of the public at large and to the detriment of our heirs and successors. We intend this dedication to be an overt act of relinquishment in perpetuity of all present and future rights to this software under copyright law. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. For more information, please refer to */ /************************************************************************** * * Copyright 2013-2014 RAD Game Tools and Valve Software * Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * **************************************************************************/ #ifndef MINIZ_NO_DEFLATE_APIS #ifdef __cplusplus extern "C" { #endif /* ------------------- Low-level Compression (independent from all decompression API's) */ /* Purposely making these tables static for faster init and thread safety. */ static const mz_uint16 s_tdefl_len_sym[256] = { 257, 258, 259, 260, 261, 262, 263, 264, 265, 265, 266, 266, 267, 267, 268, 268, 269, 269, 269, 269, 270, 270, 270, 270, 271, 271, 271, 271, 272, 272, 272, 272, 273, 273, 273, 273, 273, 273, 273, 273, 274, 274, 274, 274, 274, 274, 274, 274, 275, 275, 275, 275, 275, 275, 275, 275, 276, 276, 276, 276, 276, 276, 276, 276, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 285 }; static const mz_uint8 s_tdefl_len_extra[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0 }; static const mz_uint8 s_tdefl_small_dist_sym[512] = { 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17 }; static const mz_uint8 s_tdefl_small_dist_extra[512] = { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 }; static const mz_uint8 s_tdefl_large_dist_sym[128] = { 0, 0, 18, 19, 20, 20, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29 }; static const mz_uint8 s_tdefl_large_dist_extra[128] = { 0, 0, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13 }; /* Radix sorts tdefl_sym_freq[] array by 16-bit key m_key. Returns ptr to sorted values. */ typedef struct { mz_uint16 m_key, m_sym_index; } tdefl_sym_freq; static tdefl_sym_freq *tdefl_radix_sort_syms(mz_uint num_syms, tdefl_sym_freq *pSyms0, tdefl_sym_freq *pSyms1) { mz_uint32 total_passes = 2, pass_shift, pass, i, hist[256 * 2]; tdefl_sym_freq *pCur_syms = pSyms0, *pNew_syms = pSyms1; MZ_CLEAR_ARR(hist); for (i = 0; i < num_syms; i++) { mz_uint freq = pSyms0[i].m_key; hist[freq & 0xFF]++; hist[256 + ((freq >> 8) & 0xFF)]++; } while ((total_passes > 1) && (num_syms == hist[(total_passes - 1) * 256])) total_passes--; for (pass_shift = 0, pass = 0; pass < total_passes; pass++, pass_shift += 8) { const mz_uint32 *pHist = &hist[pass << 8]; mz_uint offsets[256], cur_ofs = 0; for (i = 0; i < 256; i++) { offsets[i] = cur_ofs; cur_ofs += pHist[i]; } for (i = 0; i < num_syms; i++) pNew_syms[offsets[(pCur_syms[i].m_key >> pass_shift) & 0xFF]++] = pCur_syms[i]; { tdefl_sym_freq *t = pCur_syms; pCur_syms = pNew_syms; pNew_syms = t; } } return pCur_syms; } /* tdefl_calculate_minimum_redundancy() originally written by: Alistair Moffat, alistair@cs.mu.oz.au, Jyrki Katajainen, jyrki@diku.dk, November 1996. */ static void tdefl_calculate_minimum_redundancy(tdefl_sym_freq *A, int n) { int root, leaf, next, avbl, used, dpth; if (n == 0) return; else if (n == 1) { A[0].m_key = 1; return; } A[0].m_key += A[1].m_key; root = 0; leaf = 2; for (next = 1; next < n - 1; next++) { if (leaf >= n || A[root].m_key < A[leaf].m_key) { A[next].m_key = A[root].m_key; A[root++].m_key = (mz_uint16)next; } else A[next].m_key = A[leaf++].m_key; if (leaf >= n || (root < next && A[root].m_key < A[leaf].m_key)) { A[next].m_key = (mz_uint16)(A[next].m_key + A[root].m_key); A[root++].m_key = (mz_uint16)next; } else A[next].m_key = (mz_uint16)(A[next].m_key + A[leaf++].m_key); } A[n - 2].m_key = 0; for (next = n - 3; next >= 0; next--) A[next].m_key = A[A[next].m_key].m_key + 1; avbl = 1; used = dpth = 0; root = n - 2; next = n - 1; while (avbl > 0) { while (root >= 0 && (int)A[root].m_key == dpth) { used++; root--; } while (avbl > used) { A[next--].m_key = (mz_uint16)(dpth); avbl--; } avbl = 2 * used; dpth++; used = 0; } } /* Limits canonical Huffman code table's max code size. */ enum { TDEFL_MAX_SUPPORTED_HUFF_CODESIZE = 32 }; static void tdefl_huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size) { int i; mz_uint32 total = 0; if (code_list_len <= 1) return; for (i = max_code_size + 1; i <= TDEFL_MAX_SUPPORTED_HUFF_CODESIZE; i++) pNum_codes[max_code_size] += pNum_codes[i]; for (i = max_code_size; i > 0; i--) total += (((mz_uint32)pNum_codes[i]) << (max_code_size - i)); while (total != (1UL << max_code_size)) { pNum_codes[max_code_size]--; for (i = max_code_size - 1; i > 0; i--) if (pNum_codes[i]) { pNum_codes[i]--; pNum_codes[i + 1] += 2; break; } total--; } } static void tdefl_optimize_huffman_table(tdefl_compressor *d, int table_num, int table_len, int code_size_limit, int static_table) { int i, j, l, num_codes[1 + TDEFL_MAX_SUPPORTED_HUFF_CODESIZE]; mz_uint next_code[TDEFL_MAX_SUPPORTED_HUFF_CODESIZE + 1]; MZ_CLEAR_ARR(num_codes); if (static_table) { for (i = 0; i < table_len; i++) num_codes[d->m_huff_code_sizes[table_num][i]]++; } else { tdefl_sym_freq syms0[TDEFL_MAX_HUFF_SYMBOLS], syms1[TDEFL_MAX_HUFF_SYMBOLS], *pSyms; int num_used_syms = 0; const mz_uint16 *pSym_count = &d->m_huff_count[table_num][0]; for (i = 0; i < table_len; i++) if (pSym_count[i]) { syms0[num_used_syms].m_key = (mz_uint16)pSym_count[i]; syms0[num_used_syms++].m_sym_index = (mz_uint16)i; } pSyms = tdefl_radix_sort_syms(num_used_syms, syms0, syms1); tdefl_calculate_minimum_redundancy(pSyms, num_used_syms); for (i = 0; i < num_used_syms; i++) num_codes[pSyms[i].m_key]++; tdefl_huffman_enforce_max_code_size(num_codes, num_used_syms, code_size_limit); MZ_CLEAR_ARR(d->m_huff_code_sizes[table_num]); MZ_CLEAR_ARR(d->m_huff_codes[table_num]); for (i = 1, j = num_used_syms; i <= code_size_limit; i++) for (l = num_codes[i]; l > 0; l--) d->m_huff_code_sizes[table_num][pSyms[--j].m_sym_index] = (mz_uint8)(i); } next_code[1] = 0; for (j = 0, i = 2; i <= code_size_limit; i++) next_code[i] = j = ((j + num_codes[i - 1]) << 1); for (i = 0; i < table_len; i++) { mz_uint rev_code = 0, code, code_size; if ((code_size = d->m_huff_code_sizes[table_num][i]) == 0) continue; code = next_code[code_size]++; for (l = code_size; l > 0; l--, code >>= 1) rev_code = (rev_code << 1) | (code & 1); d->m_huff_codes[table_num][i] = (mz_uint16)rev_code; } } #define TDEFL_PUT_BITS(b, l) \ do \ { \ mz_uint bits = b; \ mz_uint len = l; \ MZ_ASSERT(bits <= ((1U << len) - 1U)); \ d->m_bit_buffer |= (bits << d->m_bits_in); \ d->m_bits_in += len; \ while (d->m_bits_in >= 8) \ { \ if (d->m_pOutput_buf < d->m_pOutput_buf_end) \ *d->m_pOutput_buf++ = (mz_uint8)(d->m_bit_buffer); \ d->m_bit_buffer >>= 8; \ d->m_bits_in -= 8; \ } \ } \ MZ_MACRO_END #define TDEFL_RLE_PREV_CODE_SIZE() \ { \ if (rle_repeat_count) \ { \ if (rle_repeat_count < 3) \ { \ d->m_huff_count[2][prev_code_size] = (mz_uint16)(d->m_huff_count[2][prev_code_size] + rle_repeat_count); \ while (rle_repeat_count--) \ packed_code_sizes[num_packed_code_sizes++] = prev_code_size; \ } \ else \ { \ d->m_huff_count[2][16] = (mz_uint16)(d->m_huff_count[2][16] + 1); \ packed_code_sizes[num_packed_code_sizes++] = 16; \ packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_repeat_count - 3); \ } \ rle_repeat_count = 0; \ } \ } #define TDEFL_RLE_ZERO_CODE_SIZE() \ { \ if (rle_z_count) \ { \ if (rle_z_count < 3) \ { \ d->m_huff_count[2][0] = (mz_uint16)(d->m_huff_count[2][0] + rle_z_count); \ while (rle_z_count--) \ packed_code_sizes[num_packed_code_sizes++] = 0; \ } \ else if (rle_z_count <= 10) \ { \ d->m_huff_count[2][17] = (mz_uint16)(d->m_huff_count[2][17] + 1); \ packed_code_sizes[num_packed_code_sizes++] = 17; \ packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_z_count - 3); \ } \ else \ { \ d->m_huff_count[2][18] = (mz_uint16)(d->m_huff_count[2][18] + 1); \ packed_code_sizes[num_packed_code_sizes++] = 18; \ packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_z_count - 11); \ } \ rle_z_count = 0; \ } \ } static const mz_uint8 s_tdefl_packed_code_size_syms_swizzle[] = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 }; static void tdefl_start_dynamic_block(tdefl_compressor *d) { int num_lit_codes, num_dist_codes, num_bit_lengths; mz_uint i, total_code_sizes_to_pack, num_packed_code_sizes, rle_z_count, rle_repeat_count, packed_code_sizes_index; mz_uint8 code_sizes_to_pack[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], packed_code_sizes[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], prev_code_size = 0xFF; d->m_huff_count[0][256] = 1; tdefl_optimize_huffman_table(d, 0, TDEFL_MAX_HUFF_SYMBOLS_0, 15, MZ_FALSE); tdefl_optimize_huffman_table(d, 1, TDEFL_MAX_HUFF_SYMBOLS_1, 15, MZ_FALSE); for (num_lit_codes = 286; num_lit_codes > 257; num_lit_codes--) if (d->m_huff_code_sizes[0][num_lit_codes - 1]) break; for (num_dist_codes = 30; num_dist_codes > 1; num_dist_codes--) if (d->m_huff_code_sizes[1][num_dist_codes - 1]) break; memcpy(code_sizes_to_pack, &d->m_huff_code_sizes[0][0], num_lit_codes); memcpy(code_sizes_to_pack + num_lit_codes, &d->m_huff_code_sizes[1][0], num_dist_codes); total_code_sizes_to_pack = num_lit_codes + num_dist_codes; num_packed_code_sizes = 0; rle_z_count = 0; rle_repeat_count = 0; memset(&d->m_huff_count[2][0], 0, sizeof(d->m_huff_count[2][0]) * TDEFL_MAX_HUFF_SYMBOLS_2); for (i = 0; i < total_code_sizes_to_pack; i++) { mz_uint8 code_size = code_sizes_to_pack[i]; if (!code_size) { TDEFL_RLE_PREV_CODE_SIZE(); if (++rle_z_count == 138) { TDEFL_RLE_ZERO_CODE_SIZE(); } } else { TDEFL_RLE_ZERO_CODE_SIZE(); if (code_size != prev_code_size) { TDEFL_RLE_PREV_CODE_SIZE(); d->m_huff_count[2][code_size] = (mz_uint16)(d->m_huff_count[2][code_size] + 1); packed_code_sizes[num_packed_code_sizes++] = code_size; } else if (++rle_repeat_count == 6) { TDEFL_RLE_PREV_CODE_SIZE(); } } prev_code_size = code_size; } if (rle_repeat_count) { TDEFL_RLE_PREV_CODE_SIZE(); } else { TDEFL_RLE_ZERO_CODE_SIZE(); } tdefl_optimize_huffman_table(d, 2, TDEFL_MAX_HUFF_SYMBOLS_2, 7, MZ_FALSE); TDEFL_PUT_BITS(2, 2); TDEFL_PUT_BITS(num_lit_codes - 257, 5); TDEFL_PUT_BITS(num_dist_codes - 1, 5); for (num_bit_lengths = 18; num_bit_lengths >= 0; num_bit_lengths--) if (d->m_huff_code_sizes[2][s_tdefl_packed_code_size_syms_swizzle[num_bit_lengths]]) break; num_bit_lengths = MZ_MAX(4, (num_bit_lengths + 1)); TDEFL_PUT_BITS(num_bit_lengths - 4, 4); for (i = 0; (int)i < num_bit_lengths; i++) TDEFL_PUT_BITS(d->m_huff_code_sizes[2][s_tdefl_packed_code_size_syms_swizzle[i]], 3); for (packed_code_sizes_index = 0; packed_code_sizes_index < num_packed_code_sizes;) { mz_uint code = packed_code_sizes[packed_code_sizes_index++]; MZ_ASSERT(code < TDEFL_MAX_HUFF_SYMBOLS_2); TDEFL_PUT_BITS(d->m_huff_codes[2][code], d->m_huff_code_sizes[2][code]); if (code >= 16) TDEFL_PUT_BITS(packed_code_sizes[packed_code_sizes_index++], "\02\03\07"[code - 16]); } } static void tdefl_start_static_block(tdefl_compressor *d) { mz_uint i; mz_uint8 *p = &d->m_huff_code_sizes[0][0]; for (i = 0; i <= 143; ++i) *p++ = 8; for (; i <= 255; ++i) *p++ = 9; for (; i <= 279; ++i) *p++ = 7; for (; i <= 287; ++i) *p++ = 8; memset(d->m_huff_code_sizes[1], 5, 32); tdefl_optimize_huffman_table(d, 0, 288, 15, MZ_TRUE); tdefl_optimize_huffman_table(d, 1, 32, 15, MZ_TRUE); TDEFL_PUT_BITS(1, 2); } static const mz_uint mz_bitmasks[17] = { 0x0000, 0x0001, 0x0003, 0x0007, 0x000F, 0x001F, 0x003F, 0x007F, 0x00FF, 0x01FF, 0x03FF, 0x07FF, 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF }; #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && MINIZ_HAS_64BIT_REGISTERS static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d) { mz_uint flags; mz_uint8 *pLZ_codes; mz_uint8 *pOutput_buf = d->m_pOutput_buf; mz_uint8 *pLZ_code_buf_end = d->m_pLZ_code_buf; mz_uint64 bit_buffer = d->m_bit_buffer; mz_uint bits_in = d->m_bits_in; #define TDEFL_PUT_BITS_FAST(b, l) \ { \ bit_buffer |= (((mz_uint64)(b)) << bits_in); \ bits_in += (l); \ } flags = 1; for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < pLZ_code_buf_end; flags >>= 1) { if (flags == 1) flags = *pLZ_codes++ | 0x100; if (flags & 1) { mz_uint s0, s1, n0, n1, sym, num_extra_bits; mz_uint match_len = pLZ_codes[0]; mz_uint match_dist = (pLZ_codes[1] | (pLZ_codes[2] << 8)); pLZ_codes += 3; MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][s_tdefl_len_sym[match_len]], d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); TDEFL_PUT_BITS_FAST(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], s_tdefl_len_extra[match_len]); /* This sequence coaxes MSVC into using cmov's vs. jmp's. */ s0 = s_tdefl_small_dist_sym[match_dist & 511]; n0 = s_tdefl_small_dist_extra[match_dist & 511]; s1 = s_tdefl_large_dist_sym[match_dist >> 8]; n1 = s_tdefl_large_dist_extra[match_dist >> 8]; sym = (match_dist < 512) ? s0 : s1; num_extra_bits = (match_dist < 512) ? n0 : n1; MZ_ASSERT(d->m_huff_code_sizes[1][sym]); TDEFL_PUT_BITS_FAST(d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym]); TDEFL_PUT_BITS_FAST(match_dist & mz_bitmasks[num_extra_bits], num_extra_bits); } else { mz_uint lit = *pLZ_codes++; MZ_ASSERT(d->m_huff_code_sizes[0][lit]); TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end)) { flags >>= 1; lit = *pLZ_codes++; MZ_ASSERT(d->m_huff_code_sizes[0][lit]); TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end)) { flags >>= 1; lit = *pLZ_codes++; MZ_ASSERT(d->m_huff_code_sizes[0][lit]); TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); } } } if (pOutput_buf >= d->m_pOutput_buf_end) return MZ_FALSE; memcpy(pOutput_buf, &bit_buffer, sizeof(mz_uint64)); pOutput_buf += (bits_in >> 3); bit_buffer >>= (bits_in & ~7); bits_in &= 7; } #undef TDEFL_PUT_BITS_FAST d->m_pOutput_buf = pOutput_buf; d->m_bits_in = 0; d->m_bit_buffer = 0; while (bits_in) { mz_uint32 n = MZ_MIN(bits_in, 16); TDEFL_PUT_BITS((mz_uint)bit_buffer & mz_bitmasks[n], n); bit_buffer >>= n; bits_in -= n; } TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]); return (d->m_pOutput_buf < d->m_pOutput_buf_end); } #else static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d) { mz_uint flags; mz_uint8 *pLZ_codes; flags = 1; for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < d->m_pLZ_code_buf; flags >>= 1) { if (flags == 1) flags = *pLZ_codes++ | 0x100; if (flags & 1) { mz_uint sym, num_extra_bits; mz_uint match_len = pLZ_codes[0], match_dist = (pLZ_codes[1] | (pLZ_codes[2] << 8)); pLZ_codes += 3; MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); TDEFL_PUT_BITS(d->m_huff_codes[0][s_tdefl_len_sym[match_len]], d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); TDEFL_PUT_BITS(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], s_tdefl_len_extra[match_len]); if (match_dist < 512) { sym = s_tdefl_small_dist_sym[match_dist]; num_extra_bits = s_tdefl_small_dist_extra[match_dist]; } else { sym = s_tdefl_large_dist_sym[match_dist >> 8]; num_extra_bits = s_tdefl_large_dist_extra[match_dist >> 8]; } MZ_ASSERT(d->m_huff_code_sizes[1][sym]); TDEFL_PUT_BITS(d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym]); TDEFL_PUT_BITS(match_dist & mz_bitmasks[num_extra_bits], num_extra_bits); } else { mz_uint lit = *pLZ_codes++; MZ_ASSERT(d->m_huff_code_sizes[0][lit]); TDEFL_PUT_BITS(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); } } TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]); return (d->m_pOutput_buf < d->m_pOutput_buf_end); } #endif /* MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && MINIZ_HAS_64BIT_REGISTERS */ static mz_bool tdefl_compress_block(tdefl_compressor *d, mz_bool static_block) { if (static_block) tdefl_start_static_block(d); else tdefl_start_dynamic_block(d); return tdefl_compress_lz_codes(d); } static const mz_uint s_tdefl_num_probes[11]; static int tdefl_flush_block(tdefl_compressor *d, int flush) { mz_uint saved_bit_buf, saved_bits_in; mz_uint8 *pSaved_output_buf; mz_bool comp_block_succeeded = MZ_FALSE; int n, use_raw_block = ((d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS) != 0) && (d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size; mz_uint8 *pOutput_buf_start = ((d->m_pPut_buf_func == NULL) && ((*d->m_pOut_buf_size - d->m_out_buf_ofs) >= TDEFL_OUT_BUF_SIZE)) ? ((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs) : d->m_output_buf; d->m_pOutput_buf = pOutput_buf_start; d->m_pOutput_buf_end = d->m_pOutput_buf + TDEFL_OUT_BUF_SIZE - 16; MZ_ASSERT(!d->m_output_flush_remaining); d->m_output_flush_ofs = 0; d->m_output_flush_remaining = 0; *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> d->m_num_flags_left); d->m_pLZ_code_buf -= (d->m_num_flags_left == 8); if ((d->m_flags & TDEFL_WRITE_ZLIB_HEADER) && (!d->m_block_index)) { const mz_uint8 cmf = 0x78; mz_uint8 flg, flevel = 3; mz_uint header, i, mz_un = sizeof(s_tdefl_num_probes) / sizeof(mz_uint); /* Determine compression level by reversing the process in tdefl_create_comp_flags_from_zip_params() */ for (i = 0; i < mz_un; i++) if (s_tdefl_num_probes[i] == (d->m_flags & 0xFFF)) break; if (i < 2) flevel = 0; else if (i < 6) flevel = 1; else if (i == 6) flevel = 2; header = cmf << 8 | (flevel << 6); header += 31 - (header % 31); flg = header & 0xFF; TDEFL_PUT_BITS(cmf, 8); TDEFL_PUT_BITS(flg, 8); } TDEFL_PUT_BITS(flush == TDEFL_FINISH, 1); pSaved_output_buf = d->m_pOutput_buf; saved_bit_buf = d->m_bit_buffer; saved_bits_in = d->m_bits_in; if (!use_raw_block) comp_block_succeeded = tdefl_compress_block(d, (d->m_flags & TDEFL_FORCE_ALL_STATIC_BLOCKS) || (d->m_total_lz_bytes < 48)); /* If the block gets expanded, forget the current contents of the output buffer and send a raw block instead. */ if (((use_raw_block) || ((d->m_total_lz_bytes) && ((d->m_pOutput_buf - pSaved_output_buf + 1U) >= d->m_total_lz_bytes))) && ((d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size)) { mz_uint i; d->m_pOutput_buf = pSaved_output_buf; d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in; TDEFL_PUT_BITS(0, 2); if (d->m_bits_in) { TDEFL_PUT_BITS(0, 8 - d->m_bits_in); } for (i = 2; i; --i, d->m_total_lz_bytes ^= 0xFFFF) { TDEFL_PUT_BITS(d->m_total_lz_bytes & 0xFFFF, 16); } for (i = 0; i < d->m_total_lz_bytes; ++i) { TDEFL_PUT_BITS(d->m_dict[(d->m_lz_code_buf_dict_pos + i) & TDEFL_LZ_DICT_SIZE_MASK], 8); } } /* Check for the extremely unlikely (if not impossible) case of the compressed block not fitting into the output buffer when using dynamic codes. */ else if (!comp_block_succeeded) { d->m_pOutput_buf = pSaved_output_buf; d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in; tdefl_compress_block(d, MZ_TRUE); } if (flush) { if (flush == TDEFL_FINISH) { if (d->m_bits_in) { TDEFL_PUT_BITS(0, 8 - d->m_bits_in); } if (d->m_flags & TDEFL_WRITE_ZLIB_HEADER) { mz_uint i, a = d->m_adler32; for (i = 0; i < 4; i++) { TDEFL_PUT_BITS((a >> 24) & 0xFF, 8); a <<= 8; } } } else { mz_uint i, z = 0; TDEFL_PUT_BITS(0, 3); if (d->m_bits_in) { TDEFL_PUT_BITS(0, 8 - d->m_bits_in); } for (i = 2; i; --i, z ^= 0xFFFF) { TDEFL_PUT_BITS(z & 0xFFFF, 16); } } } MZ_ASSERT(d->m_pOutput_buf < d->m_pOutput_buf_end); memset(&d->m_huff_count[0][0], 0, sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0); memset(&d->m_huff_count[1][0], 0, sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1); d->m_pLZ_code_buf = d->m_lz_code_buf + 1; d->m_pLZ_flags = d->m_lz_code_buf; d->m_num_flags_left = 8; d->m_lz_code_buf_dict_pos += d->m_total_lz_bytes; d->m_total_lz_bytes = 0; d->m_block_index++; if ((n = (int)(d->m_pOutput_buf - pOutput_buf_start)) != 0) { if (d->m_pPut_buf_func) { *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf; if (!(*d->m_pPut_buf_func)(d->m_output_buf, n, d->m_pPut_buf_user)) return (d->m_prev_return_status = TDEFL_STATUS_PUT_BUF_FAILED); } else if (pOutput_buf_start == d->m_output_buf) { int bytes_to_copy = (int)MZ_MIN((size_t)n, (size_t)(*d->m_pOut_buf_size - d->m_out_buf_ofs)); memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, d->m_output_buf, bytes_to_copy); d->m_out_buf_ofs += bytes_to_copy; if ((n -= bytes_to_copy) != 0) { d->m_output_flush_ofs = bytes_to_copy; d->m_output_flush_remaining = n; } } else { d->m_out_buf_ofs += n; } } return d->m_output_flush_remaining; } #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES #ifdef MINIZ_UNALIGNED_USE_MEMCPY static mz_uint16 TDEFL_READ_UNALIGNED_WORD(const mz_uint8* p) { mz_uint16 ret; memcpy(&ret, p, sizeof(mz_uint16)); return ret; } static mz_uint16 TDEFL_READ_UNALIGNED_WORD2(const mz_uint16* p) { mz_uint16 ret; memcpy(&ret, p, sizeof(mz_uint16)); return ret; } #else #define TDEFL_READ_UNALIGNED_WORD(p) *(const mz_uint16 *)(p) #define TDEFL_READ_UNALIGNED_WORD2(p) *(const mz_uint16 *)(p) #endif static MZ_FORCEINLINE void tdefl_find_match(tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist, mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len) { mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, match_len = *pMatch_len, probe_pos = pos, next_probe_pos, probe_len; mz_uint num_probes_left = d->m_max_probes[match_len >= 32]; const mz_uint16 *s = (const mz_uint16 *)(d->m_dict + pos), *p, *q; mz_uint16 c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]), s01 = TDEFL_READ_UNALIGNED_WORD2(s); MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN); if (max_match_len <= match_len) return; for (;;) { for (;;) { if (--num_probes_left == 0) return; #define TDEFL_PROBE \ next_probe_pos = d->m_next[probe_pos]; \ if ((!next_probe_pos) || ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) \ return; \ probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK; \ if (TDEFL_READ_UNALIGNED_WORD(&d->m_dict[probe_pos + match_len - 1]) == c01) \ break; TDEFL_PROBE; TDEFL_PROBE; TDEFL_PROBE; } if (!dist) break; q = (const mz_uint16 *)(d->m_dict + probe_pos); if (TDEFL_READ_UNALIGNED_WORD2(q) != s01) continue; p = s; probe_len = 32; do { } while ((TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (--probe_len > 0)); if (!probe_len) { *pMatch_dist = dist; *pMatch_len = MZ_MIN(max_match_len, (mz_uint)TDEFL_MAX_MATCH_LEN); break; } else if ((probe_len = ((mz_uint)(p - s) * 2) + (mz_uint)(*(const mz_uint8 *)p == *(const mz_uint8 *)q)) > match_len) { *pMatch_dist = dist; if ((*pMatch_len = match_len = MZ_MIN(max_match_len, probe_len)) == max_match_len) break; c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]); } } } #else static MZ_FORCEINLINE void tdefl_find_match(tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist, mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len) { mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, match_len = *pMatch_len, probe_pos = pos, next_probe_pos, probe_len; mz_uint num_probes_left = d->m_max_probes[match_len >= 32]; const mz_uint8 *s = d->m_dict + pos, *p, *q; mz_uint8 c0 = d->m_dict[pos + match_len], c1 = d->m_dict[pos + match_len - 1]; MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN); if (max_match_len <= match_len) return; for (;;) { for (;;) { if (--num_probes_left == 0) return; #define TDEFL_PROBE \ next_probe_pos = d->m_next[probe_pos]; \ if ((!next_probe_pos) || ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) \ return; \ probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK; \ if ((d->m_dict[probe_pos + match_len] == c0) && (d->m_dict[probe_pos + match_len - 1] == c1)) \ break; TDEFL_PROBE; TDEFL_PROBE; TDEFL_PROBE; } if (!dist) break; p = s; q = d->m_dict + probe_pos; for (probe_len = 0; probe_len < max_match_len; probe_len++) if (*p++ != *q++) break; if (probe_len > match_len) { *pMatch_dist = dist; if ((*pMatch_len = match_len = probe_len) == max_match_len) return; c0 = d->m_dict[pos + match_len]; c1 = d->m_dict[pos + match_len - 1]; } } } #endif /* #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES */ #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN #ifdef MINIZ_UNALIGNED_USE_MEMCPY static mz_uint32 TDEFL_READ_UNALIGNED_WORD32(const mz_uint8* p) { mz_uint32 ret; memcpy(&ret, p, sizeof(mz_uint32)); return ret; } #else #define TDEFL_READ_UNALIGNED_WORD32(p) *(const mz_uint32 *)(p) #endif static mz_bool tdefl_compress_fast(tdefl_compressor *d) { /* Faster, minimally featured LZRW1-style match+parse loop with better register utilization. Intended for applications where raw throughput is valued more highly than ratio. */ mz_uint lookahead_pos = d->m_lookahead_pos, lookahead_size = d->m_lookahead_size, dict_size = d->m_dict_size, total_lz_bytes = d->m_total_lz_bytes, num_flags_left = d->m_num_flags_left; mz_uint8 *pLZ_code_buf = d->m_pLZ_code_buf, *pLZ_flags = d->m_pLZ_flags; mz_uint cur_pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK; while ((d->m_src_buf_left) || ((d->m_flush) && (lookahead_size))) { const mz_uint TDEFL_COMP_FAST_LOOKAHEAD_SIZE = 4096; mz_uint dst_pos = (lookahead_pos + lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK; mz_uint num_bytes_to_process = (mz_uint)MZ_MIN(d->m_src_buf_left, TDEFL_COMP_FAST_LOOKAHEAD_SIZE - lookahead_size); d->m_src_buf_left -= num_bytes_to_process; lookahead_size += num_bytes_to_process; while (num_bytes_to_process) { mz_uint32 n = MZ_MIN(TDEFL_LZ_DICT_SIZE - dst_pos, num_bytes_to_process); memcpy(d->m_dict + dst_pos, d->m_pSrc, n); if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) memcpy(d->m_dict + TDEFL_LZ_DICT_SIZE + dst_pos, d->m_pSrc, MZ_MIN(n, (TDEFL_MAX_MATCH_LEN - 1) - dst_pos)); d->m_pSrc += n; dst_pos = (dst_pos + n) & TDEFL_LZ_DICT_SIZE_MASK; num_bytes_to_process -= n; } dict_size = MZ_MIN(TDEFL_LZ_DICT_SIZE - lookahead_size, dict_size); if ((!d->m_flush) && (lookahead_size < TDEFL_COMP_FAST_LOOKAHEAD_SIZE)) break; while (lookahead_size >= 4) { mz_uint cur_match_dist, cur_match_len = 1; mz_uint8 *pCur_dict = d->m_dict + cur_pos; mz_uint first_trigram = TDEFL_READ_UNALIGNED_WORD32(pCur_dict) & 0xFFFFFF; mz_uint hash = (first_trigram ^ (first_trigram >> (24 - (TDEFL_LZ_HASH_BITS - 8)))) & TDEFL_LEVEL1_HASH_SIZE_MASK; mz_uint probe_pos = d->m_hash[hash]; d->m_hash[hash] = (mz_uint16)lookahead_pos; if (((cur_match_dist = (mz_uint16)(lookahead_pos - probe_pos)) <= dict_size) && ((TDEFL_READ_UNALIGNED_WORD32(d->m_dict + (probe_pos &= TDEFL_LZ_DICT_SIZE_MASK)) & 0xFFFFFF) == first_trigram)) { const mz_uint16 *p = (const mz_uint16 *)pCur_dict; const mz_uint16 *q = (const mz_uint16 *)(d->m_dict + probe_pos); mz_uint32 probe_len = 32; do { } while ((TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (--probe_len > 0)); cur_match_len = ((mz_uint)(p - (const mz_uint16 *)pCur_dict) * 2) + (mz_uint)(*(const mz_uint8 *)p == *(const mz_uint8 *)q); if (!probe_len) cur_match_len = cur_match_dist ? TDEFL_MAX_MATCH_LEN : 0; if ((cur_match_len < TDEFL_MIN_MATCH_LEN) || ((cur_match_len == TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 8U * 1024U))) { cur_match_len = 1; *pLZ_code_buf++ = (mz_uint8)first_trigram; *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); d->m_huff_count[0][(mz_uint8)first_trigram]++; } else { mz_uint32 s0, s1; cur_match_len = MZ_MIN(cur_match_len, lookahead_size); MZ_ASSERT((cur_match_len >= TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 1) && (cur_match_dist <= TDEFL_LZ_DICT_SIZE)); cur_match_dist--; pLZ_code_buf[0] = (mz_uint8)(cur_match_len - TDEFL_MIN_MATCH_LEN); #ifdef MINIZ_UNALIGNED_USE_MEMCPY memcpy(&pLZ_code_buf[1], &cur_match_dist, sizeof(cur_match_dist)); #else *(mz_uint16 *)(&pLZ_code_buf[1]) = (mz_uint16)cur_match_dist; #endif pLZ_code_buf += 3; *pLZ_flags = (mz_uint8)((*pLZ_flags >> 1) | 0x80); s0 = s_tdefl_small_dist_sym[cur_match_dist & 511]; s1 = s_tdefl_large_dist_sym[cur_match_dist >> 8]; d->m_huff_count[1][(cur_match_dist < 512) ? s0 : s1]++; d->m_huff_count[0][s_tdefl_len_sym[cur_match_len - TDEFL_MIN_MATCH_LEN]]++; } } else { *pLZ_code_buf++ = (mz_uint8)first_trigram; *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); d->m_huff_count[0][(mz_uint8)first_trigram]++; } if (--num_flags_left == 0) { num_flags_left = 8; pLZ_flags = pLZ_code_buf++; } total_lz_bytes += cur_match_len; lookahead_pos += cur_match_len; dict_size = MZ_MIN(dict_size + cur_match_len, (mz_uint)TDEFL_LZ_DICT_SIZE); cur_pos = (cur_pos + cur_match_len) & TDEFL_LZ_DICT_SIZE_MASK; MZ_ASSERT(lookahead_size >= cur_match_len); lookahead_size -= cur_match_len; if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) { int n; d->m_lookahead_pos = lookahead_pos; d->m_lookahead_size = lookahead_size; d->m_dict_size = dict_size; d->m_total_lz_bytes = total_lz_bytes; d->m_pLZ_code_buf = pLZ_code_buf; d->m_pLZ_flags = pLZ_flags; d->m_num_flags_left = num_flags_left; if ((n = tdefl_flush_block(d, 0)) != 0) return (n < 0) ? MZ_FALSE : MZ_TRUE; total_lz_bytes = d->m_total_lz_bytes; pLZ_code_buf = d->m_pLZ_code_buf; pLZ_flags = d->m_pLZ_flags; num_flags_left = d->m_num_flags_left; } } while (lookahead_size) { mz_uint8 lit = d->m_dict[cur_pos]; total_lz_bytes++; *pLZ_code_buf++ = lit; *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); if (--num_flags_left == 0) { num_flags_left = 8; pLZ_flags = pLZ_code_buf++; } d->m_huff_count[0][lit]++; lookahead_pos++; dict_size = MZ_MIN(dict_size + 1, (mz_uint)TDEFL_LZ_DICT_SIZE); cur_pos = (cur_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK; lookahead_size--; if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) { int n; d->m_lookahead_pos = lookahead_pos; d->m_lookahead_size = lookahead_size; d->m_dict_size = dict_size; d->m_total_lz_bytes = total_lz_bytes; d->m_pLZ_code_buf = pLZ_code_buf; d->m_pLZ_flags = pLZ_flags; d->m_num_flags_left = num_flags_left; if ((n = tdefl_flush_block(d, 0)) != 0) return (n < 0) ? MZ_FALSE : MZ_TRUE; total_lz_bytes = d->m_total_lz_bytes; pLZ_code_buf = d->m_pLZ_code_buf; pLZ_flags = d->m_pLZ_flags; num_flags_left = d->m_num_flags_left; } } } d->m_lookahead_pos = lookahead_pos; d->m_lookahead_size = lookahead_size; d->m_dict_size = dict_size; d->m_total_lz_bytes = total_lz_bytes; d->m_pLZ_code_buf = pLZ_code_buf; d->m_pLZ_flags = pLZ_flags; d->m_num_flags_left = num_flags_left; return MZ_TRUE; } #endif /* MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN */ static MZ_FORCEINLINE void tdefl_record_literal(tdefl_compressor *d, mz_uint8 lit) { d->m_total_lz_bytes++; *d->m_pLZ_code_buf++ = lit; *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> 1); if (--d->m_num_flags_left == 0) { d->m_num_flags_left = 8; d->m_pLZ_flags = d->m_pLZ_code_buf++; } d->m_huff_count[0][lit]++; } static MZ_FORCEINLINE void tdefl_record_match(tdefl_compressor *d, mz_uint match_len, mz_uint match_dist) { mz_uint32 s0, s1; MZ_ASSERT((match_len >= TDEFL_MIN_MATCH_LEN) && (match_dist >= 1) && (match_dist <= TDEFL_LZ_DICT_SIZE)); d->m_total_lz_bytes += match_len; d->m_pLZ_code_buf[0] = (mz_uint8)(match_len - TDEFL_MIN_MATCH_LEN); match_dist -= 1; d->m_pLZ_code_buf[1] = (mz_uint8)(match_dist & 0xFF); d->m_pLZ_code_buf[2] = (mz_uint8)(match_dist >> 8); d->m_pLZ_code_buf += 3; *d->m_pLZ_flags = (mz_uint8)((*d->m_pLZ_flags >> 1) | 0x80); if (--d->m_num_flags_left == 0) { d->m_num_flags_left = 8; d->m_pLZ_flags = d->m_pLZ_code_buf++; } s0 = s_tdefl_small_dist_sym[match_dist & 511]; s1 = s_tdefl_large_dist_sym[(match_dist >> 8) & 127]; d->m_huff_count[1][(match_dist < 512) ? s0 : s1]++; d->m_huff_count[0][s_tdefl_len_sym[match_len - TDEFL_MIN_MATCH_LEN]]++; } static mz_bool tdefl_compress_normal(tdefl_compressor *d) { const mz_uint8 *pSrc = d->m_pSrc; size_t src_buf_left = d->m_src_buf_left; tdefl_flush flush = d->m_flush; while ((src_buf_left) || ((flush) && (d->m_lookahead_size))) { mz_uint len_to_move, cur_match_dist, cur_match_len, cur_pos; /* Update dictionary and hash chains. Keeps the lookahead size equal to TDEFL_MAX_MATCH_LEN. */ if ((d->m_lookahead_size + d->m_dict_size) >= (TDEFL_MIN_MATCH_LEN - 1)) { mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK, ins_pos = d->m_lookahead_pos + d->m_lookahead_size - 2; mz_uint hash = (d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] << TDEFL_LZ_HASH_SHIFT) ^ d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK]; mz_uint num_bytes_to_process = (mz_uint)MZ_MIN(src_buf_left, TDEFL_MAX_MATCH_LEN - d->m_lookahead_size); const mz_uint8 *pSrc_end = pSrc ? pSrc + num_bytes_to_process : NULL; src_buf_left -= num_bytes_to_process; d->m_lookahead_size += num_bytes_to_process; while (pSrc != pSrc_end) { mz_uint8 c = *pSrc++; d->m_dict[dst_pos] = c; if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c; hash = ((hash << TDEFL_LZ_HASH_SHIFT) ^ c) & (TDEFL_LZ_HASH_SIZE - 1); d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash]; d->m_hash[hash] = (mz_uint16)(ins_pos); dst_pos = (dst_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK; ins_pos++; } } else { while ((src_buf_left) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN)) { mz_uint8 c = *pSrc++; mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK; src_buf_left--; d->m_dict[dst_pos] = c; if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c; if ((++d->m_lookahead_size + d->m_dict_size) >= TDEFL_MIN_MATCH_LEN) { mz_uint ins_pos = d->m_lookahead_pos + (d->m_lookahead_size - 1) - 2; mz_uint hash = ((d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] << (TDEFL_LZ_HASH_SHIFT * 2)) ^ (d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK] << TDEFL_LZ_HASH_SHIFT) ^ c) & (TDEFL_LZ_HASH_SIZE - 1); d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash]; d->m_hash[hash] = (mz_uint16)(ins_pos); } } } d->m_dict_size = MZ_MIN(TDEFL_LZ_DICT_SIZE - d->m_lookahead_size, d->m_dict_size); if ((!flush) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN)) break; /* Simple lazy/greedy parsing state machine. */ len_to_move = 1; cur_match_dist = 0; cur_match_len = d->m_saved_match_len ? d->m_saved_match_len : (TDEFL_MIN_MATCH_LEN - 1); cur_pos = d->m_lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK; if (d->m_flags & (TDEFL_RLE_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS)) { if ((d->m_dict_size) && (!(d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS))) { mz_uint8 c = d->m_dict[(cur_pos - 1) & TDEFL_LZ_DICT_SIZE_MASK]; cur_match_len = 0; while (cur_match_len < d->m_lookahead_size) { if (d->m_dict[cur_pos + cur_match_len] != c) break; cur_match_len++; } if (cur_match_len < TDEFL_MIN_MATCH_LEN) cur_match_len = 0; else cur_match_dist = 1; } } else { tdefl_find_match(d, d->m_lookahead_pos, d->m_dict_size, d->m_lookahead_size, &cur_match_dist, &cur_match_len); } if (((cur_match_len == TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 8U * 1024U)) || (cur_pos == cur_match_dist) || ((d->m_flags & TDEFL_FILTER_MATCHES) && (cur_match_len <= 5))) { cur_match_dist = cur_match_len = 0; } if (d->m_saved_match_len) { if (cur_match_len > d->m_saved_match_len) { tdefl_record_literal(d, (mz_uint8)d->m_saved_lit); if (cur_match_len >= 128) { tdefl_record_match(d, cur_match_len, cur_match_dist); d->m_saved_match_len = 0; len_to_move = cur_match_len; } else { d->m_saved_lit = d->m_dict[cur_pos]; d->m_saved_match_dist = cur_match_dist; d->m_saved_match_len = cur_match_len; } } else { tdefl_record_match(d, d->m_saved_match_len, d->m_saved_match_dist); len_to_move = d->m_saved_match_len - 1; d->m_saved_match_len = 0; } } else if (!cur_match_dist) tdefl_record_literal(d, d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)]); else if ((d->m_greedy_parsing) || (d->m_flags & TDEFL_RLE_MATCHES) || (cur_match_len >= 128)) { tdefl_record_match(d, cur_match_len, cur_match_dist); len_to_move = cur_match_len; } else { d->m_saved_lit = d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)]; d->m_saved_match_dist = cur_match_dist; d->m_saved_match_len = cur_match_len; } /* Move the lookahead forward by len_to_move bytes. */ d->m_lookahead_pos += len_to_move; MZ_ASSERT(d->m_lookahead_size >= len_to_move); d->m_lookahead_size -= len_to_move; d->m_dict_size = MZ_MIN(d->m_dict_size + len_to_move, (mz_uint)TDEFL_LZ_DICT_SIZE); /* Check if it's time to flush the current LZ codes to the internal output buffer. */ if ((d->m_pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) || ((d->m_total_lz_bytes > 31 * 1024) && (((((mz_uint)(d->m_pLZ_code_buf - d->m_lz_code_buf) * 115) >> 7) >= d->m_total_lz_bytes) || (d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS)))) { int n; d->m_pSrc = pSrc; d->m_src_buf_left = src_buf_left; if ((n = tdefl_flush_block(d, 0)) != 0) return (n < 0) ? MZ_FALSE : MZ_TRUE; } } d->m_pSrc = pSrc; d->m_src_buf_left = src_buf_left; return MZ_TRUE; } static tdefl_status tdefl_flush_output_buffer(tdefl_compressor *d) { if (d->m_pIn_buf_size) { *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf; } if (d->m_pOut_buf_size) { size_t n = MZ_MIN(*d->m_pOut_buf_size - d->m_out_buf_ofs, d->m_output_flush_remaining); memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, d->m_output_buf + d->m_output_flush_ofs, n); d->m_output_flush_ofs += (mz_uint)n; d->m_output_flush_remaining -= (mz_uint)n; d->m_out_buf_ofs += n; *d->m_pOut_buf_size = d->m_out_buf_ofs; } return (d->m_finished && !d->m_output_flush_remaining) ? TDEFL_STATUS_DONE : TDEFL_STATUS_OKAY; } tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, size_t *pIn_buf_size, void *pOut_buf, size_t *pOut_buf_size, tdefl_flush flush) { if (!d) { if (pIn_buf_size) *pIn_buf_size = 0; if (pOut_buf_size) *pOut_buf_size = 0; return TDEFL_STATUS_BAD_PARAM; } d->m_pIn_buf = pIn_buf; d->m_pIn_buf_size = pIn_buf_size; d->m_pOut_buf = pOut_buf; d->m_pOut_buf_size = pOut_buf_size; d->m_pSrc = (const mz_uint8 *)(pIn_buf); d->m_src_buf_left = pIn_buf_size ? *pIn_buf_size : 0; d->m_out_buf_ofs = 0; d->m_flush = flush; if (((d->m_pPut_buf_func != NULL) == ((pOut_buf != NULL) || (pOut_buf_size != NULL))) || (d->m_prev_return_status != TDEFL_STATUS_OKAY) || (d->m_wants_to_finish && (flush != TDEFL_FINISH)) || (pIn_buf_size && *pIn_buf_size && !pIn_buf) || (pOut_buf_size && *pOut_buf_size && !pOut_buf)) { if (pIn_buf_size) *pIn_buf_size = 0; if (pOut_buf_size) *pOut_buf_size = 0; return (d->m_prev_return_status = TDEFL_STATUS_BAD_PARAM); } d->m_wants_to_finish |= (flush == TDEFL_FINISH); if ((d->m_output_flush_remaining) || (d->m_finished)) return (d->m_prev_return_status = tdefl_flush_output_buffer(d)); #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN if (((d->m_flags & TDEFL_MAX_PROBES_MASK) == 1) && ((d->m_flags & TDEFL_GREEDY_PARSING_FLAG) != 0) && ((d->m_flags & (TDEFL_FILTER_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS | TDEFL_RLE_MATCHES)) == 0)) { if (!tdefl_compress_fast(d)) return d->m_prev_return_status; } else #endif /* #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN */ { if (!tdefl_compress_normal(d)) return d->m_prev_return_status; } if ((d->m_flags & (TDEFL_WRITE_ZLIB_HEADER | TDEFL_COMPUTE_ADLER32)) && (pIn_buf)) d->m_adler32 = (mz_uint32)mz_adler32(d->m_adler32, (const mz_uint8 *)pIn_buf, d->m_pSrc - (const mz_uint8 *)pIn_buf); if ((flush) && (!d->m_lookahead_size) && (!d->m_src_buf_left) && (!d->m_output_flush_remaining)) { if (tdefl_flush_block(d, flush) < 0) return d->m_prev_return_status; d->m_finished = (flush == TDEFL_FINISH); if (flush == TDEFL_FULL_FLUSH) { MZ_CLEAR_ARR(d->m_hash); MZ_CLEAR_ARR(d->m_next); d->m_dict_size = 0; } } return (d->m_prev_return_status = tdefl_flush_output_buffer(d)); } tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, size_t in_buf_size, tdefl_flush flush) { MZ_ASSERT(d->m_pPut_buf_func); return tdefl_compress(d, pIn_buf, &in_buf_size, NULL, NULL, flush); } tdefl_status tdefl_init(tdefl_compressor *d, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags) { d->m_pPut_buf_func = pPut_buf_func; d->m_pPut_buf_user = pPut_buf_user; d->m_flags = (mz_uint)(flags); d->m_max_probes[0] = 1 + ((flags & 0xFFF) + 2) / 3; d->m_greedy_parsing = (flags & TDEFL_GREEDY_PARSING_FLAG) != 0; d->m_max_probes[1] = 1 + (((flags & 0xFFF) >> 2) + 2) / 3; if (!(flags & TDEFL_NONDETERMINISTIC_PARSING_FLAG)) MZ_CLEAR_ARR(d->m_hash); d->m_lookahead_pos = d->m_lookahead_size = d->m_dict_size = d->m_total_lz_bytes = d->m_lz_code_buf_dict_pos = d->m_bits_in = 0; d->m_output_flush_ofs = d->m_output_flush_remaining = d->m_finished = d->m_block_index = d->m_bit_buffer = d->m_wants_to_finish = 0; d->m_pLZ_code_buf = d->m_lz_code_buf + 1; d->m_pLZ_flags = d->m_lz_code_buf; *d->m_pLZ_flags = 0; d->m_num_flags_left = 8; d->m_pOutput_buf = d->m_output_buf; d->m_pOutput_buf_end = d->m_output_buf; d->m_prev_return_status = TDEFL_STATUS_OKAY; d->m_saved_match_dist = d->m_saved_match_len = d->m_saved_lit = 0; d->m_adler32 = 1; d->m_pIn_buf = NULL; d->m_pOut_buf = NULL; d->m_pIn_buf_size = NULL; d->m_pOut_buf_size = NULL; d->m_flush = TDEFL_NO_FLUSH; d->m_pSrc = NULL; d->m_src_buf_left = 0; d->m_out_buf_ofs = 0; if (!(flags & TDEFL_NONDETERMINISTIC_PARSING_FLAG)) MZ_CLEAR_ARR(d->m_dict); memset(&d->m_huff_count[0][0], 0, sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0); memset(&d->m_huff_count[1][0], 0, sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1); return TDEFL_STATUS_OKAY; } tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d) { return d->m_prev_return_status; } mz_uint32 tdefl_get_adler32(tdefl_compressor *d) { return d->m_adler32; } mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags) { tdefl_compressor *pComp; mz_bool succeeded; if (((buf_len) && (!pBuf)) || (!pPut_buf_func)) return MZ_FALSE; pComp = (tdefl_compressor *)MZ_MALLOC(sizeof(tdefl_compressor)); if (!pComp) return MZ_FALSE; succeeded = (tdefl_init(pComp, pPut_buf_func, pPut_buf_user, flags) == TDEFL_STATUS_OKAY); succeeded = succeeded && (tdefl_compress_buffer(pComp, pBuf, buf_len, TDEFL_FINISH) == TDEFL_STATUS_DONE); MZ_FREE(pComp); return succeeded; } typedef struct { size_t m_size, m_capacity; mz_uint8 *m_pBuf; mz_bool m_expandable; } tdefl_output_buffer; static mz_bool tdefl_output_buffer_putter(const void *pBuf, int len, void *pUser) { tdefl_output_buffer *p = (tdefl_output_buffer *)pUser; size_t new_size = p->m_size + len; if (new_size > p->m_capacity) { size_t new_capacity = p->m_capacity; mz_uint8 *pNew_buf; if (!p->m_expandable) return MZ_FALSE; do { new_capacity = MZ_MAX(128U, new_capacity << 1U); } while (new_size > new_capacity); pNew_buf = (mz_uint8 *)MZ_REALLOC(p->m_pBuf, new_capacity); if (!pNew_buf) return MZ_FALSE; p->m_pBuf = pNew_buf; p->m_capacity = new_capacity; } memcpy((mz_uint8 *)p->m_pBuf + p->m_size, pBuf, len); p->m_size = new_size; return MZ_TRUE; } void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags) { tdefl_output_buffer out_buf; MZ_CLEAR_OBJ(out_buf); if (!pOut_len) return MZ_FALSE; else *pOut_len = 0; out_buf.m_expandable = MZ_TRUE; if (!tdefl_compress_mem_to_output(pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags)) return NULL; *pOut_len = out_buf.m_size; return out_buf.m_pBuf; } size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags) { tdefl_output_buffer out_buf; MZ_CLEAR_OBJ(out_buf); if (!pOut_buf) return 0; out_buf.m_pBuf = (mz_uint8 *)pOut_buf; out_buf.m_capacity = out_buf_len; if (!tdefl_compress_mem_to_output(pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags)) return 0; return out_buf.m_size; } static const mz_uint s_tdefl_num_probes[11] = { 0, 1, 6, 32, 16, 32, 128, 256, 512, 768, 1500 }; /* level may actually range from [0,10] (10 is a "hidden" max level, where we want a bit more compression and it's fine if throughput to fall off a cliff on some files). */ mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, int strategy) { mz_uint comp_flags = s_tdefl_num_probes[(level >= 0) ? MZ_MIN(10, level) : MZ_DEFAULT_LEVEL] | ((level <= 3) ? TDEFL_GREEDY_PARSING_FLAG : 0); if (window_bits > 0) comp_flags |= TDEFL_WRITE_ZLIB_HEADER; if (!level) comp_flags |= TDEFL_FORCE_ALL_RAW_BLOCKS; else if (strategy == MZ_FILTERED) comp_flags |= TDEFL_FILTER_MATCHES; else if (strategy == MZ_HUFFMAN_ONLY) comp_flags &= ~TDEFL_MAX_PROBES_MASK; else if (strategy == MZ_FIXED) comp_flags |= TDEFL_FORCE_ALL_STATIC_BLOCKS; else if (strategy == MZ_RLE) comp_flags |= TDEFL_RLE_MATCHES; return comp_flags; } #ifdef _MSC_VER #pragma warning(push) #pragma warning(disable : 4204) /* nonstandard extension used : non-constant aggregate initializer (also supported by GNU C and C99, so no big deal) */ #endif /* Simple PNG writer function by Alex Evans, 2011. Released into the public domain: https://gist.github.com/908299, more context at http://altdevblogaday.org/2011/04/06/a-smaller-jpg-encoder/. This is actually a modification of Alex's original code so PNG files generated by this function pass pngcheck. */ void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, int h, int num_chans, size_t *pLen_out, mz_uint level, mz_bool flip) { /* Using a local copy of this array here in case MINIZ_NO_ZLIB_APIS was defined. */ static const mz_uint s_tdefl_png_num_probes[11] = { 0, 1, 6, 32, 16, 32, 128, 256, 512, 768, 1500 }; tdefl_compressor *pComp = (tdefl_compressor *)MZ_MALLOC(sizeof(tdefl_compressor)); tdefl_output_buffer out_buf; int i, bpl = w * num_chans, y, z; mz_uint32 c; *pLen_out = 0; if (!pComp) return NULL; MZ_CLEAR_OBJ(out_buf); out_buf.m_expandable = MZ_TRUE; out_buf.m_capacity = 57 + MZ_MAX(64, (1 + bpl) * h); if (NULL == (out_buf.m_pBuf = (mz_uint8 *)MZ_MALLOC(out_buf.m_capacity))) { MZ_FREE(pComp); return NULL; } /* write dummy header */ for (z = 41; z; --z) tdefl_output_buffer_putter(&z, 1, &out_buf); /* compress image data */ tdefl_init(pComp, tdefl_output_buffer_putter, &out_buf, s_tdefl_png_num_probes[MZ_MIN(10, level)] | TDEFL_WRITE_ZLIB_HEADER); for (y = 0; y < h; ++y) { tdefl_compress_buffer(pComp, &z, 1, TDEFL_NO_FLUSH); tdefl_compress_buffer(pComp, (mz_uint8 *)pImage + (flip ? (h - 1 - y) : y) * bpl, bpl, TDEFL_NO_FLUSH); } if (tdefl_compress_buffer(pComp, NULL, 0, TDEFL_FINISH) != TDEFL_STATUS_DONE) { MZ_FREE(pComp); MZ_FREE(out_buf.m_pBuf); return NULL; } /* write real header */ *pLen_out = out_buf.m_size - 41; { static const mz_uint8 chans[] = { 0x00, 0x00, 0x04, 0x02, 0x06 }; mz_uint8 pnghdr[41] = { 0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a, 0x00, 0x00, 0x00, 0x0d, 0x49, 0x48, 0x44, 0x52, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x49, 0x44, 0x41, 0x54 }; pnghdr[18] = (mz_uint8)(w >> 8); pnghdr[19] = (mz_uint8)w; pnghdr[22] = (mz_uint8)(h >> 8); pnghdr[23] = (mz_uint8)h; pnghdr[25] = chans[num_chans]; pnghdr[33] = (mz_uint8)(*pLen_out >> 24); pnghdr[34] = (mz_uint8)(*pLen_out >> 16); pnghdr[35] = (mz_uint8)(*pLen_out >> 8); pnghdr[36] = (mz_uint8)*pLen_out; c = (mz_uint32)mz_crc32(MZ_CRC32_INIT, pnghdr + 12, 17); for (i = 0; i < 4; ++i, c <<= 8) ((mz_uint8 *)(pnghdr + 29))[i] = (mz_uint8)(c >> 24); memcpy(out_buf.m_pBuf, pnghdr, 41); } /* write footer (IDAT CRC-32, followed by IEND chunk) */ if (!tdefl_output_buffer_putter("\0\0\0\0\0\0\0\0\x49\x45\x4e\x44\xae\x42\x60\x82", 16, &out_buf)) { *pLen_out = 0; MZ_FREE(pComp); MZ_FREE(out_buf.m_pBuf); return NULL; } c = (mz_uint32)mz_crc32(MZ_CRC32_INIT, out_buf.m_pBuf + 41 - 4, *pLen_out + 4); for (i = 0; i < 4; ++i, c <<= 8) (out_buf.m_pBuf + out_buf.m_size - 16)[i] = (mz_uint8)(c >> 24); /* compute final size of file, grab compressed data buffer and return */ *pLen_out += 57; MZ_FREE(pComp); return out_buf.m_pBuf; } void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, int num_chans, size_t *pLen_out) { /* Level 6 corresponds to TDEFL_DEFAULT_MAX_PROBES or MZ_DEFAULT_LEVEL (but we can't depend on MZ_DEFAULT_LEVEL being available in case the zlib API's where #defined out) */ return tdefl_write_image_to_png_file_in_memory_ex(pImage, w, h, num_chans, pLen_out, 6, MZ_FALSE); } #ifndef MINIZ_NO_MALLOC /* Allocate the tdefl_compressor and tinfl_decompressor structures in C so that */ /* non-C language bindings to tdefL_ and tinfl_ API don't need to worry about */ /* structure size and allocation mechanism. */ tdefl_compressor *tdefl_compressor_alloc(void) { return (tdefl_compressor *)MZ_MALLOC(sizeof(tdefl_compressor)); } void tdefl_compressor_free(tdefl_compressor *pComp) { MZ_FREE(pComp); } #endif #ifdef _MSC_VER #pragma warning(pop) #endif #ifdef __cplusplus } #endif #endif /*#ifndef MINIZ_NO_DEFLATE_APIS*/ /************************************************************************** * * Copyright 2013-2014 RAD Game Tools and Valve Software * Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * **************************************************************************/ #ifndef MINIZ_NO_INFLATE_APIS #ifdef __cplusplus extern "C" { #endif /* ------------------- Low-level Decompression (completely independent from all compression API's) */ #define TINFL_MEMCPY(d, s, l) memcpy(d, s, l) #define TINFL_MEMSET(p, c, l) memset(p, c, l) #define TINFL_CR_BEGIN \ switch (r->m_state) \ { \ case 0: #define TINFL_CR_RETURN(state_index, result) \ do \ { \ status = result; \ r->m_state = state_index; \ goto common_exit; \ case state_index:; \ } \ MZ_MACRO_END #define TINFL_CR_RETURN_FOREVER(state_index, result) \ do \ { \ for (;;) \ { \ TINFL_CR_RETURN(state_index, result); \ } \ } \ MZ_MACRO_END #define TINFL_CR_FINISH } #define TINFL_GET_BYTE(state_index, c) \ do \ { \ while (pIn_buf_cur >= pIn_buf_end) \ { \ TINFL_CR_RETURN(state_index, (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) ? TINFL_STATUS_NEEDS_MORE_INPUT : TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS); \ } \ c = *pIn_buf_cur++; \ } \ MZ_MACRO_END #define TINFL_NEED_BITS(state_index, n) \ do \ { \ mz_uint c; \ TINFL_GET_BYTE(state_index, c); \ bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); \ num_bits += 8; \ } while (num_bits < (mz_uint)(n)) #define TINFL_SKIP_BITS(state_index, n) \ do \ { \ if (num_bits < (mz_uint)(n)) \ { \ TINFL_NEED_BITS(state_index, n); \ } \ bit_buf >>= (n); \ num_bits -= (n); \ } \ MZ_MACRO_END #define TINFL_GET_BITS(state_index, b, n) \ do \ { \ if (num_bits < (mz_uint)(n)) \ { \ TINFL_NEED_BITS(state_index, n); \ } \ b = bit_buf & ((1 << (n)) - 1); \ bit_buf >>= (n); \ num_bits -= (n); \ } \ MZ_MACRO_END /* TINFL_HUFF_BITBUF_FILL() is only used rarely, when the number of bytes remaining in the input buffer falls below 2. */ /* It reads just enough bytes from the input stream that are needed to decode the next Huffman code (and absolutely no more). It works by trying to fully decode a */ /* Huffman code by using whatever bits are currently present in the bit buffer. If this fails, it reads another byte, and tries again until it succeeds or until the */ /* bit buffer contains >=15 bits (deflate's max. Huffman code size). */ #define TINFL_HUFF_BITBUF_FILL(state_index, pLookUp, pTree) \ do \ { \ temp = pLookUp[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]; \ if (temp >= 0) \ { \ code_len = temp >> 9; \ if ((code_len) && (num_bits >= code_len)) \ break; \ } \ else if (num_bits > TINFL_FAST_LOOKUP_BITS) \ { \ code_len = TINFL_FAST_LOOKUP_BITS; \ do \ { \ temp = pTree[~temp + ((bit_buf >> code_len++) & 1)]; \ } while ((temp < 0) && (num_bits >= (code_len + 1))); \ if (temp >= 0) \ break; \ } \ TINFL_GET_BYTE(state_index, c); \ bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); \ num_bits += 8; \ } while (num_bits < 15); /* TINFL_HUFF_DECODE() decodes the next Huffman coded symbol. It's more complex than you would initially expect because the zlib API expects the decompressor to never read */ /* beyond the final byte of the deflate stream. (In other words, when this macro wants to read another byte from the input, it REALLY needs another byte in order to fully */ /* decode the next Huffman code.) Handling this properly is particularly important on raw deflate (non-zlib) streams, which aren't followed by a byte aligned adler-32. */ /* The slow path is only executed at the very end of the input buffer. */ /* v1.16: The original macro handled the case at the very end of the passed-in input buffer, but we also need to handle the case where the user passes in 1+zillion bytes */ /* following the deflate data and our non-conservative read-ahead path won't kick in here on this code. This is much trickier. */ #define TINFL_HUFF_DECODE(state_index, sym, pLookUp, pTree) \ do \ { \ int temp; \ mz_uint code_len, c; \ if (num_bits < 15) \ { \ if ((pIn_buf_end - pIn_buf_cur) < 2) \ { \ TINFL_HUFF_BITBUF_FILL(state_index, pLookUp, pTree); \ } \ else \ { \ bit_buf |= (((tinfl_bit_buf_t)pIn_buf_cur[0]) << num_bits) | (((tinfl_bit_buf_t)pIn_buf_cur[1]) << (num_bits + 8)); \ pIn_buf_cur += 2; \ num_bits += 16; \ } \ } \ if ((temp = pLookUp[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0) \ code_len = temp >> 9, temp &= 511; \ else \ { \ code_len = TINFL_FAST_LOOKUP_BITS; \ do \ { \ temp = pTree[~temp + ((bit_buf >> code_len++) & 1)]; \ } while (temp < 0); \ } \ sym = temp; \ bit_buf >>= code_len; \ num_bits -= code_len; \ } \ MZ_MACRO_END static void tinfl_clear_tree(tinfl_decompressor *r) { if (r->m_type == 0) MZ_CLEAR_ARR(r->m_tree_0); else if (r->m_type == 1) MZ_CLEAR_ARR(r->m_tree_1); else MZ_CLEAR_ARR(r->m_tree_2); } tinfl_status tinfl_decompress(tinfl_decompressor *r, const mz_uint8 *pIn_buf_next, size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, const mz_uint32 decomp_flags) { static const mz_uint16 s_length_base[31] = { 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0 }; static const mz_uint8 s_length_extra[31] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 0, 0 }; static const mz_uint16 s_dist_base[32] = { 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 0, 0 }; static const mz_uint8 s_dist_extra[32] = { 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13 }; static const mz_uint8 s_length_dezigzag[19] = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 }; static const mz_uint16 s_min_table_sizes[3] = { 257, 1, 4 }; mz_int16 *pTrees[3]; mz_uint8 *pCode_sizes[3]; tinfl_status status = TINFL_STATUS_FAILED; mz_uint32 num_bits, dist, counter, num_extra; tinfl_bit_buf_t bit_buf; const mz_uint8 *pIn_buf_cur = pIn_buf_next, *const pIn_buf_end = pIn_buf_next + *pIn_buf_size; mz_uint8 *pOut_buf_cur = pOut_buf_next, *const pOut_buf_end = pOut_buf_next ? pOut_buf_next + *pOut_buf_size : NULL; size_t out_buf_size_mask = (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF) ? (size_t)-1 : ((pOut_buf_next - pOut_buf_start) + *pOut_buf_size) - 1, dist_from_out_buf_start; /* Ensure the output buffer's size is a power of 2, unless the output buffer is large enough to hold the entire output file (in which case it doesn't matter). */ if (((out_buf_size_mask + 1) & out_buf_size_mask) || (pOut_buf_next < pOut_buf_start)) { *pIn_buf_size = *pOut_buf_size = 0; return TINFL_STATUS_BAD_PARAM; } pTrees[0] = r->m_tree_0; pTrees[1] = r->m_tree_1; pTrees[2] = r->m_tree_2; pCode_sizes[0] = r->m_code_size_0; pCode_sizes[1] = r->m_code_size_1; pCode_sizes[2] = r->m_code_size_2; num_bits = r->m_num_bits; bit_buf = r->m_bit_buf; dist = r->m_dist; counter = r->m_counter; num_extra = r->m_num_extra; dist_from_out_buf_start = r->m_dist_from_out_buf_start; TINFL_CR_BEGIN bit_buf = num_bits = dist = counter = num_extra = r->m_zhdr0 = r->m_zhdr1 = 0; r->m_z_adler32 = r->m_check_adler32 = 1; if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) { TINFL_GET_BYTE(1, r->m_zhdr0); TINFL_GET_BYTE(2, r->m_zhdr1); counter = (((r->m_zhdr0 * 256 + r->m_zhdr1) % 31 != 0) || (r->m_zhdr1 & 32) || ((r->m_zhdr0 & 15) != 8)); if (!(decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)) counter |= (((1U << (8U + (r->m_zhdr0 >> 4))) > 32768U) || ((out_buf_size_mask + 1) < (size_t)((size_t)1 << (8U + (r->m_zhdr0 >> 4))))); if (counter) { TINFL_CR_RETURN_FOREVER(36, TINFL_STATUS_FAILED); } } do { TINFL_GET_BITS(3, r->m_final, 3); r->m_type = r->m_final >> 1; if (r->m_type == 0) { TINFL_SKIP_BITS(5, num_bits & 7); for (counter = 0; counter < 4; ++counter) { if (num_bits) TINFL_GET_BITS(6, r->m_raw_header[counter], 8); else TINFL_GET_BYTE(7, r->m_raw_header[counter]); } if ((counter = (r->m_raw_header[0] | (r->m_raw_header[1] << 8))) != (mz_uint)(0xFFFF ^ (r->m_raw_header[2] | (r->m_raw_header[3] << 8)))) { TINFL_CR_RETURN_FOREVER(39, TINFL_STATUS_FAILED); } while ((counter) && (num_bits)) { TINFL_GET_BITS(51, dist, 8); while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(52, TINFL_STATUS_HAS_MORE_OUTPUT); } *pOut_buf_cur++ = (mz_uint8)dist; counter--; } while (counter) { size_t n; while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(9, TINFL_STATUS_HAS_MORE_OUTPUT); } while (pIn_buf_cur >= pIn_buf_end) { TINFL_CR_RETURN(38, (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) ? TINFL_STATUS_NEEDS_MORE_INPUT : TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS); } n = MZ_MIN(MZ_MIN((size_t)(pOut_buf_end - pOut_buf_cur), (size_t)(pIn_buf_end - pIn_buf_cur)), counter); TINFL_MEMCPY(pOut_buf_cur, pIn_buf_cur, n); pIn_buf_cur += n; pOut_buf_cur += n; counter -= (mz_uint)n; } } else if (r->m_type == 3) { TINFL_CR_RETURN_FOREVER(10, TINFL_STATUS_FAILED); } else { if (r->m_type == 1) { mz_uint8 *p = r->m_code_size_0; mz_uint i; r->m_table_sizes[0] = 288; r->m_table_sizes[1] = 32; TINFL_MEMSET(r->m_code_size_1, 5, 32); for (i = 0; i <= 143; ++i) *p++ = 8; for (; i <= 255; ++i) *p++ = 9; for (; i <= 279; ++i) *p++ = 7; for (; i <= 287; ++i) *p++ = 8; } else { for (counter = 0; counter < 3; counter++) { TINFL_GET_BITS(11, r->m_table_sizes[counter], "\05\05\04"[counter]); r->m_table_sizes[counter] += s_min_table_sizes[counter]; } MZ_CLEAR_ARR(r->m_code_size_2); for (counter = 0; counter < r->m_table_sizes[2]; counter++) { mz_uint s; TINFL_GET_BITS(14, s, 3); r->m_code_size_2[s_length_dezigzag[counter]] = (mz_uint8)s; } r->m_table_sizes[2] = 19; } for (; (int)r->m_type >= 0; r->m_type--) { int tree_next, tree_cur; mz_int16 *pLookUp; mz_int16 *pTree; mz_uint8 *pCode_size; mz_uint i, j, used_syms, total, sym_index, next_code[17], total_syms[16]; pLookUp = r->m_look_up[r->m_type]; pTree = pTrees[r->m_type]; pCode_size = pCode_sizes[r->m_type]; MZ_CLEAR_ARR(total_syms); TINFL_MEMSET(pLookUp, 0, sizeof(r->m_look_up[0])); tinfl_clear_tree(r); for (i = 0; i < r->m_table_sizes[r->m_type]; ++i) total_syms[pCode_size[i]]++; used_syms = 0, total = 0; next_code[0] = next_code[1] = 0; for (i = 1; i <= 15; ++i) { used_syms += total_syms[i]; next_code[i + 1] = (total = ((total + total_syms[i]) << 1)); } if ((65536 != total) && (used_syms > 1)) { TINFL_CR_RETURN_FOREVER(35, TINFL_STATUS_FAILED); } for (tree_next = -1, sym_index = 0; sym_index < r->m_table_sizes[r->m_type]; ++sym_index) { mz_uint rev_code = 0, l, cur_code, code_size = pCode_size[sym_index]; if (!code_size) continue; cur_code = next_code[code_size]++; for (l = code_size; l > 0; l--, cur_code >>= 1) rev_code = (rev_code << 1) | (cur_code & 1); if (code_size <= TINFL_FAST_LOOKUP_BITS) { mz_int16 k = (mz_int16)((code_size << 9) | sym_index); while (rev_code < TINFL_FAST_LOOKUP_SIZE) { pLookUp[rev_code] = k; rev_code += (1 << code_size); } continue; } if (0 == (tree_cur = pLookUp[rev_code & (TINFL_FAST_LOOKUP_SIZE - 1)])) { pLookUp[rev_code & (TINFL_FAST_LOOKUP_SIZE - 1)] = (mz_int16)tree_next; tree_cur = tree_next; tree_next -= 2; } rev_code >>= (TINFL_FAST_LOOKUP_BITS - 1); for (j = code_size; j > (TINFL_FAST_LOOKUP_BITS + 1); j--) { tree_cur -= ((rev_code >>= 1) & 1); if (!pTree[-tree_cur - 1]) { pTree[-tree_cur - 1] = (mz_int16)tree_next; tree_cur = tree_next; tree_next -= 2; } else tree_cur = pTree[-tree_cur - 1]; } tree_cur -= ((rev_code >>= 1) & 1); pTree[-tree_cur - 1] = (mz_int16)sym_index; } if (r->m_type == 2) { for (counter = 0; counter < (r->m_table_sizes[0] + r->m_table_sizes[1]);) { mz_uint s; TINFL_HUFF_DECODE(16, dist, r->m_look_up[2], r->m_tree_2); if (dist < 16) { r->m_len_codes[counter++] = (mz_uint8)dist; continue; } if ((dist == 16) && (!counter)) { TINFL_CR_RETURN_FOREVER(17, TINFL_STATUS_FAILED); } num_extra = "\02\03\07"[dist - 16]; TINFL_GET_BITS(18, s, num_extra); s += "\03\03\013"[dist - 16]; TINFL_MEMSET(r->m_len_codes + counter, (dist == 16) ? r->m_len_codes[counter - 1] : 0, s); counter += s; } if ((r->m_table_sizes[0] + r->m_table_sizes[1]) != counter) { TINFL_CR_RETURN_FOREVER(21, TINFL_STATUS_FAILED); } TINFL_MEMCPY(r->m_code_size_0, r->m_len_codes, r->m_table_sizes[0]); TINFL_MEMCPY(r->m_code_size_1, r->m_len_codes + r->m_table_sizes[0], r->m_table_sizes[1]); } } for (;;) { mz_uint8 *pSrc; for (;;) { if (((pIn_buf_end - pIn_buf_cur) < 4) || ((pOut_buf_end - pOut_buf_cur) < 2)) { TINFL_HUFF_DECODE(23, counter, r->m_look_up[0], r->m_tree_0); if (counter >= 256) break; while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(24, TINFL_STATUS_HAS_MORE_OUTPUT); } *pOut_buf_cur++ = (mz_uint8)counter; } else { int sym2; mz_uint code_len; #if TINFL_USE_64BIT_BITBUF if (num_bits < 30) { bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE32(pIn_buf_cur)) << num_bits); pIn_buf_cur += 4; num_bits += 32; } #else if (num_bits < 15) { bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits); pIn_buf_cur += 2; num_bits += 16; } #endif if ((sym2 = r->m_look_up[0][bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0) code_len = sym2 >> 9; else { code_len = TINFL_FAST_LOOKUP_BITS; do { sym2 = r->m_tree_0[~sym2 + ((bit_buf >> code_len++) & 1)]; } while (sym2 < 0); } counter = sym2; bit_buf >>= code_len; num_bits -= code_len; if (counter & 256) break; #if !TINFL_USE_64BIT_BITBUF if (num_bits < 15) { bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits); pIn_buf_cur += 2; num_bits += 16; } #endif if ((sym2 = r->m_look_up[0][bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0) code_len = sym2 >> 9; else { code_len = TINFL_FAST_LOOKUP_BITS; do { sym2 = r->m_tree_0[~sym2 + ((bit_buf >> code_len++) & 1)]; } while (sym2 < 0); } bit_buf >>= code_len; num_bits -= code_len; pOut_buf_cur[0] = (mz_uint8)counter; if (sym2 & 256) { pOut_buf_cur++; counter = sym2; break; } pOut_buf_cur[1] = (mz_uint8)sym2; pOut_buf_cur += 2; } } if ((counter &= 511) == 256) break; num_extra = s_length_extra[counter - 257]; counter = s_length_base[counter - 257]; if (num_extra) { mz_uint extra_bits; TINFL_GET_BITS(25, extra_bits, num_extra); counter += extra_bits; } TINFL_HUFF_DECODE(26, dist, r->m_look_up[1], r->m_tree_1); num_extra = s_dist_extra[dist]; dist = s_dist_base[dist]; if (num_extra) { mz_uint extra_bits; TINFL_GET_BITS(27, extra_bits, num_extra); dist += extra_bits; } dist_from_out_buf_start = pOut_buf_cur - pOut_buf_start; if ((dist == 0 || dist > dist_from_out_buf_start || dist_from_out_buf_start == 0) && (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)) { TINFL_CR_RETURN_FOREVER(37, TINFL_STATUS_FAILED); } pSrc = pOut_buf_start + ((dist_from_out_buf_start - dist) & out_buf_size_mask); if ((MZ_MAX(pOut_buf_cur, pSrc) + counter) > pOut_buf_end) { while (counter--) { while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(53, TINFL_STATUS_HAS_MORE_OUTPUT); } *pOut_buf_cur++ = pOut_buf_start[(dist_from_out_buf_start++ - dist) & out_buf_size_mask]; } continue; } #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES else if ((counter >= 9) && (counter <= dist)) { const mz_uint8 *pSrc_end = pSrc + (counter & ~7); do { #ifdef MINIZ_UNALIGNED_USE_MEMCPY memcpy(pOut_buf_cur, pSrc, sizeof(mz_uint32)*2); #else ((mz_uint32 *)pOut_buf_cur)[0] = ((const mz_uint32 *)pSrc)[0]; ((mz_uint32 *)pOut_buf_cur)[1] = ((const mz_uint32 *)pSrc)[1]; #endif pOut_buf_cur += 8; } while ((pSrc += 8) < pSrc_end); if ((counter &= 7) < 3) { if (counter) { pOut_buf_cur[0] = pSrc[0]; if (counter > 1) pOut_buf_cur[1] = pSrc[1]; pOut_buf_cur += counter; } continue; } } #endif while(counter>2) { pOut_buf_cur[0] = pSrc[0]; pOut_buf_cur[1] = pSrc[1]; pOut_buf_cur[2] = pSrc[2]; pOut_buf_cur += 3; pSrc += 3; counter -= 3; } if (counter > 0) { pOut_buf_cur[0] = pSrc[0]; if (counter > 1) pOut_buf_cur[1] = pSrc[1]; pOut_buf_cur += counter; } } } } while (!(r->m_final & 1)); /* Ensure byte alignment and put back any bytes from the bitbuf if we've looked ahead too far on gzip, or other Deflate streams followed by arbitrary data. */ /* I'm being super conservative here. A number of simplifications can be made to the byte alignment part, and the Adler32 check shouldn't ever need to worry about reading from the bitbuf now. */ TINFL_SKIP_BITS(32, num_bits & 7); while ((pIn_buf_cur > pIn_buf_next) && (num_bits >= 8)) { --pIn_buf_cur; num_bits -= 8; } bit_buf &= ~(~(tinfl_bit_buf_t)0 << num_bits); MZ_ASSERT(!num_bits); /* if this assert fires then we've read beyond the end of non-deflate/zlib streams with following data (such as gzip streams). */ if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) { for (counter = 0; counter < 4; ++counter) { mz_uint s; if (num_bits) TINFL_GET_BITS(41, s, 8); else TINFL_GET_BYTE(42, s); r->m_z_adler32 = (r->m_z_adler32 << 8) | s; } } TINFL_CR_RETURN_FOREVER(34, TINFL_STATUS_DONE); TINFL_CR_FINISH common_exit: /* As long as we aren't telling the caller that we NEED more input to make forward progress: */ /* Put back any bytes from the bitbuf in case we've looked ahead too far on gzip, or other Deflate streams followed by arbitrary data. */ /* We need to be very careful here to NOT push back any bytes we definitely know we need to make forward progress, though, or we'll lock the caller up into an inf loop. */ if ((status != TINFL_STATUS_NEEDS_MORE_INPUT) && (status != TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS)) { while ((pIn_buf_cur > pIn_buf_next) && (num_bits >= 8)) { --pIn_buf_cur; num_bits -= 8; } } r->m_num_bits = num_bits; r->m_bit_buf = bit_buf & ~(~(tinfl_bit_buf_t)0 << num_bits); r->m_dist = dist; r->m_counter = counter; r->m_num_extra = num_extra; r->m_dist_from_out_buf_start = dist_from_out_buf_start; *pIn_buf_size = pIn_buf_cur - pIn_buf_next; *pOut_buf_size = pOut_buf_cur - pOut_buf_next; if ((decomp_flags & (TINFL_FLAG_PARSE_ZLIB_HEADER | TINFL_FLAG_COMPUTE_ADLER32)) && (status >= 0)) { const mz_uint8 *ptr = pOut_buf_next; size_t buf_len = *pOut_buf_size; mz_uint32 i, s1 = r->m_check_adler32 & 0xffff, s2 = r->m_check_adler32 >> 16; size_t block_len = buf_len % 5552; while (buf_len) { for (i = 0; i + 7 < block_len; i += 8, ptr += 8) { s1 += ptr[0], s2 += s1; s1 += ptr[1], s2 += s1; s1 += ptr[2], s2 += s1; s1 += ptr[3], s2 += s1; s1 += ptr[4], s2 += s1; s1 += ptr[5], s2 += s1; s1 += ptr[6], s2 += s1; s1 += ptr[7], s2 += s1; } for (; i < block_len; ++i) s1 += *ptr++, s2 += s1; s1 %= 65521U, s2 %= 65521U; buf_len -= block_len; block_len = 5552; } r->m_check_adler32 = (s2 << 16) + s1; if ((status == TINFL_STATUS_DONE) && (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) && (r->m_check_adler32 != r->m_z_adler32)) status = TINFL_STATUS_ADLER32_MISMATCH; } return status; } /* Higher level helper functions. */ void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags) { tinfl_decompressor decomp; void *pBuf = NULL, *pNew_buf; size_t src_buf_ofs = 0, out_buf_capacity = 0; *pOut_len = 0; tinfl_init(&decomp); for (;;) { size_t src_buf_size = src_buf_len - src_buf_ofs, dst_buf_size = out_buf_capacity - *pOut_len, new_out_buf_capacity; tinfl_status status = tinfl_decompress(&decomp, (const mz_uint8 *)pSrc_buf + src_buf_ofs, &src_buf_size, (mz_uint8 *)pBuf, pBuf ? (mz_uint8 *)pBuf + *pOut_len : NULL, &dst_buf_size, (flags & ~TINFL_FLAG_HAS_MORE_INPUT) | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF); if ((status < 0) || (status == TINFL_STATUS_NEEDS_MORE_INPUT)) { MZ_FREE(pBuf); *pOut_len = 0; return NULL; } src_buf_ofs += src_buf_size; *pOut_len += dst_buf_size; if (status == TINFL_STATUS_DONE) break; new_out_buf_capacity = out_buf_capacity * 2; if (new_out_buf_capacity < 128) new_out_buf_capacity = 128; pNew_buf = MZ_REALLOC(pBuf, new_out_buf_capacity); if (!pNew_buf) { MZ_FREE(pBuf); *pOut_len = 0; return NULL; } pBuf = pNew_buf; out_buf_capacity = new_out_buf_capacity; } return pBuf; } size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags) { tinfl_decompressor decomp; tinfl_status status; tinfl_init(&decomp); status = tinfl_decompress(&decomp, (const mz_uint8 *)pSrc_buf, &src_buf_len, (mz_uint8 *)pOut_buf, (mz_uint8 *)pOut_buf, &out_buf_len, (flags & ~TINFL_FLAG_HAS_MORE_INPUT) | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF); return (status != TINFL_STATUS_DONE) ? TINFL_DECOMPRESS_MEM_TO_MEM_FAILED : out_buf_len; } int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, tinfl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags) { int result = 0; tinfl_decompressor decomp; mz_uint8 *pDict = (mz_uint8 *)MZ_MALLOC(TINFL_LZ_DICT_SIZE); size_t in_buf_ofs = 0, dict_ofs = 0; if (!pDict) return TINFL_STATUS_FAILED; memset(pDict,0,TINFL_LZ_DICT_SIZE); tinfl_init(&decomp); for (;;) { size_t in_buf_size = *pIn_buf_size - in_buf_ofs, dst_buf_size = TINFL_LZ_DICT_SIZE - dict_ofs; tinfl_status status = tinfl_decompress(&decomp, (const mz_uint8 *)pIn_buf + in_buf_ofs, &in_buf_size, pDict, pDict + dict_ofs, &dst_buf_size, (flags & ~(TINFL_FLAG_HAS_MORE_INPUT | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF))); in_buf_ofs += in_buf_size; if ((dst_buf_size) && (!(*pPut_buf_func)(pDict + dict_ofs, (int)dst_buf_size, pPut_buf_user))) break; if (status != TINFL_STATUS_HAS_MORE_OUTPUT) { result = (status == TINFL_STATUS_DONE); break; } dict_ofs = (dict_ofs + dst_buf_size) & (TINFL_LZ_DICT_SIZE - 1); } MZ_FREE(pDict); *pIn_buf_size = in_buf_ofs; return result; } #ifndef MINIZ_NO_MALLOC tinfl_decompressor *tinfl_decompressor_alloc(void) { tinfl_decompressor *pDecomp = (tinfl_decompressor *)MZ_MALLOC(sizeof(tinfl_decompressor)); if (pDecomp) tinfl_init(pDecomp); return pDecomp; } void tinfl_decompressor_free(tinfl_decompressor *pDecomp) { MZ_FREE(pDecomp); } #endif #ifdef __cplusplus } #endif #endif /*#ifndef MINIZ_NO_INFLATE_APIS*/ /************************************************************************** * * Copyright 2013-2014 RAD Game Tools and Valve Software * Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC * Copyright 2016 Martin Raiber * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * **************************************************************************/ #ifndef MINIZ_NO_ARCHIVE_APIS #ifdef __cplusplus extern "C" { #endif /* ------------------- .ZIP archive reading */ #ifdef MINIZ_NO_STDIO #define MZ_FILE void * #else #include #if defined(_MSC_VER) || defined(__MINGW64__) #define WIN32_LEAN_AND_MEAN #include static WCHAR* mz_utf8z_to_widechar(const char* str) { int reqChars = MultiByteToWideChar(CP_UTF8, 0, str, -1, NULL, 0); WCHAR* wStr = (WCHAR*)malloc(reqChars * sizeof(WCHAR)); MultiByteToWideChar(CP_UTF8, 0, str, -1, wStr, reqChars); return wStr; } static FILE *mz_fopen(const char *pFilename, const char *pMode) { WCHAR* wFilename = mz_utf8z_to_widechar(pFilename); WCHAR* wMode = mz_utf8z_to_widechar(pMode); FILE* pFile = NULL; errno_t err = _wfopen_s(&pFile, wFilename, wMode); free(wFilename); free(wMode); return err ? NULL : pFile; } static FILE *mz_freopen(const char *pPath, const char *pMode, FILE *pStream) { WCHAR* wPath = mz_utf8z_to_widechar(pPath); WCHAR* wMode = mz_utf8z_to_widechar(pMode); FILE* pFile = NULL; errno_t err = _wfreopen_s(&pFile, wPath, wMode, pStream); free(wPath); free(wMode); return err ? NULL : pFile; } static int mz_stat64(const char *path, struct __stat64 *buffer) { WCHAR* wPath = mz_utf8z_to_widechar(path); int res = _wstat64(wPath, buffer); free(wPath); return res; } #ifndef MINIZ_NO_TIME #include #endif #define MZ_FOPEN mz_fopen #define MZ_FCLOSE fclose #define MZ_FREAD fread #define MZ_FWRITE fwrite #define MZ_FTELL64 _ftelli64 #define MZ_FSEEK64 _fseeki64 #define MZ_FILE_STAT_STRUCT _stat64 #define MZ_FILE_STAT mz_stat64 #define MZ_FFLUSH fflush #define MZ_FREOPEN mz_freopen #define MZ_DELETE_FILE remove #elif defined(__MINGW32__) || defined(__WATCOMC__) #ifndef MINIZ_NO_TIME #include #endif #define MZ_FOPEN(f, m) fopen(f, m) #define MZ_FCLOSE fclose #define MZ_FREAD fread #define MZ_FWRITE fwrite #define MZ_FTELL64 _ftelli64 #define MZ_FSEEK64 _fseeki64 #define MZ_FILE_STAT_STRUCT stat #define MZ_FILE_STAT stat #define MZ_FFLUSH fflush #define MZ_FREOPEN(f, m, s) freopen(f, m, s) #define MZ_DELETE_FILE remove #elif defined(__TINYC__) #ifndef MINIZ_NO_TIME #include #endif #define MZ_FOPEN(f, m) fopen(f, m) #define MZ_FCLOSE fclose #define MZ_FREAD fread #define MZ_FWRITE fwrite #define MZ_FTELL64 ftell #define MZ_FSEEK64 fseek #define MZ_FILE_STAT_STRUCT stat #define MZ_FILE_STAT stat #define MZ_FFLUSH fflush #define MZ_FREOPEN(f, m, s) freopen(f, m, s) #define MZ_DELETE_FILE remove #elif defined(__USE_LARGEFILE64) /* gcc, clang */ #ifndef MINIZ_NO_TIME #include #endif #define MZ_FOPEN(f, m) fopen64(f, m) #define MZ_FCLOSE fclose #define MZ_FREAD fread #define MZ_FWRITE fwrite #define MZ_FTELL64 ftello64 #define MZ_FSEEK64 fseeko64 #define MZ_FILE_STAT_STRUCT stat64 #define MZ_FILE_STAT stat64 #define MZ_FFLUSH fflush #define MZ_FREOPEN(p, m, s) freopen64(p, m, s) #define MZ_DELETE_FILE remove #elif defined(__APPLE__) || defined(__FreeBSD__) #ifndef MINIZ_NO_TIME #include #endif #define MZ_FOPEN(f, m) fopen(f, m) #define MZ_FCLOSE fclose #define MZ_FREAD fread #define MZ_FWRITE fwrite #define MZ_FTELL64 ftello #define MZ_FSEEK64 fseeko #define MZ_FILE_STAT_STRUCT stat #define MZ_FILE_STAT stat #define MZ_FFLUSH fflush #define MZ_FREOPEN(p, m, s) freopen(p, m, s) #define MZ_DELETE_FILE remove #else #pragma message("Using fopen, ftello, fseeko, stat() etc. path for file I/O - this path may not support large files.") #ifndef MINIZ_NO_TIME #include #endif #define MZ_FOPEN(f, m) fopen(f, m) #define MZ_FCLOSE fclose #define MZ_FREAD fread #define MZ_FWRITE fwrite #ifdef __STRICT_ANSI__ #define MZ_FTELL64 ftell #define MZ_FSEEK64 fseek #else #define MZ_FTELL64 ftello #define MZ_FSEEK64 fseeko #endif #define MZ_FILE_STAT_STRUCT stat #define MZ_FILE_STAT stat #define MZ_FFLUSH fflush #define MZ_FREOPEN(f, m, s) freopen(f, m, s) #define MZ_DELETE_FILE remove #endif /* #ifdef _MSC_VER */ #endif /* #ifdef MINIZ_NO_STDIO */ #define MZ_TOLOWER(c) ((((c) >= 'A') && ((c) <= 'Z')) ? ((c) - 'A' + 'a') : (c)) /* Various ZIP archive enums. To completely avoid cross platform compiler alignment and platform endian issues, miniz.c doesn't use structs for any of this stuff. */ enum { /* ZIP archive identifiers and record sizes */ MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG = 0x06054b50, MZ_ZIP_CENTRAL_DIR_HEADER_SIG = 0x02014b50, MZ_ZIP_LOCAL_DIR_HEADER_SIG = 0x04034b50, MZ_ZIP_LOCAL_DIR_HEADER_SIZE = 30, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE = 46, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE = 22, /* ZIP64 archive identifier and record sizes */ MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIG = 0x06064b50, MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIG = 0x07064b50, MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE = 56, MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE = 20, MZ_ZIP64_EXTENDED_INFORMATION_FIELD_HEADER_ID = 0x0001, MZ_ZIP_DATA_DESCRIPTOR_ID = 0x08074b50, MZ_ZIP_DATA_DESCRIPTER_SIZE64 = 24, MZ_ZIP_DATA_DESCRIPTER_SIZE32 = 16, /* Central directory header record offsets */ MZ_ZIP_CDH_SIG_OFS = 0, MZ_ZIP_CDH_VERSION_MADE_BY_OFS = 4, MZ_ZIP_CDH_VERSION_NEEDED_OFS = 6, MZ_ZIP_CDH_BIT_FLAG_OFS = 8, MZ_ZIP_CDH_METHOD_OFS = 10, MZ_ZIP_CDH_FILE_TIME_OFS = 12, MZ_ZIP_CDH_FILE_DATE_OFS = 14, MZ_ZIP_CDH_CRC32_OFS = 16, MZ_ZIP_CDH_COMPRESSED_SIZE_OFS = 20, MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS = 24, MZ_ZIP_CDH_FILENAME_LEN_OFS = 28, MZ_ZIP_CDH_EXTRA_LEN_OFS = 30, MZ_ZIP_CDH_COMMENT_LEN_OFS = 32, MZ_ZIP_CDH_DISK_START_OFS = 34, MZ_ZIP_CDH_INTERNAL_ATTR_OFS = 36, MZ_ZIP_CDH_EXTERNAL_ATTR_OFS = 38, MZ_ZIP_CDH_LOCAL_HEADER_OFS = 42, /* Local directory header offsets */ MZ_ZIP_LDH_SIG_OFS = 0, MZ_ZIP_LDH_VERSION_NEEDED_OFS = 4, MZ_ZIP_LDH_BIT_FLAG_OFS = 6, MZ_ZIP_LDH_METHOD_OFS = 8, MZ_ZIP_LDH_FILE_TIME_OFS = 10, MZ_ZIP_LDH_FILE_DATE_OFS = 12, MZ_ZIP_LDH_CRC32_OFS = 14, MZ_ZIP_LDH_COMPRESSED_SIZE_OFS = 18, MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS = 22, MZ_ZIP_LDH_FILENAME_LEN_OFS = 26, MZ_ZIP_LDH_EXTRA_LEN_OFS = 28, MZ_ZIP_LDH_BIT_FLAG_HAS_LOCATOR = 1 << 3, /* End of central directory offsets */ MZ_ZIP_ECDH_SIG_OFS = 0, MZ_ZIP_ECDH_NUM_THIS_DISK_OFS = 4, MZ_ZIP_ECDH_NUM_DISK_CDIR_OFS = 6, MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS = 8, MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS = 10, MZ_ZIP_ECDH_CDIR_SIZE_OFS = 12, MZ_ZIP_ECDH_CDIR_OFS_OFS = 16, MZ_ZIP_ECDH_COMMENT_SIZE_OFS = 20, /* ZIP64 End of central directory locator offsets */ MZ_ZIP64_ECDL_SIG_OFS = 0, /* 4 bytes */ MZ_ZIP64_ECDL_NUM_DISK_CDIR_OFS = 4, /* 4 bytes */ MZ_ZIP64_ECDL_REL_OFS_TO_ZIP64_ECDR_OFS = 8, /* 8 bytes */ MZ_ZIP64_ECDL_TOTAL_NUMBER_OF_DISKS_OFS = 16, /* 4 bytes */ /* ZIP64 End of central directory header offsets */ MZ_ZIP64_ECDH_SIG_OFS = 0, /* 4 bytes */ MZ_ZIP64_ECDH_SIZE_OF_RECORD_OFS = 4, /* 8 bytes */ MZ_ZIP64_ECDH_VERSION_MADE_BY_OFS = 12, /* 2 bytes */ MZ_ZIP64_ECDH_VERSION_NEEDED_OFS = 14, /* 2 bytes */ MZ_ZIP64_ECDH_NUM_THIS_DISK_OFS = 16, /* 4 bytes */ MZ_ZIP64_ECDH_NUM_DISK_CDIR_OFS = 20, /* 4 bytes */ MZ_ZIP64_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS = 24, /* 8 bytes */ MZ_ZIP64_ECDH_CDIR_TOTAL_ENTRIES_OFS = 32, /* 8 bytes */ MZ_ZIP64_ECDH_CDIR_SIZE_OFS = 40, /* 8 bytes */ MZ_ZIP64_ECDH_CDIR_OFS_OFS = 48, /* 8 bytes */ MZ_ZIP_VERSION_MADE_BY_DOS_FILESYSTEM_ID = 0, MZ_ZIP_DOS_DIR_ATTRIBUTE_BITFLAG = 0x10, MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_IS_ENCRYPTED = 1, MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_COMPRESSED_PATCH_FLAG = 32, MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_USES_STRONG_ENCRYPTION = 64, MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_LOCAL_DIR_IS_MASKED = 8192, MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_UTF8 = 1 << 11 }; typedef struct { void *m_p; size_t m_size, m_capacity; mz_uint m_element_size; } mz_zip_array; struct mz_zip_internal_state_tag { mz_zip_array m_central_dir; mz_zip_array m_central_dir_offsets; mz_zip_array m_sorted_central_dir_offsets; /* The flags passed in when the archive is initially opened. */ mz_uint32 m_init_flags; /* MZ_TRUE if the archive has a zip64 end of central directory headers, etc. */ mz_bool m_zip64; /* MZ_TRUE if we found zip64 extended info in the central directory (m_zip64 will also be slammed to true too, even if we didn't find a zip64 end of central dir header, etc.) */ mz_bool m_zip64_has_extended_info_fields; /* These fields are used by the file, FILE, memory, and memory/heap read/write helpers. */ MZ_FILE *m_pFile; mz_uint64 m_file_archive_start_ofs; void *m_pMem; size_t m_mem_size; size_t m_mem_capacity; }; #define MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(array_ptr, element_size) (array_ptr)->m_element_size = element_size #if defined(DEBUG) || defined(_DEBUG) static MZ_FORCEINLINE mz_uint mz_zip_array_range_check(const mz_zip_array *pArray, mz_uint index) { MZ_ASSERT(index < pArray->m_size); return index; } #define MZ_ZIP_ARRAY_ELEMENT(array_ptr, element_type, index) ((element_type *)((array_ptr)->m_p))[mz_zip_array_range_check(array_ptr, index)] #else #define MZ_ZIP_ARRAY_ELEMENT(array_ptr, element_type, index) ((element_type *)((array_ptr)->m_p))[index] #endif static MZ_FORCEINLINE void mz_zip_array_init(mz_zip_array *pArray, mz_uint32 element_size) { memset(pArray, 0, sizeof(mz_zip_array)); pArray->m_element_size = element_size; } static MZ_FORCEINLINE void mz_zip_array_clear(mz_zip_archive *pZip, mz_zip_array *pArray) { pZip->m_pFree(pZip->m_pAlloc_opaque, pArray->m_p); memset(pArray, 0, sizeof(mz_zip_array)); } static mz_bool mz_zip_array_ensure_capacity(mz_zip_archive *pZip, mz_zip_array *pArray, size_t min_new_capacity, mz_uint growing) { void *pNew_p; size_t new_capacity = min_new_capacity; MZ_ASSERT(pArray->m_element_size); if (pArray->m_capacity >= min_new_capacity) return MZ_TRUE; if (growing) { new_capacity = MZ_MAX(1, pArray->m_capacity); while (new_capacity < min_new_capacity) new_capacity *= 2; } if (NULL == (pNew_p = pZip->m_pRealloc(pZip->m_pAlloc_opaque, pArray->m_p, pArray->m_element_size, new_capacity))) return MZ_FALSE; pArray->m_p = pNew_p; pArray->m_capacity = new_capacity; return MZ_TRUE; } static MZ_FORCEINLINE mz_bool mz_zip_array_reserve(mz_zip_archive *pZip, mz_zip_array *pArray, size_t new_capacity, mz_uint growing) { if (new_capacity > pArray->m_capacity) { if (!mz_zip_array_ensure_capacity(pZip, pArray, new_capacity, growing)) return MZ_FALSE; } return MZ_TRUE; } static MZ_FORCEINLINE mz_bool mz_zip_array_resize(mz_zip_archive *pZip, mz_zip_array *pArray, size_t new_size, mz_uint growing) { if (new_size > pArray->m_capacity) { if (!mz_zip_array_ensure_capacity(pZip, pArray, new_size, growing)) return MZ_FALSE; } pArray->m_size = new_size; return MZ_TRUE; } static MZ_FORCEINLINE mz_bool mz_zip_array_ensure_room(mz_zip_archive *pZip, mz_zip_array *pArray, size_t n) { return mz_zip_array_reserve(pZip, pArray, pArray->m_size + n, MZ_TRUE); } static MZ_FORCEINLINE mz_bool mz_zip_array_push_back(mz_zip_archive *pZip, mz_zip_array *pArray, const void *pElements, size_t n) { size_t orig_size = pArray->m_size; if (!mz_zip_array_resize(pZip, pArray, orig_size + n, MZ_TRUE)) return MZ_FALSE; if (n > 0) memcpy((mz_uint8 *)pArray->m_p + orig_size * pArray->m_element_size, pElements, n * pArray->m_element_size); return MZ_TRUE; } #ifndef MINIZ_NO_TIME static MZ_TIME_T mz_zip_dos_to_time_t(int dos_time, int dos_date) { struct tm tm; memset(&tm, 0, sizeof(tm)); tm.tm_isdst = -1; tm.tm_year = ((dos_date >> 9) & 127) + 1980 - 1900; tm.tm_mon = ((dos_date >> 5) & 15) - 1; tm.tm_mday = dos_date & 31; tm.tm_hour = (dos_time >> 11) & 31; tm.tm_min = (dos_time >> 5) & 63; tm.tm_sec = (dos_time << 1) & 62; return mktime(&tm); } #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS static void mz_zip_time_t_to_dos_time(MZ_TIME_T time, mz_uint16 *pDOS_time, mz_uint16 *pDOS_date) { #ifdef _MSC_VER struct tm tm_struct; struct tm *tm = &tm_struct; errno_t err = localtime_s(tm, &time); if (err) { *pDOS_date = 0; *pDOS_time = 0; return; } #else struct tm *tm = localtime(&time); #endif /* #ifdef _MSC_VER */ *pDOS_time = (mz_uint16)(((tm->tm_hour) << 11) + ((tm->tm_min) << 5) + ((tm->tm_sec) >> 1)); *pDOS_date = (mz_uint16)(((tm->tm_year + 1900 - 1980) << 9) + ((tm->tm_mon + 1) << 5) + tm->tm_mday); } #endif /* MINIZ_NO_ARCHIVE_WRITING_APIS */ #ifndef MINIZ_NO_STDIO #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS static mz_bool mz_zip_get_file_modified_time(const char *pFilename, MZ_TIME_T *pTime) { struct MZ_FILE_STAT_STRUCT file_stat; /* On Linux with x86 glibc, this call will fail on large files (I think >= 0x80000000 bytes) unless you compiled with _LARGEFILE64_SOURCE. Argh. */ if (MZ_FILE_STAT(pFilename, &file_stat) != 0) return MZ_FALSE; *pTime = file_stat.st_mtime; return MZ_TRUE; } #endif /* #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS*/ static mz_bool mz_zip_set_file_times(const char *pFilename, MZ_TIME_T access_time, MZ_TIME_T modified_time) { struct utimbuf t; memset(&t, 0, sizeof(t)); t.actime = access_time; t.modtime = modified_time; return !utime(pFilename, &t); } #endif /* #ifndef MINIZ_NO_STDIO */ #endif /* #ifndef MINIZ_NO_TIME */ static MZ_FORCEINLINE mz_bool mz_zip_set_error(mz_zip_archive *pZip, mz_zip_error err_num) { if (pZip) pZip->m_last_error = err_num; return MZ_FALSE; } static mz_bool mz_zip_reader_init_internal(mz_zip_archive *pZip, mz_uint flags) { (void)flags; if ((!pZip) || (pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_INVALID)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); if (!pZip->m_pAlloc) pZip->m_pAlloc = miniz_def_alloc_func; if (!pZip->m_pFree) pZip->m_pFree = miniz_def_free_func; if (!pZip->m_pRealloc) pZip->m_pRealloc = miniz_def_realloc_func; pZip->m_archive_size = 0; pZip->m_central_directory_file_ofs = 0; pZip->m_total_files = 0; pZip->m_last_error = MZ_ZIP_NO_ERROR; if (NULL == (pZip->m_pState = (mz_zip_internal_state *)pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, sizeof(mz_zip_internal_state)))) return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); memset(pZip->m_pState, 0, sizeof(mz_zip_internal_state)); MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir, sizeof(mz_uint8)); MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir_offsets, sizeof(mz_uint32)); MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_sorted_central_dir_offsets, sizeof(mz_uint32)); pZip->m_pState->m_init_flags = flags; pZip->m_pState->m_zip64 = MZ_FALSE; pZip->m_pState->m_zip64_has_extended_info_fields = MZ_FALSE; pZip->m_zip_mode = MZ_ZIP_MODE_READING; return MZ_TRUE; } static MZ_FORCEINLINE mz_bool mz_zip_reader_filename_less(const mz_zip_array *pCentral_dir_array, const mz_zip_array *pCentral_dir_offsets, mz_uint l_index, mz_uint r_index) { const mz_uint8 *pL = &MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_array, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, l_index)), *pE; const mz_uint8 *pR = &MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_array, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, r_index)); mz_uint l_len = MZ_READ_LE16(pL + MZ_ZIP_CDH_FILENAME_LEN_OFS), r_len = MZ_READ_LE16(pR + MZ_ZIP_CDH_FILENAME_LEN_OFS); mz_uint8 l = 0, r = 0; pL += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; pR += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; pE = pL + MZ_MIN(l_len, r_len); while (pL < pE) { if ((l = MZ_TOLOWER(*pL)) != (r = MZ_TOLOWER(*pR))) break; pL++; pR++; } return (pL == pE) ? (l_len < r_len) : (l < r); } #define MZ_SWAP_UINT32(a, b) \ do \ { \ mz_uint32 t = a; \ a = b; \ b = t; \ } \ MZ_MACRO_END /* Heap sort of lowercased filenames, used to help accelerate plain central directory searches by mz_zip_reader_locate_file(). (Could also use qsort(), but it could allocate memory.) */ static void mz_zip_reader_sort_central_dir_offsets_by_filename(mz_zip_archive *pZip) { mz_zip_internal_state *pState = pZip->m_pState; const mz_zip_array *pCentral_dir_offsets = &pState->m_central_dir_offsets; const mz_zip_array *pCentral_dir = &pState->m_central_dir; mz_uint32 *pIndices; mz_uint32 start, end; const mz_uint32 size = pZip->m_total_files; if (size <= 1U) return; pIndices = &MZ_ZIP_ARRAY_ELEMENT(&pState->m_sorted_central_dir_offsets, mz_uint32, 0); start = (size - 2U) >> 1U; for (;;) { mz_uint64 child, root = start; for (;;) { if ((child = (root << 1U) + 1U) >= size) break; child += (((child + 1U) < size) && (mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[child], pIndices[child + 1U]))); if (!mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[root], pIndices[child])) break; MZ_SWAP_UINT32(pIndices[root], pIndices[child]); root = child; } if (!start) break; start--; } end = size - 1; while (end > 0) { mz_uint64 child, root = 0; MZ_SWAP_UINT32(pIndices[end], pIndices[0]); for (;;) { if ((child = (root << 1U) + 1U) >= end) break; child += (((child + 1U) < end) && mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[child], pIndices[child + 1U])); if (!mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[root], pIndices[child])) break; MZ_SWAP_UINT32(pIndices[root], pIndices[child]); root = child; } end--; } } static mz_bool mz_zip_reader_locate_header_sig(mz_zip_archive *pZip, mz_uint32 record_sig, mz_uint32 record_size, mz_int64 *pOfs) { mz_int64 cur_file_ofs; mz_uint32 buf_u32[4096 / sizeof(mz_uint32)]; mz_uint8 *pBuf = (mz_uint8 *)buf_u32; /* Basic sanity checks - reject files which are too small */ if (pZip->m_archive_size < record_size) return MZ_FALSE; /* Find the record by scanning the file from the end towards the beginning. */ cur_file_ofs = MZ_MAX((mz_int64)pZip->m_archive_size - (mz_int64)sizeof(buf_u32), 0); for (;;) { int i, n = (int)MZ_MIN(sizeof(buf_u32), pZip->m_archive_size - cur_file_ofs); if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, n) != (mz_uint)n) return MZ_FALSE; for (i = n - 4; i >= 0; --i) { mz_uint s = MZ_READ_LE32(pBuf + i); if (s == record_sig) { if ((pZip->m_archive_size - (cur_file_ofs + i)) >= record_size) break; } } if (i >= 0) { cur_file_ofs += i; break; } /* Give up if we've searched the entire file, or we've gone back "too far" (~64kb) */ if ((!cur_file_ofs) || ((pZip->m_archive_size - cur_file_ofs) >= (MZ_UINT16_MAX + record_size))) return MZ_FALSE; cur_file_ofs = MZ_MAX(cur_file_ofs - (sizeof(buf_u32) - 3), 0); } *pOfs = cur_file_ofs; return MZ_TRUE; } static mz_bool mz_zip_reader_read_central_dir(mz_zip_archive *pZip, mz_uint flags) { mz_uint cdir_size = 0, cdir_entries_on_this_disk = 0, num_this_disk = 0, cdir_disk_index = 0; mz_uint64 cdir_ofs = 0; mz_int64 cur_file_ofs = 0; const mz_uint8 *p; mz_uint32 buf_u32[4096 / sizeof(mz_uint32)]; mz_uint8 *pBuf = (mz_uint8 *)buf_u32; mz_bool sort_central_dir = ((flags & MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY) == 0); mz_uint32 zip64_end_of_central_dir_locator_u32[(MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)]; mz_uint8 *pZip64_locator = (mz_uint8 *)zip64_end_of_central_dir_locator_u32; mz_uint32 zip64_end_of_central_dir_header_u32[(MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)]; mz_uint8 *pZip64_end_of_central_dir = (mz_uint8 *)zip64_end_of_central_dir_header_u32; mz_uint64 zip64_end_of_central_dir_ofs = 0; /* Basic sanity checks - reject files which are too small, and check the first 4 bytes of the file to make sure a local header is there. */ if (pZip->m_archive_size < MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) return mz_zip_set_error(pZip, MZ_ZIP_NOT_AN_ARCHIVE); if (!mz_zip_reader_locate_header_sig(pZip, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE, &cur_file_ofs)) return mz_zip_set_error(pZip, MZ_ZIP_FAILED_FINDING_CENTRAL_DIR); /* Read and verify the end of central directory record. */ if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) != MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); if (MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_SIG_OFS) != MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG) return mz_zip_set_error(pZip, MZ_ZIP_NOT_AN_ARCHIVE); if (cur_file_ofs >= (MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE + MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE)) { if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs - MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE, pZip64_locator, MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE) == MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE) { if (MZ_READ_LE32(pZip64_locator + MZ_ZIP64_ECDL_SIG_OFS) == MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIG) { zip64_end_of_central_dir_ofs = MZ_READ_LE64(pZip64_locator + MZ_ZIP64_ECDL_REL_OFS_TO_ZIP64_ECDR_OFS); if (zip64_end_of_central_dir_ofs > (pZip->m_archive_size - MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE)) return mz_zip_set_error(pZip, MZ_ZIP_NOT_AN_ARCHIVE); if (pZip->m_pRead(pZip->m_pIO_opaque, zip64_end_of_central_dir_ofs, pZip64_end_of_central_dir, MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE) == MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE) { if (MZ_READ_LE32(pZip64_end_of_central_dir + MZ_ZIP64_ECDH_SIG_OFS) == MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIG) { pZip->m_pState->m_zip64 = MZ_TRUE; } } } } } pZip->m_total_files = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS); cdir_entries_on_this_disk = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS); num_this_disk = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_NUM_THIS_DISK_OFS); cdir_disk_index = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_NUM_DISK_CDIR_OFS); cdir_size = MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_CDIR_SIZE_OFS); cdir_ofs = MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_CDIR_OFS_OFS); if (pZip->m_pState->m_zip64) { mz_uint32 zip64_total_num_of_disks = MZ_READ_LE32(pZip64_locator + MZ_ZIP64_ECDL_TOTAL_NUMBER_OF_DISKS_OFS); mz_uint64 zip64_cdir_total_entries = MZ_READ_LE64(pZip64_end_of_central_dir + MZ_ZIP64_ECDH_CDIR_TOTAL_ENTRIES_OFS); mz_uint64 zip64_cdir_total_entries_on_this_disk = MZ_READ_LE64(pZip64_end_of_central_dir + MZ_ZIP64_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS); mz_uint64 zip64_size_of_end_of_central_dir_record = MZ_READ_LE64(pZip64_end_of_central_dir + MZ_ZIP64_ECDH_SIZE_OF_RECORD_OFS); mz_uint64 zip64_size_of_central_directory = MZ_READ_LE64(pZip64_end_of_central_dir + MZ_ZIP64_ECDH_CDIR_SIZE_OFS); if (zip64_size_of_end_of_central_dir_record < (MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE - 12)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); if (zip64_total_num_of_disks != 1U) return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_MULTIDISK); /* Check for miniz's practical limits */ if (zip64_cdir_total_entries > MZ_UINT32_MAX) return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES); pZip->m_total_files = (mz_uint32)zip64_cdir_total_entries; if (zip64_cdir_total_entries_on_this_disk > MZ_UINT32_MAX) return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES); cdir_entries_on_this_disk = (mz_uint32)zip64_cdir_total_entries_on_this_disk; /* Check for miniz's current practical limits (sorry, this should be enough for millions of files) */ if (zip64_size_of_central_directory > MZ_UINT32_MAX) return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_CDIR_SIZE); cdir_size = (mz_uint32)zip64_size_of_central_directory; num_this_disk = MZ_READ_LE32(pZip64_end_of_central_dir + MZ_ZIP64_ECDH_NUM_THIS_DISK_OFS); cdir_disk_index = MZ_READ_LE32(pZip64_end_of_central_dir + MZ_ZIP64_ECDH_NUM_DISK_CDIR_OFS); cdir_ofs = MZ_READ_LE64(pZip64_end_of_central_dir + MZ_ZIP64_ECDH_CDIR_OFS_OFS); } if (pZip->m_total_files != cdir_entries_on_this_disk) return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_MULTIDISK); if (((num_this_disk | cdir_disk_index) != 0) && ((num_this_disk != 1) || (cdir_disk_index != 1))) return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_MULTIDISK); if (cdir_size < (mz_uint64)pZip->m_total_files * MZ_ZIP_CENTRAL_DIR_HEADER_SIZE) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); if ((cdir_ofs + (mz_uint64)cdir_size) > pZip->m_archive_size) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); pZip->m_central_directory_file_ofs = cdir_ofs; if (pZip->m_total_files) { mz_uint i, n; /* Read the entire central directory into a heap block, and allocate another heap block to hold the unsorted central dir file record offsets, and possibly another to hold the sorted indices. */ if ((!mz_zip_array_resize(pZip, &pZip->m_pState->m_central_dir, cdir_size, MZ_FALSE)) || (!mz_zip_array_resize(pZip, &pZip->m_pState->m_central_dir_offsets, pZip->m_total_files, MZ_FALSE))) return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); if (sort_central_dir) { if (!mz_zip_array_resize(pZip, &pZip->m_pState->m_sorted_central_dir_offsets, pZip->m_total_files, MZ_FALSE)) return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); } if (pZip->m_pRead(pZip->m_pIO_opaque, cdir_ofs, pZip->m_pState->m_central_dir.m_p, cdir_size) != cdir_size) return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); /* Now create an index into the central directory file records, do some basic sanity checking on each record */ p = (const mz_uint8 *)pZip->m_pState->m_central_dir.m_p; for (n = cdir_size, i = 0; i < pZip->m_total_files; ++i) { mz_uint total_header_size, disk_index, bit_flags, filename_size, ext_data_size; mz_uint64 comp_size, decomp_size, local_header_ofs; if ((n < MZ_ZIP_CENTRAL_DIR_HEADER_SIZE) || (MZ_READ_LE32(p) != MZ_ZIP_CENTRAL_DIR_HEADER_SIG)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, i) = (mz_uint32)(p - (const mz_uint8 *)pZip->m_pState->m_central_dir.m_p); if (sort_central_dir) MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_sorted_central_dir_offsets, mz_uint32, i) = i; comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS); decomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS); local_header_ofs = MZ_READ_LE32(p + MZ_ZIP_CDH_LOCAL_HEADER_OFS); filename_size = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS); ext_data_size = MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS); if ((!pZip->m_pState->m_zip64_has_extended_info_fields) && (ext_data_size) && (MZ_MAX(MZ_MAX(comp_size, decomp_size), local_header_ofs) == MZ_UINT32_MAX)) { /* Attempt to find zip64 extended information field in the entry's extra data */ mz_uint32 extra_size_remaining = ext_data_size; if (extra_size_remaining) { const mz_uint8 *pExtra_data; void* buf = NULL; if (MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_size + ext_data_size > n) { buf = MZ_MALLOC(ext_data_size); if(buf==NULL) return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); if (pZip->m_pRead(pZip->m_pIO_opaque, cdir_ofs + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_size, buf, ext_data_size) != ext_data_size) { MZ_FREE(buf); return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); } pExtra_data = (mz_uint8*)buf; } else { pExtra_data = p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_size; } do { mz_uint32 field_id; mz_uint32 field_data_size; if (extra_size_remaining < (sizeof(mz_uint16) * 2)) { MZ_FREE(buf); return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); } field_id = MZ_READ_LE16(pExtra_data); field_data_size = MZ_READ_LE16(pExtra_data + sizeof(mz_uint16)); if ((field_data_size + sizeof(mz_uint16) * 2) > extra_size_remaining) { MZ_FREE(buf); return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); } if (field_id == MZ_ZIP64_EXTENDED_INFORMATION_FIELD_HEADER_ID) { /* Ok, the archive didn't have any zip64 headers but it uses a zip64 extended information field so mark it as zip64 anyway (this can occur with infozip's zip util when it reads compresses files from stdin). */ pZip->m_pState->m_zip64 = MZ_TRUE; pZip->m_pState->m_zip64_has_extended_info_fields = MZ_TRUE; break; } pExtra_data += sizeof(mz_uint16) * 2 + field_data_size; extra_size_remaining = extra_size_remaining - sizeof(mz_uint16) * 2 - field_data_size; } while (extra_size_remaining); MZ_FREE(buf); } } /* I've seen archives that aren't marked as zip64 that uses zip64 ext data, argh */ if ((comp_size != MZ_UINT32_MAX) && (decomp_size != MZ_UINT32_MAX)) { if (((!MZ_READ_LE32(p + MZ_ZIP_CDH_METHOD_OFS)) && (decomp_size != comp_size)) || (decomp_size && !comp_size)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); } disk_index = MZ_READ_LE16(p + MZ_ZIP_CDH_DISK_START_OFS); if ((disk_index == MZ_UINT16_MAX) || ((disk_index != num_this_disk) && (disk_index != 1))) return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_MULTIDISK); if (comp_size != MZ_UINT32_MAX) { if (((mz_uint64)MZ_READ_LE32(p + MZ_ZIP_CDH_LOCAL_HEADER_OFS) + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + comp_size) > pZip->m_archive_size) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); } bit_flags = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS); if (bit_flags & MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_LOCAL_DIR_IS_MASKED) return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_ENCRYPTION); if ((total_header_size = MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS) + MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS) + MZ_READ_LE16(p + MZ_ZIP_CDH_COMMENT_LEN_OFS)) > n) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); n -= total_header_size; p += total_header_size; } } if (sort_central_dir) mz_zip_reader_sort_central_dir_offsets_by_filename(pZip); return MZ_TRUE; } void mz_zip_zero_struct(mz_zip_archive *pZip) { if (pZip) MZ_CLEAR_PTR(pZip); } static mz_bool mz_zip_reader_end_internal(mz_zip_archive *pZip, mz_bool set_last_error) { mz_bool status = MZ_TRUE; if (!pZip) return MZ_FALSE; if ((!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree) || (pZip->m_zip_mode != MZ_ZIP_MODE_READING)) { if (set_last_error) pZip->m_last_error = MZ_ZIP_INVALID_PARAMETER; return MZ_FALSE; } if (pZip->m_pState) { mz_zip_internal_state *pState = pZip->m_pState; pZip->m_pState = NULL; mz_zip_array_clear(pZip, &pState->m_central_dir); mz_zip_array_clear(pZip, &pState->m_central_dir_offsets); mz_zip_array_clear(pZip, &pState->m_sorted_central_dir_offsets); #ifndef MINIZ_NO_STDIO if (pState->m_pFile) { if (pZip->m_zip_type == MZ_ZIP_TYPE_FILE) { if (MZ_FCLOSE(pState->m_pFile) == EOF) { if (set_last_error) pZip->m_last_error = MZ_ZIP_FILE_CLOSE_FAILED; status = MZ_FALSE; } } pState->m_pFile = NULL; } #endif /* #ifndef MINIZ_NO_STDIO */ pZip->m_pFree(pZip->m_pAlloc_opaque, pState); } pZip->m_zip_mode = MZ_ZIP_MODE_INVALID; return status; } mz_bool mz_zip_reader_end(mz_zip_archive *pZip) { return mz_zip_reader_end_internal(pZip, MZ_TRUE); } mz_bool mz_zip_reader_init(mz_zip_archive *pZip, mz_uint64 size, mz_uint flags) { if ((!pZip) || (!pZip->m_pRead)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); if (!mz_zip_reader_init_internal(pZip, flags)) return MZ_FALSE; pZip->m_zip_type = MZ_ZIP_TYPE_USER; pZip->m_archive_size = size; if (!mz_zip_reader_read_central_dir(pZip, flags)) { mz_zip_reader_end_internal(pZip, MZ_FALSE); return MZ_FALSE; } return MZ_TRUE; } static size_t mz_zip_mem_read_func(void *pOpaque, mz_uint64 file_ofs, void *pBuf, size_t n) { mz_zip_archive *pZip = (mz_zip_archive *)pOpaque; size_t s = (file_ofs >= pZip->m_archive_size) ? 0 : (size_t)MZ_MIN(pZip->m_archive_size - file_ofs, n); memcpy(pBuf, (const mz_uint8 *)pZip->m_pState->m_pMem + file_ofs, s); return s; } mz_bool mz_zip_reader_init_mem(mz_zip_archive *pZip, const void *pMem, size_t size, mz_uint flags) { if (!pMem) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); if (size < MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) return mz_zip_set_error(pZip, MZ_ZIP_NOT_AN_ARCHIVE); if (!mz_zip_reader_init_internal(pZip, flags)) return MZ_FALSE; pZip->m_zip_type = MZ_ZIP_TYPE_MEMORY; pZip->m_archive_size = size; pZip->m_pRead = mz_zip_mem_read_func; pZip->m_pIO_opaque = pZip; pZip->m_pNeeds_keepalive = NULL; #ifdef __cplusplus pZip->m_pState->m_pMem = const_cast(pMem); #else pZip->m_pState->m_pMem = (void *)pMem; #endif pZip->m_pState->m_mem_size = size; if (!mz_zip_reader_read_central_dir(pZip, flags)) { mz_zip_reader_end_internal(pZip, MZ_FALSE); return MZ_FALSE; } return MZ_TRUE; } #ifndef MINIZ_NO_STDIO static size_t mz_zip_file_read_func(void *pOpaque, mz_uint64 file_ofs, void *pBuf, size_t n) { mz_zip_archive *pZip = (mz_zip_archive *)pOpaque; mz_int64 cur_ofs = MZ_FTELL64(pZip->m_pState->m_pFile); file_ofs += pZip->m_pState->m_file_archive_start_ofs; if (((mz_int64)file_ofs < 0) || (((cur_ofs != (mz_int64)file_ofs)) && (MZ_FSEEK64(pZip->m_pState->m_pFile, (mz_int64)file_ofs, SEEK_SET)))) return 0; return MZ_FREAD(pBuf, 1, n, pZip->m_pState->m_pFile); } mz_bool mz_zip_reader_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint32 flags) { return mz_zip_reader_init_file_v2(pZip, pFilename, flags, 0, 0); } mz_bool mz_zip_reader_init_file_v2(mz_zip_archive *pZip, const char *pFilename, mz_uint flags, mz_uint64 file_start_ofs, mz_uint64 archive_size) { mz_uint64 file_size; MZ_FILE *pFile; if ((!pZip) || (!pFilename) || ((archive_size) && (archive_size < MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE))) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); pFile = MZ_FOPEN(pFilename, "rb"); if (!pFile) return mz_zip_set_error(pZip, MZ_ZIP_FILE_OPEN_FAILED); file_size = archive_size; if (!file_size) { if (MZ_FSEEK64(pFile, 0, SEEK_END)) { MZ_FCLOSE(pFile); return mz_zip_set_error(pZip, MZ_ZIP_FILE_SEEK_FAILED); } file_size = MZ_FTELL64(pFile); } /* TODO: Better sanity check archive_size and the # of actual remaining bytes */ if (file_size < MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) { MZ_FCLOSE(pFile); return mz_zip_set_error(pZip, MZ_ZIP_NOT_AN_ARCHIVE); } if (!mz_zip_reader_init_internal(pZip, flags)) { MZ_FCLOSE(pFile); return MZ_FALSE; } pZip->m_zip_type = MZ_ZIP_TYPE_FILE; pZip->m_pRead = mz_zip_file_read_func; pZip->m_pIO_opaque = pZip; pZip->m_pState->m_pFile = pFile; pZip->m_archive_size = file_size; pZip->m_pState->m_file_archive_start_ofs = file_start_ofs; if (!mz_zip_reader_read_central_dir(pZip, flags)) { mz_zip_reader_end_internal(pZip, MZ_FALSE); return MZ_FALSE; } return MZ_TRUE; } mz_bool mz_zip_reader_init_cfile(mz_zip_archive *pZip, MZ_FILE *pFile, mz_uint64 archive_size, mz_uint flags) { mz_uint64 cur_file_ofs; if ((!pZip) || (!pFile)) return mz_zip_set_error(pZip, MZ_ZIP_FILE_OPEN_FAILED); cur_file_ofs = MZ_FTELL64(pFile); if (!archive_size) { if (MZ_FSEEK64(pFile, 0, SEEK_END)) return mz_zip_set_error(pZip, MZ_ZIP_FILE_SEEK_FAILED); archive_size = MZ_FTELL64(pFile) - cur_file_ofs; if (archive_size < MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) return mz_zip_set_error(pZip, MZ_ZIP_NOT_AN_ARCHIVE); } if (!mz_zip_reader_init_internal(pZip, flags)) return MZ_FALSE; pZip->m_zip_type = MZ_ZIP_TYPE_CFILE; pZip->m_pRead = mz_zip_file_read_func; pZip->m_pIO_opaque = pZip; pZip->m_pState->m_pFile = pFile; pZip->m_archive_size = archive_size; pZip->m_pState->m_file_archive_start_ofs = cur_file_ofs; if (!mz_zip_reader_read_central_dir(pZip, flags)) { mz_zip_reader_end_internal(pZip, MZ_FALSE); return MZ_FALSE; } return MZ_TRUE; } #endif /* #ifndef MINIZ_NO_STDIO */ static MZ_FORCEINLINE const mz_uint8 *mz_zip_get_cdh(mz_zip_archive *pZip, mz_uint file_index) { if ((!pZip) || (!pZip->m_pState) || (file_index >= pZip->m_total_files)) return NULL; return &MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, file_index)); } mz_bool mz_zip_reader_is_file_encrypted(mz_zip_archive *pZip, mz_uint file_index) { mz_uint m_bit_flag; const mz_uint8 *p = mz_zip_get_cdh(pZip, file_index); if (!p) { mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); return MZ_FALSE; } m_bit_flag = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS); return (m_bit_flag & (MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_IS_ENCRYPTED | MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_USES_STRONG_ENCRYPTION)) != 0; } mz_bool mz_zip_reader_is_file_supported(mz_zip_archive *pZip, mz_uint file_index) { mz_uint bit_flag; mz_uint method; const mz_uint8 *p = mz_zip_get_cdh(pZip, file_index); if (!p) { mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); return MZ_FALSE; } method = MZ_READ_LE16(p + MZ_ZIP_CDH_METHOD_OFS); bit_flag = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS); if ((method != 0) && (method != MZ_DEFLATED)) { mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_METHOD); return MZ_FALSE; } if (bit_flag & (MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_IS_ENCRYPTED | MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_USES_STRONG_ENCRYPTION)) { mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_ENCRYPTION); return MZ_FALSE; } if (bit_flag & MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_COMPRESSED_PATCH_FLAG) { mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_FEATURE); return MZ_FALSE; } return MZ_TRUE; } mz_bool mz_zip_reader_is_file_a_directory(mz_zip_archive *pZip, mz_uint file_index) { mz_uint filename_len, attribute_mapping_id, external_attr; const mz_uint8 *p = mz_zip_get_cdh(pZip, file_index); if (!p) { mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); return MZ_FALSE; } filename_len = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS); if (filename_len) { if (*(p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_len - 1) == '/') return MZ_TRUE; } /* Bugfix: This code was also checking if the internal attribute was non-zero, which wasn't correct. */ /* Most/all zip writers (hopefully) set DOS file/directory attributes in the low 16-bits, so check for the DOS directory flag and ignore the source OS ID in the created by field. */ /* FIXME: Remove this check? Is it necessary - we already check the filename. */ attribute_mapping_id = MZ_READ_LE16(p + MZ_ZIP_CDH_VERSION_MADE_BY_OFS) >> 8; (void)attribute_mapping_id; external_attr = MZ_READ_LE32(p + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS); if ((external_attr & MZ_ZIP_DOS_DIR_ATTRIBUTE_BITFLAG) != 0) { return MZ_TRUE; } return MZ_FALSE; } static mz_bool mz_zip_file_stat_internal(mz_zip_archive *pZip, mz_uint file_index, const mz_uint8 *pCentral_dir_header, mz_zip_archive_file_stat *pStat, mz_bool *pFound_zip64_extra_data) { mz_uint n; const mz_uint8 *p = pCentral_dir_header; if (pFound_zip64_extra_data) *pFound_zip64_extra_data = MZ_FALSE; if ((!p) || (!pStat)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); /* Extract fields from the central directory record. */ pStat->m_file_index = file_index; pStat->m_central_dir_ofs = MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, file_index); pStat->m_version_made_by = MZ_READ_LE16(p + MZ_ZIP_CDH_VERSION_MADE_BY_OFS); pStat->m_version_needed = MZ_READ_LE16(p + MZ_ZIP_CDH_VERSION_NEEDED_OFS); pStat->m_bit_flag = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS); pStat->m_method = MZ_READ_LE16(p + MZ_ZIP_CDH_METHOD_OFS); #ifndef MINIZ_NO_TIME pStat->m_time = mz_zip_dos_to_time_t(MZ_READ_LE16(p + MZ_ZIP_CDH_FILE_TIME_OFS), MZ_READ_LE16(p + MZ_ZIP_CDH_FILE_DATE_OFS)); #endif pStat->m_crc32 = MZ_READ_LE32(p + MZ_ZIP_CDH_CRC32_OFS); pStat->m_comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS); pStat->m_uncomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS); pStat->m_internal_attr = MZ_READ_LE16(p + MZ_ZIP_CDH_INTERNAL_ATTR_OFS); pStat->m_external_attr = MZ_READ_LE32(p + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS); pStat->m_local_header_ofs = MZ_READ_LE32(p + MZ_ZIP_CDH_LOCAL_HEADER_OFS); /* Copy as much of the filename and comment as possible. */ n = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS); n = MZ_MIN(n, MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE - 1); memcpy(pStat->m_filename, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n); pStat->m_filename[n] = '\0'; n = MZ_READ_LE16(p + MZ_ZIP_CDH_COMMENT_LEN_OFS); n = MZ_MIN(n, MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE - 1); pStat->m_comment_size = n; memcpy(pStat->m_comment, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS) + MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS), n); pStat->m_comment[n] = '\0'; /* Set some flags for convienance */ pStat->m_is_directory = mz_zip_reader_is_file_a_directory(pZip, file_index); pStat->m_is_encrypted = mz_zip_reader_is_file_encrypted(pZip, file_index); pStat->m_is_supported = mz_zip_reader_is_file_supported(pZip, file_index); /* See if we need to read any zip64 extended information fields. */ /* Confusingly, these zip64 fields can be present even on non-zip64 archives (Debian zip on a huge files from stdin piped to stdout creates them). */ if (MZ_MAX(MZ_MAX(pStat->m_comp_size, pStat->m_uncomp_size), pStat->m_local_header_ofs) == MZ_UINT32_MAX) { /* Attempt to find zip64 extended information field in the entry's extra data */ mz_uint32 extra_size_remaining = MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS); if (extra_size_remaining) { const mz_uint8 *pExtra_data = p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS); do { mz_uint32 field_id; mz_uint32 field_data_size; if (extra_size_remaining < (sizeof(mz_uint16) * 2)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); field_id = MZ_READ_LE16(pExtra_data); field_data_size = MZ_READ_LE16(pExtra_data + sizeof(mz_uint16)); if ((field_data_size + sizeof(mz_uint16) * 2) > extra_size_remaining) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); if (field_id == MZ_ZIP64_EXTENDED_INFORMATION_FIELD_HEADER_ID) { const mz_uint8 *pField_data = pExtra_data + sizeof(mz_uint16) * 2; mz_uint32 field_data_remaining = field_data_size; if (pFound_zip64_extra_data) *pFound_zip64_extra_data = MZ_TRUE; if (pStat->m_uncomp_size == MZ_UINT32_MAX) { if (field_data_remaining < sizeof(mz_uint64)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); pStat->m_uncomp_size = MZ_READ_LE64(pField_data); pField_data += sizeof(mz_uint64); field_data_remaining -= sizeof(mz_uint64); } if (pStat->m_comp_size == MZ_UINT32_MAX) { if (field_data_remaining < sizeof(mz_uint64)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); pStat->m_comp_size = MZ_READ_LE64(pField_data); pField_data += sizeof(mz_uint64); field_data_remaining -= sizeof(mz_uint64); } if (pStat->m_local_header_ofs == MZ_UINT32_MAX) { if (field_data_remaining < sizeof(mz_uint64)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); pStat->m_local_header_ofs = MZ_READ_LE64(pField_data); pField_data += sizeof(mz_uint64); field_data_remaining -= sizeof(mz_uint64); } break; } pExtra_data += sizeof(mz_uint16) * 2 + field_data_size; extra_size_remaining = extra_size_remaining - sizeof(mz_uint16) * 2 - field_data_size; } while (extra_size_remaining); } } return MZ_TRUE; } static MZ_FORCEINLINE mz_bool mz_zip_string_equal(const char *pA, const char *pB, mz_uint len, mz_uint flags) { mz_uint i; if (flags & MZ_ZIP_FLAG_CASE_SENSITIVE) return 0 == memcmp(pA, pB, len); for (i = 0; i < len; ++i) if (MZ_TOLOWER(pA[i]) != MZ_TOLOWER(pB[i])) return MZ_FALSE; return MZ_TRUE; } static MZ_FORCEINLINE int mz_zip_filename_compare(const mz_zip_array *pCentral_dir_array, const mz_zip_array *pCentral_dir_offsets, mz_uint l_index, const char *pR, mz_uint r_len) { const mz_uint8 *pL = &MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_array, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, l_index)), *pE; mz_uint l_len = MZ_READ_LE16(pL + MZ_ZIP_CDH_FILENAME_LEN_OFS); mz_uint8 l = 0, r = 0; pL += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; pE = pL + MZ_MIN(l_len, r_len); while (pL < pE) { if ((l = MZ_TOLOWER(*pL)) != (r = MZ_TOLOWER(*pR))) break; pL++; pR++; } return (pL == pE) ? (int)(l_len - r_len) : (l - r); } static mz_bool mz_zip_locate_file_binary_search(mz_zip_archive *pZip, const char *pFilename, mz_uint32 *pIndex) { mz_zip_internal_state *pState = pZip->m_pState; const mz_zip_array *pCentral_dir_offsets = &pState->m_central_dir_offsets; const mz_zip_array *pCentral_dir = &pState->m_central_dir; mz_uint32 *pIndices = &MZ_ZIP_ARRAY_ELEMENT(&pState->m_sorted_central_dir_offsets, mz_uint32, 0); const mz_uint32 size = pZip->m_total_files; const mz_uint filename_len = (mz_uint)strlen(pFilename); if (pIndex) *pIndex = 0; if (size) { /* yes I could use uint32_t's, but then we would have to add some special case checks in the loop, argh, and */ /* honestly the major expense here on 32-bit CPU's will still be the filename compare */ mz_int64 l = 0, h = (mz_int64)size - 1; while (l <= h) { mz_int64 m = l + ((h - l) >> 1); mz_uint32 file_index = pIndices[(mz_uint32)m]; int comp = mz_zip_filename_compare(pCentral_dir, pCentral_dir_offsets, file_index, pFilename, filename_len); if (!comp) { if (pIndex) *pIndex = file_index; return MZ_TRUE; } else if (comp < 0) l = m + 1; else h = m - 1; } } return mz_zip_set_error(pZip, MZ_ZIP_FILE_NOT_FOUND); } int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags) { mz_uint32 index; if (!mz_zip_reader_locate_file_v2(pZip, pName, pComment, flags, &index)) return -1; else return (int)index; } mz_bool mz_zip_reader_locate_file_v2(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags, mz_uint32 *pIndex) { mz_uint file_index; size_t name_len, comment_len; if (pIndex) *pIndex = 0; if ((!pZip) || (!pZip->m_pState) || (!pName)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); /* See if we can use a binary search */ if (((pZip->m_pState->m_init_flags & MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY) == 0) && (pZip->m_zip_mode == MZ_ZIP_MODE_READING) && ((flags & (MZ_ZIP_FLAG_IGNORE_PATH | MZ_ZIP_FLAG_CASE_SENSITIVE)) == 0) && (!pComment) && (pZip->m_pState->m_sorted_central_dir_offsets.m_size)) { return mz_zip_locate_file_binary_search(pZip, pName, pIndex); } /* Locate the entry by scanning the entire central directory */ name_len = strlen(pName); if (name_len > MZ_UINT16_MAX) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); comment_len = pComment ? strlen(pComment) : 0; if (comment_len > MZ_UINT16_MAX) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); for (file_index = 0; file_index < pZip->m_total_files; file_index++) { const mz_uint8 *pHeader = &MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, file_index)); mz_uint filename_len = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_FILENAME_LEN_OFS); const char *pFilename = (const char *)pHeader + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; if (filename_len < name_len) continue; if (comment_len) { mz_uint file_extra_len = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_EXTRA_LEN_OFS), file_comment_len = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_COMMENT_LEN_OFS); const char *pFile_comment = pFilename + filename_len + file_extra_len; if ((file_comment_len != comment_len) || (!mz_zip_string_equal(pComment, pFile_comment, file_comment_len, flags))) continue; } if ((flags & MZ_ZIP_FLAG_IGNORE_PATH) && (filename_len)) { int ofs = filename_len - 1; do { if ((pFilename[ofs] == '/') || (pFilename[ofs] == '\\') || (pFilename[ofs] == ':')) break; } while (--ofs >= 0); ofs++; pFilename += ofs; filename_len -= ofs; } if ((filename_len == name_len) && (mz_zip_string_equal(pName, pFilename, filename_len, flags))) { if (pIndex) *pIndex = file_index; return MZ_TRUE; } } return mz_zip_set_error(pZip, MZ_ZIP_FILE_NOT_FOUND); } static mz_bool mz_zip_reader_extract_to_mem_no_alloc1(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size, const mz_zip_archive_file_stat *st) { int status = TINFL_STATUS_DONE; mz_uint64 needed_size, cur_file_ofs, comp_remaining, out_buf_ofs = 0, read_buf_size, read_buf_ofs = 0, read_buf_avail; mz_zip_archive_file_stat file_stat; void *pRead_buf; mz_uint32 local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)]; mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32; tinfl_decompressor inflator; if ((!pZip) || (!pZip->m_pState) || ((buf_size) && (!pBuf)) || ((user_read_buf_size) && (!pUser_read_buf)) || (!pZip->m_pRead)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); if (st) { file_stat = *st; } else if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) return MZ_FALSE; /* A directory or zero length file */ if ((file_stat.m_is_directory) || (!file_stat.m_comp_size)) return MZ_TRUE; /* Encryption and patch files are not supported. */ if (file_stat.m_bit_flag & (MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_IS_ENCRYPTED | MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_USES_STRONG_ENCRYPTION | MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_COMPRESSED_PATCH_FLAG)) return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_ENCRYPTION); /* This function only supports decompressing stored and deflate. */ if ((!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (file_stat.m_method != 0) && (file_stat.m_method != MZ_DEFLATED)) return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_METHOD); /* Ensure supplied output buffer is large enough. */ needed_size = (flags & MZ_ZIP_FLAG_COMPRESSED_DATA) ? file_stat.m_comp_size : file_stat.m_uncomp_size; if (buf_size < needed_size) return mz_zip_set_error(pZip, MZ_ZIP_BUF_TOO_SMALL); /* Read and parse the local directory entry. */ cur_file_ofs = file_stat.m_local_header_ofs; if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE) return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); cur_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS); if ((cur_file_ofs + file_stat.m_comp_size) > pZip->m_archive_size) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); if ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!file_stat.m_method)) { /* The file is stored or the caller has requested the compressed data. */ if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, (size_t)needed_size) != needed_size) return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); #ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS if ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) == 0) { if (mz_crc32(MZ_CRC32_INIT, (const mz_uint8 *)pBuf, (size_t)file_stat.m_uncomp_size) != file_stat.m_crc32) return mz_zip_set_error(pZip, MZ_ZIP_CRC_CHECK_FAILED); } #endif return MZ_TRUE; } /* Decompress the file either directly from memory or from a file input buffer. */ tinfl_init(&inflator); if (pZip->m_pState->m_pMem) { /* Read directly from the archive in memory. */ pRead_buf = (mz_uint8 *)pZip->m_pState->m_pMem + cur_file_ofs; read_buf_size = read_buf_avail = file_stat.m_comp_size; comp_remaining = 0; } else if (pUser_read_buf) { /* Use a user provided read buffer. */ if (!user_read_buf_size) return MZ_FALSE; pRead_buf = (mz_uint8 *)pUser_read_buf; read_buf_size = user_read_buf_size; read_buf_avail = 0; comp_remaining = file_stat.m_comp_size; } else { /* Temporarily allocate a read buffer. */ read_buf_size = MZ_MIN(file_stat.m_comp_size, (mz_uint64)MZ_ZIP_MAX_IO_BUF_SIZE); if (((sizeof(size_t) == sizeof(mz_uint32))) && (read_buf_size > 0x7FFFFFFF)) return mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR); if (NULL == (pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)read_buf_size))) return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); read_buf_avail = 0; comp_remaining = file_stat.m_comp_size; } do { /* The size_t cast here should be OK because we've verified that the output buffer is >= file_stat.m_uncomp_size above */ size_t in_buf_size, out_buf_size = (size_t)(file_stat.m_uncomp_size - out_buf_ofs); if ((!read_buf_avail) && (!pZip->m_pState->m_pMem)) { read_buf_avail = MZ_MIN(read_buf_size, comp_remaining); if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, (size_t)read_buf_avail) != read_buf_avail) { status = TINFL_STATUS_FAILED; mz_zip_set_error(pZip, MZ_ZIP_DECOMPRESSION_FAILED); break; } cur_file_ofs += read_buf_avail; comp_remaining -= read_buf_avail; read_buf_ofs = 0; } in_buf_size = (size_t)read_buf_avail; status = tinfl_decompress(&inflator, (mz_uint8 *)pRead_buf + read_buf_ofs, &in_buf_size, (mz_uint8 *)pBuf, (mz_uint8 *)pBuf + out_buf_ofs, &out_buf_size, TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF | (comp_remaining ? TINFL_FLAG_HAS_MORE_INPUT : 0)); read_buf_avail -= in_buf_size; read_buf_ofs += in_buf_size; out_buf_ofs += out_buf_size; } while (status == TINFL_STATUS_NEEDS_MORE_INPUT); if (status == TINFL_STATUS_DONE) { /* Make sure the entire file was decompressed, and check its CRC. */ if (out_buf_ofs != file_stat.m_uncomp_size) { mz_zip_set_error(pZip, MZ_ZIP_UNEXPECTED_DECOMPRESSED_SIZE); status = TINFL_STATUS_FAILED; } #ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS else if (mz_crc32(MZ_CRC32_INIT, (const mz_uint8 *)pBuf, (size_t)file_stat.m_uncomp_size) != file_stat.m_crc32) { mz_zip_set_error(pZip, MZ_ZIP_CRC_CHECK_FAILED); status = TINFL_STATUS_FAILED; } #endif } if ((!pZip->m_pState->m_pMem) && (!pUser_read_buf)) pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); return status == TINFL_STATUS_DONE; } mz_bool mz_zip_reader_extract_to_mem_no_alloc(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size) { return mz_zip_reader_extract_to_mem_no_alloc1(pZip, file_index, pBuf, buf_size, flags, pUser_read_buf, user_read_buf_size, NULL); } mz_bool mz_zip_reader_extract_file_to_mem_no_alloc(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size) { mz_uint32 file_index; if (!mz_zip_reader_locate_file_v2(pZip, pFilename, NULL, flags, &file_index)) return MZ_FALSE; return mz_zip_reader_extract_to_mem_no_alloc1(pZip, file_index, pBuf, buf_size, flags, pUser_read_buf, user_read_buf_size, NULL); } mz_bool mz_zip_reader_extract_to_mem(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags) { return mz_zip_reader_extract_to_mem_no_alloc1(pZip, file_index, pBuf, buf_size, flags, NULL, 0, NULL); } mz_bool mz_zip_reader_extract_file_to_mem(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags) { return mz_zip_reader_extract_file_to_mem_no_alloc(pZip, pFilename, pBuf, buf_size, flags, NULL, 0); } void *mz_zip_reader_extract_to_heap(mz_zip_archive *pZip, mz_uint file_index, size_t *pSize, mz_uint flags) { mz_zip_archive_file_stat file_stat; mz_uint64 alloc_size; void *pBuf; if (pSize) *pSize = 0; if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) return NULL; alloc_size = (flags & MZ_ZIP_FLAG_COMPRESSED_DATA) ? file_stat.m_comp_size : file_stat.m_uncomp_size; if (((sizeof(size_t) == sizeof(mz_uint32))) && (alloc_size > 0x7FFFFFFF)) { mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR); return NULL; } if (NULL == (pBuf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)alloc_size))) { mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); return NULL; } if (!mz_zip_reader_extract_to_mem_no_alloc1(pZip, file_index, pBuf, (size_t)alloc_size, flags, NULL, 0, &file_stat)) { pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); return NULL; } if (pSize) *pSize = (size_t)alloc_size; return pBuf; } void *mz_zip_reader_extract_file_to_heap(mz_zip_archive *pZip, const char *pFilename, size_t *pSize, mz_uint flags) { mz_uint32 file_index; if (!mz_zip_reader_locate_file_v2(pZip, pFilename, NULL, flags, &file_index)) { if (pSize) *pSize = 0; return MZ_FALSE; } return mz_zip_reader_extract_to_heap(pZip, file_index, pSize, flags); } mz_bool mz_zip_reader_extract_to_callback(mz_zip_archive *pZip, mz_uint file_index, mz_file_write_func pCallback, void *pOpaque, mz_uint flags) { int status = TINFL_STATUS_DONE; #ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS mz_uint file_crc32 = MZ_CRC32_INIT; #endif mz_uint64 read_buf_size, read_buf_ofs = 0, read_buf_avail, comp_remaining, out_buf_ofs = 0, cur_file_ofs; mz_zip_archive_file_stat file_stat; void *pRead_buf = NULL; void *pWrite_buf = NULL; mz_uint32 local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)]; mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32; if ((!pZip) || (!pZip->m_pState) || (!pCallback) || (!pZip->m_pRead)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) return MZ_FALSE; /* A directory or zero length file */ if ((file_stat.m_is_directory) || (!file_stat.m_comp_size)) return MZ_TRUE; /* Encryption and patch files are not supported. */ if (file_stat.m_bit_flag & (MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_IS_ENCRYPTED | MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_USES_STRONG_ENCRYPTION | MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_COMPRESSED_PATCH_FLAG)) return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_ENCRYPTION); /* This function only supports decompressing stored and deflate. */ if ((!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (file_stat.m_method != 0) && (file_stat.m_method != MZ_DEFLATED)) return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_METHOD); /* Read and do some minimal validation of the local directory entry (this doesn't crack the zip64 stuff, which we already have from the central dir) */ cur_file_ofs = file_stat.m_local_header_ofs; if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE) return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); cur_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS); if ((cur_file_ofs + file_stat.m_comp_size) > pZip->m_archive_size) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); /* Decompress the file either directly from memory or from a file input buffer. */ if (pZip->m_pState->m_pMem) { pRead_buf = (mz_uint8 *)pZip->m_pState->m_pMem + cur_file_ofs; read_buf_size = read_buf_avail = file_stat.m_comp_size; comp_remaining = 0; } else { read_buf_size = MZ_MIN(file_stat.m_comp_size, (mz_uint64)MZ_ZIP_MAX_IO_BUF_SIZE); if (NULL == (pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)read_buf_size))) return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); read_buf_avail = 0; comp_remaining = file_stat.m_comp_size; } if ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!file_stat.m_method)) { /* The file is stored or the caller has requested the compressed data. */ if (pZip->m_pState->m_pMem) { if (((sizeof(size_t) == sizeof(mz_uint32))) && (file_stat.m_comp_size > MZ_UINT32_MAX)) return mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR); if (pCallback(pOpaque, out_buf_ofs, pRead_buf, (size_t)file_stat.m_comp_size) != file_stat.m_comp_size) { mz_zip_set_error(pZip, MZ_ZIP_WRITE_CALLBACK_FAILED); status = TINFL_STATUS_FAILED; } else if (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) { #ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS file_crc32 = (mz_uint32)mz_crc32(file_crc32, (const mz_uint8 *)pRead_buf, (size_t)file_stat.m_comp_size); #endif } cur_file_ofs += file_stat.m_comp_size; out_buf_ofs += file_stat.m_comp_size; comp_remaining = 0; } else { while (comp_remaining) { read_buf_avail = MZ_MIN(read_buf_size, comp_remaining); if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, (size_t)read_buf_avail) != read_buf_avail) { mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); status = TINFL_STATUS_FAILED; break; } #ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS if (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) { file_crc32 = (mz_uint32)mz_crc32(file_crc32, (const mz_uint8 *)pRead_buf, (size_t)read_buf_avail); } #endif if (pCallback(pOpaque, out_buf_ofs, pRead_buf, (size_t)read_buf_avail) != read_buf_avail) { mz_zip_set_error(pZip, MZ_ZIP_WRITE_CALLBACK_FAILED); status = TINFL_STATUS_FAILED; break; } cur_file_ofs += read_buf_avail; out_buf_ofs += read_buf_avail; comp_remaining -= read_buf_avail; } } } else { tinfl_decompressor inflator; tinfl_init(&inflator); if (NULL == (pWrite_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, TINFL_LZ_DICT_SIZE))) { mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); status = TINFL_STATUS_FAILED; } else { do { mz_uint8 *pWrite_buf_cur = (mz_uint8 *)pWrite_buf + (out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1)); size_t in_buf_size, out_buf_size = TINFL_LZ_DICT_SIZE - (out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1)); if ((!read_buf_avail) && (!pZip->m_pState->m_pMem)) { read_buf_avail = MZ_MIN(read_buf_size, comp_remaining); if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, (size_t)read_buf_avail) != read_buf_avail) { mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); status = TINFL_STATUS_FAILED; break; } cur_file_ofs += read_buf_avail; comp_remaining -= read_buf_avail; read_buf_ofs = 0; } in_buf_size = (size_t)read_buf_avail; status = tinfl_decompress(&inflator, (const mz_uint8 *)pRead_buf + read_buf_ofs, &in_buf_size, (mz_uint8 *)pWrite_buf, pWrite_buf_cur, &out_buf_size, comp_remaining ? TINFL_FLAG_HAS_MORE_INPUT : 0); read_buf_avail -= in_buf_size; read_buf_ofs += in_buf_size; if (out_buf_size) { if (pCallback(pOpaque, out_buf_ofs, pWrite_buf_cur, out_buf_size) != out_buf_size) { mz_zip_set_error(pZip, MZ_ZIP_WRITE_CALLBACK_FAILED); status = TINFL_STATUS_FAILED; break; } #ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS file_crc32 = (mz_uint32)mz_crc32(file_crc32, pWrite_buf_cur, out_buf_size); #endif if ((out_buf_ofs += out_buf_size) > file_stat.m_uncomp_size) { mz_zip_set_error(pZip, MZ_ZIP_DECOMPRESSION_FAILED); status = TINFL_STATUS_FAILED; break; } } } while ((status == TINFL_STATUS_NEEDS_MORE_INPUT) || (status == TINFL_STATUS_HAS_MORE_OUTPUT)); } } if ((status == TINFL_STATUS_DONE) && (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA))) { /* Make sure the entire file was decompressed, and check its CRC. */ if (out_buf_ofs != file_stat.m_uncomp_size) { mz_zip_set_error(pZip, MZ_ZIP_UNEXPECTED_DECOMPRESSED_SIZE); status = TINFL_STATUS_FAILED; } #ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS else if (file_crc32 != file_stat.m_crc32) { mz_zip_set_error(pZip, MZ_ZIP_DECOMPRESSION_FAILED); status = TINFL_STATUS_FAILED; } #endif } if (!pZip->m_pState->m_pMem) pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); if (pWrite_buf) pZip->m_pFree(pZip->m_pAlloc_opaque, pWrite_buf); return status == TINFL_STATUS_DONE; } mz_bool mz_zip_reader_extract_file_to_callback(mz_zip_archive *pZip, const char *pFilename, mz_file_write_func pCallback, void *pOpaque, mz_uint flags) { mz_uint32 file_index; if (!mz_zip_reader_locate_file_v2(pZip, pFilename, NULL, flags, &file_index)) return MZ_FALSE; return mz_zip_reader_extract_to_callback(pZip, file_index, pCallback, pOpaque, flags); } mz_zip_reader_extract_iter_state* mz_zip_reader_extract_iter_new(mz_zip_archive *pZip, mz_uint file_index, mz_uint flags) { mz_zip_reader_extract_iter_state *pState; mz_uint32 local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)]; mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32; /* Argument sanity check */ if ((!pZip) || (!pZip->m_pState)) return NULL; /* Allocate an iterator status structure */ pState = (mz_zip_reader_extract_iter_state*)pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, sizeof(mz_zip_reader_extract_iter_state)); if (!pState) { mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); return NULL; } /* Fetch file details */ if (!mz_zip_reader_file_stat(pZip, file_index, &pState->file_stat)) { pZip->m_pFree(pZip->m_pAlloc_opaque, pState); return NULL; } /* Encryption and patch files are not supported. */ if (pState->file_stat.m_bit_flag & (MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_IS_ENCRYPTED | MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_USES_STRONG_ENCRYPTION | MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_COMPRESSED_PATCH_FLAG)) { mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_ENCRYPTION); pZip->m_pFree(pZip->m_pAlloc_opaque, pState); return NULL; } /* This function only supports decompressing stored and deflate. */ if ((!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (pState->file_stat.m_method != 0) && (pState->file_stat.m_method != MZ_DEFLATED)) { mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_METHOD); pZip->m_pFree(pZip->m_pAlloc_opaque, pState); return NULL; } /* Init state - save args */ pState->pZip = pZip; pState->flags = flags; /* Init state - reset variables to defaults */ pState->status = TINFL_STATUS_DONE; #ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS pState->file_crc32 = MZ_CRC32_INIT; #endif pState->read_buf_ofs = 0; pState->out_buf_ofs = 0; pState->pRead_buf = NULL; pState->pWrite_buf = NULL; pState->out_blk_remain = 0; /* Read and parse the local directory entry. */ pState->cur_file_ofs = pState->file_stat.m_local_header_ofs; if (pZip->m_pRead(pZip->m_pIO_opaque, pState->cur_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE) { mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); pZip->m_pFree(pZip->m_pAlloc_opaque, pState); return NULL; } if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG) { mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); pZip->m_pFree(pZip->m_pAlloc_opaque, pState); return NULL; } pState->cur_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS); if ((pState->cur_file_ofs + pState->file_stat.m_comp_size) > pZip->m_archive_size) { mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); pZip->m_pFree(pZip->m_pAlloc_opaque, pState); return NULL; } /* Decompress the file either directly from memory or from a file input buffer. */ if (pZip->m_pState->m_pMem) { pState->pRead_buf = (mz_uint8 *)pZip->m_pState->m_pMem + pState->cur_file_ofs; pState->read_buf_size = pState->read_buf_avail = pState->file_stat.m_comp_size; pState->comp_remaining = pState->file_stat.m_comp_size; } else { if (!((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!pState->file_stat.m_method))) { /* Decompression required, therefore intermediate read buffer required */ pState->read_buf_size = MZ_MIN(pState->file_stat.m_comp_size, (mz_uint64)MZ_ZIP_MAX_IO_BUF_SIZE); if (NULL == (pState->pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)pState->read_buf_size))) { mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); pZip->m_pFree(pZip->m_pAlloc_opaque, pState); return NULL; } } else { /* Decompression not required - we will be reading directly into user buffer, no temp buf required */ pState->read_buf_size = 0; } pState->read_buf_avail = 0; pState->comp_remaining = pState->file_stat.m_comp_size; } if (!((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!pState->file_stat.m_method))) { /* Decompression required, init decompressor */ tinfl_init( &pState->inflator ); /* Allocate write buffer */ if (NULL == (pState->pWrite_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, TINFL_LZ_DICT_SIZE))) { mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); if (pState->pRead_buf) pZip->m_pFree(pZip->m_pAlloc_opaque, pState->pRead_buf); pZip->m_pFree(pZip->m_pAlloc_opaque, pState); return NULL; } } return pState; } mz_zip_reader_extract_iter_state* mz_zip_reader_extract_file_iter_new(mz_zip_archive *pZip, const char *pFilename, mz_uint flags) { mz_uint32 file_index; /* Locate file index by name */ if (!mz_zip_reader_locate_file_v2(pZip, pFilename, NULL, flags, &file_index)) return NULL; /* Construct iterator */ return mz_zip_reader_extract_iter_new(pZip, file_index, flags); } size_t mz_zip_reader_extract_iter_read(mz_zip_reader_extract_iter_state* pState, void* pvBuf, size_t buf_size) { size_t copied_to_caller = 0; /* Argument sanity check */ if ((!pState) || (!pState->pZip) || (!pState->pZip->m_pState) || (!pvBuf)) return 0; if ((pState->flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!pState->file_stat.m_method)) { /* The file is stored or the caller has requested the compressed data, calc amount to return. */ copied_to_caller = (size_t)MZ_MIN( buf_size, pState->comp_remaining ); /* Zip is in memory....or requires reading from a file? */ if (pState->pZip->m_pState->m_pMem) { /* Copy data to caller's buffer */ memcpy( pvBuf, pState->pRead_buf, copied_to_caller ); pState->pRead_buf = ((mz_uint8*)pState->pRead_buf) + copied_to_caller; } else { /* Read directly into caller's buffer */ if (pState->pZip->m_pRead(pState->pZip->m_pIO_opaque, pState->cur_file_ofs, pvBuf, copied_to_caller) != copied_to_caller) { /* Failed to read all that was asked for, flag failure and alert user */ mz_zip_set_error(pState->pZip, MZ_ZIP_FILE_READ_FAILED); pState->status = TINFL_STATUS_FAILED; copied_to_caller = 0; } } #ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS /* Compute CRC if not returning compressed data only */ if (!(pState->flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) pState->file_crc32 = (mz_uint32)mz_crc32(pState->file_crc32, (const mz_uint8 *)pvBuf, copied_to_caller); #endif /* Advance offsets, dec counters */ pState->cur_file_ofs += copied_to_caller; pState->out_buf_ofs += copied_to_caller; pState->comp_remaining -= copied_to_caller; } else { do { /* Calc ptr to write buffer - given current output pos and block size */ mz_uint8 *pWrite_buf_cur = (mz_uint8 *)pState->pWrite_buf + (pState->out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1)); /* Calc max output size - given current output pos and block size */ size_t in_buf_size, out_buf_size = TINFL_LZ_DICT_SIZE - (pState->out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1)); if (!pState->out_blk_remain) { /* Read more data from file if none available (and reading from file) */ if ((!pState->read_buf_avail) && (!pState->pZip->m_pState->m_pMem)) { /* Calc read size */ pState->read_buf_avail = MZ_MIN(pState->read_buf_size, pState->comp_remaining); if (pState->pZip->m_pRead(pState->pZip->m_pIO_opaque, pState->cur_file_ofs, pState->pRead_buf, (size_t)pState->read_buf_avail) != pState->read_buf_avail) { mz_zip_set_error(pState->pZip, MZ_ZIP_FILE_READ_FAILED); pState->status = TINFL_STATUS_FAILED; break; } /* Advance offsets, dec counters */ pState->cur_file_ofs += pState->read_buf_avail; pState->comp_remaining -= pState->read_buf_avail; pState->read_buf_ofs = 0; } /* Perform decompression */ in_buf_size = (size_t)pState->read_buf_avail; pState->status = tinfl_decompress(&pState->inflator, (const mz_uint8 *)pState->pRead_buf + pState->read_buf_ofs, &in_buf_size, (mz_uint8 *)pState->pWrite_buf, pWrite_buf_cur, &out_buf_size, pState->comp_remaining ? TINFL_FLAG_HAS_MORE_INPUT : 0); pState->read_buf_avail -= in_buf_size; pState->read_buf_ofs += in_buf_size; /* Update current output block size remaining */ pState->out_blk_remain = out_buf_size; } if (pState->out_blk_remain) { /* Calc amount to return. */ size_t to_copy = MZ_MIN( (buf_size - copied_to_caller), pState->out_blk_remain ); /* Copy data to caller's buffer */ memcpy( (mz_uint8*)pvBuf + copied_to_caller, pWrite_buf_cur, to_copy ); #ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS /* Perform CRC */ pState->file_crc32 = (mz_uint32)mz_crc32(pState->file_crc32, pWrite_buf_cur, to_copy); #endif /* Decrement data consumed from block */ pState->out_blk_remain -= to_copy; /* Inc output offset, while performing sanity check */ if ((pState->out_buf_ofs += to_copy) > pState->file_stat.m_uncomp_size) { mz_zip_set_error(pState->pZip, MZ_ZIP_DECOMPRESSION_FAILED); pState->status = TINFL_STATUS_FAILED; break; } /* Increment counter of data copied to caller */ copied_to_caller += to_copy; } } while ( (copied_to_caller < buf_size) && ((pState->status == TINFL_STATUS_NEEDS_MORE_INPUT) || (pState->status == TINFL_STATUS_HAS_MORE_OUTPUT)) ); } /* Return how many bytes were copied into user buffer */ return copied_to_caller; } mz_bool mz_zip_reader_extract_iter_free(mz_zip_reader_extract_iter_state* pState) { int status; /* Argument sanity check */ if ((!pState) || (!pState->pZip) || (!pState->pZip->m_pState)) return MZ_FALSE; /* Was decompression completed and requested? */ if ((pState->status == TINFL_STATUS_DONE) && (!(pState->flags & MZ_ZIP_FLAG_COMPRESSED_DATA))) { /* Make sure the entire file was decompressed, and check its CRC. */ if (pState->out_buf_ofs != pState->file_stat.m_uncomp_size) { mz_zip_set_error(pState->pZip, MZ_ZIP_UNEXPECTED_DECOMPRESSED_SIZE); pState->status = TINFL_STATUS_FAILED; } #ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS else if (pState->file_crc32 != pState->file_stat.m_crc32) { mz_zip_set_error(pState->pZip, MZ_ZIP_DECOMPRESSION_FAILED); pState->status = TINFL_STATUS_FAILED; } #endif } /* Free buffers */ if (!pState->pZip->m_pState->m_pMem) pState->pZip->m_pFree(pState->pZip->m_pAlloc_opaque, pState->pRead_buf); if (pState->pWrite_buf) pState->pZip->m_pFree(pState->pZip->m_pAlloc_opaque, pState->pWrite_buf); /* Save status */ status = pState->status; /* Free context */ pState->pZip->m_pFree(pState->pZip->m_pAlloc_opaque, pState); return status == TINFL_STATUS_DONE; } #ifndef MINIZ_NO_STDIO static size_t mz_zip_file_write_callback(void *pOpaque, mz_uint64 ofs, const void *pBuf, size_t n) { (void)ofs; return MZ_FWRITE(pBuf, 1, n, (MZ_FILE *)pOpaque); } mz_bool mz_zip_reader_extract_to_file(mz_zip_archive *pZip, mz_uint file_index, const char *pDst_filename, mz_uint flags) { mz_bool status; mz_zip_archive_file_stat file_stat; MZ_FILE *pFile; if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) return MZ_FALSE; if ((file_stat.m_is_directory) || (!file_stat.m_is_supported)) return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_FEATURE); pFile = MZ_FOPEN(pDst_filename, "wb"); if (!pFile) return mz_zip_set_error(pZip, MZ_ZIP_FILE_OPEN_FAILED); status = mz_zip_reader_extract_to_callback(pZip, file_index, mz_zip_file_write_callback, pFile, flags); if (MZ_FCLOSE(pFile) == EOF) { if (status) mz_zip_set_error(pZip, MZ_ZIP_FILE_CLOSE_FAILED); status = MZ_FALSE; } #if !defined(MINIZ_NO_TIME) && !defined(MINIZ_NO_STDIO) if (status) mz_zip_set_file_times(pDst_filename, file_stat.m_time, file_stat.m_time); #endif return status; } mz_bool mz_zip_reader_extract_file_to_file(mz_zip_archive *pZip, const char *pArchive_filename, const char *pDst_filename, mz_uint flags) { mz_uint32 file_index; if (!mz_zip_reader_locate_file_v2(pZip, pArchive_filename, NULL, flags, &file_index)) return MZ_FALSE; return mz_zip_reader_extract_to_file(pZip, file_index, pDst_filename, flags); } mz_bool mz_zip_reader_extract_to_cfile(mz_zip_archive *pZip, mz_uint file_index, MZ_FILE *pFile, mz_uint flags) { mz_zip_archive_file_stat file_stat; if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) return MZ_FALSE; if ((file_stat.m_is_directory) || (!file_stat.m_is_supported)) return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_FEATURE); return mz_zip_reader_extract_to_callback(pZip, file_index, mz_zip_file_write_callback, pFile, flags); } mz_bool mz_zip_reader_extract_file_to_cfile(mz_zip_archive *pZip, const char *pArchive_filename, MZ_FILE *pFile, mz_uint flags) { mz_uint32 file_index; if (!mz_zip_reader_locate_file_v2(pZip, pArchive_filename, NULL, flags, &file_index)) return MZ_FALSE; return mz_zip_reader_extract_to_cfile(pZip, file_index, pFile, flags); } #endif /* #ifndef MINIZ_NO_STDIO */ static size_t mz_zip_compute_crc32_callback(void *pOpaque, mz_uint64 file_ofs, const void *pBuf, size_t n) { mz_uint32 *p = (mz_uint32 *)pOpaque; (void)file_ofs; *p = (mz_uint32)mz_crc32(*p, (const mz_uint8 *)pBuf, n); return n; } mz_bool mz_zip_validate_file(mz_zip_archive *pZip, mz_uint file_index, mz_uint flags) { mz_zip_archive_file_stat file_stat; mz_zip_internal_state *pState; const mz_uint8 *pCentral_dir_header; mz_bool found_zip64_ext_data_in_cdir = MZ_FALSE; mz_bool found_zip64_ext_data_in_ldir = MZ_FALSE; mz_uint32 local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)]; mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32; mz_uint64 local_header_ofs = 0; mz_uint32 local_header_filename_len, local_header_extra_len, local_header_crc32; mz_uint64 local_header_comp_size, local_header_uncomp_size; mz_uint32 uncomp_crc32 = MZ_CRC32_INIT; mz_bool has_data_descriptor; mz_uint32 local_header_bit_flags; mz_zip_array file_data_array; mz_zip_array_init(&file_data_array, 1); if ((!pZip) || (!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree) || (!pZip->m_pRead)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); if (file_index > pZip->m_total_files) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); pState = pZip->m_pState; pCentral_dir_header = mz_zip_get_cdh(pZip, file_index); if (!mz_zip_file_stat_internal(pZip, file_index, pCentral_dir_header, &file_stat, &found_zip64_ext_data_in_cdir)) return MZ_FALSE; /* A directory or zero length file */ if ((file_stat.m_is_directory) || (!file_stat.m_uncomp_size)) return MZ_TRUE; /* Encryption and patch files are not supported. */ if (file_stat.m_is_encrypted) return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_ENCRYPTION); /* This function only supports stored and deflate. */ if ((file_stat.m_method != 0) && (file_stat.m_method != MZ_DEFLATED)) return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_METHOD); if (!file_stat.m_is_supported) return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_FEATURE); /* Read and parse the local directory entry. */ local_header_ofs = file_stat.m_local_header_ofs; if (pZip->m_pRead(pZip->m_pIO_opaque, local_header_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE) return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); local_header_filename_len = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS); local_header_extra_len = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS); local_header_comp_size = MZ_READ_LE32(pLocal_header + MZ_ZIP_LDH_COMPRESSED_SIZE_OFS); local_header_uncomp_size = MZ_READ_LE32(pLocal_header + MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS); local_header_crc32 = MZ_READ_LE32(pLocal_header + MZ_ZIP_LDH_CRC32_OFS); local_header_bit_flags = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_BIT_FLAG_OFS); has_data_descriptor = (local_header_bit_flags & 8) != 0; if (local_header_filename_len != strlen(file_stat.m_filename)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); if ((local_header_ofs + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + local_header_filename_len + local_header_extra_len + file_stat.m_comp_size) > pZip->m_archive_size) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); if (!mz_zip_array_resize(pZip, &file_data_array, MZ_MAX(local_header_filename_len, local_header_extra_len), MZ_FALSE)) { mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); goto handle_failure; } if (local_header_filename_len) { if (pZip->m_pRead(pZip->m_pIO_opaque, local_header_ofs + MZ_ZIP_LOCAL_DIR_HEADER_SIZE, file_data_array.m_p, local_header_filename_len) != local_header_filename_len) { mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); goto handle_failure; } /* I've seen 1 archive that had the same pathname, but used backslashes in the local dir and forward slashes in the central dir. Do we care about this? For now, this case will fail validation. */ if (memcmp(file_stat.m_filename, file_data_array.m_p, local_header_filename_len) != 0) { mz_zip_set_error(pZip, MZ_ZIP_VALIDATION_FAILED); goto handle_failure; } } if ((local_header_extra_len) && ((local_header_comp_size == MZ_UINT32_MAX) || (local_header_uncomp_size == MZ_UINT32_MAX))) { mz_uint32 extra_size_remaining = local_header_extra_len; const mz_uint8 *pExtra_data = (const mz_uint8 *)file_data_array.m_p; if (pZip->m_pRead(pZip->m_pIO_opaque, local_header_ofs + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + local_header_filename_len, file_data_array.m_p, local_header_extra_len) != local_header_extra_len) { mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); goto handle_failure; } do { mz_uint32 field_id, field_data_size, field_total_size; if (extra_size_remaining < (sizeof(mz_uint16) * 2)) { mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); goto handle_failure; } field_id = MZ_READ_LE16(pExtra_data); field_data_size = MZ_READ_LE16(pExtra_data + sizeof(mz_uint16)); field_total_size = field_data_size + sizeof(mz_uint16) * 2; if (field_total_size > extra_size_remaining) { mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); goto handle_failure; } if (field_id == MZ_ZIP64_EXTENDED_INFORMATION_FIELD_HEADER_ID) { const mz_uint8 *pSrc_field_data = pExtra_data + sizeof(mz_uint32); if (field_data_size < sizeof(mz_uint64) * 2) { mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); goto handle_failure; } local_header_uncomp_size = MZ_READ_LE64(pSrc_field_data); local_header_comp_size = MZ_READ_LE64(pSrc_field_data + sizeof(mz_uint64)); found_zip64_ext_data_in_ldir = MZ_TRUE; break; } pExtra_data += field_total_size; extra_size_remaining -= field_total_size; } while (extra_size_remaining); } /* TODO: parse local header extra data when local_header_comp_size is 0xFFFFFFFF! (big_descriptor.zip) */ /* I've seen zips in the wild with the data descriptor bit set, but proper local header values and bogus data descriptors */ if ((has_data_descriptor) && (!local_header_comp_size) && (!local_header_crc32)) { mz_uint8 descriptor_buf[32]; mz_bool has_id; const mz_uint8 *pSrc; mz_uint32 file_crc32; mz_uint64 comp_size = 0, uncomp_size = 0; mz_uint32 num_descriptor_uint32s = ((pState->m_zip64) || (found_zip64_ext_data_in_ldir)) ? 6 : 4; if (pZip->m_pRead(pZip->m_pIO_opaque, local_header_ofs + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + local_header_filename_len + local_header_extra_len + file_stat.m_comp_size, descriptor_buf, sizeof(mz_uint32) * num_descriptor_uint32s) != (sizeof(mz_uint32) * num_descriptor_uint32s)) { mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); goto handle_failure; } has_id = (MZ_READ_LE32(descriptor_buf) == MZ_ZIP_DATA_DESCRIPTOR_ID); pSrc = has_id ? (descriptor_buf + sizeof(mz_uint32)) : descriptor_buf; file_crc32 = MZ_READ_LE32(pSrc); if ((pState->m_zip64) || (found_zip64_ext_data_in_ldir)) { comp_size = MZ_READ_LE64(pSrc + sizeof(mz_uint32)); uncomp_size = MZ_READ_LE64(pSrc + sizeof(mz_uint32) + sizeof(mz_uint64)); } else { comp_size = MZ_READ_LE32(pSrc + sizeof(mz_uint32)); uncomp_size = MZ_READ_LE32(pSrc + sizeof(mz_uint32) + sizeof(mz_uint32)); } if ((file_crc32 != file_stat.m_crc32) || (comp_size != file_stat.m_comp_size) || (uncomp_size != file_stat.m_uncomp_size)) { mz_zip_set_error(pZip, MZ_ZIP_VALIDATION_FAILED); goto handle_failure; } } else { if ((local_header_crc32 != file_stat.m_crc32) || (local_header_comp_size != file_stat.m_comp_size) || (local_header_uncomp_size != file_stat.m_uncomp_size)) { mz_zip_set_error(pZip, MZ_ZIP_VALIDATION_FAILED); goto handle_failure; } } mz_zip_array_clear(pZip, &file_data_array); if ((flags & MZ_ZIP_FLAG_VALIDATE_HEADERS_ONLY) == 0) { if (!mz_zip_reader_extract_to_callback(pZip, file_index, mz_zip_compute_crc32_callback, &uncomp_crc32, 0)) return MZ_FALSE; /* 1 more check to be sure, although the extract checks too. */ if (uncomp_crc32 != file_stat.m_crc32) { mz_zip_set_error(pZip, MZ_ZIP_VALIDATION_FAILED); return MZ_FALSE; } } return MZ_TRUE; handle_failure: mz_zip_array_clear(pZip, &file_data_array); return MZ_FALSE; } mz_bool mz_zip_validate_archive(mz_zip_archive *pZip, mz_uint flags) { mz_zip_internal_state *pState; mz_uint32 i; if ((!pZip) || (!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree) || (!pZip->m_pRead)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); pState = pZip->m_pState; /* Basic sanity checks */ if (!pState->m_zip64) { if (pZip->m_total_files > MZ_UINT16_MAX) return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); if (pZip->m_archive_size > MZ_UINT32_MAX) return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); } else { if (pState->m_central_dir.m_size >= MZ_UINT32_MAX) return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); } for (i = 0; i < pZip->m_total_files; i++) { if (MZ_ZIP_FLAG_VALIDATE_LOCATE_FILE_FLAG & flags) { mz_uint32 found_index; mz_zip_archive_file_stat stat; if (!mz_zip_reader_file_stat(pZip, i, &stat)) return MZ_FALSE; if (!mz_zip_reader_locate_file_v2(pZip, stat.m_filename, NULL, 0, &found_index)) return MZ_FALSE; /* This check can fail if there are duplicate filenames in the archive (which we don't check for when writing - that's up to the user) */ if (found_index != i) return mz_zip_set_error(pZip, MZ_ZIP_VALIDATION_FAILED); } if (!mz_zip_validate_file(pZip, i, flags)) return MZ_FALSE; } return MZ_TRUE; } mz_bool mz_zip_validate_mem_archive(const void *pMem, size_t size, mz_uint flags, mz_zip_error *pErr) { mz_bool success = MZ_TRUE; mz_zip_archive zip; mz_zip_error actual_err = MZ_ZIP_NO_ERROR; if ((!pMem) || (!size)) { if (pErr) *pErr = MZ_ZIP_INVALID_PARAMETER; return MZ_FALSE; } mz_zip_zero_struct(&zip); if (!mz_zip_reader_init_mem(&zip, pMem, size, flags)) { if (pErr) *pErr = zip.m_last_error; return MZ_FALSE; } if (!mz_zip_validate_archive(&zip, flags)) { actual_err = zip.m_last_error; success = MZ_FALSE; } if (!mz_zip_reader_end_internal(&zip, success)) { if (!actual_err) actual_err = zip.m_last_error; success = MZ_FALSE; } if (pErr) *pErr = actual_err; return success; } #ifndef MINIZ_NO_STDIO mz_bool mz_zip_validate_file_archive(const char *pFilename, mz_uint flags, mz_zip_error *pErr) { mz_bool success = MZ_TRUE; mz_zip_archive zip; mz_zip_error actual_err = MZ_ZIP_NO_ERROR; if (!pFilename) { if (pErr) *pErr = MZ_ZIP_INVALID_PARAMETER; return MZ_FALSE; } mz_zip_zero_struct(&zip); if (!mz_zip_reader_init_file_v2(&zip, pFilename, flags, 0, 0)) { if (pErr) *pErr = zip.m_last_error; return MZ_FALSE; } if (!mz_zip_validate_archive(&zip, flags)) { actual_err = zip.m_last_error; success = MZ_FALSE; } if (!mz_zip_reader_end_internal(&zip, success)) { if (!actual_err) actual_err = zip.m_last_error; success = MZ_FALSE; } if (pErr) *pErr = actual_err; return success; } #endif /* #ifndef MINIZ_NO_STDIO */ /* ------------------- .ZIP archive writing */ #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS static MZ_FORCEINLINE void mz_write_le16(mz_uint8 *p, mz_uint16 v) { p[0] = (mz_uint8)v; p[1] = (mz_uint8)(v >> 8); } static MZ_FORCEINLINE void mz_write_le32(mz_uint8 *p, mz_uint32 v) { p[0] = (mz_uint8)v; p[1] = (mz_uint8)(v >> 8); p[2] = (mz_uint8)(v >> 16); p[3] = (mz_uint8)(v >> 24); } static MZ_FORCEINLINE void mz_write_le64(mz_uint8 *p, mz_uint64 v) { mz_write_le32(p, (mz_uint32)v); mz_write_le32(p + sizeof(mz_uint32), (mz_uint32)(v >> 32)); } #define MZ_WRITE_LE16(p, v) mz_write_le16((mz_uint8 *)(p), (mz_uint16)(v)) #define MZ_WRITE_LE32(p, v) mz_write_le32((mz_uint8 *)(p), (mz_uint32)(v)) #define MZ_WRITE_LE64(p, v) mz_write_le64((mz_uint8 *)(p), (mz_uint64)(v)) static size_t mz_zip_heap_write_func(void *pOpaque, mz_uint64 file_ofs, const void *pBuf, size_t n) { mz_zip_archive *pZip = (mz_zip_archive *)pOpaque; mz_zip_internal_state *pState = pZip->m_pState; mz_uint64 new_size = MZ_MAX(file_ofs + n, pState->m_mem_size); if (!n) return 0; /* An allocation this big is likely to just fail on 32-bit systems, so don't even go there. */ if ((sizeof(size_t) == sizeof(mz_uint32)) && (new_size > 0x7FFFFFFF)) { mz_zip_set_error(pZip, MZ_ZIP_FILE_TOO_LARGE); return 0; } if (new_size > pState->m_mem_capacity) { void *pNew_block; size_t new_capacity = MZ_MAX(64, pState->m_mem_capacity); while (new_capacity < new_size) new_capacity *= 2; if (NULL == (pNew_block = pZip->m_pRealloc(pZip->m_pAlloc_opaque, pState->m_pMem, 1, new_capacity))) { mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); return 0; } pState->m_pMem = pNew_block; pState->m_mem_capacity = new_capacity; } memcpy((mz_uint8 *)pState->m_pMem + file_ofs, pBuf, n); pState->m_mem_size = (size_t)new_size; return n; } static mz_bool mz_zip_writer_end_internal(mz_zip_archive *pZip, mz_bool set_last_error) { mz_zip_internal_state *pState; mz_bool status = MZ_TRUE; if ((!pZip) || (!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree) || ((pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) && (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED))) { if (set_last_error) mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); return MZ_FALSE; } pState = pZip->m_pState; pZip->m_pState = NULL; mz_zip_array_clear(pZip, &pState->m_central_dir); mz_zip_array_clear(pZip, &pState->m_central_dir_offsets); mz_zip_array_clear(pZip, &pState->m_sorted_central_dir_offsets); #ifndef MINIZ_NO_STDIO if (pState->m_pFile) { if (pZip->m_zip_type == MZ_ZIP_TYPE_FILE) { if (MZ_FCLOSE(pState->m_pFile) == EOF) { if (set_last_error) mz_zip_set_error(pZip, MZ_ZIP_FILE_CLOSE_FAILED); status = MZ_FALSE; } } pState->m_pFile = NULL; } #endif /* #ifndef MINIZ_NO_STDIO */ if ((pZip->m_pWrite == mz_zip_heap_write_func) && (pState->m_pMem)) { pZip->m_pFree(pZip->m_pAlloc_opaque, pState->m_pMem); pState->m_pMem = NULL; } pZip->m_pFree(pZip->m_pAlloc_opaque, pState); pZip->m_zip_mode = MZ_ZIP_MODE_INVALID; return status; } mz_bool mz_zip_writer_init_v2(mz_zip_archive *pZip, mz_uint64 existing_size, mz_uint flags) { mz_bool zip64 = (flags & MZ_ZIP_FLAG_WRITE_ZIP64) != 0; if ((!pZip) || (pZip->m_pState) || (!pZip->m_pWrite) || (pZip->m_zip_mode != MZ_ZIP_MODE_INVALID)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); if (flags & MZ_ZIP_FLAG_WRITE_ALLOW_READING) { if (!pZip->m_pRead) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); } if (pZip->m_file_offset_alignment) { /* Ensure user specified file offset alignment is a power of 2. */ if (pZip->m_file_offset_alignment & (pZip->m_file_offset_alignment - 1)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); } if (!pZip->m_pAlloc) pZip->m_pAlloc = miniz_def_alloc_func; if (!pZip->m_pFree) pZip->m_pFree = miniz_def_free_func; if (!pZip->m_pRealloc) pZip->m_pRealloc = miniz_def_realloc_func; pZip->m_archive_size = existing_size; pZip->m_central_directory_file_ofs = 0; pZip->m_total_files = 0; if (NULL == (pZip->m_pState = (mz_zip_internal_state *)pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, sizeof(mz_zip_internal_state)))) return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); memset(pZip->m_pState, 0, sizeof(mz_zip_internal_state)); MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir, sizeof(mz_uint8)); MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir_offsets, sizeof(mz_uint32)); MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_sorted_central_dir_offsets, sizeof(mz_uint32)); pZip->m_pState->m_zip64 = zip64; pZip->m_pState->m_zip64_has_extended_info_fields = zip64; pZip->m_zip_type = MZ_ZIP_TYPE_USER; pZip->m_zip_mode = MZ_ZIP_MODE_WRITING; return MZ_TRUE; } mz_bool mz_zip_writer_init(mz_zip_archive *pZip, mz_uint64 existing_size) { return mz_zip_writer_init_v2(pZip, existing_size, 0); } mz_bool mz_zip_writer_init_heap_v2(mz_zip_archive *pZip, size_t size_to_reserve_at_beginning, size_t initial_allocation_size, mz_uint flags) { pZip->m_pWrite = mz_zip_heap_write_func; pZip->m_pNeeds_keepalive = NULL; if (flags & MZ_ZIP_FLAG_WRITE_ALLOW_READING) pZip->m_pRead = mz_zip_mem_read_func; pZip->m_pIO_opaque = pZip; if (!mz_zip_writer_init_v2(pZip, size_to_reserve_at_beginning, flags)) return MZ_FALSE; pZip->m_zip_type = MZ_ZIP_TYPE_HEAP; if (0 != (initial_allocation_size = MZ_MAX(initial_allocation_size, size_to_reserve_at_beginning))) { if (NULL == (pZip->m_pState->m_pMem = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, initial_allocation_size))) { mz_zip_writer_end_internal(pZip, MZ_FALSE); return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); } pZip->m_pState->m_mem_capacity = initial_allocation_size; } return MZ_TRUE; } mz_bool mz_zip_writer_init_heap(mz_zip_archive *pZip, size_t size_to_reserve_at_beginning, size_t initial_allocation_size) { return mz_zip_writer_init_heap_v2(pZip, size_to_reserve_at_beginning, initial_allocation_size, 0); } #ifndef MINIZ_NO_STDIO static size_t mz_zip_file_write_func(void *pOpaque, mz_uint64 file_ofs, const void *pBuf, size_t n) { mz_zip_archive *pZip = (mz_zip_archive *)pOpaque; mz_int64 cur_ofs = MZ_FTELL64(pZip->m_pState->m_pFile); file_ofs += pZip->m_pState->m_file_archive_start_ofs; if (((mz_int64)file_ofs < 0) || (((cur_ofs != (mz_int64)file_ofs)) && (MZ_FSEEK64(pZip->m_pState->m_pFile, (mz_int64)file_ofs, SEEK_SET)))) { mz_zip_set_error(pZip, MZ_ZIP_FILE_SEEK_FAILED); return 0; } return MZ_FWRITE(pBuf, 1, n, pZip->m_pState->m_pFile); } mz_bool mz_zip_writer_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint64 size_to_reserve_at_beginning) { return mz_zip_writer_init_file_v2(pZip, pFilename, size_to_reserve_at_beginning, 0); } mz_bool mz_zip_writer_init_file_v2(mz_zip_archive *pZip, const char *pFilename, mz_uint64 size_to_reserve_at_beginning, mz_uint flags) { MZ_FILE *pFile; pZip->m_pWrite = mz_zip_file_write_func; pZip->m_pNeeds_keepalive = NULL; if (flags & MZ_ZIP_FLAG_WRITE_ALLOW_READING) pZip->m_pRead = mz_zip_file_read_func; pZip->m_pIO_opaque = pZip; if (!mz_zip_writer_init_v2(pZip, size_to_reserve_at_beginning, flags)) return MZ_FALSE; if (NULL == (pFile = MZ_FOPEN(pFilename, (flags & MZ_ZIP_FLAG_WRITE_ALLOW_READING) ? "w+b" : "wb"))) { mz_zip_writer_end(pZip); return mz_zip_set_error(pZip, MZ_ZIP_FILE_OPEN_FAILED); } pZip->m_pState->m_pFile = pFile; pZip->m_zip_type = MZ_ZIP_TYPE_FILE; if (size_to_reserve_at_beginning) { mz_uint64 cur_ofs = 0; char buf[4096]; MZ_CLEAR_ARR(buf); do { size_t n = (size_t)MZ_MIN(sizeof(buf), size_to_reserve_at_beginning); if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_ofs, buf, n) != n) { mz_zip_writer_end(pZip); return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); } cur_ofs += n; size_to_reserve_at_beginning -= n; } while (size_to_reserve_at_beginning); } return MZ_TRUE; } mz_bool mz_zip_writer_init_cfile(mz_zip_archive *pZip, MZ_FILE *pFile, mz_uint flags) { pZip->m_pWrite = mz_zip_file_write_func; pZip->m_pNeeds_keepalive = NULL; if (flags & MZ_ZIP_FLAG_WRITE_ALLOW_READING) pZip->m_pRead = mz_zip_file_read_func; pZip->m_pIO_opaque = pZip; if (!mz_zip_writer_init_v2(pZip, 0, flags)) return MZ_FALSE; pZip->m_pState->m_pFile = pFile; pZip->m_pState->m_file_archive_start_ofs = MZ_FTELL64(pZip->m_pState->m_pFile); pZip->m_zip_type = MZ_ZIP_TYPE_CFILE; return MZ_TRUE; } #endif /* #ifndef MINIZ_NO_STDIO */ mz_bool mz_zip_writer_init_from_reader_v2(mz_zip_archive *pZip, const char *pFilename, mz_uint flags) { mz_zip_internal_state *pState; if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_READING)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); if (flags & MZ_ZIP_FLAG_WRITE_ZIP64) { /* We don't support converting a non-zip64 file to zip64 - this seems like more trouble than it's worth. (What about the existing 32-bit data descriptors that could follow the compressed data?) */ if (!pZip->m_pState->m_zip64) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); } /* No sense in trying to write to an archive that's already at the support max size */ if (pZip->m_pState->m_zip64) { if (pZip->m_total_files == MZ_UINT32_MAX) return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES); } else { if (pZip->m_total_files == MZ_UINT16_MAX) return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES); if ((pZip->m_archive_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + MZ_ZIP_LOCAL_DIR_HEADER_SIZE) > MZ_UINT32_MAX) return mz_zip_set_error(pZip, MZ_ZIP_FILE_TOO_LARGE); } pState = pZip->m_pState; if (pState->m_pFile) { #ifdef MINIZ_NO_STDIO (void)pFilename; return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); #else if (pZip->m_pIO_opaque != pZip) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); if (pZip->m_zip_type == MZ_ZIP_TYPE_FILE) { if (!pFilename) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); /* Archive is being read from stdio and was originally opened only for reading. Try to reopen as writable. */ if (NULL == (pState->m_pFile = MZ_FREOPEN(pFilename, "r+b", pState->m_pFile))) { /* The mz_zip_archive is now in a bogus state because pState->m_pFile is NULL, so just close it. */ mz_zip_reader_end_internal(pZip, MZ_FALSE); return mz_zip_set_error(pZip, MZ_ZIP_FILE_OPEN_FAILED); } } pZip->m_pWrite = mz_zip_file_write_func; pZip->m_pNeeds_keepalive = NULL; #endif /* #ifdef MINIZ_NO_STDIO */ } else if (pState->m_pMem) { /* Archive lives in a memory block. Assume it's from the heap that we can resize using the realloc callback. */ if (pZip->m_pIO_opaque != pZip) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); pState->m_mem_capacity = pState->m_mem_size; pZip->m_pWrite = mz_zip_heap_write_func; pZip->m_pNeeds_keepalive = NULL; } /* Archive is being read via a user provided read function - make sure the user has specified a write function too. */ else if (!pZip->m_pWrite) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); /* Start writing new files at the archive's current central directory location. */ /* TODO: We could add a flag that lets the user start writing immediately AFTER the existing central dir - this would be safer. */ pZip->m_archive_size = pZip->m_central_directory_file_ofs; pZip->m_central_directory_file_ofs = 0; /* Clear the sorted central dir offsets, they aren't useful or maintained now. */ /* Even though we're now in write mode, files can still be extracted and verified, but file locates will be slow. */ /* TODO: We could easily maintain the sorted central directory offsets. */ mz_zip_array_clear(pZip, &pZip->m_pState->m_sorted_central_dir_offsets); pZip->m_zip_mode = MZ_ZIP_MODE_WRITING; return MZ_TRUE; } mz_bool mz_zip_writer_init_from_reader(mz_zip_archive *pZip, const char *pFilename) { return mz_zip_writer_init_from_reader_v2(pZip, pFilename, 0); } /* TODO: pArchive_name is a terrible name here! */ mz_bool mz_zip_writer_add_mem(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, mz_uint level_and_flags) { return mz_zip_writer_add_mem_ex(pZip, pArchive_name, pBuf, buf_size, NULL, 0, level_and_flags, 0, 0); } typedef struct { mz_zip_archive *m_pZip; mz_uint64 m_cur_archive_file_ofs; mz_uint64 m_comp_size; } mz_zip_writer_add_state; static mz_bool mz_zip_writer_add_put_buf_callback(const void *pBuf, int len, void *pUser) { mz_zip_writer_add_state *pState = (mz_zip_writer_add_state *)pUser; if ((int)pState->m_pZip->m_pWrite(pState->m_pZip->m_pIO_opaque, pState->m_cur_archive_file_ofs, pBuf, len) != len) return MZ_FALSE; pState->m_cur_archive_file_ofs += len; pState->m_comp_size += len; return MZ_TRUE; } #define MZ_ZIP64_MAX_LOCAL_EXTRA_FIELD_SIZE (sizeof(mz_uint16) * 2 + sizeof(mz_uint64) * 2) #define MZ_ZIP64_MAX_CENTRAL_EXTRA_FIELD_SIZE (sizeof(mz_uint16) * 2 + sizeof(mz_uint64) * 3) static mz_uint32 mz_zip_writer_create_zip64_extra_data(mz_uint8 *pBuf, mz_uint64 *pUncomp_size, mz_uint64 *pComp_size, mz_uint64 *pLocal_header_ofs) { mz_uint8 *pDst = pBuf; mz_uint32 field_size = 0; MZ_WRITE_LE16(pDst + 0, MZ_ZIP64_EXTENDED_INFORMATION_FIELD_HEADER_ID); MZ_WRITE_LE16(pDst + 2, 0); pDst += sizeof(mz_uint16) * 2; if (pUncomp_size) { MZ_WRITE_LE64(pDst, *pUncomp_size); pDst += sizeof(mz_uint64); field_size += sizeof(mz_uint64); } if (pComp_size) { MZ_WRITE_LE64(pDst, *pComp_size); pDst += sizeof(mz_uint64); field_size += sizeof(mz_uint64); } if (pLocal_header_ofs) { MZ_WRITE_LE64(pDst, *pLocal_header_ofs); pDst += sizeof(mz_uint64); field_size += sizeof(mz_uint64); } MZ_WRITE_LE16(pBuf + 2, field_size); return (mz_uint32)(pDst - pBuf); } static mz_bool mz_zip_writer_create_local_dir_header(mz_zip_archive *pZip, mz_uint8 *pDst, mz_uint16 filename_size, mz_uint16 extra_size, mz_uint64 uncomp_size, mz_uint64 comp_size, mz_uint32 uncomp_crc32, mz_uint16 method, mz_uint16 bit_flags, mz_uint16 dos_time, mz_uint16 dos_date) { (void)pZip; memset(pDst, 0, MZ_ZIP_LOCAL_DIR_HEADER_SIZE); MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_SIG_OFS, MZ_ZIP_LOCAL_DIR_HEADER_SIG); MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_VERSION_NEEDED_OFS, method ? 20 : 0); MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_BIT_FLAG_OFS, bit_flags); MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_METHOD_OFS, method); MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILE_TIME_OFS, dos_time); MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILE_DATE_OFS, dos_date); MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_CRC32_OFS, uncomp_crc32); MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_COMPRESSED_SIZE_OFS, MZ_MIN(comp_size, MZ_UINT32_MAX)); MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS, MZ_MIN(uncomp_size, MZ_UINT32_MAX)); MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILENAME_LEN_OFS, filename_size); MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_EXTRA_LEN_OFS, extra_size); return MZ_TRUE; } static mz_bool mz_zip_writer_create_central_dir_header(mz_zip_archive *pZip, mz_uint8 *pDst, mz_uint16 filename_size, mz_uint16 extra_size, mz_uint16 comment_size, mz_uint64 uncomp_size, mz_uint64 comp_size, mz_uint32 uncomp_crc32, mz_uint16 method, mz_uint16 bit_flags, mz_uint16 dos_time, mz_uint16 dos_date, mz_uint64 local_header_ofs, mz_uint32 ext_attributes) { (void)pZip; memset(pDst, 0, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE); MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_SIG_OFS, MZ_ZIP_CENTRAL_DIR_HEADER_SIG); MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_VERSION_NEEDED_OFS, method ? 20 : 0); MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_BIT_FLAG_OFS, bit_flags); MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_METHOD_OFS, method); MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILE_TIME_OFS, dos_time); MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILE_DATE_OFS, dos_date); MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_CRC32_OFS, uncomp_crc32); MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS, MZ_MIN(comp_size, MZ_UINT32_MAX)); MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS, MZ_MIN(uncomp_size, MZ_UINT32_MAX)); MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILENAME_LEN_OFS, filename_size); MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_EXTRA_LEN_OFS, extra_size); MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_COMMENT_LEN_OFS, comment_size); MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS, ext_attributes); MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_LOCAL_HEADER_OFS, MZ_MIN(local_header_ofs, MZ_UINT32_MAX)); return MZ_TRUE; } static mz_bool mz_zip_writer_add_to_central_dir(mz_zip_archive *pZip, const char *pFilename, mz_uint16 filename_size, const void *pExtra, mz_uint16 extra_size, const void *pComment, mz_uint16 comment_size, mz_uint64 uncomp_size, mz_uint64 comp_size, mz_uint32 uncomp_crc32, mz_uint16 method, mz_uint16 bit_flags, mz_uint16 dos_time, mz_uint16 dos_date, mz_uint64 local_header_ofs, mz_uint32 ext_attributes, const char *user_extra_data, mz_uint user_extra_data_len) { mz_zip_internal_state *pState = pZip->m_pState; mz_uint32 central_dir_ofs = (mz_uint32)pState->m_central_dir.m_size; size_t orig_central_dir_size = pState->m_central_dir.m_size; mz_uint8 central_dir_header[MZ_ZIP_CENTRAL_DIR_HEADER_SIZE]; if (!pZip->m_pState->m_zip64) { if (local_header_ofs > 0xFFFFFFFF) return mz_zip_set_error(pZip, MZ_ZIP_FILE_TOO_LARGE); } /* miniz doesn't support central dirs >= MZ_UINT32_MAX bytes yet */ if (((mz_uint64)pState->m_central_dir.m_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_size + extra_size + user_extra_data_len + comment_size) >= MZ_UINT32_MAX) return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_CDIR_SIZE); if (!mz_zip_writer_create_central_dir_header(pZip, central_dir_header, filename_size, (mz_uint16)(extra_size + user_extra_data_len), comment_size, uncomp_size, comp_size, uncomp_crc32, method, bit_flags, dos_time, dos_date, local_header_ofs, ext_attributes)) return mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR); if ((!mz_zip_array_push_back(pZip, &pState->m_central_dir, central_dir_header, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE)) || (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pFilename, filename_size)) || (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pExtra, extra_size)) || (!mz_zip_array_push_back(pZip, &pState->m_central_dir, user_extra_data, user_extra_data_len)) || (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pComment, comment_size)) || (!mz_zip_array_push_back(pZip, &pState->m_central_dir_offsets, ¢ral_dir_ofs, 1))) { /* Try to resize the central directory array back into its original state. */ mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE); return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); } return MZ_TRUE; } static mz_bool mz_zip_writer_validate_archive_name(const char *pArchive_name) { /* Basic ZIP archive filename validity checks: Valid filenames cannot start with a forward slash, cannot contain a drive letter, and cannot use DOS-style backward slashes. */ if (*pArchive_name == '/') return MZ_FALSE; /* Making sure the name does not contain drive letters or DOS style backward slashes is the responsibility of the program using miniz*/ return MZ_TRUE; } static mz_uint mz_zip_writer_compute_padding_needed_for_file_alignment(mz_zip_archive *pZip) { mz_uint32 n; if (!pZip->m_file_offset_alignment) return 0; n = (mz_uint32)(pZip->m_archive_size & (pZip->m_file_offset_alignment - 1)); return (mz_uint)((pZip->m_file_offset_alignment - n) & (pZip->m_file_offset_alignment - 1)); } static mz_bool mz_zip_writer_write_zeros(mz_zip_archive *pZip, mz_uint64 cur_file_ofs, mz_uint32 n) { char buf[4096]; memset(buf, 0, MZ_MIN(sizeof(buf), n)); while (n) { mz_uint32 s = MZ_MIN(sizeof(buf), n); if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_file_ofs, buf, s) != s) return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); cur_file_ofs += s; n -= s; } return MZ_TRUE; } mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, mz_uint64 uncomp_size, mz_uint32 uncomp_crc32) { return mz_zip_writer_add_mem_ex_v2(pZip, pArchive_name, pBuf, buf_size, pComment, comment_size, level_and_flags, uncomp_size, uncomp_crc32, NULL, NULL, 0, NULL, 0); } mz_bool mz_zip_writer_add_mem_ex_v2(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, mz_uint64 uncomp_size, mz_uint32 uncomp_crc32, MZ_TIME_T *last_modified, const char *user_extra_data, mz_uint user_extra_data_len, const char *user_extra_data_central, mz_uint user_extra_data_central_len) { mz_uint16 method = 0, dos_time = 0, dos_date = 0; mz_uint level, ext_attributes = 0, num_alignment_padding_bytes; mz_uint64 local_dir_header_ofs = pZip->m_archive_size, cur_archive_file_ofs = pZip->m_archive_size, comp_size = 0; size_t archive_name_size; mz_uint8 local_dir_header[MZ_ZIP_LOCAL_DIR_HEADER_SIZE]; tdefl_compressor *pComp = NULL; mz_bool store_data_uncompressed; mz_zip_internal_state *pState; mz_uint8 *pExtra_data = NULL; mz_uint32 extra_size = 0; mz_uint8 extra_data[MZ_ZIP64_MAX_CENTRAL_EXTRA_FIELD_SIZE]; mz_uint16 bit_flags = 0; if ((int)level_and_flags < 0) level_and_flags = MZ_DEFAULT_LEVEL; if (uncomp_size || (buf_size && !(level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA))) bit_flags |= MZ_ZIP_LDH_BIT_FLAG_HAS_LOCATOR; if (!(level_and_flags & MZ_ZIP_FLAG_ASCII_FILENAME)) bit_flags |= MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_UTF8; level = level_and_flags & 0xF; store_data_uncompressed = ((!level) || (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)); if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) || ((buf_size) && (!pBuf)) || (!pArchive_name) || ((comment_size) && (!pComment)) || (level > MZ_UBER_COMPRESSION)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); pState = pZip->m_pState; if (pState->m_zip64) { if (pZip->m_total_files == MZ_UINT32_MAX) return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES); } else { if (pZip->m_total_files == MZ_UINT16_MAX) { pState->m_zip64 = MZ_TRUE; /*return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES); */ } if (((mz_uint64)buf_size > 0xFFFFFFFF) || (uncomp_size > 0xFFFFFFFF)) { pState->m_zip64 = MZ_TRUE; /*return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); */ } } if ((!(level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (uncomp_size)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); if (!mz_zip_writer_validate_archive_name(pArchive_name)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_FILENAME); #ifndef MINIZ_NO_TIME if (last_modified != NULL) { mz_zip_time_t_to_dos_time(*last_modified, &dos_time, &dos_date); } else { MZ_TIME_T cur_time; time(&cur_time); mz_zip_time_t_to_dos_time(cur_time, &dos_time, &dos_date); } #endif /* #ifndef MINIZ_NO_TIME */ if (!(level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) { uncomp_crc32 = (mz_uint32)mz_crc32(MZ_CRC32_INIT, (const mz_uint8 *)pBuf, buf_size); uncomp_size = buf_size; if (uncomp_size <= 3) { level = 0; store_data_uncompressed = MZ_TRUE; } } archive_name_size = strlen(pArchive_name); if (archive_name_size > MZ_UINT16_MAX) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_FILENAME); num_alignment_padding_bytes = mz_zip_writer_compute_padding_needed_for_file_alignment(pZip); /* miniz doesn't support central dirs >= MZ_UINT32_MAX bytes yet */ if (((mz_uint64)pState->m_central_dir.m_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + archive_name_size + MZ_ZIP64_MAX_CENTRAL_EXTRA_FIELD_SIZE + comment_size) >= MZ_UINT32_MAX) return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_CDIR_SIZE); if (!pState->m_zip64) { /* Bail early if the archive would obviously become too large */ if ((pZip->m_archive_size + num_alignment_padding_bytes + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + archive_name_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + archive_name_size + comment_size + user_extra_data_len + pState->m_central_dir.m_size + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE + user_extra_data_central_len + MZ_ZIP_DATA_DESCRIPTER_SIZE32) > 0xFFFFFFFF) { pState->m_zip64 = MZ_TRUE; /*return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); */ } } if ((archive_name_size) && (pArchive_name[archive_name_size - 1] == '/')) { /* Set DOS Subdirectory attribute bit. */ ext_attributes |= MZ_ZIP_DOS_DIR_ATTRIBUTE_BITFLAG; /* Subdirectories cannot contain data. */ if ((buf_size) || (uncomp_size)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); } /* Try to do any allocations before writing to the archive, so if an allocation fails the file remains unmodified. (A good idea if we're doing an in-place modification.) */ if ((!mz_zip_array_ensure_room(pZip, &pState->m_central_dir, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + archive_name_size + comment_size + (pState->m_zip64 ? MZ_ZIP64_MAX_CENTRAL_EXTRA_FIELD_SIZE : 0))) || (!mz_zip_array_ensure_room(pZip, &pState->m_central_dir_offsets, 1))) return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); if ((!store_data_uncompressed) && (buf_size)) { if (NULL == (pComp = (tdefl_compressor *)pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, sizeof(tdefl_compressor)))) return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); } if (!mz_zip_writer_write_zeros(pZip, cur_archive_file_ofs, num_alignment_padding_bytes)) { pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); return MZ_FALSE; } local_dir_header_ofs += num_alignment_padding_bytes; if (pZip->m_file_offset_alignment) { MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) == 0); } cur_archive_file_ofs += num_alignment_padding_bytes; MZ_CLEAR_ARR(local_dir_header); if (!store_data_uncompressed || (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) { method = MZ_DEFLATED; } if (pState->m_zip64) { if (uncomp_size >= MZ_UINT32_MAX || local_dir_header_ofs >= MZ_UINT32_MAX) { pExtra_data = extra_data; extra_size = mz_zip_writer_create_zip64_extra_data(extra_data, (uncomp_size >= MZ_UINT32_MAX) ? &uncomp_size : NULL, (uncomp_size >= MZ_UINT32_MAX) ? &comp_size : NULL, (local_dir_header_ofs >= MZ_UINT32_MAX) ? &local_dir_header_ofs : NULL); } if (!mz_zip_writer_create_local_dir_header(pZip, local_dir_header, (mz_uint16)archive_name_size, (mz_uint16)(extra_size + user_extra_data_len), 0, 0, 0, method, bit_flags, dos_time, dos_date)) return mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR); if (pZip->m_pWrite(pZip->m_pIO_opaque, local_dir_header_ofs, local_dir_header, sizeof(local_dir_header)) != sizeof(local_dir_header)) return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); cur_archive_file_ofs += sizeof(local_dir_header); if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name, archive_name_size) != archive_name_size) { pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); } cur_archive_file_ofs += archive_name_size; if (pExtra_data != NULL) { if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, extra_data, extra_size) != extra_size) return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); cur_archive_file_ofs += extra_size; } } else { if ((comp_size > MZ_UINT32_MAX) || (cur_archive_file_ofs > MZ_UINT32_MAX)) return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); if (!mz_zip_writer_create_local_dir_header(pZip, local_dir_header, (mz_uint16)archive_name_size, (mz_uint16)user_extra_data_len, 0, 0, 0, method, bit_flags, dos_time, dos_date)) return mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR); if (pZip->m_pWrite(pZip->m_pIO_opaque, local_dir_header_ofs, local_dir_header, sizeof(local_dir_header)) != sizeof(local_dir_header)) return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); cur_archive_file_ofs += sizeof(local_dir_header); if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name, archive_name_size) != archive_name_size) { pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); } cur_archive_file_ofs += archive_name_size; } if (user_extra_data_len > 0) { if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, user_extra_data, user_extra_data_len) != user_extra_data_len) return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); cur_archive_file_ofs += user_extra_data_len; } if (store_data_uncompressed) { if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pBuf, buf_size) != buf_size) { pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); } cur_archive_file_ofs += buf_size; comp_size = buf_size; } else if (buf_size) { mz_zip_writer_add_state state; state.m_pZip = pZip; state.m_cur_archive_file_ofs = cur_archive_file_ofs; state.m_comp_size = 0; if ((tdefl_init(pComp, mz_zip_writer_add_put_buf_callback, &state, tdefl_create_comp_flags_from_zip_params(level, -15, MZ_DEFAULT_STRATEGY)) != TDEFL_STATUS_OKAY) || (tdefl_compress_buffer(pComp, pBuf, buf_size, TDEFL_FINISH) != TDEFL_STATUS_DONE)) { pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); return mz_zip_set_error(pZip, MZ_ZIP_COMPRESSION_FAILED); } comp_size = state.m_comp_size; cur_archive_file_ofs = state.m_cur_archive_file_ofs; } pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); pComp = NULL; if (uncomp_size) { mz_uint8 local_dir_footer[MZ_ZIP_DATA_DESCRIPTER_SIZE64]; mz_uint32 local_dir_footer_size = MZ_ZIP_DATA_DESCRIPTER_SIZE32; MZ_ASSERT(bit_flags & MZ_ZIP_LDH_BIT_FLAG_HAS_LOCATOR); MZ_WRITE_LE32(local_dir_footer + 0, MZ_ZIP_DATA_DESCRIPTOR_ID); MZ_WRITE_LE32(local_dir_footer + 4, uncomp_crc32); if (pExtra_data == NULL) { if (comp_size > MZ_UINT32_MAX) return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); MZ_WRITE_LE32(local_dir_footer + 8, comp_size); MZ_WRITE_LE32(local_dir_footer + 12, uncomp_size); } else { MZ_WRITE_LE64(local_dir_footer + 8, comp_size); MZ_WRITE_LE64(local_dir_footer + 16, uncomp_size); local_dir_footer_size = MZ_ZIP_DATA_DESCRIPTER_SIZE64; } if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, local_dir_footer, local_dir_footer_size) != local_dir_footer_size) return MZ_FALSE; cur_archive_file_ofs += local_dir_footer_size; } if (pExtra_data != NULL) { extra_size = mz_zip_writer_create_zip64_extra_data(extra_data, (uncomp_size >= MZ_UINT32_MAX) ? &uncomp_size : NULL, (uncomp_size >= MZ_UINT32_MAX) ? &comp_size : NULL, (local_dir_header_ofs >= MZ_UINT32_MAX) ? &local_dir_header_ofs : NULL); } if (!mz_zip_writer_add_to_central_dir(pZip, pArchive_name, (mz_uint16)archive_name_size, pExtra_data, (mz_uint16)extra_size, pComment, comment_size, uncomp_size, comp_size, uncomp_crc32, method, bit_flags, dos_time, dos_date, local_dir_header_ofs, ext_attributes, user_extra_data_central, user_extra_data_central_len)) return MZ_FALSE; pZip->m_total_files++; pZip->m_archive_size = cur_archive_file_ofs; return MZ_TRUE; } mz_bool mz_zip_writer_add_read_buf_callback(mz_zip_archive *pZip, const char *pArchive_name, mz_file_read_func read_callback, void* callback_opaque, mz_uint64 max_size, const MZ_TIME_T *pFile_time, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, const char *user_extra_data, mz_uint user_extra_data_len, const char *user_extra_data_central, mz_uint user_extra_data_central_len) { mz_uint16 gen_flags; mz_uint uncomp_crc32 = MZ_CRC32_INIT, level, num_alignment_padding_bytes; mz_uint16 method = 0, dos_time = 0, dos_date = 0, ext_attributes = 0; mz_uint64 local_dir_header_ofs, cur_archive_file_ofs = pZip->m_archive_size, uncomp_size = 0, comp_size = 0; size_t archive_name_size; mz_uint8 local_dir_header[MZ_ZIP_LOCAL_DIR_HEADER_SIZE]; mz_uint8 *pExtra_data = NULL; mz_uint32 extra_size = 0; mz_uint8 extra_data[MZ_ZIP64_MAX_CENTRAL_EXTRA_FIELD_SIZE]; mz_zip_internal_state *pState; mz_uint64 file_ofs = 0, cur_archive_header_file_ofs; if ((int)level_and_flags < 0) level_and_flags = MZ_DEFAULT_LEVEL; level = level_and_flags & 0xF; gen_flags = (level_and_flags & MZ_ZIP_FLAG_WRITE_HEADER_SET_SIZE) ? 0 : MZ_ZIP_LDH_BIT_FLAG_HAS_LOCATOR; if (!(level_and_flags & MZ_ZIP_FLAG_ASCII_FILENAME)) gen_flags |= MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_UTF8; /* Sanity checks */ if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) || (!pArchive_name) || ((comment_size) && (!pComment)) || (level > MZ_UBER_COMPRESSION)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); pState = pZip->m_pState; if ((!pState->m_zip64) && (max_size > MZ_UINT32_MAX)) { /* Source file is too large for non-zip64 */ /*return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); */ pState->m_zip64 = MZ_TRUE; } /* We could support this, but why? */ if (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); if (!mz_zip_writer_validate_archive_name(pArchive_name)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_FILENAME); if (pState->m_zip64) { if (pZip->m_total_files == MZ_UINT32_MAX) return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES); } else { if (pZip->m_total_files == MZ_UINT16_MAX) { pState->m_zip64 = MZ_TRUE; /*return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES); */ } } archive_name_size = strlen(pArchive_name); if (archive_name_size > MZ_UINT16_MAX) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_FILENAME); num_alignment_padding_bytes = mz_zip_writer_compute_padding_needed_for_file_alignment(pZip); /* miniz doesn't support central dirs >= MZ_UINT32_MAX bytes yet */ if (((mz_uint64)pState->m_central_dir.m_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + archive_name_size + MZ_ZIP64_MAX_CENTRAL_EXTRA_FIELD_SIZE + comment_size) >= MZ_UINT32_MAX) return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_CDIR_SIZE); if (!pState->m_zip64) { /* Bail early if the archive would obviously become too large */ if ((pZip->m_archive_size + num_alignment_padding_bytes + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + archive_name_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + archive_name_size + comment_size + user_extra_data_len + pState->m_central_dir.m_size + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE + 1024 + MZ_ZIP_DATA_DESCRIPTER_SIZE32 + user_extra_data_central_len) > 0xFFFFFFFF) { pState->m_zip64 = MZ_TRUE; /*return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); */ } } #ifndef MINIZ_NO_TIME if (pFile_time) { mz_zip_time_t_to_dos_time(*pFile_time, &dos_time, &dos_date); } #endif if (max_size <= 3) level = 0; if (!mz_zip_writer_write_zeros(pZip, cur_archive_file_ofs, num_alignment_padding_bytes)) { return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); } cur_archive_file_ofs += num_alignment_padding_bytes; local_dir_header_ofs = cur_archive_file_ofs; if (pZip->m_file_offset_alignment) { MZ_ASSERT((cur_archive_file_ofs & (pZip->m_file_offset_alignment - 1)) == 0); } if (max_size && level) { method = MZ_DEFLATED; } MZ_CLEAR_ARR(local_dir_header); if (pState->m_zip64) { if (max_size >= MZ_UINT32_MAX || local_dir_header_ofs >= MZ_UINT32_MAX) { pExtra_data = extra_data; if (level_and_flags & MZ_ZIP_FLAG_WRITE_HEADER_SET_SIZE) extra_size = mz_zip_writer_create_zip64_extra_data(extra_data, (max_size >= MZ_UINT32_MAX) ? &uncomp_size : NULL, (max_size >= MZ_UINT32_MAX) ? &comp_size : NULL, (local_dir_header_ofs >= MZ_UINT32_MAX) ? &local_dir_header_ofs : NULL); else extra_size = mz_zip_writer_create_zip64_extra_data(extra_data, NULL, NULL, (local_dir_header_ofs >= MZ_UINT32_MAX) ? &local_dir_header_ofs : NULL); } if (!mz_zip_writer_create_local_dir_header(pZip, local_dir_header, (mz_uint16)archive_name_size, (mz_uint16)(extra_size + user_extra_data_len), 0, 0, 0, method, gen_flags, dos_time, dos_date)) return mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR); if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, local_dir_header, sizeof(local_dir_header)) != sizeof(local_dir_header)) return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); cur_archive_file_ofs += sizeof(local_dir_header); if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name, archive_name_size) != archive_name_size) { return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); } cur_archive_file_ofs += archive_name_size; if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, extra_data, extra_size) != extra_size) return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); cur_archive_file_ofs += extra_size; } else { if ((comp_size > MZ_UINT32_MAX) || (cur_archive_file_ofs > MZ_UINT32_MAX)) return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); if (!mz_zip_writer_create_local_dir_header(pZip, local_dir_header, (mz_uint16)archive_name_size, (mz_uint16)user_extra_data_len, 0, 0, 0, method, gen_flags, dos_time, dos_date)) return mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR); if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, local_dir_header, sizeof(local_dir_header)) != sizeof(local_dir_header)) return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); cur_archive_file_ofs += sizeof(local_dir_header); if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name, archive_name_size) != archive_name_size) { return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); } cur_archive_file_ofs += archive_name_size; } if (user_extra_data_len > 0) { if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, user_extra_data, user_extra_data_len) != user_extra_data_len) return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); cur_archive_file_ofs += user_extra_data_len; } if (max_size) { void *pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, MZ_ZIP_MAX_IO_BUF_SIZE); if (!pRead_buf) { return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); } if (!level) { while (1) { size_t n = read_callback(callback_opaque, file_ofs, pRead_buf, MZ_ZIP_MAX_IO_BUF_SIZE); if (n == 0) break; if ((n > MZ_ZIP_MAX_IO_BUF_SIZE) || (file_ofs + n > max_size)) { pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); } if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pRead_buf, n) != n) { pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); } file_ofs += n; uncomp_crc32 = (mz_uint32)mz_crc32(uncomp_crc32, (const mz_uint8 *)pRead_buf, n); cur_archive_file_ofs += n; } uncomp_size = file_ofs; comp_size = uncomp_size; } else { mz_bool result = MZ_FALSE; mz_zip_writer_add_state state; tdefl_compressor *pComp = (tdefl_compressor *)pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, sizeof(tdefl_compressor)); if (!pComp) { pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); } state.m_pZip = pZip; state.m_cur_archive_file_ofs = cur_archive_file_ofs; state.m_comp_size = 0; if (tdefl_init(pComp, mz_zip_writer_add_put_buf_callback, &state, tdefl_create_comp_flags_from_zip_params(level, -15, MZ_DEFAULT_STRATEGY)) != TDEFL_STATUS_OKAY) { pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); return mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR); } for (;;) { tdefl_status status; tdefl_flush flush = TDEFL_NO_FLUSH; size_t n = read_callback(callback_opaque, file_ofs, pRead_buf, MZ_ZIP_MAX_IO_BUF_SIZE); if ((n > MZ_ZIP_MAX_IO_BUF_SIZE) || (file_ofs + n > max_size)) { mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); break; } file_ofs += n; uncomp_crc32 = (mz_uint32)mz_crc32(uncomp_crc32, (const mz_uint8 *)pRead_buf, n); if (pZip->m_pNeeds_keepalive != NULL && pZip->m_pNeeds_keepalive(pZip->m_pIO_opaque)) flush = TDEFL_FULL_FLUSH; if (n == 0) flush = TDEFL_FINISH; status = tdefl_compress_buffer(pComp, pRead_buf, n, flush); if (status == TDEFL_STATUS_DONE) { result = MZ_TRUE; break; } else if (status != TDEFL_STATUS_OKAY) { mz_zip_set_error(pZip, MZ_ZIP_COMPRESSION_FAILED); break; } } pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); if (!result) { pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); return MZ_FALSE; } uncomp_size = file_ofs; comp_size = state.m_comp_size; cur_archive_file_ofs = state.m_cur_archive_file_ofs; } pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); } if (!(level_and_flags & MZ_ZIP_FLAG_WRITE_HEADER_SET_SIZE)) { mz_uint8 local_dir_footer[MZ_ZIP_DATA_DESCRIPTER_SIZE64]; mz_uint32 local_dir_footer_size = MZ_ZIP_DATA_DESCRIPTER_SIZE32; MZ_WRITE_LE32(local_dir_footer + 0, MZ_ZIP_DATA_DESCRIPTOR_ID); MZ_WRITE_LE32(local_dir_footer + 4, uncomp_crc32); if (pExtra_data == NULL) { if (comp_size > MZ_UINT32_MAX) return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); MZ_WRITE_LE32(local_dir_footer + 8, comp_size); MZ_WRITE_LE32(local_dir_footer + 12, uncomp_size); } else { MZ_WRITE_LE64(local_dir_footer + 8, comp_size); MZ_WRITE_LE64(local_dir_footer + 16, uncomp_size); local_dir_footer_size = MZ_ZIP_DATA_DESCRIPTER_SIZE64; } if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, local_dir_footer, local_dir_footer_size) != local_dir_footer_size) return MZ_FALSE; cur_archive_file_ofs += local_dir_footer_size; } if (level_and_flags & MZ_ZIP_FLAG_WRITE_HEADER_SET_SIZE) { if (pExtra_data != NULL) { extra_size = mz_zip_writer_create_zip64_extra_data(extra_data, (max_size >= MZ_UINT32_MAX) ? &uncomp_size : NULL, (max_size >= MZ_UINT32_MAX) ? &comp_size : NULL, (local_dir_header_ofs >= MZ_UINT32_MAX) ? &local_dir_header_ofs : NULL); } if (!mz_zip_writer_create_local_dir_header(pZip, local_dir_header, (mz_uint16)archive_name_size, (mz_uint16)(extra_size + user_extra_data_len), (max_size >= MZ_UINT32_MAX) ? MZ_UINT32_MAX : uncomp_size, (max_size >= MZ_UINT32_MAX) ? MZ_UINT32_MAX : comp_size, uncomp_crc32, method, gen_flags, dos_time, dos_date)) return mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR); cur_archive_header_file_ofs = local_dir_header_ofs; if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_header_file_ofs, local_dir_header, sizeof(local_dir_header)) != sizeof(local_dir_header)) return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); if (pExtra_data != NULL) { cur_archive_header_file_ofs += sizeof(local_dir_header); if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_header_file_ofs, pArchive_name, archive_name_size) != archive_name_size) { return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); } cur_archive_header_file_ofs += archive_name_size; if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_header_file_ofs, extra_data, extra_size) != extra_size) return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); cur_archive_header_file_ofs += extra_size; } } if (pExtra_data != NULL) { extra_size = mz_zip_writer_create_zip64_extra_data(extra_data, (uncomp_size >= MZ_UINT32_MAX) ? &uncomp_size : NULL, (uncomp_size >= MZ_UINT32_MAX) ? &comp_size : NULL, (local_dir_header_ofs >= MZ_UINT32_MAX) ? &local_dir_header_ofs : NULL); } if (!mz_zip_writer_add_to_central_dir(pZip, pArchive_name, (mz_uint16)archive_name_size, pExtra_data, (mz_uint16)extra_size, pComment, comment_size, uncomp_size, comp_size, uncomp_crc32, method, gen_flags, dos_time, dos_date, local_dir_header_ofs, ext_attributes, user_extra_data_central, user_extra_data_central_len)) return MZ_FALSE; pZip->m_total_files++; pZip->m_archive_size = cur_archive_file_ofs; return MZ_TRUE; } #ifndef MINIZ_NO_STDIO static size_t mz_file_read_func_stdio(void *pOpaque, mz_uint64 file_ofs, void *pBuf, size_t n) { MZ_FILE *pSrc_file = (MZ_FILE *)pOpaque; mz_int64 cur_ofs = MZ_FTELL64(pSrc_file); if (((mz_int64)file_ofs < 0) || (((cur_ofs != (mz_int64)file_ofs)) && (MZ_FSEEK64(pSrc_file, (mz_int64)file_ofs, SEEK_SET)))) return 0; return MZ_FREAD(pBuf, 1, n, pSrc_file); } mz_bool mz_zip_writer_add_cfile(mz_zip_archive *pZip, const char *pArchive_name, MZ_FILE *pSrc_file, mz_uint64 max_size, const MZ_TIME_T *pFile_time, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, const char *user_extra_data, mz_uint user_extra_data_len, const char *user_extra_data_central, mz_uint user_extra_data_central_len) { return mz_zip_writer_add_read_buf_callback(pZip, pArchive_name, mz_file_read_func_stdio, pSrc_file, max_size, pFile_time, pComment, comment_size, level_and_flags, user_extra_data, user_extra_data_len, user_extra_data_central, user_extra_data_central_len); } mz_bool mz_zip_writer_add_file(mz_zip_archive *pZip, const char *pArchive_name, const char *pSrc_filename, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags) { MZ_FILE *pSrc_file = NULL; mz_uint64 uncomp_size = 0; MZ_TIME_T file_modified_time; MZ_TIME_T *pFile_time = NULL; mz_bool status; memset(&file_modified_time, 0, sizeof(file_modified_time)); #if !defined(MINIZ_NO_TIME) && !defined(MINIZ_NO_STDIO) pFile_time = &file_modified_time; if (!mz_zip_get_file_modified_time(pSrc_filename, &file_modified_time)) return mz_zip_set_error(pZip, MZ_ZIP_FILE_STAT_FAILED); #endif pSrc_file = MZ_FOPEN(pSrc_filename, "rb"); if (!pSrc_file) return mz_zip_set_error(pZip, MZ_ZIP_FILE_OPEN_FAILED); MZ_FSEEK64(pSrc_file, 0, SEEK_END); uncomp_size = MZ_FTELL64(pSrc_file); MZ_FSEEK64(pSrc_file, 0, SEEK_SET); status = mz_zip_writer_add_cfile(pZip, pArchive_name, pSrc_file, uncomp_size, pFile_time, pComment, comment_size, level_and_flags, NULL, 0, NULL, 0); MZ_FCLOSE(pSrc_file); return status; } #endif /* #ifndef MINIZ_NO_STDIO */ static mz_bool mz_zip_writer_update_zip64_extension_block(mz_zip_array *pNew_ext, mz_zip_archive *pZip, const mz_uint8 *pExt, mz_uint32 ext_len, mz_uint64 *pComp_size, mz_uint64 *pUncomp_size, mz_uint64 *pLocal_header_ofs, mz_uint32 *pDisk_start) { /* + 64 should be enough for any new zip64 data */ if (!mz_zip_array_reserve(pZip, pNew_ext, ext_len + 64, MZ_FALSE)) return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); mz_zip_array_resize(pZip, pNew_ext, 0, MZ_FALSE); if ((pUncomp_size) || (pComp_size) || (pLocal_header_ofs) || (pDisk_start)) { mz_uint8 new_ext_block[64]; mz_uint8 *pDst = new_ext_block; mz_write_le16(pDst, MZ_ZIP64_EXTENDED_INFORMATION_FIELD_HEADER_ID); mz_write_le16(pDst + sizeof(mz_uint16), 0); pDst += sizeof(mz_uint16) * 2; if (pUncomp_size) { mz_write_le64(pDst, *pUncomp_size); pDst += sizeof(mz_uint64); } if (pComp_size) { mz_write_le64(pDst, *pComp_size); pDst += sizeof(mz_uint64); } if (pLocal_header_ofs) { mz_write_le64(pDst, *pLocal_header_ofs); pDst += sizeof(mz_uint64); } if (pDisk_start) { mz_write_le32(pDst, *pDisk_start); pDst += sizeof(mz_uint32); } mz_write_le16(new_ext_block + sizeof(mz_uint16), (mz_uint16)((pDst - new_ext_block) - sizeof(mz_uint16) * 2)); if (!mz_zip_array_push_back(pZip, pNew_ext, new_ext_block, pDst - new_ext_block)) return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); } if ((pExt) && (ext_len)) { mz_uint32 extra_size_remaining = ext_len; const mz_uint8 *pExtra_data = pExt; do { mz_uint32 field_id, field_data_size, field_total_size; if (extra_size_remaining < (sizeof(mz_uint16) * 2)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); field_id = MZ_READ_LE16(pExtra_data); field_data_size = MZ_READ_LE16(pExtra_data + sizeof(mz_uint16)); field_total_size = field_data_size + sizeof(mz_uint16) * 2; if (field_total_size > extra_size_remaining) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); if (field_id != MZ_ZIP64_EXTENDED_INFORMATION_FIELD_HEADER_ID) { if (!mz_zip_array_push_back(pZip, pNew_ext, pExtra_data, field_total_size)) return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); } pExtra_data += field_total_size; extra_size_remaining -= field_total_size; } while (extra_size_remaining); } return MZ_TRUE; } /* TODO: This func is now pretty freakin complex due to zip64, split it up? */ mz_bool mz_zip_writer_add_from_zip_reader(mz_zip_archive *pZip, mz_zip_archive *pSource_zip, mz_uint src_file_index) { mz_uint n, bit_flags, num_alignment_padding_bytes, src_central_dir_following_data_size; mz_uint64 src_archive_bytes_remaining, local_dir_header_ofs; mz_uint64 cur_src_file_ofs, cur_dst_file_ofs; mz_uint32 local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)]; mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32; mz_uint8 new_central_header[MZ_ZIP_CENTRAL_DIR_HEADER_SIZE]; size_t orig_central_dir_size; mz_zip_internal_state *pState; void *pBuf; const mz_uint8 *pSrc_central_header; mz_zip_archive_file_stat src_file_stat; mz_uint32 src_filename_len, src_comment_len, src_ext_len; mz_uint32 local_header_filename_size, local_header_extra_len; mz_uint64 local_header_comp_size, local_header_uncomp_size; mz_bool found_zip64_ext_data_in_ldir = MZ_FALSE; /* Sanity checks */ if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) || (!pSource_zip->m_pRead)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); pState = pZip->m_pState; /* Don't support copying files from zip64 archives to non-zip64, even though in some cases this is possible */ if ((pSource_zip->m_pState->m_zip64) && (!pZip->m_pState->m_zip64)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); /* Get pointer to the source central dir header and crack it */ if (NULL == (pSrc_central_header = mz_zip_get_cdh(pSource_zip, src_file_index))) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); if (MZ_READ_LE32(pSrc_central_header + MZ_ZIP_CDH_SIG_OFS) != MZ_ZIP_CENTRAL_DIR_HEADER_SIG) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); src_filename_len = MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_FILENAME_LEN_OFS); src_comment_len = MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_COMMENT_LEN_OFS); src_ext_len = MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_EXTRA_LEN_OFS); src_central_dir_following_data_size = src_filename_len + src_ext_len + src_comment_len; /* TODO: We don't support central dir's >= MZ_UINT32_MAX bytes right now (+32 fudge factor in case we need to add more extra data) */ if ((pState->m_central_dir.m_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + src_central_dir_following_data_size + 32) >= MZ_UINT32_MAX) return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_CDIR_SIZE); num_alignment_padding_bytes = mz_zip_writer_compute_padding_needed_for_file_alignment(pZip); if (!pState->m_zip64) { if (pZip->m_total_files == MZ_UINT16_MAX) return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES); } else { /* TODO: Our zip64 support still has some 32-bit limits that may not be worth fixing. */ if (pZip->m_total_files == MZ_UINT32_MAX) return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES); } if (!mz_zip_file_stat_internal(pSource_zip, src_file_index, pSrc_central_header, &src_file_stat, NULL)) return MZ_FALSE; cur_src_file_ofs = src_file_stat.m_local_header_ofs; cur_dst_file_ofs = pZip->m_archive_size; /* Read the source archive's local dir header */ if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE) return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); cur_src_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE; /* Compute the total size we need to copy (filename+extra data+compressed data) */ local_header_filename_size = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS); local_header_extra_len = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS); local_header_comp_size = MZ_READ_LE32(pLocal_header + MZ_ZIP_LDH_COMPRESSED_SIZE_OFS); local_header_uncomp_size = MZ_READ_LE32(pLocal_header + MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS); src_archive_bytes_remaining = local_header_filename_size + local_header_extra_len + src_file_stat.m_comp_size; /* Try to find a zip64 extended information field */ if ((local_header_extra_len) && ((local_header_comp_size == MZ_UINT32_MAX) || (local_header_uncomp_size == MZ_UINT32_MAX))) { mz_zip_array file_data_array; const mz_uint8 *pExtra_data; mz_uint32 extra_size_remaining = local_header_extra_len; mz_zip_array_init(&file_data_array, 1); if (!mz_zip_array_resize(pZip, &file_data_array, local_header_extra_len, MZ_FALSE)) { return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); } if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, src_file_stat.m_local_header_ofs + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + local_header_filename_size, file_data_array.m_p, local_header_extra_len) != local_header_extra_len) { mz_zip_array_clear(pZip, &file_data_array); return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); } pExtra_data = (const mz_uint8 *)file_data_array.m_p; do { mz_uint32 field_id, field_data_size, field_total_size; if (extra_size_remaining < (sizeof(mz_uint16) * 2)) { mz_zip_array_clear(pZip, &file_data_array); return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); } field_id = MZ_READ_LE16(pExtra_data); field_data_size = MZ_READ_LE16(pExtra_data + sizeof(mz_uint16)); field_total_size = field_data_size + sizeof(mz_uint16) * 2; if (field_total_size > extra_size_remaining) { mz_zip_array_clear(pZip, &file_data_array); return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); } if (field_id == MZ_ZIP64_EXTENDED_INFORMATION_FIELD_HEADER_ID) { const mz_uint8 *pSrc_field_data = pExtra_data + sizeof(mz_uint32); if (field_data_size < sizeof(mz_uint64) * 2) { mz_zip_array_clear(pZip, &file_data_array); return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); } local_header_uncomp_size = MZ_READ_LE64(pSrc_field_data); local_header_comp_size = MZ_READ_LE64(pSrc_field_data + sizeof(mz_uint64)); /* may be 0 if there's a descriptor */ found_zip64_ext_data_in_ldir = MZ_TRUE; break; } pExtra_data += field_total_size; extra_size_remaining -= field_total_size; } while (extra_size_remaining); mz_zip_array_clear(pZip, &file_data_array); } if (!pState->m_zip64) { /* Try to detect if the new archive will most likely wind up too big and bail early (+(sizeof(mz_uint32) * 4) is for the optional descriptor which could be present, +64 is a fudge factor). */ /* We also check when the archive is finalized so this doesn't need to be perfect. */ mz_uint64 approx_new_archive_size = cur_dst_file_ofs + num_alignment_padding_bytes + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + src_archive_bytes_remaining + (sizeof(mz_uint32) * 4) + pState->m_central_dir.m_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + src_central_dir_following_data_size + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE + 64; if (approx_new_archive_size >= MZ_UINT32_MAX) return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); } /* Write dest archive padding */ if (!mz_zip_writer_write_zeros(pZip, cur_dst_file_ofs, num_alignment_padding_bytes)) return MZ_FALSE; cur_dst_file_ofs += num_alignment_padding_bytes; local_dir_header_ofs = cur_dst_file_ofs; if (pZip->m_file_offset_alignment) { MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) == 0); } /* The original zip's local header+ext block doesn't change, even with zip64, so we can just copy it over to the dest zip */ if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE) return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); cur_dst_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE; /* Copy over the source archive bytes to the dest archive, also ensure we have enough buf space to handle optional data descriptor */ if (NULL == (pBuf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)MZ_MAX(32U, MZ_MIN((mz_uint64)MZ_ZIP_MAX_IO_BUF_SIZE, src_archive_bytes_remaining))))) return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); while (src_archive_bytes_remaining) { n = (mz_uint)MZ_MIN((mz_uint64)MZ_ZIP_MAX_IO_BUF_SIZE, src_archive_bytes_remaining); if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pBuf, n) != n) { pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); } cur_src_file_ofs += n; if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pBuf, n) != n) { pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); } cur_dst_file_ofs += n; src_archive_bytes_remaining -= n; } /* Now deal with the optional data descriptor */ bit_flags = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_BIT_FLAG_OFS); if (bit_flags & 8) { /* Copy data descriptor */ if ((pSource_zip->m_pState->m_zip64) || (found_zip64_ext_data_in_ldir)) { /* src is zip64, dest must be zip64 */ /* name uint32_t's */ /* id 1 (optional in zip64?) */ /* crc 1 */ /* comp_size 2 */ /* uncomp_size 2 */ if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pBuf, (sizeof(mz_uint32) * 6)) != (sizeof(mz_uint32) * 6)) { pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); } n = sizeof(mz_uint32) * ((MZ_READ_LE32(pBuf) == MZ_ZIP_DATA_DESCRIPTOR_ID) ? 6 : 5); } else { /* src is NOT zip64 */ mz_bool has_id; if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pBuf, sizeof(mz_uint32) * 4) != sizeof(mz_uint32) * 4) { pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); } has_id = (MZ_READ_LE32(pBuf) == MZ_ZIP_DATA_DESCRIPTOR_ID); if (pZip->m_pState->m_zip64) { /* dest is zip64, so upgrade the data descriptor */ const mz_uint8 *pSrc_descriptor = (const mz_uint8 *)pBuf + (has_id ? sizeof(mz_uint32) : 0); const mz_uint32 src_crc32 = MZ_READ_LE32(pSrc_descriptor); const mz_uint64 src_comp_size = MZ_READ_LE32(pSrc_descriptor + sizeof(mz_uint32)); const mz_uint64 src_uncomp_size = MZ_READ_LE32(pSrc_descriptor + 2*sizeof(mz_uint32)); mz_write_le32((mz_uint8 *)pBuf, MZ_ZIP_DATA_DESCRIPTOR_ID); mz_write_le32((mz_uint8 *)pBuf + sizeof(mz_uint32) * 1, src_crc32); mz_write_le64((mz_uint8 *)pBuf + sizeof(mz_uint32) * 2, src_comp_size); mz_write_le64((mz_uint8 *)pBuf + sizeof(mz_uint32) * 4, src_uncomp_size); n = sizeof(mz_uint32) * 6; } else { /* dest is NOT zip64, just copy it as-is */ n = sizeof(mz_uint32) * (has_id ? 4 : 3); } } if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pBuf, n) != n) { pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); } cur_src_file_ofs += n; cur_dst_file_ofs += n; } pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); /* Finally, add the new central dir header */ orig_central_dir_size = pState->m_central_dir.m_size; memcpy(new_central_header, pSrc_central_header, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE); if (pState->m_zip64) { /* This is the painful part: We need to write a new central dir header + ext block with updated zip64 fields, and ensure the old fields (if any) are not included. */ const mz_uint8 *pSrc_ext = pSrc_central_header + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + src_filename_len; mz_zip_array new_ext_block; mz_zip_array_init(&new_ext_block, sizeof(mz_uint8)); MZ_WRITE_LE32(new_central_header + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS, MZ_UINT32_MAX); MZ_WRITE_LE32(new_central_header + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS, MZ_UINT32_MAX); MZ_WRITE_LE32(new_central_header + MZ_ZIP_CDH_LOCAL_HEADER_OFS, MZ_UINT32_MAX); if (!mz_zip_writer_update_zip64_extension_block(&new_ext_block, pZip, pSrc_ext, src_ext_len, &src_file_stat.m_comp_size, &src_file_stat.m_uncomp_size, &local_dir_header_ofs, NULL)) { mz_zip_array_clear(pZip, &new_ext_block); return MZ_FALSE; } MZ_WRITE_LE16(new_central_header + MZ_ZIP_CDH_EXTRA_LEN_OFS, new_ext_block.m_size); if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, new_central_header, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE)) { mz_zip_array_clear(pZip, &new_ext_block); return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); } if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pSrc_central_header + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, src_filename_len)) { mz_zip_array_clear(pZip, &new_ext_block); mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE); return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); } if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, new_ext_block.m_p, new_ext_block.m_size)) { mz_zip_array_clear(pZip, &new_ext_block); mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE); return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); } if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pSrc_central_header + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + src_filename_len + src_ext_len, src_comment_len)) { mz_zip_array_clear(pZip, &new_ext_block); mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE); return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); } mz_zip_array_clear(pZip, &new_ext_block); } else { /* sanity checks */ if (cur_dst_file_ofs > MZ_UINT32_MAX) return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); if (local_dir_header_ofs >= MZ_UINT32_MAX) return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); MZ_WRITE_LE32(new_central_header + MZ_ZIP_CDH_LOCAL_HEADER_OFS, local_dir_header_ofs); if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, new_central_header, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE)) return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pSrc_central_header + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, src_central_dir_following_data_size)) { mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE); return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); } } /* This shouldn't trigger unless we screwed up during the initial sanity checks */ if (pState->m_central_dir.m_size >= MZ_UINT32_MAX) { /* TODO: Support central dirs >= 32-bits in size */ mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE); return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_CDIR_SIZE); } n = (mz_uint32)orig_central_dir_size; if (!mz_zip_array_push_back(pZip, &pState->m_central_dir_offsets, &n, 1)) { mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE); return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); } pZip->m_total_files++; pZip->m_archive_size = cur_dst_file_ofs; return MZ_TRUE; } mz_bool mz_zip_writer_finalize_archive(mz_zip_archive *pZip) { mz_zip_internal_state *pState; mz_uint64 central_dir_ofs, central_dir_size; mz_uint8 hdr[256]; if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); pState = pZip->m_pState; if (pState->m_zip64) { if ((mz_uint64)pState->m_central_dir.m_size >= MZ_UINT32_MAX) return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES); } else { if ((pZip->m_total_files > MZ_UINT16_MAX) || ((pZip->m_archive_size + pState->m_central_dir.m_size + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) > MZ_UINT32_MAX)) return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES); } central_dir_ofs = 0; central_dir_size = 0; if (pZip->m_total_files) { /* Write central directory */ central_dir_ofs = pZip->m_archive_size; central_dir_size = pState->m_central_dir.m_size; pZip->m_central_directory_file_ofs = central_dir_ofs; if (pZip->m_pWrite(pZip->m_pIO_opaque, central_dir_ofs, pState->m_central_dir.m_p, (size_t)central_dir_size) != central_dir_size) return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); pZip->m_archive_size += central_dir_size; } if (pState->m_zip64) { /* Write zip64 end of central directory header */ mz_uint64 rel_ofs_to_zip64_ecdr = pZip->m_archive_size; MZ_CLEAR_ARR(hdr); MZ_WRITE_LE32(hdr + MZ_ZIP64_ECDH_SIG_OFS, MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIG); MZ_WRITE_LE64(hdr + MZ_ZIP64_ECDH_SIZE_OF_RECORD_OFS, MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE - sizeof(mz_uint32) - sizeof(mz_uint64)); MZ_WRITE_LE16(hdr + MZ_ZIP64_ECDH_VERSION_MADE_BY_OFS, 0x031E); /* TODO: always Unix */ MZ_WRITE_LE16(hdr + MZ_ZIP64_ECDH_VERSION_NEEDED_OFS, 0x002D); MZ_WRITE_LE64(hdr + MZ_ZIP64_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS, pZip->m_total_files); MZ_WRITE_LE64(hdr + MZ_ZIP64_ECDH_CDIR_TOTAL_ENTRIES_OFS, pZip->m_total_files); MZ_WRITE_LE64(hdr + MZ_ZIP64_ECDH_CDIR_SIZE_OFS, central_dir_size); MZ_WRITE_LE64(hdr + MZ_ZIP64_ECDH_CDIR_OFS_OFS, central_dir_ofs); if (pZip->m_pWrite(pZip->m_pIO_opaque, pZip->m_archive_size, hdr, MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE) != MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE) return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); pZip->m_archive_size += MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE; /* Write zip64 end of central directory locator */ MZ_CLEAR_ARR(hdr); MZ_WRITE_LE32(hdr + MZ_ZIP64_ECDL_SIG_OFS, MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIG); MZ_WRITE_LE64(hdr + MZ_ZIP64_ECDL_REL_OFS_TO_ZIP64_ECDR_OFS, rel_ofs_to_zip64_ecdr); MZ_WRITE_LE32(hdr + MZ_ZIP64_ECDL_TOTAL_NUMBER_OF_DISKS_OFS, 1); if (pZip->m_pWrite(pZip->m_pIO_opaque, pZip->m_archive_size, hdr, MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE) != MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE) return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); pZip->m_archive_size += MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE; } /* Write end of central directory record */ MZ_CLEAR_ARR(hdr); MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_SIG_OFS, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG); MZ_WRITE_LE16(hdr + MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS, MZ_MIN(MZ_UINT16_MAX, pZip->m_total_files)); MZ_WRITE_LE16(hdr + MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS, MZ_MIN(MZ_UINT16_MAX, pZip->m_total_files)); MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_CDIR_SIZE_OFS, MZ_MIN(MZ_UINT32_MAX, central_dir_size)); MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_CDIR_OFS_OFS, MZ_MIN(MZ_UINT32_MAX, central_dir_ofs)); if (pZip->m_pWrite(pZip->m_pIO_opaque, pZip->m_archive_size, hdr, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) != MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); #ifndef MINIZ_NO_STDIO if ((pState->m_pFile) && (MZ_FFLUSH(pState->m_pFile) == EOF)) return mz_zip_set_error(pZip, MZ_ZIP_FILE_CLOSE_FAILED); #endif /* #ifndef MINIZ_NO_STDIO */ pZip->m_archive_size += MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE; pZip->m_zip_mode = MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED; return MZ_TRUE; } mz_bool mz_zip_writer_finalize_heap_archive(mz_zip_archive *pZip, void **ppBuf, size_t *pSize) { if ((!ppBuf) || (!pSize)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); *ppBuf = NULL; *pSize = 0; if ((!pZip) || (!pZip->m_pState)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); if (pZip->m_pWrite != mz_zip_heap_write_func) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); if (!mz_zip_writer_finalize_archive(pZip)) return MZ_FALSE; *ppBuf = pZip->m_pState->m_pMem; *pSize = pZip->m_pState->m_mem_size; pZip->m_pState->m_pMem = NULL; pZip->m_pState->m_mem_size = pZip->m_pState->m_mem_capacity = 0; return MZ_TRUE; } mz_bool mz_zip_writer_end(mz_zip_archive *pZip) { return mz_zip_writer_end_internal(pZip, MZ_TRUE); } #ifndef MINIZ_NO_STDIO mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags) { return mz_zip_add_mem_to_archive_file_in_place_v2(pZip_filename, pArchive_name, pBuf, buf_size, pComment, comment_size, level_and_flags, NULL); } mz_bool mz_zip_add_mem_to_archive_file_in_place_v2(const char *pZip_filename, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, mz_zip_error *pErr) { mz_bool status, created_new_archive = MZ_FALSE; mz_zip_archive zip_archive; struct MZ_FILE_STAT_STRUCT file_stat; mz_zip_error actual_err = MZ_ZIP_NO_ERROR; mz_zip_zero_struct(&zip_archive); if ((int)level_and_flags < 0) level_and_flags = MZ_DEFAULT_LEVEL; if ((!pZip_filename) || (!pArchive_name) || ((buf_size) && (!pBuf)) || ((comment_size) && (!pComment)) || ((level_and_flags & 0xF) > MZ_UBER_COMPRESSION)) { if (pErr) *pErr = MZ_ZIP_INVALID_PARAMETER; return MZ_FALSE; } if (!mz_zip_writer_validate_archive_name(pArchive_name)) { if (pErr) *pErr = MZ_ZIP_INVALID_FILENAME; return MZ_FALSE; } /* Important: The regular non-64 bit version of stat() can fail here if the file is very large, which could cause the archive to be overwritten. */ /* So be sure to compile with _LARGEFILE64_SOURCE 1 */ if (MZ_FILE_STAT(pZip_filename, &file_stat) != 0) { /* Create a new archive. */ if (!mz_zip_writer_init_file_v2(&zip_archive, pZip_filename, 0, level_and_flags)) { if (pErr) *pErr = zip_archive.m_last_error; return MZ_FALSE; } created_new_archive = MZ_TRUE; } else { /* Append to an existing archive. */ if (!mz_zip_reader_init_file_v2(&zip_archive, pZip_filename, level_and_flags | MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY, 0, 0)) { if (pErr) *pErr = zip_archive.m_last_error; return MZ_FALSE; } if (!mz_zip_writer_init_from_reader_v2(&zip_archive, pZip_filename, level_and_flags)) { if (pErr) *pErr = zip_archive.m_last_error; mz_zip_reader_end_internal(&zip_archive, MZ_FALSE); return MZ_FALSE; } } status = mz_zip_writer_add_mem_ex(&zip_archive, pArchive_name, pBuf, buf_size, pComment, comment_size, level_and_flags, 0, 0); actual_err = zip_archive.m_last_error; /* Always finalize, even if adding failed for some reason, so we have a valid central directory. (This may not always succeed, but we can try.) */ if (!mz_zip_writer_finalize_archive(&zip_archive)) { if (!actual_err) actual_err = zip_archive.m_last_error; status = MZ_FALSE; } if (!mz_zip_writer_end_internal(&zip_archive, status)) { if (!actual_err) actual_err = zip_archive.m_last_error; status = MZ_FALSE; } if ((!status) && (created_new_archive)) { /* It's a new archive and something went wrong, so just delete it. */ int ignoredStatus = MZ_DELETE_FILE(pZip_filename); (void)ignoredStatus; } if (pErr) *pErr = actual_err; return status; } void *mz_zip_extract_archive_file_to_heap_v2(const char *pZip_filename, const char *pArchive_name, const char *pComment, size_t *pSize, mz_uint flags, mz_zip_error *pErr) { mz_uint32 file_index; mz_zip_archive zip_archive; void *p = NULL; if (pSize) *pSize = 0; if ((!pZip_filename) || (!pArchive_name)) { if (pErr) *pErr = MZ_ZIP_INVALID_PARAMETER; return NULL; } mz_zip_zero_struct(&zip_archive); if (!mz_zip_reader_init_file_v2(&zip_archive, pZip_filename, flags | MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY, 0, 0)) { if (pErr) *pErr = zip_archive.m_last_error; return NULL; } if (mz_zip_reader_locate_file_v2(&zip_archive, pArchive_name, pComment, flags, &file_index)) { p = mz_zip_reader_extract_to_heap(&zip_archive, file_index, pSize, flags); } mz_zip_reader_end_internal(&zip_archive, p != NULL); if (pErr) *pErr = zip_archive.m_last_error; return p; } void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const char *pArchive_name, size_t *pSize, mz_uint flags) { return mz_zip_extract_archive_file_to_heap_v2(pZip_filename, pArchive_name, NULL, pSize, flags, NULL); } #endif /* #ifndef MINIZ_NO_STDIO */ #endif /* #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS */ /* ------------------- Misc utils */ mz_zip_mode mz_zip_get_mode(mz_zip_archive *pZip) { return pZip ? pZip->m_zip_mode : MZ_ZIP_MODE_INVALID; } mz_zip_type mz_zip_get_type(mz_zip_archive *pZip) { return pZip ? pZip->m_zip_type : MZ_ZIP_TYPE_INVALID; } mz_zip_error mz_zip_set_last_error(mz_zip_archive *pZip, mz_zip_error err_num) { mz_zip_error prev_err; if (!pZip) return MZ_ZIP_INVALID_PARAMETER; prev_err = pZip->m_last_error; pZip->m_last_error = err_num; return prev_err; } mz_zip_error mz_zip_peek_last_error(mz_zip_archive *pZip) { if (!pZip) return MZ_ZIP_INVALID_PARAMETER; return pZip->m_last_error; } mz_zip_error mz_zip_clear_last_error(mz_zip_archive *pZip) { return mz_zip_set_last_error(pZip, MZ_ZIP_NO_ERROR); } mz_zip_error mz_zip_get_last_error(mz_zip_archive *pZip) { mz_zip_error prev_err; if (!pZip) return MZ_ZIP_INVALID_PARAMETER; prev_err = pZip->m_last_error; pZip->m_last_error = MZ_ZIP_NO_ERROR; return prev_err; } const char *mz_zip_get_error_string(mz_zip_error mz_err) { switch (mz_err) { case MZ_ZIP_NO_ERROR: return "no error"; case MZ_ZIP_UNDEFINED_ERROR: return "undefined error"; case MZ_ZIP_TOO_MANY_FILES: return "too many files"; case MZ_ZIP_FILE_TOO_LARGE: return "file too large"; case MZ_ZIP_UNSUPPORTED_METHOD: return "unsupported method"; case MZ_ZIP_UNSUPPORTED_ENCRYPTION: return "unsupported encryption"; case MZ_ZIP_UNSUPPORTED_FEATURE: return "unsupported feature"; case MZ_ZIP_FAILED_FINDING_CENTRAL_DIR: return "failed finding central directory"; case MZ_ZIP_NOT_AN_ARCHIVE: return "not a ZIP archive"; case MZ_ZIP_INVALID_HEADER_OR_CORRUPTED: return "invalid header or archive is corrupted"; case MZ_ZIP_UNSUPPORTED_MULTIDISK: return "unsupported multidisk archive"; case MZ_ZIP_DECOMPRESSION_FAILED: return "decompression failed or archive is corrupted"; case MZ_ZIP_COMPRESSION_FAILED: return "compression failed"; case MZ_ZIP_UNEXPECTED_DECOMPRESSED_SIZE: return "unexpected decompressed size"; case MZ_ZIP_CRC_CHECK_FAILED: return "CRC-32 check failed"; case MZ_ZIP_UNSUPPORTED_CDIR_SIZE: return "unsupported central directory size"; case MZ_ZIP_ALLOC_FAILED: return "allocation failed"; case MZ_ZIP_FILE_OPEN_FAILED: return "file open failed"; case MZ_ZIP_FILE_CREATE_FAILED: return "file create failed"; case MZ_ZIP_FILE_WRITE_FAILED: return "file write failed"; case MZ_ZIP_FILE_READ_FAILED: return "file read failed"; case MZ_ZIP_FILE_CLOSE_FAILED: return "file close failed"; case MZ_ZIP_FILE_SEEK_FAILED: return "file seek failed"; case MZ_ZIP_FILE_STAT_FAILED: return "file stat failed"; case MZ_ZIP_INVALID_PARAMETER: return "invalid parameter"; case MZ_ZIP_INVALID_FILENAME: return "invalid filename"; case MZ_ZIP_BUF_TOO_SMALL: return "buffer too small"; case MZ_ZIP_INTERNAL_ERROR: return "internal error"; case MZ_ZIP_FILE_NOT_FOUND: return "file not found"; case MZ_ZIP_ARCHIVE_TOO_LARGE: return "archive is too large"; case MZ_ZIP_VALIDATION_FAILED: return "validation failed"; case MZ_ZIP_WRITE_CALLBACK_FAILED: return "write callback failed"; case MZ_ZIP_TOTAL_ERRORS: return "total errors"; default: break; } return "unknown error"; } /* Note: Just because the archive is not zip64 doesn't necessarily mean it doesn't have Zip64 extended information extra field, argh. */ mz_bool mz_zip_is_zip64(mz_zip_archive *pZip) { if ((!pZip) || (!pZip->m_pState)) return MZ_FALSE; return pZip->m_pState->m_zip64; } size_t mz_zip_get_central_dir_size(mz_zip_archive *pZip) { if ((!pZip) || (!pZip->m_pState)) return 0; return pZip->m_pState->m_central_dir.m_size; } mz_uint mz_zip_reader_get_num_files(mz_zip_archive *pZip) { return pZip ? pZip->m_total_files : 0; } mz_uint64 mz_zip_get_archive_size(mz_zip_archive *pZip) { if (!pZip) return 0; return pZip->m_archive_size; } mz_uint64 mz_zip_get_archive_file_start_offset(mz_zip_archive *pZip) { if ((!pZip) || (!pZip->m_pState)) return 0; return pZip->m_pState->m_file_archive_start_ofs; } MZ_FILE *mz_zip_get_cfile(mz_zip_archive *pZip) { if ((!pZip) || (!pZip->m_pState)) return 0; return pZip->m_pState->m_pFile; } size_t mz_zip_read_archive_data(mz_zip_archive *pZip, mz_uint64 file_ofs, void *pBuf, size_t n) { if ((!pZip) || (!pZip->m_pState) || (!pBuf) || (!pZip->m_pRead)) return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); return pZip->m_pRead(pZip->m_pIO_opaque, file_ofs, pBuf, n); } mz_uint mz_zip_reader_get_filename(mz_zip_archive *pZip, mz_uint file_index, char *pFilename, mz_uint filename_buf_size) { mz_uint n; const mz_uint8 *p = mz_zip_get_cdh(pZip, file_index); if (!p) { if (filename_buf_size) pFilename[0] = '\0'; mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); return 0; } n = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS); if (filename_buf_size) { n = MZ_MIN(n, filename_buf_size - 1); memcpy(pFilename, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n); pFilename[n] = '\0'; } return n + 1; } mz_bool mz_zip_reader_file_stat(mz_zip_archive *pZip, mz_uint file_index, mz_zip_archive_file_stat *pStat) { return mz_zip_file_stat_internal(pZip, file_index, mz_zip_get_cdh(pZip, file_index), pStat, NULL); } mz_bool mz_zip_end(mz_zip_archive *pZip) { if (!pZip) return MZ_FALSE; if (pZip->m_zip_mode == MZ_ZIP_MODE_READING) return mz_zip_reader_end(pZip); #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS else if ((pZip->m_zip_mode == MZ_ZIP_MODE_WRITING) || (pZip->m_zip_mode == MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED)) return mz_zip_writer_end(pZip); #endif return MZ_FALSE; } #ifdef __cplusplus } #endif #endif /*#ifndef MINIZ_NO_ARCHIVE_APIS*/ luametatex-2.10.08/source/libraries/miniz/miniz.h000066400000000000000000002127201442250314700217420ustar00rootroot00000000000000#ifndef MINIZ_EXPORT #define MINIZ_EXPORT #endif /* miniz.c 3.0.0 - public domain deflate/inflate, zlib-subset, ZIP reading/writing/appending, PNG writing See "unlicense" statement at the end of this file. Rich Geldreich , last updated Oct. 13, 2013 Implements RFC 1950: http://www.ietf.org/rfc/rfc1950.txt and RFC 1951: http://www.ietf.org/rfc/rfc1951.txt Most API's defined in miniz.c are optional. For example, to disable the archive related functions just define MINIZ_NO_ARCHIVE_APIS, or to get rid of all stdio usage define MINIZ_NO_STDIO (see the list below for more macros). * Low-level Deflate/Inflate implementation notes: Compression: Use the "tdefl" API's. The compressor supports raw, static, and dynamic blocks, lazy or greedy parsing, match length filtering, RLE-only, and Huffman-only streams. It performs and compresses approximately as well as zlib. Decompression: Use the "tinfl" API's. The entire decompressor is implemented as a single function coroutine: see tinfl_decompress(). It supports decompression into a 32KB (or larger power of 2) wrapping buffer, or into a memory block large enough to hold the entire file. The low-level tdefl/tinfl API's do not make any use of dynamic memory allocation. * zlib-style API notes: miniz.c implements a fairly large subset of zlib. There's enough functionality present for it to be a drop-in zlib replacement in many apps: The z_stream struct, optional memory allocation callbacks deflateInit/deflateInit2/deflate/deflateReset/deflateEnd/deflateBound inflateInit/inflateInit2/inflate/inflateReset/inflateEnd compress, compress2, compressBound, uncompress CRC-32, Adler-32 - Using modern, minimal code size, CPU cache friendly routines. Supports raw deflate streams or standard zlib streams with adler-32 checking. Limitations: The callback API's are not implemented yet. No support for gzip headers or zlib static dictionaries. I've tried to closely emulate zlib's various flavors of stream flushing and return status codes, but there are no guarantees that miniz.c pulls this off perfectly. * PNG writing: See the tdefl_write_image_to_png_file_in_memory() function, originally written by Alex Evans. Supports 1-4 bytes/pixel images. * ZIP archive API notes: The ZIP archive API's where designed with simplicity and efficiency in mind, with just enough abstraction to get the job done with minimal fuss. There are simple API's to retrieve file information, read files from existing archives, create new archives, append new files to existing archives, or clone archive data from one archive to another. It supports archives located in memory or the heap, on disk (using stdio.h), or you can specify custom file read/write callbacks. - Archive reading: Just call this function to read a single file from a disk archive: void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const char *pArchive_name, size_t *pSize, mz_uint zip_flags); For more complex cases, use the "mz_zip_reader" functions. Upon opening an archive, the entire central directory is located and read as-is into memory, and subsequent file access only occurs when reading individual files. - Archives file scanning: The simple way is to use this function to scan a loaded archive for a specific file: int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags); The locate operation can optionally check file comments too, which (as one example) can be used to identify multiple versions of the same file in an archive. This function uses a simple linear search through the central directory, so it's not very fast. Alternately, you can iterate through all the files in an archive (using mz_zip_reader_get_num_files()) and retrieve detailed info on each file by calling mz_zip_reader_file_stat(). - Archive creation: Use the "mz_zip_writer" functions. The ZIP writer immediately writes compressed file data to disk and builds an exact image of the central directory in memory. The central directory image is written all at once at the end of the archive file when the archive is finalized. The archive writer can optionally align each file's local header and file data to any power of 2 alignment, which can be useful when the archive will be read from optical media. Also, the writer supports placing arbitrary data blobs at the very beginning of ZIP archives. Archives written using either feature are still readable by any ZIP tool. - Archive appending: The simple way to add a single file to an archive is to call this function: mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags); The archive will be created if it doesn't already exist, otherwise it'll be appended to. Note the appending is done in-place and is not an atomic operation, so if something goes wrong during the operation it's possible the archive could be left without a central directory (although the local file headers and file data will be fine, so the archive will be recoverable). For more complex archive modification scenarios: 1. The safest way is to use a mz_zip_reader to read the existing archive, cloning only those bits you want to preserve into a new archive using using the mz_zip_writer_add_from_zip_reader() function (which compiles the compressed file data as-is). When you're done, delete the old archive and rename the newly written archive, and you're done. This is safe but requires a bunch of temporary disk space or heap memory. 2. Or, you can convert an mz_zip_reader in-place to an mz_zip_writer using mz_zip_writer_init_from_reader(), append new files as needed, then finalize the archive which will write an updated central directory to the original archive. (This is basically what mz_zip_add_mem_to_archive_file_in_place() does.) There's a possibility that the archive's central directory could be lost with this method if anything goes wrong, though. - ZIP archive support limitations: No spanning support. Extraction functions can only handle unencrypted, stored or deflated files. Requires streams capable of seeking. * This is a header file library, like stb_image.c. To get only a header file, either cut and paste the below header, or create miniz.h, #define MINIZ_HEADER_FILE_ONLY, and then include miniz.c from it. * Important: For best perf. be sure to customize the below macros for your target platform: #define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1 #define MINIZ_LITTLE_ENDIAN 1 #define MINIZ_HAS_64BIT_REGISTERS 1 * On platforms using glibc, Be sure to "#define _LARGEFILE64_SOURCE 1" before including miniz.c to ensure miniz uses the 64-bit variants: fopen64(), stat64(), etc. Otherwise you won't be able to process large files (i.e. 32-bit stat() fails for me on files > 0x7FFFFFFF bytes). */ #pragma once /* Defines to completely disable specific portions of miniz.c: If all macros here are defined the only functionality remaining will be CRC-32 and adler-32. */ /* Define MINIZ_NO_STDIO to disable all usage and any functions which rely on stdio for file I/O. */ /*#define MINIZ_NO_STDIO */ /* If MINIZ_NO_TIME is specified then the ZIP archive functions will not be able to get the current time, or */ /* get/set file times, and the C run-time funcs that get/set times won't be called. */ /* The current downside is the times written to your archives will be from 1979. */ /*#define MINIZ_NO_TIME */ /* Define MINIZ_NO_DEFLATE_APIS to disable all compression API's. */ /*#define MINIZ_NO_DEFLATE_APIS */ /* Define MINIZ_NO_INFLATE_APIS to disable all decompression API's. */ /*#define MINIZ_NO_INFLATE_APIS */ /* Define MINIZ_NO_ARCHIVE_APIS to disable all ZIP archive API's. */ /*#define MINIZ_NO_ARCHIVE_APIS */ /* Define MINIZ_NO_ARCHIVE_WRITING_APIS to disable all writing related ZIP archive API's. */ /*#define MINIZ_NO_ARCHIVE_WRITING_APIS */ /* Define MINIZ_NO_ZLIB_APIS to remove all ZLIB-style compression/decompression API's. */ /*#define MINIZ_NO_ZLIB_APIS */ /* Define MINIZ_NO_ZLIB_COMPATIBLE_NAME to disable zlib names, to prevent conflicts against stock zlib. */ /*#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES */ /* Define MINIZ_NO_MALLOC to disable all calls to malloc, free, and realloc. Note if MINIZ_NO_MALLOC is defined then the user must always provide custom user alloc/free/realloc callbacks to the zlib and archive API's, and a few stand-alone helper API's which don't provide custom user functions (such as tdefl_compress_mem_to_heap() and tinfl_decompress_mem_to_heap()) won't work. */ /*#define MINIZ_NO_MALLOC */ #ifdef MINIZ_NO_INFLATE_APIS #define MINIZ_NO_ARCHIVE_APIS #endif #ifdef MINIZ_NO_DEFLATE_APIS #define MINIZ_NO_ARCHIVE_WRITING_APIS #endif #if defined(__TINYC__) && (defined(__linux) || defined(__linux__)) /* TODO: Work around "error: include file 'sys\utime.h' when compiling with tcc on Linux */ #define MINIZ_NO_TIME #endif #include #if !defined(MINIZ_NO_TIME) && !defined(MINIZ_NO_ARCHIVE_APIS) #include #endif #if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__i386) || defined(__i486__) || defined(__i486) || defined(i386) || defined(__ia64__) || defined(__x86_64__) /* MINIZ_X86_OR_X64_CPU is only used to help set the below macros. */ #define MINIZ_X86_OR_X64_CPU 1 #else #define MINIZ_X86_OR_X64_CPU 0 #endif /* Set MINIZ_LITTLE_ENDIAN only if not set */ #if !defined(MINIZ_LITTLE_ENDIAN) #if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) #if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) /* Set MINIZ_LITTLE_ENDIAN to 1 if the processor is little endian. */ #define MINIZ_LITTLE_ENDIAN 1 #else #define MINIZ_LITTLE_ENDIAN 0 #endif #else #if MINIZ_X86_OR_X64_CPU #define MINIZ_LITTLE_ENDIAN 1 #else #define MINIZ_LITTLE_ENDIAN 0 #endif #endif #endif /* Using unaligned loads and stores causes errors when using UBSan */ #if defined(__has_feature) #if __has_feature(undefined_behavior_sanitizer) #define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 0 #endif #endif /* Set MINIZ_USE_UNALIGNED_LOADS_AND_STORES only if not set */ #if !defined(MINIZ_USE_UNALIGNED_LOADS_AND_STORES) #if MINIZ_X86_OR_X64_CPU /* Set MINIZ_USE_UNALIGNED_LOADS_AND_STORES to 1 on CPU's that permit efficient integer loads and stores from unaligned addresses. */ #define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 0 #define MINIZ_UNALIGNED_USE_MEMCPY #else #define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 0 #endif #endif #if defined(_M_X64) || defined(_WIN64) || defined(__MINGW64__) || defined(_LP64) || defined(__LP64__) || defined(__ia64__) || defined(__x86_64__) /* Set MINIZ_HAS_64BIT_REGISTERS to 1 if operations on 64-bit integers are reasonably fast (and don't involve compiler generated calls to helper functions). */ #define MINIZ_HAS_64BIT_REGISTERS 1 #else #define MINIZ_HAS_64BIT_REGISTERS 0 #endif #ifdef __cplusplus extern "C" { #endif /* ------------------- zlib-style API Definitions. */ /* For more compatibility with zlib, miniz.c uses unsigned long for some parameters/struct members. Beware: mz_ulong can be either 32 or 64-bits! */ typedef unsigned long mz_ulong; /* mz_free() internally uses the MZ_FREE() macro (which by default calls free() unless you've modified the MZ_MALLOC macro) to release a block allocated from the heap. */ MINIZ_EXPORT void mz_free(void *p); #define MZ_ADLER32_INIT (1) /* mz_adler32() returns the initial adler-32 value to use when called with ptr==NULL. */ MINIZ_EXPORT mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len); #define MZ_CRC32_INIT (0) /* mz_crc32() returns the initial CRC-32 value to use when called with ptr==NULL. */ MINIZ_EXPORT mz_ulong mz_crc32(mz_ulong crc, const unsigned char *ptr, size_t buf_len); /* Compression strategies. */ enum { MZ_DEFAULT_STRATEGY = 0, MZ_FILTERED = 1, MZ_HUFFMAN_ONLY = 2, MZ_RLE = 3, MZ_FIXED = 4 }; /* Method */ #define MZ_DEFLATED 8 /* Heap allocation callbacks. Note that mz_alloc_func parameter types purposely differ from zlib's: items/size is size_t, not unsigned long. */ typedef void *(*mz_alloc_func)(void *opaque, size_t items, size_t size); typedef void (*mz_free_func)(void *opaque, void *address); typedef void *(*mz_realloc_func)(void *opaque, void *address, size_t items, size_t size); /* Compression levels: 0-9 are the standard zlib-style levels, 10 is best possible compression (not zlib compatible, and may be very slow), MZ_DEFAULT_COMPRESSION=MZ_DEFAULT_LEVEL. */ enum { MZ_NO_COMPRESSION = 0, MZ_BEST_SPEED = 1, MZ_BEST_COMPRESSION = 9, MZ_UBER_COMPRESSION = 10, MZ_DEFAULT_LEVEL = 6, MZ_DEFAULT_COMPRESSION = -1 }; #define MZ_VERSION "11.0.1" #define MZ_VERNUM 0xB001 #define MZ_VER_MAJOR 11 #define MZ_VER_MINOR 1 #define MZ_VER_REVISION 0 #define MZ_VER_SUBREVISION 0 #ifndef MINIZ_NO_ZLIB_APIS /* Flush values. For typical usage you only need MZ_NO_FLUSH and MZ_FINISH. The other values are for advanced use (refer to the zlib docs). */ enum { MZ_NO_FLUSH = 0, MZ_PARTIAL_FLUSH = 1, MZ_SYNC_FLUSH = 2, MZ_FULL_FLUSH = 3, MZ_FINISH = 4, MZ_BLOCK = 5 }; /* Return status codes. MZ_PARAM_ERROR is non-standard. */ enum { MZ_OK = 0, MZ_STREAM_END = 1, MZ_NEED_DICT = 2, MZ_ERRNO = -1, MZ_STREAM_ERROR = -2, MZ_DATA_ERROR = -3, MZ_MEM_ERROR = -4, MZ_BUF_ERROR = -5, MZ_VERSION_ERROR = -6, MZ_PARAM_ERROR = -10000 }; /* Window bits */ #define MZ_DEFAULT_WINDOW_BITS 15 struct mz_internal_state; /* Compression/decompression stream struct. */ typedef struct mz_stream_s { const unsigned char *next_in; /* pointer to next byte to read */ unsigned int avail_in; /* number of bytes available at next_in */ mz_ulong total_in; /* total number of bytes consumed so far */ unsigned char *next_out; /* pointer to next byte to write */ unsigned int avail_out; /* number of bytes that can be written to next_out */ mz_ulong total_out; /* total number of bytes produced so far */ char *msg; /* error msg (unused) */ struct mz_internal_state *state; /* internal state, allocated by zalloc/zfree */ mz_alloc_func zalloc; /* optional heap allocation function (defaults to malloc) */ mz_free_func zfree; /* optional heap free function (defaults to free) */ void *opaque; /* heap alloc function user pointer */ int data_type; /* data_type (unused) */ mz_ulong adler; /* adler32 of the source or uncompressed data */ mz_ulong reserved; /* not used */ } mz_stream; typedef mz_stream *mz_streamp; /* Returns the version string of miniz.c. */ MINIZ_EXPORT const char *mz_version(void); #ifndef MINIZ_NO_DEFLATE_APIS /* mz_deflateInit() initializes a compressor with default options: */ /* Parameters: */ /* pStream must point to an initialized mz_stream struct. */ /* level must be between [MZ_NO_COMPRESSION, MZ_BEST_COMPRESSION]. */ /* level 1 enables a specially optimized compression function that's been optimized purely for performance, not ratio. */ /* (This special func. is currently only enabled when MINIZ_USE_UNALIGNED_LOADS_AND_STORES and MINIZ_LITTLE_ENDIAN are defined.) */ /* Return values: */ /* MZ_OK on success. */ /* MZ_STREAM_ERROR if the stream is bogus. */ /* MZ_PARAM_ERROR if the input parameters are bogus. */ /* MZ_MEM_ERROR on out of memory. */ MINIZ_EXPORT int mz_deflateInit(mz_streamp pStream, int level); /* mz_deflateInit2() is like mz_deflate(), except with more control: */ /* Additional parameters: */ /* method must be MZ_DEFLATED */ /* window_bits must be MZ_DEFAULT_WINDOW_BITS (to wrap the deflate stream with zlib header/adler-32 footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate/no header or footer) */ /* mem_level must be between [1, 9] (it's checked but ignored by miniz.c) */ MINIZ_EXPORT int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy); /* Quickly resets a compressor without having to reallocate anything. Same as calling mz_deflateEnd() followed by mz_deflateInit()/mz_deflateInit2(). */ MINIZ_EXPORT int mz_deflateReset(mz_streamp pStream); /* mz_deflate() compresses the input to output, consuming as much of the input and producing as much output as possible. */ /* Parameters: */ /* pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members. */ /* flush may be MZ_NO_FLUSH, MZ_PARTIAL_FLUSH/MZ_SYNC_FLUSH, MZ_FULL_FLUSH, or MZ_FINISH. */ /* Return values: */ /* MZ_OK on success (when flushing, or if more input is needed but not available, and/or there's more output to be written but the output buffer is full). */ /* MZ_STREAM_END if all input has been consumed and all output bytes have been written. Don't call mz_deflate() on the stream anymore. */ /* MZ_STREAM_ERROR if the stream is bogus. */ /* MZ_PARAM_ERROR if one of the parameters is invalid. */ /* MZ_BUF_ERROR if no forward progress is possible because the input and/or output buffers are empty. (Fill up the input buffer or free up some output space and try again.) */ MINIZ_EXPORT int mz_deflate(mz_streamp pStream, int flush); /* mz_deflateEnd() deinitializes a compressor: */ /* Return values: */ /* MZ_OK on success. */ /* MZ_STREAM_ERROR if the stream is bogus. */ MINIZ_EXPORT int mz_deflateEnd(mz_streamp pStream); /* mz_deflateBound() returns a (very) conservative upper bound on the amount of data that could be generated by deflate(), assuming flush is set to only MZ_NO_FLUSH or MZ_FINISH. */ MINIZ_EXPORT mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len); /* Single-call compression functions mz_compress() and mz_compress2(): */ /* Returns MZ_OK on success, or one of the error codes from mz_deflate() on failure. */ MINIZ_EXPORT int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len); MINIZ_EXPORT int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len, int level); /* mz_compressBound() returns a (very) conservative upper bound on the amount of data that could be generated by calling mz_compress(). */ MINIZ_EXPORT mz_ulong mz_compressBound(mz_ulong source_len); #endif /*#ifndef MINIZ_NO_DEFLATE_APIS*/ #ifndef MINIZ_NO_INFLATE_APIS /* Initializes a decompressor. */ MINIZ_EXPORT int mz_inflateInit(mz_streamp pStream); /* mz_inflateInit2() is like mz_inflateInit() with an additional option that controls the window size and whether or not the stream has been wrapped with a zlib header/footer: */ /* window_bits must be MZ_DEFAULT_WINDOW_BITS (to parse zlib header/footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate). */ MINIZ_EXPORT int mz_inflateInit2(mz_streamp pStream, int window_bits); /* Quickly resets a compressor without having to reallocate anything. Same as calling mz_inflateEnd() followed by mz_inflateInit()/mz_inflateInit2(). */ MINIZ_EXPORT int mz_inflateReset(mz_streamp pStream); /* Decompresses the input stream to the output, consuming only as much of the input as needed, and writing as much to the output as possible. */ /* Parameters: */ /* pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members. */ /* flush may be MZ_NO_FLUSH, MZ_SYNC_FLUSH, or MZ_FINISH. */ /* On the first call, if flush is MZ_FINISH it's assumed the input and output buffers are both sized large enough to decompress the entire stream in a single call (this is slightly faster). */ /* MZ_FINISH implies that there are no more source bytes available beside what's already in the input buffer, and that the output buffer is large enough to hold the rest of the decompressed data. */ /* Return values: */ /* MZ_OK on success. Either more input is needed but not available, and/or there's more output to be written but the output buffer is full. */ /* MZ_STREAM_END if all needed input has been consumed and all output bytes have been written. For zlib streams, the adler-32 of the decompressed data has also been verified. */ /* MZ_STREAM_ERROR if the stream is bogus. */ /* MZ_DATA_ERROR if the deflate stream is invalid. */ /* MZ_PARAM_ERROR if one of the parameters is invalid. */ /* MZ_BUF_ERROR if no forward progress is possible because the input buffer is empty but the inflater needs more input to continue, or if the output buffer is not large enough. Call mz_inflate() again */ /* with more input data, or with more room in the output buffer (except when using single call decompression, described above). */ MINIZ_EXPORT int mz_inflate(mz_streamp pStream, int flush); /* Deinitializes a decompressor. */ MINIZ_EXPORT int mz_inflateEnd(mz_streamp pStream); /* Single-call decompression. */ /* Returns MZ_OK on success, or one of the error codes from mz_inflate() on failure. */ MINIZ_EXPORT int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len); MINIZ_EXPORT int mz_uncompress2(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong *pSource_len); #endif /*#ifndef MINIZ_NO_INFLATE_APIS*/ /* Returns a string description of the specified error code, or NULL if the error code is invalid. */ MINIZ_EXPORT const char *mz_error(int err); /* Redefine zlib-compatible names to miniz equivalents, so miniz.c can be used as a drop-in replacement for the subset of zlib that miniz.c supports. */ /* Define MINIZ_NO_ZLIB_COMPATIBLE_NAMES to disable zlib-compatibility if you use zlib in the same project. */ #ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES typedef unsigned char Byte; typedef unsigned int uInt; typedef mz_ulong uLong; typedef Byte Bytef; typedef uInt uIntf; typedef char charf; typedef int intf; typedef void *voidpf; typedef uLong uLongf; typedef void *voidp; typedef void *const voidpc; #define Z_NULL 0 #define Z_NO_FLUSH MZ_NO_FLUSH #define Z_PARTIAL_FLUSH MZ_PARTIAL_FLUSH #define Z_SYNC_FLUSH MZ_SYNC_FLUSH #define Z_FULL_FLUSH MZ_FULL_FLUSH #define Z_FINISH MZ_FINISH #define Z_BLOCK MZ_BLOCK #define Z_OK MZ_OK #define Z_STREAM_END MZ_STREAM_END #define Z_NEED_DICT MZ_NEED_DICT #define Z_ERRNO MZ_ERRNO #define Z_STREAM_ERROR MZ_STREAM_ERROR #define Z_DATA_ERROR MZ_DATA_ERROR #define Z_MEM_ERROR MZ_MEM_ERROR #define Z_BUF_ERROR MZ_BUF_ERROR #define Z_VERSION_ERROR MZ_VERSION_ERROR #define Z_PARAM_ERROR MZ_PARAM_ERROR #define Z_NO_COMPRESSION MZ_NO_COMPRESSION #define Z_BEST_SPEED MZ_BEST_SPEED #define Z_BEST_COMPRESSION MZ_BEST_COMPRESSION #define Z_DEFAULT_COMPRESSION MZ_DEFAULT_COMPRESSION #define Z_DEFAULT_STRATEGY MZ_DEFAULT_STRATEGY #define Z_FILTERED MZ_FILTERED #define Z_HUFFMAN_ONLY MZ_HUFFMAN_ONLY #define Z_RLE MZ_RLE #define Z_FIXED MZ_FIXED #define Z_DEFLATED MZ_DEFLATED #define Z_DEFAULT_WINDOW_BITS MZ_DEFAULT_WINDOW_BITS #define alloc_func mz_alloc_func #define free_func mz_free_func #define internal_state mz_internal_state #define z_stream mz_stream #ifndef MINIZ_NO_DEFLATE_APIS #define deflateInit mz_deflateInit #define deflateInit2 mz_deflateInit2 #define deflateReset mz_deflateReset #define deflate mz_deflate #define deflateEnd mz_deflateEnd #define deflateBound mz_deflateBound #define compress mz_compress #define compress2 mz_compress2 #define compressBound mz_compressBound #endif /*#ifndef MINIZ_NO_DEFLATE_APIS*/ #ifndef MINIZ_NO_INFLATE_APIS #define inflateInit mz_inflateInit #define inflateInit2 mz_inflateInit2 #define inflateReset mz_inflateReset #define inflate mz_inflate #define inflateEnd mz_inflateEnd #define uncompress mz_uncompress #define uncompress2 mz_uncompress2 #endif /*#ifndef MINIZ_NO_INFLATE_APIS*/ #define crc32 mz_crc32 #define adler32 mz_adler32 #define MAX_WBITS 15 #define MAX_MEM_LEVEL 9 #define zError mz_error #define ZLIB_VERSION MZ_VERSION #define ZLIB_VERNUM MZ_VERNUM #define ZLIB_VER_MAJOR MZ_VER_MAJOR #define ZLIB_VER_MINOR MZ_VER_MINOR #define ZLIB_VER_REVISION MZ_VER_REVISION #define ZLIB_VER_SUBREVISION MZ_VER_SUBREVISION #define zlibVersion mz_version #define zlib_version mz_version() #endif /* #ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES */ #endif /* MINIZ_NO_ZLIB_APIS */ #ifdef __cplusplus } #endif #pragma once #include #include #include #include /* ------------------- Types and macros */ typedef unsigned char mz_uint8; typedef signed short mz_int16; typedef unsigned short mz_uint16; typedef unsigned int mz_uint32; typedef unsigned int mz_uint; typedef int64_t mz_int64; typedef uint64_t mz_uint64; typedef int mz_bool; #define MZ_FALSE (0) #define MZ_TRUE (1) /* Works around MSVC's spammy "warning C4127: conditional expression is constant" message. */ #ifdef _MSC_VER #define MZ_MACRO_END while (0, 0) #else #define MZ_MACRO_END while (0) #endif #ifdef MINIZ_NO_STDIO #define MZ_FILE void * #else #include #define MZ_FILE FILE #endif /* #ifdef MINIZ_NO_STDIO */ #ifdef MINIZ_NO_TIME typedef struct mz_dummy_time_t_tag { mz_uint32 m_dummy1; mz_uint32 m_dummy2; } mz_dummy_time_t; #define MZ_TIME_T mz_dummy_time_t #else #define MZ_TIME_T time_t #endif #define MZ_ASSERT(x) assert(x) #ifdef MINIZ_NO_MALLOC #define MZ_MALLOC(x) NULL #define MZ_FREE(x) (void)x, ((void)0) #define MZ_REALLOC(p, x) NULL #else #define MZ_MALLOC(x) malloc(x) #define MZ_FREE(x) free(x) #define MZ_REALLOC(p, x) realloc(p, x) #endif #define MZ_MAX(a, b) (((a) > (b)) ? (a) : (b)) #define MZ_MIN(a, b) (((a) < (b)) ? (a) : (b)) #define MZ_CLEAR_OBJ(obj) memset(&(obj), 0, sizeof(obj)) #define MZ_CLEAR_ARR(obj) memset((obj), 0, sizeof(obj)) #define MZ_CLEAR_PTR(obj) memset((obj), 0, sizeof(*obj)) #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN #define MZ_READ_LE16(p) *((const mz_uint16 *)(p)) #define MZ_READ_LE32(p) *((const mz_uint32 *)(p)) #else #define MZ_READ_LE16(p) ((mz_uint32)(((const mz_uint8 *)(p))[0]) | ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U)) #define MZ_READ_LE32(p) ((mz_uint32)(((const mz_uint8 *)(p))[0]) | ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U) | ((mz_uint32)(((const mz_uint8 *)(p))[2]) << 16U) | ((mz_uint32)(((const mz_uint8 *)(p))[3]) << 24U)) #endif #define MZ_READ_LE64(p) (((mz_uint64)MZ_READ_LE32(p)) | (((mz_uint64)MZ_READ_LE32((const mz_uint8 *)(p) + sizeof(mz_uint32))) << 32U)) #ifdef _MSC_VER #define MZ_FORCEINLINE __forceinline #elif defined(__GNUC__) #define MZ_FORCEINLINE __inline__ __attribute__((__always_inline__)) #else #define MZ_FORCEINLINE inline #endif #ifdef __cplusplus extern "C" { #endif extern MINIZ_EXPORT void *miniz_def_alloc_func(void *opaque, size_t items, size_t size); extern MINIZ_EXPORT void miniz_def_free_func(void *opaque, void *address); extern MINIZ_EXPORT void *miniz_def_realloc_func(void *opaque, void *address, size_t items, size_t size); #define MZ_UINT16_MAX (0xFFFFU) #define MZ_UINT32_MAX (0xFFFFFFFFU) #ifdef __cplusplus } #endif #pragma once #ifndef MINIZ_NO_DEFLATE_APIS #ifdef __cplusplus extern "C" { #endif /* ------------------- Low-level Compression API Definitions */ /* Set TDEFL_LESS_MEMORY to 1 to use less memory (compression will be slightly slower, and raw/dynamic blocks will be output more frequently). */ #define TDEFL_LESS_MEMORY 0 /* tdefl_init() compression flags logically OR'd together (low 12 bits contain the max. number of probes per dictionary search): */ /* TDEFL_DEFAULT_MAX_PROBES: The compressor defaults to 128 dictionary probes per dictionary search. 0=Huffman only, 1=Huffman+LZ (fastest/crap compression), 4095=Huffman+LZ (slowest/best compression). */ enum { TDEFL_HUFFMAN_ONLY = 0, TDEFL_DEFAULT_MAX_PROBES = 128, TDEFL_MAX_PROBES_MASK = 0xFFF }; /* TDEFL_WRITE_ZLIB_HEADER: If set, the compressor outputs a zlib header before the deflate data, and the Adler-32 of the source data at the end. Otherwise, you'll get raw deflate data. */ /* TDEFL_COMPUTE_ADLER32: Always compute the adler-32 of the input data (even when not writing zlib headers). */ /* TDEFL_GREEDY_PARSING_FLAG: Set to use faster greedy parsing, instead of more efficient lazy parsing. */ /* TDEFL_NONDETERMINISTIC_PARSING_FLAG: Enable to decrease the compressor's initialization time to the minimum, but the output may vary from run to run given the same input (depending on the contents of memory). */ /* TDEFL_RLE_MATCHES: Only look for RLE matches (matches with a distance of 1) */ /* TDEFL_FILTER_MATCHES: Discards matches <= 5 chars if enabled. */ /* TDEFL_FORCE_ALL_STATIC_BLOCKS: Disable usage of optimized Huffman tables. */ /* TDEFL_FORCE_ALL_RAW_BLOCKS: Only use raw (uncompressed) deflate blocks. */ /* The low 12 bits are reserved to control the max # of hash probes per dictionary lookup (see TDEFL_MAX_PROBES_MASK). */ enum { TDEFL_WRITE_ZLIB_HEADER = 0x01000, TDEFL_COMPUTE_ADLER32 = 0x02000, TDEFL_GREEDY_PARSING_FLAG = 0x04000, TDEFL_NONDETERMINISTIC_PARSING_FLAG = 0x08000, TDEFL_RLE_MATCHES = 0x10000, TDEFL_FILTER_MATCHES = 0x20000, TDEFL_FORCE_ALL_STATIC_BLOCKS = 0x40000, TDEFL_FORCE_ALL_RAW_BLOCKS = 0x80000 }; /* High level compression functions: */ /* tdefl_compress_mem_to_heap() compresses a block in memory to a heap block allocated via malloc(). */ /* On entry: */ /* pSrc_buf, src_buf_len: Pointer and size of source block to compress. */ /* flags: The max match finder probes (default is 128) logically OR'd against the above flags. Higher probes are slower but improve compression. */ /* On return: */ /* Function returns a pointer to the compressed data, or NULL on failure. */ /* *pOut_len will be set to the compressed data's size, which could be larger than src_buf_len on uncompressible data. */ /* The caller must free() the returned block when it's no longer needed. */ MINIZ_EXPORT void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags); /* tdefl_compress_mem_to_mem() compresses a block in memory to another block in memory. */ /* Returns 0 on failure. */ MINIZ_EXPORT size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags); /* Compresses an image to a compressed PNG file in memory. */ /* On entry: */ /* pImage, w, h, and num_chans describe the image to compress. num_chans may be 1, 2, 3, or 4. */ /* The image pitch in bytes per scanline will be w*num_chans. The leftmost pixel on the top scanline is stored first in memory. */ /* level may range from [0,10], use MZ_NO_COMPRESSION, MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc. or a decent default is MZ_DEFAULT_LEVEL */ /* If flip is true, the image will be flipped on the Y axis (useful for OpenGL apps). */ /* On return: */ /* Function returns a pointer to the compressed data, or NULL on failure. */ /* *pLen_out will be set to the size of the PNG image file. */ /* The caller must mz_free() the returned heap block (which will typically be larger than *pLen_out) when it's no longer needed. */ MINIZ_EXPORT void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, int h, int num_chans, size_t *pLen_out, mz_uint level, mz_bool flip); MINIZ_EXPORT void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, int num_chans, size_t *pLen_out); /* Output stream interface. The compressor uses this interface to write compressed data. It'll typically be called TDEFL_OUT_BUF_SIZE at a time. */ typedef mz_bool (*tdefl_put_buf_func_ptr)(const void *pBuf, int len, void *pUser); /* tdefl_compress_mem_to_output() compresses a block to an output stream. The above helpers use this function internally. */ MINIZ_EXPORT mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags); enum { TDEFL_MAX_HUFF_TABLES = 3, TDEFL_MAX_HUFF_SYMBOLS_0 = 288, TDEFL_MAX_HUFF_SYMBOLS_1 = 32, TDEFL_MAX_HUFF_SYMBOLS_2 = 19, TDEFL_LZ_DICT_SIZE = 32768, TDEFL_LZ_DICT_SIZE_MASK = TDEFL_LZ_DICT_SIZE - 1, TDEFL_MIN_MATCH_LEN = 3, TDEFL_MAX_MATCH_LEN = 258 }; /* TDEFL_OUT_BUF_SIZE MUST be large enough to hold a single entire compressed output block (using static/fixed Huffman codes). */ #if TDEFL_LESS_MEMORY enum { TDEFL_LZ_CODE_BUF_SIZE = 24 * 1024, TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13) / 10, TDEFL_MAX_HUFF_SYMBOLS = 288, TDEFL_LZ_HASH_BITS = 12, TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS }; #else enum { TDEFL_LZ_CODE_BUF_SIZE = 64 * 1024, TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13) / 10, TDEFL_MAX_HUFF_SYMBOLS = 288, TDEFL_LZ_HASH_BITS = 15, TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS }; #endif /* The low-level tdefl functions below may be used directly if the above helper functions aren't flexible enough. The low-level functions don't make any heap allocations, unlike the above helper functions. */ typedef enum { TDEFL_STATUS_BAD_PARAM = -2, TDEFL_STATUS_PUT_BUF_FAILED = -1, TDEFL_STATUS_OKAY = 0, TDEFL_STATUS_DONE = 1 } tdefl_status; /* Must map to MZ_NO_FLUSH, MZ_SYNC_FLUSH, etc. enums */ typedef enum { TDEFL_NO_FLUSH = 0, TDEFL_SYNC_FLUSH = 2, TDEFL_FULL_FLUSH = 3, TDEFL_FINISH = 4 } tdefl_flush; /* tdefl's compression state structure. */ typedef struct { tdefl_put_buf_func_ptr m_pPut_buf_func; void *m_pPut_buf_user; mz_uint m_flags, m_max_probes[2]; int m_greedy_parsing; mz_uint m_adler32, m_lookahead_pos, m_lookahead_size, m_dict_size; mz_uint8 *m_pLZ_code_buf, *m_pLZ_flags, *m_pOutput_buf, *m_pOutput_buf_end; mz_uint m_num_flags_left, m_total_lz_bytes, m_lz_code_buf_dict_pos, m_bits_in, m_bit_buffer; mz_uint m_saved_match_dist, m_saved_match_len, m_saved_lit, m_output_flush_ofs, m_output_flush_remaining, m_finished, m_block_index, m_wants_to_finish; tdefl_status m_prev_return_status; const void *m_pIn_buf; void *m_pOut_buf; size_t *m_pIn_buf_size, *m_pOut_buf_size; tdefl_flush m_flush; const mz_uint8 *m_pSrc; size_t m_src_buf_left, m_out_buf_ofs; mz_uint8 m_dict[TDEFL_LZ_DICT_SIZE + TDEFL_MAX_MATCH_LEN - 1]; mz_uint16 m_huff_count[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; mz_uint16 m_huff_codes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; mz_uint8 m_huff_code_sizes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; mz_uint8 m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE]; mz_uint16 m_next[TDEFL_LZ_DICT_SIZE]; mz_uint16 m_hash[TDEFL_LZ_HASH_SIZE]; mz_uint8 m_output_buf[TDEFL_OUT_BUF_SIZE]; } tdefl_compressor; /* Initializes the compressor. */ /* There is no corresponding deinit() function because the tdefl API's do not dynamically allocate memory. */ /* pBut_buf_func: If NULL, output data will be supplied to the specified callback. In this case, the user should call the tdefl_compress_buffer() API for compression. */ /* If pBut_buf_func is NULL the user should always call the tdefl_compress() API. */ /* flags: See the above enums (TDEFL_HUFFMAN_ONLY, TDEFL_WRITE_ZLIB_HEADER, etc.) */ MINIZ_EXPORT tdefl_status tdefl_init(tdefl_compressor *d, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags); /* Compresses a block of data, consuming as much of the specified input buffer as possible, and writing as much compressed data to the specified output buffer as possible. */ MINIZ_EXPORT tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, size_t *pIn_buf_size, void *pOut_buf, size_t *pOut_buf_size, tdefl_flush flush); /* tdefl_compress_buffer() is only usable when the tdefl_init() is called with a non-NULL tdefl_put_buf_func_ptr. */ /* tdefl_compress_buffer() always consumes the entire input buffer. */ MINIZ_EXPORT tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, size_t in_buf_size, tdefl_flush flush); MINIZ_EXPORT tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d); MINIZ_EXPORT mz_uint32 tdefl_get_adler32(tdefl_compressor *d); /* Create tdefl_compress() flags given zlib-style compression parameters. */ /* level may range from [0,10] (where 10 is absolute max compression, but may be much slower on some files) */ /* window_bits may be -15 (raw deflate) or 15 (zlib) */ /* strategy may be either MZ_DEFAULT_STRATEGY, MZ_FILTERED, MZ_HUFFMAN_ONLY, MZ_RLE, or MZ_FIXED */ MINIZ_EXPORT mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, int strategy); #ifndef MINIZ_NO_MALLOC /* Allocate the tdefl_compressor structure in C so that */ /* non-C language bindings to tdefl_ API don't need to worry about */ /* structure size and allocation mechanism. */ MINIZ_EXPORT tdefl_compressor *tdefl_compressor_alloc(void); MINIZ_EXPORT void tdefl_compressor_free(tdefl_compressor *pComp); #endif #ifdef __cplusplus } #endif #endif /*#ifndef MINIZ_NO_DEFLATE_APIS*/ #pragma once /* ------------------- Low-level Decompression API Definitions */ #ifndef MINIZ_NO_INFLATE_APIS #ifdef __cplusplus extern "C" { #endif /* Decompression flags used by tinfl_decompress(). */ /* TINFL_FLAG_PARSE_ZLIB_HEADER: If set, the input has a valid zlib header and ends with an adler32 checksum (it's a valid zlib stream). Otherwise, the input is a raw deflate stream. */ /* TINFL_FLAG_HAS_MORE_INPUT: If set, there are more input bytes available beyond the end of the supplied input buffer. If clear, the input buffer contains all remaining input. */ /* TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF: If set, the output buffer is large enough to hold the entire decompressed stream. If clear, the output buffer is at least the size of the dictionary (typically 32KB). */ /* TINFL_FLAG_COMPUTE_ADLER32: Force adler-32 checksum computation of the decompressed bytes. */ enum { TINFL_FLAG_PARSE_ZLIB_HEADER = 1, TINFL_FLAG_HAS_MORE_INPUT = 2, TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF = 4, TINFL_FLAG_COMPUTE_ADLER32 = 8 }; /* High level decompression functions: */ /* tinfl_decompress_mem_to_heap() decompresses a block in memory to a heap block allocated via malloc(). */ /* On entry: */ /* pSrc_buf, src_buf_len: Pointer and size of the Deflate or zlib source data to decompress. */ /* On return: */ /* Function returns a pointer to the decompressed data, or NULL on failure. */ /* *pOut_len will be set to the decompressed data's size, which could be larger than src_buf_len on uncompressible data. */ /* The caller must call mz_free() on the returned block when it's no longer needed. */ MINIZ_EXPORT void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags); /* tinfl_decompress_mem_to_mem() decompresses a block in memory to another block in memory. */ /* Returns TINFL_DECOMPRESS_MEM_TO_MEM_FAILED on failure, or the number of bytes written on success. */ #define TINFL_DECOMPRESS_MEM_TO_MEM_FAILED ((size_t)(-1)) MINIZ_EXPORT size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags); /* tinfl_decompress_mem_to_callback() decompresses a block in memory to an internal 32KB buffer, and a user provided callback function will be called to flush the buffer. */ /* Returns 1 on success or 0 on failure. */ typedef int (*tinfl_put_buf_func_ptr)(const void *pBuf, int len, void *pUser); MINIZ_EXPORT int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, tinfl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags); struct tinfl_decompressor_tag; typedef struct tinfl_decompressor_tag tinfl_decompressor; #ifndef MINIZ_NO_MALLOC /* Allocate the tinfl_decompressor structure in C so that */ /* non-C language bindings to tinfl_ API don't need to worry about */ /* structure size and allocation mechanism. */ MINIZ_EXPORT tinfl_decompressor *tinfl_decompressor_alloc(void); MINIZ_EXPORT void tinfl_decompressor_free(tinfl_decompressor *pDecomp); #endif /* Max size of LZ dictionary. */ #define TINFL_LZ_DICT_SIZE 32768 /* Return status. */ typedef enum { /* This flags indicates the inflator needs 1 or more input bytes to make forward progress, but the caller is indicating that no more are available. The compressed data */ /* is probably corrupted. If you call the inflator again with more bytes it'll try to continue processing the input but this is a BAD sign (either the data is corrupted or you called it incorrectly). */ /* If you call it again with no input you'll just get TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS again. */ TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS = -4, /* This flag indicates that one or more of the input parameters was obviously bogus. (You can try calling it again, but if you get this error the calling code is wrong.) */ TINFL_STATUS_BAD_PARAM = -3, /* This flags indicate the inflator is finished but the adler32 check of the uncompressed data didn't match. If you call it again it'll return TINFL_STATUS_DONE. */ TINFL_STATUS_ADLER32_MISMATCH = -2, /* This flags indicate the inflator has somehow failed (bad code, corrupted input, etc.). If you call it again without resetting via tinfl_init() it it'll just keep on returning the same status failure code. */ TINFL_STATUS_FAILED = -1, /* Any status code less than TINFL_STATUS_DONE must indicate a failure. */ /* This flag indicates the inflator has returned every byte of uncompressed data that it can, has consumed every byte that it needed, has successfully reached the end of the deflate stream, and */ /* if zlib headers and adler32 checking enabled that it has successfully checked the uncompressed data's adler32. If you call it again you'll just get TINFL_STATUS_DONE over and over again. */ TINFL_STATUS_DONE = 0, /* This flag indicates the inflator MUST have more input data (even 1 byte) before it can make any more forward progress, or you need to clear the TINFL_FLAG_HAS_MORE_INPUT */ /* flag on the next call if you don't have any more source data. If the source data was somehow corrupted it's also possible (but unlikely) for the inflator to keep on demanding input to */ /* proceed, so be sure to properly set the TINFL_FLAG_HAS_MORE_INPUT flag. */ TINFL_STATUS_NEEDS_MORE_INPUT = 1, /* This flag indicates the inflator definitely has 1 or more bytes of uncompressed data available, but it cannot write this data into the output buffer. */ /* Note if the source compressed data was corrupted it's possible for the inflator to return a lot of uncompressed data to the caller. I've been assuming you know how much uncompressed data to expect */ /* (either exact or worst case) and will stop calling the inflator and fail after receiving too much. In pure streaming scenarios where you have no idea how many bytes to expect this may not be possible */ /* so I may need to add some code to address this. */ TINFL_STATUS_HAS_MORE_OUTPUT = 2 } tinfl_status; /* Initializes the decompressor to its initial state. */ #define tinfl_init(r) \ do \ { \ (r)->m_state = 0; \ } \ MZ_MACRO_END #define tinfl_get_adler32(r) (r)->m_check_adler32 /* Main low-level decompressor coroutine function. This is the only function actually needed for decompression. All the other functions are just high-level helpers for improved usability. */ /* This is a universal API, i.e. it can be used as a building block to build any desired higher level decompression API. In the limit case, it can be called once per every byte input or output. */ MINIZ_EXPORT tinfl_status tinfl_decompress(tinfl_decompressor *r, const mz_uint8 *pIn_buf_next, size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, const mz_uint32 decomp_flags); /* Internal/private bits follow. */ enum { TINFL_MAX_HUFF_TABLES = 3, TINFL_MAX_HUFF_SYMBOLS_0 = 288, TINFL_MAX_HUFF_SYMBOLS_1 = 32, TINFL_MAX_HUFF_SYMBOLS_2 = 19, TINFL_FAST_LOOKUP_BITS = 10, TINFL_FAST_LOOKUP_SIZE = 1 << TINFL_FAST_LOOKUP_BITS }; #if MINIZ_HAS_64BIT_REGISTERS #define TINFL_USE_64BIT_BITBUF 1 #else #define TINFL_USE_64BIT_BITBUF 0 #endif #if TINFL_USE_64BIT_BITBUF typedef mz_uint64 tinfl_bit_buf_t; #define TINFL_BITBUF_SIZE (64) #else typedef mz_uint32 tinfl_bit_buf_t; #define TINFL_BITBUF_SIZE (32) #endif struct tinfl_decompressor_tag { mz_uint32 m_state, m_num_bits, m_zhdr0, m_zhdr1, m_z_adler32, m_final, m_type, m_check_adler32, m_dist, m_counter, m_num_extra, m_table_sizes[TINFL_MAX_HUFF_TABLES]; tinfl_bit_buf_t m_bit_buf; size_t m_dist_from_out_buf_start; mz_int16 m_look_up[TINFL_MAX_HUFF_TABLES][TINFL_FAST_LOOKUP_SIZE]; mz_int16 m_tree_0[TINFL_MAX_HUFF_SYMBOLS_0 * 2]; mz_int16 m_tree_1[TINFL_MAX_HUFF_SYMBOLS_1 * 2]; mz_int16 m_tree_2[TINFL_MAX_HUFF_SYMBOLS_2 * 2]; mz_uint8 m_code_size_0[TINFL_MAX_HUFF_SYMBOLS_0]; mz_uint8 m_code_size_1[TINFL_MAX_HUFF_SYMBOLS_1]; mz_uint8 m_code_size_2[TINFL_MAX_HUFF_SYMBOLS_2]; mz_uint8 m_raw_header[4], m_len_codes[TINFL_MAX_HUFF_SYMBOLS_0 + TINFL_MAX_HUFF_SYMBOLS_1 + 137]; }; #ifdef __cplusplus } #endif #endif /*#ifndef MINIZ_NO_INFLATE_APIS*/ #pragma once /* ------------------- ZIP archive reading/writing */ #ifndef MINIZ_NO_ARCHIVE_APIS #ifdef __cplusplus extern "C" { #endif enum { /* Note: These enums can be reduced as needed to save memory or stack space - they are pretty conservative. */ MZ_ZIP_MAX_IO_BUF_SIZE = 64 * 1024, MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE = 512, MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE = 512 }; typedef struct { /* Central directory file index. */ mz_uint32 m_file_index; /* Byte offset of this entry in the archive's central directory. Note we currently only support up to UINT_MAX or less bytes in the central dir. */ mz_uint64 m_central_dir_ofs; /* These fields are copied directly from the zip's central dir. */ mz_uint16 m_version_made_by; mz_uint16 m_version_needed; mz_uint16 m_bit_flag; mz_uint16 m_method; /* CRC-32 of uncompressed data. */ mz_uint32 m_crc32; /* File's compressed size. */ mz_uint64 m_comp_size; /* File's uncompressed size. Note, I've seen some old archives where directory entries had 512 bytes for their uncompressed sizes, but when you try to unpack them you actually get 0 bytes. */ mz_uint64 m_uncomp_size; /* Zip internal and external file attributes. */ mz_uint16 m_internal_attr; mz_uint32 m_external_attr; /* Entry's local header file offset in bytes. */ mz_uint64 m_local_header_ofs; /* Size of comment in bytes. */ mz_uint32 m_comment_size; /* MZ_TRUE if the entry appears to be a directory. */ mz_bool m_is_directory; /* MZ_TRUE if the entry uses encryption/strong encryption (which miniz_zip doesn't support) */ mz_bool m_is_encrypted; /* MZ_TRUE if the file is not encrypted, a patch file, and if it uses a compression method we support. */ mz_bool m_is_supported; /* Filename. If string ends in '/' it's a subdirectory entry. */ /* Guaranteed to be zero terminated, may be truncated to fit. */ char m_filename[MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE]; /* Comment field. */ /* Guaranteed to be zero terminated, may be truncated to fit. */ char m_comment[MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE]; #ifdef MINIZ_NO_TIME MZ_TIME_T m_padding; #else MZ_TIME_T m_time; #endif } mz_zip_archive_file_stat; typedef size_t (*mz_file_read_func)(void *pOpaque, mz_uint64 file_ofs, void *pBuf, size_t n); typedef size_t (*mz_file_write_func)(void *pOpaque, mz_uint64 file_ofs, const void *pBuf, size_t n); typedef mz_bool (*mz_file_needs_keepalive)(void *pOpaque); struct mz_zip_internal_state_tag; typedef struct mz_zip_internal_state_tag mz_zip_internal_state; typedef enum { MZ_ZIP_MODE_INVALID = 0, MZ_ZIP_MODE_READING = 1, MZ_ZIP_MODE_WRITING = 2, MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED = 3 } mz_zip_mode; typedef enum { MZ_ZIP_FLAG_CASE_SENSITIVE = 0x0100, MZ_ZIP_FLAG_IGNORE_PATH = 0x0200, MZ_ZIP_FLAG_COMPRESSED_DATA = 0x0400, MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY = 0x0800, MZ_ZIP_FLAG_VALIDATE_LOCATE_FILE_FLAG = 0x1000, /* if enabled, mz_zip_reader_locate_file() will be called on each file as its validated to ensure the func finds the file in the central dir (intended for testing) */ MZ_ZIP_FLAG_VALIDATE_HEADERS_ONLY = 0x2000, /* validate the local headers, but don't decompress the entire file and check the crc32 */ MZ_ZIP_FLAG_WRITE_ZIP64 = 0x4000, /* always use the zip64 file format, instead of the original zip file format with automatic switch to zip64. Use as flags parameter with mz_zip_writer_init*_v2 */ MZ_ZIP_FLAG_WRITE_ALLOW_READING = 0x8000, MZ_ZIP_FLAG_ASCII_FILENAME = 0x10000, /*After adding a compressed file, seek back to local file header and set the correct sizes*/ MZ_ZIP_FLAG_WRITE_HEADER_SET_SIZE = 0x20000 } mz_zip_flags; typedef enum { MZ_ZIP_TYPE_INVALID = 0, MZ_ZIP_TYPE_USER, MZ_ZIP_TYPE_MEMORY, MZ_ZIP_TYPE_HEAP, MZ_ZIP_TYPE_FILE, MZ_ZIP_TYPE_CFILE, MZ_ZIP_TOTAL_TYPES } mz_zip_type; /* miniz error codes. Be sure to update mz_zip_get_error_string() if you add or modify this enum. */ typedef enum { MZ_ZIP_NO_ERROR = 0, MZ_ZIP_UNDEFINED_ERROR, MZ_ZIP_TOO_MANY_FILES, MZ_ZIP_FILE_TOO_LARGE, MZ_ZIP_UNSUPPORTED_METHOD, MZ_ZIP_UNSUPPORTED_ENCRYPTION, MZ_ZIP_UNSUPPORTED_FEATURE, MZ_ZIP_FAILED_FINDING_CENTRAL_DIR, MZ_ZIP_NOT_AN_ARCHIVE, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED, MZ_ZIP_UNSUPPORTED_MULTIDISK, MZ_ZIP_DECOMPRESSION_FAILED, MZ_ZIP_COMPRESSION_FAILED, MZ_ZIP_UNEXPECTED_DECOMPRESSED_SIZE, MZ_ZIP_CRC_CHECK_FAILED, MZ_ZIP_UNSUPPORTED_CDIR_SIZE, MZ_ZIP_ALLOC_FAILED, MZ_ZIP_FILE_OPEN_FAILED, MZ_ZIP_FILE_CREATE_FAILED, MZ_ZIP_FILE_WRITE_FAILED, MZ_ZIP_FILE_READ_FAILED, MZ_ZIP_FILE_CLOSE_FAILED, MZ_ZIP_FILE_SEEK_FAILED, MZ_ZIP_FILE_STAT_FAILED, MZ_ZIP_INVALID_PARAMETER, MZ_ZIP_INVALID_FILENAME, MZ_ZIP_BUF_TOO_SMALL, MZ_ZIP_INTERNAL_ERROR, MZ_ZIP_FILE_NOT_FOUND, MZ_ZIP_ARCHIVE_TOO_LARGE, MZ_ZIP_VALIDATION_FAILED, MZ_ZIP_WRITE_CALLBACK_FAILED, MZ_ZIP_TOTAL_ERRORS } mz_zip_error; typedef struct { mz_uint64 m_archive_size; mz_uint64 m_central_directory_file_ofs; /* We only support up to UINT32_MAX files in zip64 mode. */ mz_uint32 m_total_files; mz_zip_mode m_zip_mode; mz_zip_type m_zip_type; mz_zip_error m_last_error; mz_uint64 m_file_offset_alignment; mz_alloc_func m_pAlloc; mz_free_func m_pFree; mz_realloc_func m_pRealloc; void *m_pAlloc_opaque; mz_file_read_func m_pRead; mz_file_write_func m_pWrite; mz_file_needs_keepalive m_pNeeds_keepalive; void *m_pIO_opaque; mz_zip_internal_state *m_pState; } mz_zip_archive; typedef struct { mz_zip_archive *pZip; mz_uint flags; int status; mz_uint64 read_buf_size, read_buf_ofs, read_buf_avail, comp_remaining, out_buf_ofs, cur_file_ofs; mz_zip_archive_file_stat file_stat; void *pRead_buf; void *pWrite_buf; size_t out_blk_remain; tinfl_decompressor inflator; #ifdef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS mz_uint padding; #else mz_uint file_crc32; #endif } mz_zip_reader_extract_iter_state; /* -------- ZIP reading */ /* Inits a ZIP archive reader. */ /* These functions read and validate the archive's central directory. */ MINIZ_EXPORT mz_bool mz_zip_reader_init(mz_zip_archive *pZip, mz_uint64 size, mz_uint flags); MINIZ_EXPORT mz_bool mz_zip_reader_init_mem(mz_zip_archive *pZip, const void *pMem, size_t size, mz_uint flags); #ifndef MINIZ_NO_STDIO /* Read a archive from a disk file. */ /* file_start_ofs is the file offset where the archive actually begins, or 0. */ /* actual_archive_size is the true total size of the archive, which may be smaller than the file's actual size on disk. If zero the entire file is treated as the archive. */ MINIZ_EXPORT mz_bool mz_zip_reader_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint32 flags); MINIZ_EXPORT mz_bool mz_zip_reader_init_file_v2(mz_zip_archive *pZip, const char *pFilename, mz_uint flags, mz_uint64 file_start_ofs, mz_uint64 archive_size); /* Read an archive from an already opened FILE, beginning at the current file position. */ /* The archive is assumed to be archive_size bytes long. If archive_size is 0, then the entire rest of the file is assumed to contain the archive. */ /* The FILE will NOT be closed when mz_zip_reader_end() is called. */ MINIZ_EXPORT mz_bool mz_zip_reader_init_cfile(mz_zip_archive *pZip, MZ_FILE *pFile, mz_uint64 archive_size, mz_uint flags); #endif /* Ends archive reading, freeing all allocations, and closing the input archive file if mz_zip_reader_init_file() was used. */ MINIZ_EXPORT mz_bool mz_zip_reader_end(mz_zip_archive *pZip); /* -------- ZIP reading or writing */ /* Clears a mz_zip_archive struct to all zeros. */ /* Important: This must be done before passing the struct to any mz_zip functions. */ MINIZ_EXPORT void mz_zip_zero_struct(mz_zip_archive *pZip); MINIZ_EXPORT mz_zip_mode mz_zip_get_mode(mz_zip_archive *pZip); MINIZ_EXPORT mz_zip_type mz_zip_get_type(mz_zip_archive *pZip); /* Returns the total number of files in the archive. */ MINIZ_EXPORT mz_uint mz_zip_reader_get_num_files(mz_zip_archive *pZip); MINIZ_EXPORT mz_uint64 mz_zip_get_archive_size(mz_zip_archive *pZip); MINIZ_EXPORT mz_uint64 mz_zip_get_archive_file_start_offset(mz_zip_archive *pZip); MINIZ_EXPORT MZ_FILE *mz_zip_get_cfile(mz_zip_archive *pZip); /* Reads n bytes of raw archive data, starting at file offset file_ofs, to pBuf. */ MINIZ_EXPORT size_t mz_zip_read_archive_data(mz_zip_archive *pZip, mz_uint64 file_ofs, void *pBuf, size_t n); /* All mz_zip funcs set the m_last_error field in the mz_zip_archive struct. These functions retrieve/manipulate this field. */ /* Note that the m_last_error functionality is not thread safe. */ MINIZ_EXPORT mz_zip_error mz_zip_set_last_error(mz_zip_archive *pZip, mz_zip_error err_num); MINIZ_EXPORT mz_zip_error mz_zip_peek_last_error(mz_zip_archive *pZip); MINIZ_EXPORT mz_zip_error mz_zip_clear_last_error(mz_zip_archive *pZip); MINIZ_EXPORT mz_zip_error mz_zip_get_last_error(mz_zip_archive *pZip); MINIZ_EXPORT const char *mz_zip_get_error_string(mz_zip_error mz_err); /* MZ_TRUE if the archive file entry is a directory entry. */ MINIZ_EXPORT mz_bool mz_zip_reader_is_file_a_directory(mz_zip_archive *pZip, mz_uint file_index); /* MZ_TRUE if the file is encrypted/strong encrypted. */ MINIZ_EXPORT mz_bool mz_zip_reader_is_file_encrypted(mz_zip_archive *pZip, mz_uint file_index); /* MZ_TRUE if the compression method is supported, and the file is not encrypted, and the file is not a compressed patch file. */ MINIZ_EXPORT mz_bool mz_zip_reader_is_file_supported(mz_zip_archive *pZip, mz_uint file_index); /* Retrieves the filename of an archive file entry. */ /* Returns the number of bytes written to pFilename, or if filename_buf_size is 0 this function returns the number of bytes needed to fully store the filename. */ MINIZ_EXPORT mz_uint mz_zip_reader_get_filename(mz_zip_archive *pZip, mz_uint file_index, char *pFilename, mz_uint filename_buf_size); /* Attempts to locates a file in the archive's central directory. */ /* Valid flags: MZ_ZIP_FLAG_CASE_SENSITIVE, MZ_ZIP_FLAG_IGNORE_PATH */ /* Returns -1 if the file cannot be found. */ MINIZ_EXPORT int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags); MINIZ_EXPORT mz_bool mz_zip_reader_locate_file_v2(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags, mz_uint32 *file_index); /* Returns detailed information about an archive file entry. */ MINIZ_EXPORT mz_bool mz_zip_reader_file_stat(mz_zip_archive *pZip, mz_uint file_index, mz_zip_archive_file_stat *pStat); /* MZ_TRUE if the file is in zip64 format. */ /* A file is considered zip64 if it contained a zip64 end of central directory marker, or if it contained any zip64 extended file information fields in the central directory. */ MINIZ_EXPORT mz_bool mz_zip_is_zip64(mz_zip_archive *pZip); /* Returns the total central directory size in bytes. */ /* The current max supported size is <= MZ_UINT32_MAX. */ MINIZ_EXPORT size_t mz_zip_get_central_dir_size(mz_zip_archive *pZip); /* Extracts a archive file to a memory buffer using no memory allocation. */ /* There must be at least enough room on the stack to store the inflator's state (~34KB or so). */ MINIZ_EXPORT mz_bool mz_zip_reader_extract_to_mem_no_alloc(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size); MINIZ_EXPORT mz_bool mz_zip_reader_extract_file_to_mem_no_alloc(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size); /* Extracts a archive file to a memory buffer. */ MINIZ_EXPORT mz_bool mz_zip_reader_extract_to_mem(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags); MINIZ_EXPORT mz_bool mz_zip_reader_extract_file_to_mem(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags); /* Extracts a archive file to a dynamically allocated heap buffer. */ /* The memory will be allocated via the mz_zip_archive's alloc/realloc functions. */ /* Returns NULL and sets the last error on failure. */ MINIZ_EXPORT void *mz_zip_reader_extract_to_heap(mz_zip_archive *pZip, mz_uint file_index, size_t *pSize, mz_uint flags); MINIZ_EXPORT void *mz_zip_reader_extract_file_to_heap(mz_zip_archive *pZip, const char *pFilename, size_t *pSize, mz_uint flags); /* Extracts a archive file using a callback function to output the file's data. */ MINIZ_EXPORT mz_bool mz_zip_reader_extract_to_callback(mz_zip_archive *pZip, mz_uint file_index, mz_file_write_func pCallback, void *pOpaque, mz_uint flags); MINIZ_EXPORT mz_bool mz_zip_reader_extract_file_to_callback(mz_zip_archive *pZip, const char *pFilename, mz_file_write_func pCallback, void *pOpaque, mz_uint flags); /* Extract a file iteratively */ MINIZ_EXPORT mz_zip_reader_extract_iter_state* mz_zip_reader_extract_iter_new(mz_zip_archive *pZip, mz_uint file_index, mz_uint flags); MINIZ_EXPORT mz_zip_reader_extract_iter_state* mz_zip_reader_extract_file_iter_new(mz_zip_archive *pZip, const char *pFilename, mz_uint flags); MINIZ_EXPORT size_t mz_zip_reader_extract_iter_read(mz_zip_reader_extract_iter_state* pState, void* pvBuf, size_t buf_size); MINIZ_EXPORT mz_bool mz_zip_reader_extract_iter_free(mz_zip_reader_extract_iter_state* pState); #ifndef MINIZ_NO_STDIO /* Extracts a archive file to a disk file and sets its last accessed and modified times. */ /* This function only extracts files, not archive directory records. */ MINIZ_EXPORT mz_bool mz_zip_reader_extract_to_file(mz_zip_archive *pZip, mz_uint file_index, const char *pDst_filename, mz_uint flags); MINIZ_EXPORT mz_bool mz_zip_reader_extract_file_to_file(mz_zip_archive *pZip, const char *pArchive_filename, const char *pDst_filename, mz_uint flags); /* Extracts a archive file starting at the current position in the destination FILE stream. */ MINIZ_EXPORT mz_bool mz_zip_reader_extract_to_cfile(mz_zip_archive *pZip, mz_uint file_index, MZ_FILE *File, mz_uint flags); MINIZ_EXPORT mz_bool mz_zip_reader_extract_file_to_cfile(mz_zip_archive *pZip, const char *pArchive_filename, MZ_FILE *pFile, mz_uint flags); #endif #if 0 /* TODO */ typedef void *mz_zip_streaming_extract_state_ptr; mz_zip_streaming_extract_state_ptr mz_zip_streaming_extract_begin(mz_zip_archive *pZip, mz_uint file_index, mz_uint flags); mz_uint64 mz_zip_streaming_extract_get_size(mz_zip_archive *pZip, mz_zip_streaming_extract_state_ptr pState); mz_uint64 mz_zip_streaming_extract_get_cur_ofs(mz_zip_archive *pZip, mz_zip_streaming_extract_state_ptr pState); mz_bool mz_zip_streaming_extract_seek(mz_zip_archive *pZip, mz_zip_streaming_extract_state_ptr pState, mz_uint64 new_ofs); size_t mz_zip_streaming_extract_read(mz_zip_archive *pZip, mz_zip_streaming_extract_state_ptr pState, void *pBuf, size_t buf_size); mz_bool mz_zip_streaming_extract_end(mz_zip_archive *pZip, mz_zip_streaming_extract_state_ptr pState); #endif /* This function compares the archive's local headers, the optional local zip64 extended information block, and the optional descriptor following the compressed data vs. the data in the central directory. */ /* It also validates that each file can be successfully uncompressed unless the MZ_ZIP_FLAG_VALIDATE_HEADERS_ONLY is specified. */ MINIZ_EXPORT mz_bool mz_zip_validate_file(mz_zip_archive *pZip, mz_uint file_index, mz_uint flags); /* Validates an entire archive by calling mz_zip_validate_file() on each file. */ MINIZ_EXPORT mz_bool mz_zip_validate_archive(mz_zip_archive *pZip, mz_uint flags); /* Misc utils/helpers, valid for ZIP reading or writing */ MINIZ_EXPORT mz_bool mz_zip_validate_mem_archive(const void *pMem, size_t size, mz_uint flags, mz_zip_error *pErr); #ifndef MINIZ_NO_STDIO MINIZ_EXPORT mz_bool mz_zip_validate_file_archive(const char *pFilename, mz_uint flags, mz_zip_error *pErr); #endif /* Universal end function - calls either mz_zip_reader_end() or mz_zip_writer_end(). */ MINIZ_EXPORT mz_bool mz_zip_end(mz_zip_archive *pZip); /* -------- ZIP writing */ #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS /* Inits a ZIP archive writer. */ /*Set pZip->m_pWrite (and pZip->m_pIO_opaque) before calling mz_zip_writer_init or mz_zip_writer_init_v2*/ /*The output is streamable, i.e. file_ofs in mz_file_write_func always increases only by n*/ MINIZ_EXPORT mz_bool mz_zip_writer_init(mz_zip_archive *pZip, mz_uint64 existing_size); MINIZ_EXPORT mz_bool mz_zip_writer_init_v2(mz_zip_archive *pZip, mz_uint64 existing_size, mz_uint flags); MINIZ_EXPORT mz_bool mz_zip_writer_init_heap(mz_zip_archive *pZip, size_t size_to_reserve_at_beginning, size_t initial_allocation_size); MINIZ_EXPORT mz_bool mz_zip_writer_init_heap_v2(mz_zip_archive *pZip, size_t size_to_reserve_at_beginning, size_t initial_allocation_size, mz_uint flags); #ifndef MINIZ_NO_STDIO MINIZ_EXPORT mz_bool mz_zip_writer_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint64 size_to_reserve_at_beginning); MINIZ_EXPORT mz_bool mz_zip_writer_init_file_v2(mz_zip_archive *pZip, const char *pFilename, mz_uint64 size_to_reserve_at_beginning, mz_uint flags); MINIZ_EXPORT mz_bool mz_zip_writer_init_cfile(mz_zip_archive *pZip, MZ_FILE *pFile, mz_uint flags); #endif /* Converts a ZIP archive reader object into a writer object, to allow efficient in-place file appends to occur on an existing archive. */ /* For archives opened using mz_zip_reader_init_file, pFilename must be the archive's filename so it can be reopened for writing. If the file can't be reopened, mz_zip_reader_end() will be called. */ /* For archives opened using mz_zip_reader_init_mem, the memory block must be growable using the realloc callback (which defaults to realloc unless you've overridden it). */ /* Finally, for archives opened using mz_zip_reader_init, the mz_zip_archive's user provided m_pWrite function cannot be NULL. */ /* Note: In-place archive modification is not recommended unless you know what you're doing, because if execution stops or something goes wrong before */ /* the archive is finalized the file's central directory will be hosed. */ MINIZ_EXPORT mz_bool mz_zip_writer_init_from_reader(mz_zip_archive *pZip, const char *pFilename); MINIZ_EXPORT mz_bool mz_zip_writer_init_from_reader_v2(mz_zip_archive *pZip, const char *pFilename, mz_uint flags); /* Adds the contents of a memory buffer to an archive. These functions record the current local time into the archive. */ /* To add a directory entry, call this method with an archive name ending in a forwardslash with an empty buffer. */ /* level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION. */ MINIZ_EXPORT mz_bool mz_zip_writer_add_mem(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, mz_uint level_and_flags); /* Like mz_zip_writer_add_mem(), except you can specify a file comment field, and optionally supply the function with already compressed data. */ /* uncomp_size/uncomp_crc32 are only used if the MZ_ZIP_FLAG_COMPRESSED_DATA flag is specified. */ MINIZ_EXPORT mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, mz_uint64 uncomp_size, mz_uint32 uncomp_crc32); MINIZ_EXPORT mz_bool mz_zip_writer_add_mem_ex_v2(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, mz_uint64 uncomp_size, mz_uint32 uncomp_crc32, MZ_TIME_T *last_modified, const char *user_extra_data_local, mz_uint user_extra_data_local_len, const char *user_extra_data_central, mz_uint user_extra_data_central_len); /* Adds the contents of a file to an archive. This function also records the disk file's modified time into the archive. */ /* File data is supplied via a read callback function. User mz_zip_writer_add_(c)file to add a file directly.*/ MINIZ_EXPORT mz_bool mz_zip_writer_add_read_buf_callback(mz_zip_archive *pZip, const char *pArchive_name, mz_file_read_func read_callback, void* callback_opaque, mz_uint64 max_size, const MZ_TIME_T *pFile_time, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, const char *user_extra_data_local, mz_uint user_extra_data_local_len, const char *user_extra_data_central, mz_uint user_extra_data_central_len); #ifndef MINIZ_NO_STDIO /* Adds the contents of a disk file to an archive. This function also records the disk file's modified time into the archive. */ /* level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION. */ MINIZ_EXPORT mz_bool mz_zip_writer_add_file(mz_zip_archive *pZip, const char *pArchive_name, const char *pSrc_filename, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags); /* Like mz_zip_writer_add_file(), except the file data is read from the specified FILE stream. */ MINIZ_EXPORT mz_bool mz_zip_writer_add_cfile(mz_zip_archive *pZip, const char *pArchive_name, MZ_FILE *pSrc_file, mz_uint64 max_size, const MZ_TIME_T *pFile_time, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, const char *user_extra_data_local, mz_uint user_extra_data_local_len, const char *user_extra_data_central, mz_uint user_extra_data_central_len); #endif /* Adds a file to an archive by fully cloning the data from another archive. */ /* This function fully clones the source file's compressed data (no recompression), along with its full filename, extra data (it may add or modify the zip64 local header extra data field), and the optional descriptor following the compressed data. */ MINIZ_EXPORT mz_bool mz_zip_writer_add_from_zip_reader(mz_zip_archive *pZip, mz_zip_archive *pSource_zip, mz_uint src_file_index); /* Finalizes the archive by writing the central directory records followed by the end of central directory record. */ /* After an archive is finalized, the only valid call on the mz_zip_archive struct is mz_zip_writer_end(). */ /* An archive must be manually finalized by calling this function for it to be valid. */ MINIZ_EXPORT mz_bool mz_zip_writer_finalize_archive(mz_zip_archive *pZip); /* Finalizes a heap archive, returning a pointer to the heap block and its size. */ /* The heap block will be allocated using the mz_zip_archive's alloc/realloc callbacks. */ MINIZ_EXPORT mz_bool mz_zip_writer_finalize_heap_archive(mz_zip_archive *pZip, void **ppBuf, size_t *pSize); /* Ends archive writing, freeing all allocations, and closing the output file if mz_zip_writer_init_file() was used. */ /* Note for the archive to be valid, it *must* have been finalized before ending (this function will not do it for you). */ MINIZ_EXPORT mz_bool mz_zip_writer_end(mz_zip_archive *pZip); /* -------- Misc. high-level helper functions: */ /* mz_zip_add_mem_to_archive_file_in_place() efficiently (but not atomically) appends a memory blob to a ZIP archive. */ /* Note this is NOT a fully safe operation. If it crashes or dies in some way your archive can be left in a screwed up state (without a central directory). */ /* level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION. */ /* TODO: Perhaps add an option to leave the existing central dir in place in case the add dies? We could then truncate the file (so the old central dir would be at the end) if something goes wrong. */ MINIZ_EXPORT mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags); MINIZ_EXPORT mz_bool mz_zip_add_mem_to_archive_file_in_place_v2(const char *pZip_filename, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, mz_zip_error *pErr); #ifndef MINIZ_NO_STDIO /* Reads a single file from an archive into a heap block. */ /* If pComment is not NULL, only the file with the specified comment will be extracted. */ /* Returns NULL on failure. */ MINIZ_EXPORT void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const char *pArchive_name, size_t *pSize, mz_uint flags); MINIZ_EXPORT void *mz_zip_extract_archive_file_to_heap_v2(const char *pZip_filename, const char *pArchive_name, const char *pComment, size_t *pSize, mz_uint flags, mz_zip_error *pErr); #endif #endif /* #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS */ #ifdef __cplusplus } #endif #endif /* MINIZ_NO_ARCHIVE_APIS */ luametatex-2.10.08/source/libraries/miniz/readme.md000066400000000000000000000112601442250314700222160ustar00rootroot00000000000000## Miniz Miniz is a lossless, high performance data compression library in a single source file that implements the zlib (RFC 1950) and Deflate (RFC 1951) compressed data format specification standards. It supports the most commonly used functions exported by the zlib library, but is a completely independent implementation so zlib's licensing requirements do not apply. Miniz also contains simple to use functions for writing .PNG format image files and reading/writing/appending .ZIP format archives. Miniz's compression speed has been tuned to be comparable to zlib's, and it also has a specialized real-time compressor function designed to compare well against fastlz/minilzo. ## Usage Releases are available at the [releases page](https://github.com/richgel999/miniz/releases) as a pair of `miniz.c`/`miniz.h` files which can be simply added to a project. To create this file pair the different source and header files are [amalgamated](https://www.sqlite.org/amalgamation.html) during build. Alternatively use as cmake or meson module (or build system of your choice). ## Features * MIT licensed * A portable, single source and header file library written in plain C. Tested with GCC, clang and Visual Studio. * Easily tuned and trimmed down by defines * A drop-in replacement for zlib's most used API's (tested in several open source projects that use zlib, such as libpng and libzip). * Fills a single threaded performance vs. compression ratio gap between several popular real-time compressors and zlib. For example, at level 1, miniz.c compresses around 5-9% better than minilzo, but is approx. 35% slower. At levels 2-9, miniz.c is designed to compare favorably against zlib's ratio and speed. See the miniz performance comparison page for example timings. * Not a block based compressor: miniz.c fully supports stream based processing using a coroutine-style implementation. The zlib-style API functions can be called a single byte at a time if that's all you've got. * Easy to use. The low-level compressor (tdefl) and decompressor (tinfl) have simple state structs which can be saved/restored as needed with simple memcpy's. The low-level codec API's don't use the heap in any way. * Entire inflater (including optional zlib header parsing and Adler-32 checking) is implemented in a single function as a coroutine, which is separately available in a small (~550 line) source file: miniz_tinfl.c * A fairly complete (but totally optional) set of .ZIP archive manipulation and extraction API's. The archive functionality is intended to solve common problems encountered in embedded, mobile, or game development situations. (The archive API's are purposely just powerful enough to write an entire archiver given a bit of additional higher-level logic.) ## Building miniz - Using vcpkg You can download and install miniz using the [vcpkg](https://github.com/Microsoft/vcpkg) dependency manager: git clone https://github.com/Microsoft/vcpkg.git cd vcpkg ./bootstrap-vcpkg.sh ./vcpkg integrate install ./vcpkg install miniz The miniz port in vcpkg is kept up to date by Microsoft team members and community contributors. If the version is out of date, please [create an issue or pull request](https://github.com/Microsoft/vcpkg) on the vcpkg repository. ## Known Problems * No support for encrypted archives. Not sure how useful this stuff is in practice. * Minimal documentation. The assumption is that the user is already familiar with the basic zlib API. I need to write an API wiki - for now I've tried to place key comments before each enum/API, and I've included 6 examples that demonstrate how to use the module's major features. ## Special Thanks Thanks to Alex Evans for the PNG writer function. Also, thanks to Paul Holden and Thorsten Scheuermann for feedback and testing, Matt Pritchard for all his encouragement, and Sean Barrett's various public domain libraries for inspiration (and encouraging me to write miniz.c in C, which was much more enjoyable and less painful than I thought it would be considering I've been programming in C++ for so long). Thanks to Bruce Dawson for reporting a problem with the level_and_flags archive API parameter (which is fixed in v1.12) and general feedback, and Janez Zemva for indirectly encouraging me into writing more examples. ## Patents I was recently asked if miniz avoids patent issues. miniz purposely uses the same core algorithms as the ones used by zlib. The compressor uses vanilla hash chaining as described [here](https://datatracker.ietf.org/doc/html/rfc1951#section-4). Also see the [gzip FAQ](https://web.archive.org/web/20160308045258/http://www.gzip.org/#faq11). In my opinion, if miniz falls prey to a patent attack then zlib/gzip are likely to be at serious risk too. luametatex-2.10.08/source/libraries/miniz/readme.txt000066400000000000000000000007251442250314700224410ustar00rootroot00000000000000Remark Conform the recommendation we use the official merged files (release) not the github files. Also, we only use part of that single file because we do all file handling ourselves because we operate within the file name regime of LuaMetaTeX that is aware of operating system specifics like wide filenames on MSWindows). We don't drop in updates without careful checking them first for potential clashes.\\ release url: https://github.com/richgel999/miniz/releasesluametatex-2.10.08/source/libraries/pplib/000077500000000000000000000000001442250314700204175ustar00rootroot00000000000000luametatex-2.10.08/source/libraries/pplib/html.zip000066400000000000000000010430061442250314700221130ustar00rootroot00000000000000PKŻ?vqx2i+n}wO+!F)6S\+N*+~++Bn uF'ɻ]$< 8O^icvaŨ,5ɠ u^yQX!4nu߽PKO&p html/genindex.htmlW[o0~߯0qHl- qCBڧ[6zKu _sirpp05su $wu!Es4]V8ff ]{L4> d~%^"X.JDBDVEo_^ɞ5s]n :X[?%,ƂUnl49$*krQvtJR 9L2VjB JĀEl" %X:?e¬O[Rاg{FeFn3KNQh' ]jѸ2hK҈X6>\)m4xsSRI3iT乭)(jkݠicݤTR"sNe6-(yd95:I<g<x6M)wܣxl@8yJ5,>m#]&/V6nsW|a)aX(eMdajĔHG޾"l.+ p(.8L4Dp$LRSBRBDAcI' {ܷ!Iy<Ͳ@oSrɂPq8|FrTr+f_נPIZ{GkP#q0!)% 1kÆ5ky<= FH|oVo{[B0w(P+C BDυ-%4Nt0llAM7v;6Eg]| , ܆ZBvcDmhsdI!Ls9Nˁ-.5+mk&ÒRn3?R.;|჻})( ^rPKO} zUDhtml/ppapi-1.x.htmliv4Na@ʾ|W@yP<}(:4l;.8G$̌d9Ėwf˨kU07Gɲ,xrOg="v8\1N7#>/Y',8Y@o0+߻7+++(^/}X0w :>U==0|![ _UG l Bs꽉TG\["`sk>cbF۷kP&լ\- SK3.I\QaA)fs\X!, o@?h>P׺ETor̵$af< ȳ;F }=OX9+;FrQ ]<~w.$;>!} b X55}\pQf QXRLRb46͔ܳ%~17e!PҚX劘y&Kw4Cm ؼ2Ec0?qw5-FGq(Yw5者![ugǠ#p 9[AS/7 |j~f8pa~ W AG!fFAqN& Uhc~=~6鈇F9v9`?d]J^8!d~ {# KttPhљwB5*q,ɔcK8֫)2^uꠄ#2"/yx." :=XRNDHR5Sv$rN9axcAJDKasi}DM3RDL2E '(pE@ǥV,cL*H>HEUڬwl"}8d]t*E[>+Ed!5s_s8KܾH7_\{gn)K8pF IMٯ:yAG{}DMdۍ/9)Rp}ZkH:"%Ul4sPG7?m3*n<Ľr6_:7:6`?P)L7h~0琅+ h ҂[!C"EkBh9V yLa | L8 * j-ɿKR V]EL?)e轃Sj?y|cq"`k$*t&%"$xVuI;9 H<xhzy[jd^? &8yO@lp@#QfsYHJu`AcHn8FM0&e~sD~nHFR$D s]Mk-㳅ͬx/u;mྵ䰎ȷ=o-=Z(ap<Sj0~/p^6 vͮMa~ nMAv aG.)T9֧sN~Ɲf|o!q" V~bb,[0':&n-ܣj]:AC nMAw J܄Ϊ3CBw,a jo'ZۥɮVAw@1<ހ&ò=J.SVá.!yyz.s`|OPĐ"+hM*bfՑQpWjcCIvRmX>CA:NgN_S0٢Ua.&K1M?Z_!pužtyNВÓHtNlHf_?# 7) 55uxmu$5&k_8 {5rsRUP8Z_FoAq p20:X+C(]YPJI&@D]i zs(ϐ)%7ֳK! D5s}6Rn:ք[,* hήq#8a2s_̌k8Tb_FHC1Pbya, C8BA6c%IZNP][+N5ꡆm6CP2:,ib&H6d<>ȑ%"@ gzi' 1^2H@rsFQ9{M=(+Xeϐ 8ew9군x"_9RDmn3d[`|sx|aQb*x9QQB G\x&k.A,> m|5=BXV5 4znk*Qq)MkQAUpĶ]cXm +uY䐊^v =T81Pr=z߿|\O:_;{{J/.?{ 3>?<>ٞ}xe,dD6nuW< `$$sp(e\Py$]{&h )\RkBnunbj* h`opQFO6:m>D CF +~Pk4A=9{voA5H=_iĨ!t)SCML@M>(;I)ZuU#x#ANb.DVGAXnuU57M]?D#n |_%xU*5L{s5Ŋd^lRLDyږp/,^tP͡DL 2)^j.RNW,އlKe)?@VϗNI\"%5 R2Ẩ c7wQL4(Jl)"^'1[P]y0UW_7H*Ou(<}dp./uiȮwu'y}6 `  P;7 +.>4:JP jL2֥ӢyK˘ƽ?;yKeܗrc j?Ii GPQ? yS]i"RI$Qﲇ(˻Lj&mK%:{hZ7MK?;}Co6MCz'JLAޟiEnMS Rpa:btvF&z<ϿB8:(b!}d].boW#/2J'UkWv Z[:dS !dIUڡԓs7wc^zK8Dq]uG&F>Co pd5gq+ 4q~kOJ/(㙼;vpc=?"coE7Bp`Uo'n)Inj]y߶RȄ-Ӯ7fV- tNYN$g0<1|v,HyAY:T5xtK56KR:M%W}J~S8]XS J,1cAm ̏'1Q7U^ɬ(?xwqڀAæG}_z'|KGo8-ΠeW;hܼOwх] ]RS]1q$34s^Jdtazv5&ZzByCUP6Q\fEz_~@\OK,m%+tcLVpsnINd9* D>\A/% ZSf^rEΫGϐ^<|vue-8>|嗃{êr[6J$%SՉ׊`U/Cxx># Vd[KMU׭J9+}Ll}vN86@De':' )KYP vON/dGU'8F~%V%Ƀ\.4r!'k '񤌒A ƭv= `w<{`U&2hrŃX%=%|Ocb~. `Lmqxgc3|=n&Uh:N=$OG'Q{;q1RdjWLҾZy< YQW<Ng9&\}i7~ٟP)Zrҧ&u1Q ~%$DS4ӢՅrcQ% ph["\ Z`֧9#LI9ҳtj; ~98:d.)e9#%BҬ]3#|wr54B-"%q&2/Ӱ;-2P:"gL3ٳQO$"}洯R$a;|K&1,e;[#>_ޤ ^AyȭQh3$k pj1ZUYܞ@t*cf4=!PҴ eon⺳K#@TG('U 񈓦yč_XD6 …qʌ17֔R@qz  ,Ef&G'Ji>4uXRdMHY$@}_\ zIZ(y)g>K] {ƍX矡'P3p%Q6$"VKww" 'xw d/:  :;5HN^!-$.-; j)j߲"> MAܬOVV&=y m~SNng"+ 0e]o( BU1 zވUAXVLh;{hUoDID1,ߥ;..֌/2J2ƴ 0&Yx؎#e%PFD0kgh\O+.EZ`>6Qp vgCJGԌ%NCw)8 Ju-YLZ`Pʞ+LUrofٹ"[A҇ba*(Ƴ$%E+O>~YY"-6%;et/itod`|:0:ok&qջkycV(ыNٯ%iun,;1֡gTrpPzNGEd@_ؕǷZV`q}^x\@9]7:x{BoR9E#`^|qiI~*i6xL43Uxf_ҟBV3כBM)ʨ;]^-BKCoO(JҜ^6N@RG9+8hE@aCz(O{e@peH~͚^'̪1D )cWAp\Sy`QTHw^֯?5 Cwd*)hր[tw I>NtAnLvoQ5|;=SQzZǽV>r:|d|p+.ڭzb05e: _$e!5 ŦR$O<"D6I 7|p6R!B:BPu{l^..)m#JH~+v.Sg@L%6w6榈2!?q5p[SM^_Ba(326Rl{% hJ^Y-9Uq`Ʒ t$/7MV.eG*Fjg+an<)ŒlK:V,)"k;J()ʌ"k;F(tE RF`g#qms-_R:9@Ypk_fο;i} $²܈[*P VnЬlkz< 3cI$ hG}4ɇwH lH(֤NZ)` =PjxPeU KA>ͩhM+Jg\5mT+`C{D9wc@;Jweh:*8'-.z+$zb:*+;d:-y]`R"bW*NPA9]۳QuX7:ZYvVHKoB yq%<!c|LI}/]#ׯ&yʹ04 B$oH x~@{6n:ȍᏭ|t=1J%ߘ^hk[C( ^`j}fj SZ$x_0-%cNh4&ѣƴt_#fDyFr.Rs/(jZM#ͬHܱ84q-w\\P,YehDP<'FV㡖#GrN69F_JmG#v4+֩2{LR^nGjqp.Εǁxi^⸶YwbKnRI7j>)Gq\d >BK꨾ut()Y,4NPX{W) 4Fy=>ӣP5əKRxQL;q*ݚƗӽUg`^-s=0({)H?}gSbaKFiQnhl2Fp iDu߸-pSW WQ%[+XS^0 QʷU_,nnsp:ljb28z22+83Ka_H"jW} G t# -򭲎4av~w)襞k ;~ >呾UG{1!<AqjLr6|&Bwi$/M^|18yŗ|͗^?_;::84U#:Ctp+<fut>6sd[=,ҷAYׄl$?\ x>}3G!ƻSlے p a*O]cKԓ%c8WByi},U{!E󨅴kNZ y-j{(;qwCvUOMO 1%xJut\⅚]) \,* viR4CSҨt(.:vs$ =c!qѨkp`݀6X [ CZ//zr~gW/n:xQ=~Ipyߏj{{mZ/>;>1oܓxxm賳GHr;xkϾƎp0hm#^!?b=e9'̡o~1GCBsIs0dOw}\IҐ/Qݾf2MJ*n 4@z " ¼KVGscFhQ&2f݋^V#\zWS>Q_5CrE! [t%[O-8Zg Em4zR<#653| -k,I*88 HEQ!Wf3&P?AϾ?҆9[f`1bf^ƷaF y: %Eo5a4j-pXŔJ;UNU7r7w[U_h`f_YOe\5\Oj}|w?Agwy6pt/ȵra R/s1C|+_G6j˽whJ(_~~ t2oƤ`6{ޑPж#5;Chr=[y& ous;F"&[Xd 9#s.0'a8q/Io{ _zjNMPw?O6JuwV̕No'CVww@]N:[W\Y$W)oSBxPk$$cl>7xӜd4Z`6aR4~WĒ|ZЊzFҢ vyx&ԑdžƛ1. q4 m5yjڃ zuj%x~kDF5bayT^"4ZaZtAWiV (k+,Z1t߶n'PȐJqmZ>-&58 '7$ 'Nh:A0,SE82vSw.POc9u@g7./)/8s ś2`(ftw3~xhbL$]cþ:lq)L[%^g' AJH+']fV$Zrp)g!f V 2ܵOaEk5:;jm0(st(apxs|Ƕrafيzeki!YPrTVXW[l͹ -NlZB7LR3!RVβۏ%auK& Q.񦠨lȗڍZ) ]ʲNG9>m kCR}reAŚj:R|i0US.MԊdX[2̪_/r W*2dNeL&ֿ52s8o#^5CwM]-Ƭ!{{چ^ vɈͪ/ׁ6N#qk.mmq2%< \-]+ 74[X;};oAU ?|a}i[ȨGQ5} 7:-| .p;֍ oZ0i)( Q1~a~H$ۅMe1OL<(O^h,F^:t) !/NzpV$ih(ЖFц>+Fƙc6Qvjp$޷A0ڇT%2UeL>n//?>'_5sEsDGߙ7}'¾0OcW㰅meWP|y;m{b6ӟ(}4]w_~uO!Ds<<~샯Zv"CcWG}L[skY1ŧO!xT :=&Gq2U2^e]BR>ʢ X4WTH-Q\&(eT J霝6 2/fGI+lY2Rt}YD@U2VlE#SDO?PJVFcˀw8l`E*z$Bpڼ+rR$on1PKbϵv +~G1sbug,Aw弡N5/)3OpX8h*;56pkCrc[?L- )nc'gOe'0Lm|D!r^<@H ;FJYUCh?G}Ӡa+S (}ԓW0=P 80å1p)&FLeEpMO<$èB(17rlHuTG/OǶb@"'lKvIow+0p ˾ֵ_4НM#;;mWiX9vCGHugU2xMYcG;sס:3ǾF5Իd얷/R뻂1Z(#gdnj|hhQIi1ǂq6SLsRW .*%_A5h&lX($F2>DeieLgAhca h.OS)K_D[iU#;tPzhؾz );d[b3 6G+EQzƳ75DO$vt?zgAoՊٻeTLUD 1$r YʉK,кD[ɔo$t^9l¹R4i*tl((ͶuSRn n⚉lI8/Q 6ψ6#,7`X<И2Lxd+/ ' p(X6rQiog8D }"w6n툑rj7m)҈guRq?_|pDk7S*K(1!]'͛ M #~?s*)9%FK4D#( c5<{Ůѕjeg@{?AiٞD7x*3Hra.CsXtǸV97kWah ؐ*2 dHd"O-ڗՒƈȉQC@ikIƳ2L  +I_3hgS% vwʭ%Dlbcm~WX 51 &o|8SJx|gyty8iHI|wߊ,u1}#海qU,ی58շc\ E ʲasܞ'vځ("쿉&HV拕[aaR0%_5v{3-9+/85{ ii7r;l9X@3n WTf鴕;`S]V6_hCf$3הv+G[C+ZM_'UVUJ!#O4I S>UJ=tёzN7T1*4-ʍbn,G8`JѹnA!G1P,y80AG:lT*:Y {|ܜ(fE9Ie8??8l A_*hQU;ZK(Q86l#Мɍq[Bp{QAyRYuϼ`bgX| SbaS>bߺxbx28d:)gseL̪ *2ZC0]bQ㌏5`UROWRyjw+â2!C}0eN }T#!ɻr?w)@ٍO>FQH',t{{uCfFrd Ty4‡Gdk׎Z}"\$iX$s ۼn(c"w2$L@WGP4棃49$t^kAn4KYѾ zG`Mq kj&fe\#DﳗtxkC/HЌ]\KzTI <{c`l/$>zY[W#= hW|/q515PoPKŻl)N~ ZZ8%՜|ˮf1V~_ ۖ t ; P5?*26.b#@~+6I1iiT- +voo9m>d߲!PY8O  ߅_o@1}p?럿qwwx2ɘ(7&!E0i[DWLS.dY`ϒOU;[׃'5B<[>ȭuR Zzj/)C*\ \AF g~-lާ=P0/m l ׭2 sBs~wV#-e2i[صm[,1wbV]O ӣɗUIb:É+r΂% 5c*s-EX/6򊯀>#|6d-WW,aepOGHسcɾc?h؅ɽ"s+~U?NH  P.f»c"96 hrłZxz.?@SUU3ln (x̍8wx\ WZP=? 'q9AQ`^\|60liXN` v(=zV72# ?%:r;lާ%va i{1- jOٿ&JOK40.Aeo>Uog*/ko oBד -i6-E{Z247)ؿ1ߔq eA?cҊz^XIן>3@oBV/}6WeA X7*"D>njl{ AoƁ[|<#l'kQ)-GJ? (Ҧ˙RtѾLGɶ$4Ur @& 7t⻮&6T5T6HEpb@"":B.\z q%#Œ}HWz1 b@W62-m(0"ZZUN)B2fnWp&{q{W_dG&W\`3Ij9|"7Lu!aCTk'G:= @;OvO7Uf7~dpJ޼˖B='["[Fё)`OkpmVg>j)g:,3\ GNx0gXBjzO-ʞp:cX--ؿ"2.Bk5IocB-ϱ R:P;τ&+'F@s1E’J4ss*mS'0G#&^p$ZA/)DsH$`TxR{H-=e:bqqS hg[h&|?Ŷ<P[/JyُT[O숾;ƝO\kg7A+(# (ddde40׵ؿ bt>U jܳ!}+j.ynB ZR|3|Ooۋ :]jl`]oZvKOV,- 25d/XXVu|ɼi3`7e}a၎c*Q5Ȇ!qm9HcȝW'xK7g^HrL l-V݊@ӧ4wV$H|Z$AK-]%vh&[L/e=KYS&ݳppui ӡZ%,{0$XmoPpkHB)a_kY^d&F h5Uk zwk3joAiSY.9` t.~EM9I>S ˋe]ӎyO-jFִ3drV2J#U˼7`c]A~Sph0rJkȧ*d֣U1 f[}z"ןÈ:|: d3_~WCn ͯ8myE ص/=R# }U8z<%w'(jc%c91QH>eXf$9 ~{s:spߪ8Vs:(qϏ34c[2 *Ԍ]$π9 `0}sQ7kP\;sЏװBk=j]Yf$Y4l(JpL+{X(r6('c,_j(o+cn3A$SeD.Ӣ% Z@2 L3S{|H"`7 {DiUFu1^2fH@jqJ &CYdfm6~~e Hl`Is2acx^O;}|xr_x1}|{o??v_?3^/gpؚ1gkҬHQ9 ~3NopmdibpY1tt0 _pvṘüӸ)OڣG~c"ghH%Ops)ō_qðFR,Awr;{|us?% ۤgQ\#!5z df:YuD 6e$d-^m~CԓAN'؂ĐC)ߊ2ˠ[~Jy 䘪6 K+aK/Qgńb}Pڸ&7s[nod8(KŶKsԕ /fjng+X;!Go*  #rN%c?#h7F8wɤ #mL!`>BmSŽpz򠲜Aҙ1i`2/d!V+Ӫ.c_YR/[C Eە RS%-:Cꓭhy]QRH^Ǿ_1P}y0љa" *X>`08q]mv?B]n}AM); P;K4˅.>%ia`>rsOS0IsO! d(S9!&B=4A' M1HE<*{%;gzP)"v4L<5{K di"NyдY&v z!&?O2Єz֪.%Q9$G9 b SKG%+Itf_~;t_d'DOAlpA)<X):tYT`* $@ܑ IMwOx&ͭ%S{.ԗf1ӿ-(6B\R=5pLʰ!M*u"t!s+E|)Cfd.u:ՃGx\LƕM`[Ӊ}?"aNoO3SaǺܠpil=X&iz-ZP] `:6QAJIh!7 O :3Aİ %=ՐS`Jr{φCr9Eu>G#Uw$$W9殦7|y5e3JEy4t\wк*ıJ mҞίZBcjX4a\ؚ6S3P(o+z+%1%ˬ"}'"}u,;" ܛ3@=, Xƫ3t6Y ]3U .S Q^z.MX4XU:k fI% 2֫H܊sI5B#?\@(7P&^A !9m[Av_JYv$ӊ˵Qvqs >ـ˃XZ B6vӠ| FTW8,pEQB'ƠuWiVΛ$m7*S *eK=5\4d ȷ++̧owSzhDq`x!n*EQ\ĝ4q*iQ\PɩEZ *"᛫kXXX^)v^rj,j9E:C|͓ݜd9%v1YrOUS$Ws1fkzL.,p8,nLOr}*pA-@ySCrKL~Lɴ*9:X JᩕBوV!ETifD2BD!Ck1H% 4@#Q}[YlruE-T~ue}6b*zGyzC0"rh T,!dPo&t(Ss\tN_BAUî㶜oV݂݊>5K}c6^9GOn6O}I'WӫHn[/c/ս*T=`tt,`;SYR jфrD})A1TG=ScN$ĆgMBdk( E!YשJGx`W%\-i_ 8땨m3~_4e:h +FV¼/ K-iuBwϳщ6v zv[?,' ^d ȂG/o:|::`jNUVe\{ҫ-|UFb6NWgI`ZX8|L>j&N57*w]xm"#1qJ.9nW9EHW0fau"* 6 IW^ňŚA:9Rj\64Yx8x%0e9ՆU81EVCS6'Ait -Qq]V=D-ւ-KyH0`VOO@cpjMl "G>S :ן\q6Kq :G}6'Z0 hCIKV GKwjLVRXp<6s:^@HziY`qVlV}4KȐ|$GJ8b8 b PdM7:o0_I;~c9x2ޗ|[`IMxrziQW?(sR-ݯ֤RG ӠtYW1U>yQjx h[\Wc0W!XE/*|$KV0 lY} b^hp@J̦ifPLXJF ɣWKq LBEGJ6q0 GQwZPh {caF"azUD==R$U*AЍ:j}ׇ NȟVZc/9 #Lك`h|wn!o}^KEHG[PB01b16 =c42aem<3QCT1D=*1"8Tg'$!#NUSt9D@7cI)h-Zc O@G1rkg1zgOP#*2UЩJHٚK}2'Mps i=h/!3e`Gp[{Q}Bp]!EK&EJ\{q?k9(0^T{q/:x9k[0/z}/1`NjHdp) g^RQ5dMX, q.YJHO~,ֆB7EcɛR,BRHLZ"1%B#kbX^ bZvޯ!#h@dXGX,'mH*F$|Cէ98t z?$B6Y6>9Tܫv=9S:Ba8UfDPSI[DXOIZu62(9wapthT=kFdO1`è%Izvf@2q^44PY%)&ì@}B&Z@*n5dS.jܭ'/ՠf; ~ cٲaXQnNdnS*,.w[95ݑ![㖁`BTQ))3i]ForbKP ѧCM@uA.Co叛ֽ)*]԰lEf1 yUnW²p+_ױB"\` cGD/R5ޱ-Nfxi!;} C ),i ~/aw%_Ƕd;n0-V.e=;~Su^a9ZE`O6 sXdYUh=Xߎ{, JGΞ<_@;J2?VUڂbsosRT&=cJqncyzU.R[LaGiHMUO˾.z+0ˉJ7?!{{gsd'cܹZi0ofw>ĸ'MܳÕ>(bP/xū2_?zؓA u1;zS/w^iՀ1j[+*ʄ|0x%!S|܄ vNDMی1aaiڸE HPW\jy8(-B&N^M:ώk/U vB-ڐ9HnT" 5nVji^3'tu_fUDYKymm~mW3~Mka;?&0TeLUEjKAYZ9I:!şlԆRnǏu1IW(Ϥ+zlq ν\900vkK7o"^Q%3IyY:dy!GqI=}9Ht()8A@)b_(]7f͉,iP4%%?yj-)1UxQzqjWӿh`^F` T?|gZSb1r$!Qnhl8g*Z'+a%Z*Έ^|6ϲ2_O8v˯{y- [z3VxGh` m;f|!x|v ܏:$|3Cƶ\םi LMb"kl] lи":dቇ|o*}fS:tU:w39u`FNJ]ϓe )U†'-h=ی ńqxnE1.fl9yRS35I{:Kksj5]sl'HGU{ݩ7`G1ULSc;4;IWRFh(b席0T߉&VPnc5A2#]kx* SqjKBm0{W@ ɹ~hԴR(Lr^ZOqytשU)n:hW *al U9 ~B` jKĔҡO'iq Q*@ ){rUŢ0gl:xNr'~°N0vTU9S!sA` M'*VXsmo[=}غ;n[G١{C1 xJRlℚ])r#&ɍtj,gaM(i$"g7DT#Wa?zT2naң6ںaKUŘ܋˫gg_^tzy[歽'矱LnU}?i77aܳ~qӫ̛,_o p|qzu}? >A[7}7C8 91C9]6,hAhb*l3!˺6OÐ됛(#ʌ6s7,ď()=XQ!ڏ` eIf g50"腗-0klYukz`[-fd|;_h QRiU͸֐/U_ϻS5 ܾUp"^/+XOxX[˹ VaUrL\YImR#.~t8?'Kj9{J;8WƦfɝ=:,_}U-uh]{=kӔP8%h2䋍IOmTbP hL7d̦bWf{^ M36o Άv >nu@YM@Nb1{,QbKGmªGJiW ~#x~dɩWlv$+8X5uJ\%A"RI"ke)U2m)?j-tUBC%m@L-޾j. F(BFw5yZ|^0':nhauG{okq@ gʛ;_{5aDCrrrRgE%m9f͆&t20ViB٦ 7f%OqaΏRgG ]w;15:s1/D Jb3QN@{RVQ r,$s*b` jRkyXT{mpb]jY2c] 7W5oj프dl1s fn# M7xu@,ɧusQ'%E}kG1 Iz_߶'}=KK>\ 9v7؃pbT\R$¯6GH&3g{DQ 娎"eMڢ'd* a^_ ?+PL;)̹u} U󋵗i>X?>yts4eH#Rj?#tG0=tl\ טo4-v9^ac('^Y(w$5 x VCgf!*.{,2ə5=Z-"mz`67LB6kq[P4F>rIp:5%ͺkx+-%,m P'/Ũiz5>x佗*dl aaՆj5L.־J:Z:76.i5xĔ3xG+b->q\Z@瞗ͺ[JU!mj9ByS b95aC#*FOƇ% @!aPXf|c s棇DQ]B02d|'voz)%tkMN 3U(QOpoC486|G0Low<=EfIg|mS{>tf1jǕг)&(!K[?D0}lnJF" _ ]p~ٳM!2q po-'/Z髅{dHg%n6-ż_Պ֓7o'Efvv)0,"L|;);g nMe]?Ye&o^HG1;}3ee"+,K hq_eA(LK~4da8Sax>qBՊD0^A8Y7S"!6r.{, $WC w]b1,ع}FW[ƞx.#;J`X ߰(cŰm[$eZ-(rTVHW[d͘ZW IY2-3!$L&U&,͒%sx& Q.flȗڏfķ[ ]ʲ*]VB#yڐD_F(.LʊAŚj+F RȥZ&$@ -V?qTsW^I0;o<Ʈۊnm~n3W`1kwH$8/mUސ/!YVyL?u7-[7pQ4n-QG$QtFtnZQx윾qע Jɰjh=E W @LdÞlyY%x3#DEh5@jK&X73Y0[G(8~E¤ٶt+.XOMMQG }?X:i}{UR$h (܃in !c9F?;JA =q0YfJFi% $룛,PxLLQNsTed,})LJA)k3eQ%ՁrQ&[x:FGB2}Y74fwHhGԭbmpA S#AFaev)hf2zTmiDȶo 28eަ"&@jV75KyVlxV^}uM֚7#pleܻ8laE*:![1yPmfc>,4`=> "Ŏ$%Kspݱ!6bzj*;Qaj''MU^2ɔ0gDu0pT; FJ@kՁ丂VBx_=Yݲlm51Jò5ϩZB;os]ϺL3=_4O{Й90ҭKέ X}&oY;"1±rusW0ߦ~/Z7f&7 >4Gt(|j Oc^"q6SLwRS &T Y//y4^6,QflV<4Y2>Dl2tRa3e Uow0^4DZ)K_p&B+I6tTz hؾ );d5н0'jeQaޚeƳM74"OH;Z͟>񀳠*|~<5SmgV4 33Fb2&0ï '܏oLm,gJ{ Θz coqSctM>{>"WT\fsJ=i:x_lpBDcϦ2U\MPbcPqO7D&K"PfqR sJ(oi"F Qjx2r+]+՞-Ζ3_h Yg{}kMkӭH`"p<"* aa85#Zh[E;ަa D!Mh4 |ÔnfQVswvYN$$qw;ؐ#$gȐ'A~bkWK{N>e1]z-dl/jҭlQ>`ri:66sXT`{"K]Ay-BB6cN-Vd!Yvr [EiB(v"Kw$$:&tfQf 3FM8]LYVӒUrRMQ\eا2QX '*g͉OzQ쾘Z$' ]-ưkd# x=x3 >:9@ ^/!| BU(jwPtxZU- i[1ZEE*mxt&pJ"4Tj{@&Bs Z{x,g7 {" ޻u7]=Hܖ&JpEEA&+%bCk]Xq&YEJL lOqQtۯi~P𽩦;xU -g6\off}"#-ZVI"5!Y!VzʲZ]tSY1 {cL Q)17SvV8PrDas@Kx}{rFߑ9Tpp:ʫfc%3wD).iِu*éLi1h3ƪ%ZF*+}%k> 08T0'5^G8Wj e]̉ v,vXǣcg ʋQv4}ΛqD,g ep,unjUDgVEoFsHQ_j ?~z4t^)ٵsQ_CJ#vk88).;ʍHfYs)1e݌DHػ8tNuW5h/ݎM]\3%~e0pχ+AT[1)X43裵C&\kFuQl30:j;fB1 VVLF]6JFeތ\)B[k~~n$~“;ٌV>^߹nj?(<tC2\y(,L2%I =8{{{O{SIL1+ M"RqJv1g=׍?uZXփ7lH;mkݖld2Խ|a&9‰8Z|@_` 8>U==0|)[ _W' , A s$DY lE ,N }ąǮŌmb`MYYJƗg.I\Qa_9N,^! gЏ/ZO7Եnv1&[!s-w.6+쎩#ržqmB3T^vȦnʏZ [I.{ b`]ȃ?R~( &-Y@ː.i}@{B`Ž-h-)gsK@cn*6!5Mp#1!"\EP5 Vb)5$,¿d #Q oL :`aV1hh0x0xkT M`@w45ho4_U e¹lQf{)ezPĤcxͿ1ɗ.p M:a`<ǥuN?XYRί&iY%A ~vaڽ@͝OuZcWS+!\¾[};%~$zuWiI>_)J& ծrU ^ڟ==о~DG3ç,q!6)g0~5 # 6n7h4?ޢuk #H}T18 HcymLKNxWKLx`XNJqaczOb0Gi0f! W{2Elފהls A{/rW2$Tc]~qBUP_LYHRN%uT:JnK ^T'CO'yM]>HB" Q~~Q"8"{S61ˌI-˳O]{"7CK(?sA ۷!1( G5죝18hHAN81H Sv AHY.S-e@`k7T`|U~NVR[K|+xޓ]-%܂s@^ ¸H|srOmxalACm ͭ](\s< [AU?2o MykY_HݮOJK =OK,7OnvD FVdaߚo?b9:@ZQ]+_ťmbkؑK*U'i]I%][}\"=&":X>;0K@^GC{TKh]G>Xe@U"271* KlےviU~l6P7Ϡ7ɢzzpFu^n7Ἒ`|O3P̐$+hM*jfUHԨ:,\@U%I5`ݱlUIu8 pvs[) ˅di4m5ҕ:aOpi)+l( /`ހ#)8TD8JJsGpj P-ïa(<%5tqw# 7}u9ogȕƒ"}딕Mglw2QZ<Q[06i_ vNGxCaS#N<ӵkvq̩jv@WBRIx,q5(8ܦqZ4\FBlP j&@:ᶋ~eC&Cj'6?s=|[\)d_)<{oϘ>䣋˷>7.^dog{H\y[킎Wv4L/Ao eO[6[qlre)b{^T<2qTٰ{+m{QQȌNG|ꅗg>0O60I$U).` HIøA.A3w f*㺅ʜT /3Ȥ}DLnN쐛\SreWuW+UE)E_z 2zb7*0i[!^2fh>ݯAф<7E0 է֨#>҉QBݭb)SB]L L>;I)ZuU+t#ANR.DV'_!xSʁS:٪:YΧ{7x |y|K8yfx" VLOD/0OwF"t[a#Q^A!-k!Ϧ4}s$B>eʿWhUxO{k`b+N$-nJU՛ytUp]TwQM4"P%o=l.<8ܪa嫝_7X*ωu8<`p,ui̮3ww'{}6 `&@vnTW]} y$=(%P!6T][zԗ!Z}c=!yS]y~4GG'-Ds=LA4^ey"M=жX\gO@hҏNƛM_d-SOɴ"&}r)a#mtnRkxt_~;q<r$V '$rU=};E8yQBQ{xoG 95;Kak0 !KB؜,1֭\)l@B*UelRjt3Ȗ |hI GP~&0 & wA1+[и)ci6PZp b%d a3 ;$-8%ŭyY+=OVJ@up`n- @IOrI`cA ҩ05r7ţ`^YLPi*s:š @LU Č90Oc|dnFYQ~y[ g#YeNӇM8 z'|KGo8-ɠu+hҼOw񅈲] ѓ]RSU1q򫵤3ts^JduBa|v5%zB#UX6Q\fYydw,Ŋ-rY V=Tj{ŧ3OQUhõ)#jC ѯxtK*ʫP߁6 X7ܛsШ<$扡2qKplZcNz ڙ`{R)U&MB4R}v}17ρ(zi !RI[rY>-1cVHW:5%[hHV:<b)aՊrkEt+*t^=~"aӈ}08[Dmu+\V۲_T"9!rFN^D,@}3;DgݿK= o5YQIո: ETeq/sTj֧n脃RS 예h./Wd7e0y98~)+#J)%jCtxG#k?AG.4r!'k xRF ׈YVv][dP@B{{`V&G7g:ZԪb(S=>q|7$3_J( VaL:*=D ;rtI^&Kh0Jjl3HgE]ъS|W:a ?2w~I|IfBhɹR/KښV=HkGE۳~ xNH! \+ǢJa6p E6+k[rObEP$\8|)#aG?g+}j qB%in[pIVdrM Uz83zFu7Rsx^)1Gz6NB^]G"Q>%5,r2 Ha. ƈ&<\ M;F!iwEnsBL4pNKL~L_uD2$[doBs"9U8u$1 ;ى]2 =Kȧ>바8JM1 =54 }ƂDz} s^U}_r9Ako6M4#-{qٛ[H("; I z2Gq0%+MErVrc5+F_\^v|؄DHano"X@ae4h}D~:?t,}h׋ @}7ߠ^ zIz(y)kg>K] {ƅX矡'P3Ov(@EjuH ^ޥл@MͻB;urSx s Cvw@O$CDnΐΖ{IT|Y 2CDж4aUIÞj6)}'D3%"AdSӲ7n*ALDoD ,+HCN׽?n) 6$"u8/.֜/2J ƴ &,8ʤGy6t0jֶz0q/I@|lO<}zk}u֑H=AMwGc4zyd`| fuw -@ݵG+Eג`,6YG֑ڝ@HRKEx~9-Pϡ*tߨs7 Xh;n}Zjﯿj(^*}d) ?(J<#%Sh8 ?=Mڦ)ʨ􋪚:> 1[ -I 4}Rh$Uߺ"z!ͺ8lq%} x~1ߴdz%/l8Jc0T^/$\ҝ|fʳr 5VqBֹ+M8&yzЧ[-B?wZp׋)jVOZ6>v:|d|@V~Sp=5V~2}W|s$I@9q >,DobiTv ѤMcM}!WW%C&ޗ/5[EhqbLdD4z.Ұ1*`x34s3M~@}|&G 嫜q[%Djm}u/ 1 hТZ)TUs\c1)9Xk2퀈 +@iHZo0|U<_@PeX>h*;Kvve4Zah=OU_w2&Q['Zc>58{{mMU;b]ӨYl{Y YAk CƝ @"[N{X6%IB9Mc$W|U1uqXW2W+[DDkKphS,%n 3Qv`޿ɍ8/Wꋴ3L #n8綀 EV@ʦ=*ku+Gl4&b%%  ௲_9^T!4fMQ刦~up|J,sVwD\Fx"GuTGm# "C!) Y9ќ/~2;r Q 8cG4uvk vkUbs1wE ,= U4|VݚZx kSTHZ,@( <{eZ6A߂RK6j^=eŇ}_xnSuXlSEF(vPjSE6(vPDzf#qns-_R:9Dpk떁z_$S<~G?n4>EjaYn-(PQ}qx7mhqE5x=d$vMˁMbsXq؀ʆbMke}Cg UVۜOtƕ[s6M6a@9+s7 Stǎ^~ ⢷BG{!<(nJAShJqiwٕ ,5!3v֍,It lB<6=$| O 0FSȸ">Il{U%tWͱ8d*CKyJ(m#%tX=;fG݁R+| acYEX&'%!m=;o(4L05F,SRhV Z5k)t -:Ezև_jm]#lgM/ƌ*/3Wljzq ?h|aP [&)/{u9\n}o杌FRc"k*ߟq&?U!yR˽(`T|,&@Oܱ> 3ؒg(8ƹkqX|@)-EltFT:{1(My8VߛCg{ǟ)Dia@SRo_KwH+Ior.=,Mt< jj64:!^rAc++gE|7& vCb?ZH~T"  &eˤEر:Z0 I1-Cݗ@ك6QQzu˶6 ڬVS$2{3ka"w,M\a0]$W**KV9xȑMblїRH͊u* kE^j4J 6c. "֞UuCk&xlOrzl:e@^v >&mtow` e OFߙfGǒAZ>LQEk:;z%DCEQ(7ng #(>boT q5  xci|40Fm;f|a@8[|3C.ϑ^w'?+C4Ä1 ,RNf-RD<ЦKwPl6?OCWՉ&{ԔZ9+ite>OnB%fT\ <*lsn4MG4x8<[[M6LsE̙ݺʗb#P$p +e_t􃄇Y͌ 8`F%x640zc:(=6R$ lO`!nb~@U֑&nT.s!PpztЧ<ҷ*/߿?⫝Ub{VڋqA$?x 3t%Ւ+b()Rh E9׬ DTcQ#R=狉-rމP;O d|&c1[-ߚE\Gmʼn_Kqh4+oLs{ OVUW`<zM꫞\c5|V.9Z63(7Ey՜xL#˔t2 KG!ޫi3?a}n۶d`'H4DSטd/╷P{lf`o-Kf^zegZxxmV=οƠ=d%>#޲^>~0a￱# Z[ȟ/hȏX|Y}c s[}_ѐx >xe4n4d3p/KdLS)M=^0r^=EscFhQ&2f݋^V#\zWS>Q_5CrE! [t%[O-8Zg Em4zR<#653| -k,I*88 HEQ!Wf3&P?AϾ?҆9[f`1bf^ƷaF y: %Eo5a4j-pXrQ5ywFw8goX*o ~9_J;|,jΕӹ 5rOxld@-_kΕ^rb/W Uhm{=єP8%/d޶Iry{mb#2,r3:mF/Jkw%f{^ M>  vEYMܱ@`slG\@?aNp^85-&~xn"Ab%KNbu#YM8`h,Q26NA6FɖT˴4RTQ Ʒ1:$,Uh9ilZ^ -i%yPF2o娲2#F8iV u&}ay_a4T(WI$<KD dh3#*cڭL 8p"a8pkԗӗx$78 m= k.3zp9L#J5)dQE&U(Ǖ<8sYSn?i@E@$ܽ8y y5:˯TnfJ*{(c{&YDE_sluǢX 2Q /NedyM*Pڛ, []d0lq)Dg+|QelkxmF1[G,0`Sqާs!=KCP_] k]vu3(s3[ܹu歽9o̔k**`[`{|ySIj%m>}7mnNtӼxG&x&(&Xh=|~~`!Xx&O!EéQȿkz +jؠG()e^,ز= u̬@Uç,?9dq8[xثi"xKLuk:J8 vBٰ>[^`V4݌|dރ߀CTh{F{7gyDGHZ4۔ne降 ?a&2]`[\J&XPJh4QNxQ'RP8m?@H4F4KxhWhCv^O#1xڨT;58 w|C[ GCVK*ȒE\?\* U~g}哋/v֢fϿ̛a L՟8la[@3Lp*\`//{mOSAfq<9F1Pξ˯)Hcۯ}URdhu (?ikn !9F=?JA(q0YJƫK@HG7]YTKjx% 3+JA)f3Q%Ձ,y(sE-+^Fyqo!h*BƊ h$:E:|ʶmR6ձ 0O2Ier423n2:0;N4%۾I&,`beަ"&@jV75KlVlp-k>RGdz&k̛Rx2`i,`'\6o??%|lCኜ!ۡ[A sm$.&Ѫnx ܄X#F"# G+]9ohfKELm0ZáN ƽ P/\e@.qwBJ؉)Sى" SQ7醩׽d)$+?dŽnVd>ڏQe_4hŊTJ_k/$Lt,g,)Lrxpis 6)fSdl470j)Jx z9*[j~fwGRuZoGy 13~y뜔z J WFG {(&l36(n}G)O}7QoZY0ә|7;XC/ӔmJfⒺVڳzȎ?T0^1;6EBJ9"|kl?7YnohE BߡJaC}w2_egnx,cMpg"'`ϟYf< In<Smg0#YoKe1"rbT5uz/dbƚnjҮ}-GBʥm TIrk 7Xw{4 ί@ ְ2cH/M>0qdΎI6@BhMbw=b &Gko\k<poQGAe/;p=O!>Qh Ex.Ud&̑T6,6f0,he5-_$F+Gu 09͕8G@ 1JI8;gHRns^ET3/pg%Is=19n1ѻvZ:Ҷ9+19P+~Y'+]OU_Ap UNv O7EWa;m@#VHe!!l$,"VIJWc!o8Kawm!բC|iYn"{D,wݼwkPjV}[F 94y:Ba/Fԯ w)kb݌0 *~nl`3zάS̍8 ~M6M5W-}OhBzlHknkWa$Wd.%31k*WT.Cg`x 1Y떭pЛ-Y*tsf%=2hMf|KS=+( 찵Ca>-ڎtLTBPOg!DyI/U<ܒv乃*zC\ b@͠ ;)td-&6CSGvVA.dDk-=O!!WFՆfQB Uo'&nLoj- MeAQf [6 ޡiT/5 ZUG8|sj$]'[BS]˹>o"I&b'+o+o z#Ftɗ crKl0f c/N:H0CZbrAg, zh7V+os&tYBn)ĮG+TE!][oݙEkJVNM݁Պmu*j'dX)ɞ֪Gk=tёzN7T1*4I-ʍF"4#3h[0(#f %g;]0U'JE'+b9aL5gC6 bcvՁOl0ZtTՎV1R7J@, 82Q0~Kn/#8Wj#C3˺CXLcwZ;lސ\[O Pǒ\'8`xLIYuN[CtZUK;:LɭGBDk_ﲃ܈4{6˚K(f$ġtXLZɻٌ j%RӟHصkGuRL?L7Hyc\-5ۍVгڝAC.XH;*|[Ƚ%{P*k_!5JÀ 7oG<IxcL;e~gbckv^)05ȼ>?v_lxqŁ2rU4Ac?:ymQƱKQBDž/gr vi|af ^$I \u ea0Ks[ÚiiX-`n{azzm=.^ sVKƎ[:04/2| #m\b6F2ι5ƒIhx7vgƬ$c 3\#ɉ]vKuݩ@=3c։r{Ť!\c kPƎ -,+C=*!}A3GB-3?@q""RN?zMAO;6oUi0EU@y˽Uo7̜|ԨH@Z({pfB}6x(߈A՝!aXq44%l$h64H7vC{AD1}<)_ع\o?R=H?,w[}]+u4 {\G|Ch&?\ޓ:`t-"F곝nqV^a``` 3s1N.pΐѸ3 3l}1T]ÖU-lu=41oswV};fwBطMq]A;s y9\5v^+R%ĕ8;wbhe\biq!4o|40qc1&J :ą8DZ3&HdnQ;|bBK 1q*1!r?<)Ԑ иB -A͏ߟuǫm9ɦ}XٜS0}ZgkdH=1,*P9+ [,8IñYy̽U %B - _lFVHo1DHYnUx MZ:@)ٽ4}X!͏`tl*:fh ZbX A?(*2p?`Q'ncBQ"ոً2ks<S.`u@m0 nXS]qYzld%1< p-I9 /&|F%'D +䥬%-GT5m 2tBX(XEjv;zAZ~lȥugBw}9.u]+eÏ+3ˆC8K2%ȶd`Fǧãý_qT818C.66O7۔ݗ/o)u&i1{3T3M˚&IX76i&cЍ[ ,ʃh,M-?Y]P @ sS3FMO$Ħ2WC6{.# M"h1{l:8ت7;[= Fڨ#Di}}r')WbEJ[<_D$a,ꃯX-_cC"e%5j!aS(4p5pdK;JF(j/![6<ыF d.-BdXd72-Q]\DiLk}D@. J9  oE X}A]%_B֍f =SYU'nkpč ΕoNS#_35@Gmv=q eWj9N WBՄ-YY{ؔe%tL6:q]1\Se+MR)pv`SO<=$5ʇ2& FW"U4h,Յo[ SվQP܋ޖRaroU&e-ݪ5#b!df0DVck:VnQ#z`#m]" ?ǫ8*bg&ŧsNlz~)Kׅ#ƺɺLJp$ pFwܹ_4?)4^[8Sq:mq|,SϜ,c PUZAB2̧6b y*Lc~1.+4ݍYMGtYp pRK3I:,NB@D9z"yiea^}-7r/FyW)Ku!̈NռL.y^Yu]@ĚRCxђ¤Ęp|HO1WueXsvIZ֗˦sч xsc[ Q= nh#8#g dOIZat6ƴTg1`gP/сO`kLa3ԗTt7얿P5"Sq-j7*-ָr>' P(4yR3XXwƄJKBkd5CCmaU5[J(0 Sr$Yё"dRJdR-ʞqYzHU⟔թ}te9\cO7yqcqvvH֢o+qZ!Zq!:IB%5oTĮ~ _YqJ\YTGZ`9 "$4SIj QzPOxC>wH3wr=m1aJvŻ9G ULV3]a;KGWonD`MҾa':'ј!t`ėciw0XqcW vfnfzkrPwfdz>D%GȩZi u/T+Z/i;w)K#w5焺֊g3C=Tm~G O ׃\wKve~^ ;VT$ s˵^<1Rfkq6r7>vX!_9IFyMJVE`jLH&f_#n̵TMp`FxrUDmN#[]~5? \,NcԆjyRuTHHEҙnpf;WsMenXl4tL?4NaFc0GK2:Vo퇴9.e2F-}F6+EʙÌe㽓ӣL|eC.Vw |0RrL#g9t3/ 6iR+D,lgzzo/tQ"&jĹKxc~ 7Et:e'p K Kg+*fXSfx}".X]_fN6wIr-)Ln3j ڹB>.L`z7W5xGZ"*k>Dʆ5bC:!37W.Y3(|!ζjmhW\Ry-kĎ7Z0WQbR(Mm<)I4q< abDnkB_gg5)m3A#??m>6<:G9($| 1+3J ʅC l⩬%RYPD>:Glڗ2hK8O’őϑ*bg*pQͬ5;ʳ3^Y 5J'z *`K{CH{"v"dߒblϡ|Ԕ!0@ ,`.G9a^Pp,|~8 h; mz8غ ~rsmYGh1 IS6%N"&r#IEuE96us`@ oYd;ve}\M8M ϴưi[N M}$=V?5/B!{@uXCH@GReq )KSL/1" m3}d=ZSx<فo2Iu>Ӛ$5Q ]:A EF6>*^(H E/*lA-@F~/ork3Ú8~ _TJ/g*MΕ"]]k/{u +ypeZ/>k]!J$Y~I j9˵׬ EExMWv㋛q~TS)4$*Il{r(UfP,W+||=!ߍtҹ Uj׳ĩr6R.Nm%D*j.D*P5GDFelxq?t dTG9n9[ |r-C6*>_ɫÃoO[ӽ%~q OOOt[%2 =A!QYZFHָح WnaˬrMh}2&s3rݐ8Wt+|!rS+LQ˲5%)P rzS*W1*6jzd=ͳծ Zl`]ۗɃ$b$Hr m$H82ˊRJ^HOm|98뫕{0I).v{q ۟tH~SbD)(V9*`ҬqQ+˷ wQ -&TE<2t4&5}nxHNEZ%Y.fLjjVeZ9|2+  >a̗|I8&VHQImA%1HZdE``3Y41MטM`6(cCٚ8g_iWErwH[RQLAa J)lsF.SeZ}1çGF8wU:!~V]"Q֗9 Czތ Q։f~E]k<**^ ;$[L˹0C{ 65z| #ęDro%~dos*U@? F mJw*l.ܗ%@Pnkpqv% k< OL#t7 Hxol=k62 @,A!`ŒUfC1%Œ#(8U-MɡL8P 0f # qc$$#N_ 04tYSkrR?2((?SbH\%#aT[S rn\"H\$_oZ64 ^9{5X{KMhByvRy) z n@\LWȾs8ցז8(X'^Fc?/y:RƷr ΃15碪IpW.jr\P+vAbu .tBEG2| jo,{ca ,Lr̳JXZMX c G57rdi$Tޣ!s̯bd6ve.i!4o氷rŊ|>\sSU5pW>:BAKYBTǯ9`TʶB?59bz"rْ\w9nd9upem2\FGCOs%n4M/J,qMij9C :؉M/JfSO,u4VٺvptfKgHQsﯻGLu[͢c" &og/[jNUhܓCb3,I<"Jzqg/O>%Ե}ym{3 81l3w#;`cJA|:AF %h 6#EUeܗi7uXېF[~2DmɛFN wUϖvJ,j:9>"YorNe9]gɠE3Efq9*3ȩ ŒYm9x?_1F&;~$?ݼWghǭu$GaJҒ iyxM N vrvׇ'UX~(b$;XYY`>aG/z۵ O<}"\#Zg[d~.ٍUǷllyKqDiOek>x4z7D F𑒱5c%d i%>۵2"AŢBUx2o[`|zڶB_eth3YG &9}2yaɺ*RU {ˣ"9V0`Q椚^IG8#HhJfx(C%Kk2d^sɊ?dnnd.+ ֿV{Hf"G6JH':0Ay =؇ff`?~њ=9>UV6Tq&AN3-%*O2%KuF|zU3n1y,ӮSnY[8ST0RBZ-},BEZWTp!cǂFqӋNޱjbTۍ0љuo5@m+LDik\SV~,GvzX4W /:}+5S-ЫԏN&6~ ]vR4R$mC}m?@OV# zﲨ۷tn6jZQ\ v>7 `!j~_=MA0 -ƥ [;Kr'o1=M$F@sb]Ik]zDp= QKEH(∄TTڤ#>y.S],vrmdd>Q v0aCA\z q+D1GIj!:7ꞹR^*t5k+ď@KI)$h%#-FA%COM_v dLHF}s3_G|:ZR֯ߏ^ zv V=&G=g;<ڇک>f:g fC#C; HCDP?(ڀv~d\DxZhjP-w0`Scr6gR)/޻G`( ć\ܳ)q{2q;rKyPs.rTE?LI}5Mx$ ̈́JuJ[ҖctH} xǦ 4J_^x%&!t-qйYT㕗O] A[L_#cƽ3OƷ3'HI.'T ,TvܼL@Ct,eT_ sLQ M8牲/0!hԧ8-ҨE !!~b߄)R 5z<$Ƒ8޸xJ„ `ʵu'L=hip{rlf{otH٭>,w[zM)^> 3; ExިƋVqPKŻ: =RcaZ@EH'IF/:@4, snAr=]ʼdFθGbY^wz[WtSn36!J@)ͬ2s\dxTxM:7 ĽJ@*$ؑ,%3Q%g3OMuʹH|{Z{VoɾTn]$c%JM-X̼^0Je` VqoMCv0߼&HU$\X*@RfSM}%l -U21yްuQÜ_ـ[.Bb?/{߻wխJQ %BQM oML.xaWL1g*N['q7-`-' d8heeWsW RU)*+t𥈹o_NB2_ `H}̐D0kI_"oKbhkL65A"$Θ 6a'Q RXS{ͳtJXrOFiv]27]33o':*c_փ7yˈ >,oˋ,4d`L|o_b?x#aX}axNӊ>Eh!zMÇhy7_K2ZfGϙ(t W%WuS>+#KCeЕ fm{oiPZ%Hl4ebN@f~ Mw\GsaXKXTxuh}(m6;Ieɕ=Ir*%nӯse0*oX.X3ƠBLeYk:WWwu.lɲ ]~0I‹/Ny$JD(k>gUfvvҫ~|G=:@_ح[;/{>+3ݷO-abő2pl0c|U)k%qerI\ סhو,0G{`[ӕܯUnL /I3Uksx@PN  1D*!e +vܔhD>+]CqSr8շ8㬨Juu\qoz^2`D{\>XPdECZ*q<=oP+ſv?= _[{|Rދ3.[.$BSm 1~"0r}!a.2X߰A s^χ^[ ?vclQuvcՂ-0H=L_DZø RZ\aM X RRi?S@fu;A3?^3?Ӂ3?Ӂ3?Ӂ3?3؟؟tLHg3hݜYAhO2rrw!.&[;!~(в5*rOzrLn)? >q?8PK!O4Chtml/ppnew.html\nD"XhXER$Ğ$:' w/(< ߙ۹KJ{fΙs?g.VXEf 62㤇e$'1ٻt: BNƴ{-ƎGDZ0D ~,mg~",R9i~tQS90Y+(!6/&"!yZ3؉Lf.LelF'H#Si$OqLp^ %~~wcKN.!Z#|;r}ؐڵ1 !nDf"Y|"uی ~GU3f{T{ү1ff4C]vFvmQWp|AUX" sGp"4= |V"ѵ2Mޖ{d43sc5n#[ cV>0u36;՗}d'a}{eHZƢ,pel~7OyRsea 'a!U8C^\b?*rr5xg#ҢMW7ʥICYikk @l,Xm"uLCeHe*+Rg ņrX_2b%@21Դ8Ҍj)Z>2͊|fXXb؄'%-iHƱHK|HDWSǺ };xԑŀ˚. a!DI^~79 /+:ĉwSVGQi:bu#Kl8J:0xt0I%xYBU9LcNC!$(4y з6QgK\JI -%΀PJ=pY_Q "1տ-vn[:r5fe*Ga{gV0p)h O 0|[W:dcD(A>1EYLߢ0C ӊHe"/]XR^;tj-A{"Cj(|nA Rb3wʘ_JN2uv:@p+ F Z^8ţ1$Ƃ$c>Q\mB1ean>g´8(BuH0ca,z&ב_j]UJմߣi^W/utkk4 jYIP: 4.j{k4RSGpM xڡhnΞ`R~-w@8z@Ue(Zh\*we|gyu>emoBkUJw .NYQg>R[Zk Xfi#t.>{ͣJe-4c{;/V:j=wXL,',~)00ǫ~A5%^[к.#`i^22x}(*GfF7l:d_^H_Ph1nȴHF>eV ?mo+V\)za=J￘tb7K6axs'XW{14/=6Y(>~5:h^>ټiա9 8{8±ɢg O3'UL5Q7+50;rΜ}nKê) SMY-ڋAp2(! nRM҉ıKBHm`-OtrRdzОxga[dx+_|ٓ;vᵴi;62DŰEc~WoLA6(@"x@ "Zw#c&^sP7J၅pXp(]pGop4 T¿+ < ~tLcO-ԹJ+JmσWK-6򡽶!4> B GyhKf_شYQ-Z{v""l+,~=US蠐C_C:Wn/^Hdtӌj]X2aٝ*0[ R&Sr`$RQWUdbemCB; 4ij6nwA9cFuuC=.ig.u^U@ʍOq&(lvkq;Ըjxd%ۿFh )/=u~y;^kEjƊ^o4[@bk)wiJO~/fnܵW:X]k Jd$֊!_@rT<G͔^:u fC7#%]b""Ỉm=} НԝKn9R+RQԃeU Gtas;j;>-oGzFJ/Aq2 $g- fFȾU|5 >OUQsHLiY;YOc]-YC6N+H[f$CY >(ڪ (rII[M1h&`&gkM#4xοQbL.oO~RY/\$v(ip"#U Ah18JQ8#EK&q?N`.ULחdʴIfr WJZ6-a'T.xBUi&|LT71QmDQ|H׸!3~d'7G/?QV eese|kٜ 75u_0FQOH4k}k\u|xU2qۇРEBaky\Lc?&alt-6DrLL`I,dٺT&ؚ6l-.(B cn\ /<a38դYr7,X5$g&S} ?'s֭DW4t"Pв4\$OPs Ι`wgodnͲKo/r|Czx {`im$̘cVWH|!nRzi8:Qt]&-)S6G}Q5 R\XLw[WNlvхz8ޕrەؕin;!4s>tlx"Az@G 18`Z(ɝQ{7uP72?vݤl~'Fij|E CI&9& ;֑oXBKU9 jUAEqdK&ZVyj&)%'M*~X(XOIԚ:ݪmKֱ;sK"ݵ@/gFp*ks{tp hISI1uuQ)VfVq6'[樿EQ4-I?.PKŻhtml/searchindex.js[[4+{iRZH *Ml_.؞\7vs؉ok2o/;sεc3ij&Gw n]gH``0]xsCcQa%^l囁Ӵ%\z;mFxҧxqV ءOD͘EOvl̑([8Owy&@)q1,Л8$DRلef{uS{@j.O~&ŒwÏ?C?R{))$<%1";i  !z o.Œ6+)Ui[3ULC(䒨MT15u69A(gWbNG3svMM|]f,I;ʩԯ!Zd~cm =|'1Ο_Rb3@íx,.^K"B bSiz~aSt)BkڋJFx܌ i0cm!_sCh݀_>}`Zk?3+%/>aJp5 On{kcKn\Gp~O)BP Mۤ=QCM CY"yu#GjWzc\GW[8@p I̤IwAjTτv<9L2L,0QWjG-Zۄ ./<$&*,/3kCZG3U @vo S)OW0&!i -$vWHx~$/Sck-fR WV!B' : *4)3ݚ)Ӳ "sWxvf>"Xɰצ1JjJ KY!a1,Y>i̟\0/^+r Fv2ΆDvGăN] vb4c.sbGÔ(\{\pQ9Қ [g.;H$VU1{E TzZm sECGv!ZLfT=8F)فD gq5[SeH$b6eᮂu99L8C3 @ YRrގpJ337^v#m%NlܬI1%.KvL"ԴB뒄]a tEXΉV uq2{8 *Y],5X}2T #B(2|RrɋɗÊ!O$RdT6s v=Q;h_"5d#1BBbZKYЁ;gU,AfR}[1W)?D+ >wq-ǰ8f&F {RHFK !ソ`1ֶvXbH&긪&y&]ׄ<`ϗX t^HT'r}Tzp2Bw'kDn\$n apC5>h6K9)! TG7pM$)hÿ /Gž&.=-ۆ6q I$ |n ;Igc c>(OH]e cBW%L7~'pI`Jd6 *.e +N9 0b7:MqJ0t=᣾VM/…(afT~o&f 8'jwFvKG<_bk4`b5 )'bld%c9RB;J^'ڊ.iJ z6ځĹT_6̀v֡gw֡ o0yK7o:m}+ݵ7 KDZ^K ^RJ+)ͤR6Z*Ţ. K +uP_Ќ_Ec[>-[aބi#LjeY+ZPaVjW6J7 [D<NJJC˒&mTJ/j2}%`D8vm%o}y-k4eHKrZɵOenk%# k´:D|]W⪪4Tq͈V鋳bRIZ*J-8QjfшkZk@<ώ!!* +G9mloGݹCpX佺9uLg>8QxF$^K"k4n5( '.ʜpBEeK^XAc%>}QIpRDA^ ҟyetxkmZ6f>Yxb٥oS#:Aۙ3Di- .D:)dpSuZ[Z5_<~_-bTKO7JN*RXX[5U(_ f(Yb.3`pB Zk=kO:YD|ljܵ[Tݸ˹-ݖ ˷{: u[ xh2k=ވOt_3iP4ច.Q_nʙH&?tQCT6LMFuDcN8?2JRbώoDvBKQ3Jy¸Yu)o <&!N~y"nf A>p&X_XYLwQMU=Vq.Ta%@p ;k#O:kC`L\k4eO%1Β߲{-ϋ K|?HU] eWJ&op;ɭ"Qf7 saŌ =1NXb<+S&k\:Tu 8ٸGMVϓɀVA/ʝ(#&81وC7 `z\f&$ǖs7nxAEbR)CseF@'t:3 [˜}B 8<³ 2,˓WSN^({JS2e8/g{mAêݱotfo֑'1|ǘWoOśgx{@GGjU_h_DS;r 0_luS.,;zy@ݩ"]QI+u& JN ~;'? PKŻ;{o.;i!D`؞y3ͻgw~^/yvg/~^Yg7٢n&>{,͊=Nl7my=۬Cjq9/xUW4|Vd]m}ͬ4Er#g7^y.C~>΋EYj[<.|?X 5@фNnʇO~EU̥EWtV"ZŔf,B|U4 + H/üikLܣy9^?K{/i^6ŬphjTORy6./+ ^drqA4"1ɮ olr{(]K4]|ZeADˤ]4&~0WN={FTU} LdRO :p٣&^0{|)ĶQ. TOH;s)C"XB.Yvan9 w !cMh,(Y N1FH3Qxr0墤%8_n n q_yXPrKksdvb֊8Ss>B3dv >ۙ>}' Ϳa {-82NlN1*0Cg=w?~>=~|bӇ??VwzK_U~^ؚ ͕(evmq=+S}Ũ;+N`_a;cܵi&-67^4| <}o[}sb~;%5s\7˥Q>Nt1U]e&Jx)h킫Sfvw7!9}^N@FC7 UVHWN8odrVNhoyWN` nŵOP_E721r,xo>CH w.O\+DTn 6H5z͒d˖"ŜKMto`V5!]l=C' pM1z@-˞M&L ]g󃃔ַ9Ե1-7JwES*bമ x{KF!DR@|E毷)0 (ѿjԐ1D$O2Nz KzdVb&طCg|jlMԀ ߸Olf;t>IN؞%Еlϒ6G #{{x4k 0fFo+P%>af }`mm~&b yq,OA: e{ρMv%p{sډPF*.4+hlOXU^:)5BA74QN{vY,תo47Kڢ3d'a:}x~b,~̞ aQA+?iܣYM5žj /٣= }L1kf%Y#&(EZK~ 1m\l"E~!} 6q`܋N6|"J>@IKzU.8iQ4,~/Pdb@m"*-  6u¯jק1ˇC^:-`֪,IiVw"p+C|L W$PoCL-T%~ȵDѳ2Ktv%M9la-5C7g#^c,QǶB NXbYҵTW~DSih$/> G, w:!:Bѡ0f*n3q8I6{!(VKΰD.F!B}jug1cA) xz O}l4ą]=BLo.>k]}oqHӢqv.D!q#%ׯ"t%?3ɞ^C6Och`t]mVGVVOns W/;n9Bz'Um K&t׈1. Lw0m;rSklhEKQD+XA0JZi4r jKߛӾ7¬of1'U$[Ц'ʪ) JGmW϶eTugR‹U4,+Y.q TcDe8jH~p1]UENM̉Y84,^ӪH(_{ay<VK7AOHB!W*U-lSĦPP1[T%vBUF#V59%JeK~ɫj|) vEnhT\wfŴHھk-D4 ri" ^X|M)N: 5a8[F]ڊ , 5U R)w%4 JnO(0]k(<_tMEj4V^zV眝bpIb$\5#qz_KNfz8PU }_JBżZGx ឧ|,C 8_mOFICgaaǖݽ$E,UhJnT,G;:`gɵPLpz8#kCg>O`N2  D^ٳwOxp5i c-W^qS*}b|kSĄ7xJF(9Weǥ, Q֘|qZ${ީ-#!&myU܁{7iaM/_ i T4Os3wE- b.Zr(}<Oz!||Ygö]=ZJm.ՍFw|;=dM 1sV)BL7V?bR. a.IҚ}Ե[:Wte e"[]uthSC~ )>|Kdf> ^]҇-[ZU~o=YȵAUH UGX1EFh,`Q|EE^b ʴ: ,Y֊je|rGLA)+;eݥx:,C T 2[RKCYz,ågܥb3_yIhQcly^9qk p{w<[&v,>YtwB9̷c 8Tâ]>EX<0ֳϋ_h@t{Lz^f lbs Իp耍NH ewZ$a @nݵ>V4v2nܥ6sZ84];ι ktiO^yZ &5C9 ){Ϋ g" l6Jُ ifEoe^7݄*[Ky+MM^E ]!袁&ySJún$h54-̐a攫:"X p) h(vm֚9xj"GͰ-\mg&$ vprlZpx FSk8Z5T H6Bj !\ځ헔69T퍈c]2@ݩ/cOU\}('>]hVA3a`LrtUy|[m âK k1r.v% LɅT0YIU6Jw!:8ɖ JN,*8aZ?$05zu"$[`I}+4Ғaf(Yv]: v>TS8$$&0*I[iĈȂ"uSvE{١qF7^Իqdo1TaΡY`O`}a~zUo7EDOQ>'GA"i :Ǩ%Uܐx\֛b֛1 g#bDtTn@s^+aR6\5|[|o| t3bCRH?qˌIKpE>m(YM&Ct Q|<^B& o̰7 ݺhB{+я( 7 kXd[.bn$il\ԭ0,1[{D!ۣo5歞ĉ'ihܭ[҉^xd`X$li?"kYDA4fz@Mn:4cSZ(gtȾito&`:a@"UuQHsX5$̌nI5MEwHKdJ:_*3|;N/t$ ?@D]1%s$=I|0u%XDbl8i;Yq 6M F滤IIZ'!+8&[vDAesK>h֌5۞.0%d1l \ג#B W_Ro]XJbۉU7\H; w~I4 OMv fQ/_}/Q`,]yFP6=JK:Hg7hؗVI[\3jVsa* hKGwx>a6|dFik"TX&Ü=NOO/t^;Nl\nSr0Hv: dBҳ>-1p}̒)3F<K9?+Dԓ("Ƅ$}j;ւڔ;l@* ׊AoE/⠻]&\O=9.F8A%ԣXS-ecV3.~o1jF>a'fDO=VH% &GO(j m1{{m-Z:} kɏ %BCzǙm3٣a)RE>ZSي+(ـ"ΖzB[g| /LlTzWܯn ŵD+C= >gQu _X$tgue4¨͢ t4rV5:)Q%9I- -ڟ{SY죇M|~a²^vyT;St UCiS4X"lw/w2At..@pz~c)}Bߡ'(32qlgCgsama:lS*/SBםPvj݇y*J^0ˆf 'rZitCUDJtՔPڶÂxGce(E^ǟ-EB$ pG̖T6^Uf„Ip[Bs'A2gQ+''<+#,m"\a, `x.٦W &|AaX$>,2]Td*'ܓL$7[1<޻?"87&<A(e%d1Yӯn/bBNqq~D༚/xyt"]umĝ1$[rwwґU Ȓh)}^[g?θ^d G/ZSü'<izتof͹CJ1zwgzyITC.m.@ +zpăˤpN,&,Q&NX/g)i7Aoe/L_ 6C½I2z@:axz\T&9od?X<,9+a@)5ʚ)Pj3ߓ 2#nd_ɏsi-%'Uഞ_hH^BJylxiE,v]ˊ 3g^X<=Ee]N檕e0 \;oE]7 pVbss0+7QR=j[ Q:=rIL2 r4!0ʶ؏Yjg: WAd]/Rwx{RO?b2M+Xbū,v=RF6e a(x^Ql<`,X VdgL+/zyqQwǕv">rwo@M2/%\VX`[wxSN8ܟ <) >L>솚& ^߮.Xط^uqu˙VO%u~Ȭn=1t +.n=DQ1p6aVF9$]F~O}C -WH6@) "÷.y|N; (:/O;+ʟ^ImVɀ/"#7 e]'2MD<)TYjo'Am PHqIrz݆vY~Sg9,v@롘 q ):b\iŸ4R*KC^aᡏ9&>cr⋛J9>mqxnXAh]('ΆNn) ZMZS%C,kԴ3CBPItDȓ8 40wOZoa;+vFbd >%[č+^qT/p y.AGm/O#Z|kȹJ~[?hh*.m'F\5g|#bU.ޓ~ϕE]kOb g'_|D! r1!;Cr|P?Egn.`S[089"Gp5jՆ%p74_˝^kdcHRK 5QB9M,>/8 'v"+u;dm!{4]m+1NIX%L.,j,׋Cyjd6Zwkc6N R^*̈́EɇO^[亖Dr48xZ&v[6pcc#;1VCÓb1r(Hju_W0 JZ~-^I1 ǝ<"~u=olJ1/`bq?I`wonoγ:fF5ʲN存G?FAϕfm-)慠K ~ET~nr_d[izI*']H#M#Z{}:d#/pH34:m?>Q#kb|Ǝ٥m[XBϓʎi%knügNw'N%Fp_~qS<0߻>|/ ?/tO[,sg"4(ΧRlB椘Vkv ~2iX[+'>2\7,a;Nep@U9:-9~ Ġ'/;OAH[;QQv, EΪuEvi&zT9uV% |nw(o 9w_>Mkwdo:nCxf;]$B.fZmy'gߴ Nםtd#f{B rxn~n:Ғ \f 6vC=<Aw$2Kbh8fh,@nERpJtM-Gfak<p {Ya`~,+9eèW2i15, ƆiAeTc KoG[ `]0PZ4K_ߎro/<BsNBI׃zVes$]nevڼAnl* 5Wvnwѹ14&4 ^gRB?= xgqܒ;O8^,3R,;+(g2%76D L 8$HnƱ~fh[unč,ƱIq;*|,oT-3OU6X#t,ma6San=rF3%r.d{G 7PV W3V)'LMg;d$Uȃ,u!LpXt%Rr̃PPǠq2t)$[{Q+NIjɢ=mJЮ)+"ch{pWN I.HUrN W0ٛ4fxbi>E|=DWv,0,ADv=^fs_o& 9Y6I{#?&H<\&l#6IWs;ELub_V=ݬ2HP޽!+liO* XN%--<ƈkUAeHcy'R;ua7 ,8} H3qC!]Q0`4fLoF{l@;{[8VT6=8[(#Pc_021&0:\yiL}rO 诩κ4UVWeNLߝwP2dixR΢7y67зm_F%]d yn`&Dmcz-nXWWwExokW>NNE3Ko;l_e8Ak?mTϞ] v6R{VOEy*G#'CIv޻ ۧpz}[֡cGw.\OZ4I(,Ԟn,H*߹:IZ꥗asB~dy|/h{լezvwk:׶5O*d^Z8uM{I|Ei#޵hi?[O3ipjz;lc2u1g3/:.z*Umo e2l&v%5~a?- {ޫ`Ȳwi `Pyp6O,DGM*Snf}ZA3*LShho9pa "VeҔh&m6XְiW Z[;e'm9(6Yε3ݝr^ϳnvwމvwe>6ys&k?ɤhۃd--Z)֓VhUMeS_ӂ_Z,٪ƾv֮dnx.vwg?Ŭд-&g+eGMPt!x?c@lr7h9>]j`]tne̊z=NNd`euW0o?oѴ~ 1_æ'Ӵl*Caď$lRW0zy뫂;ZE@W: c]ٺ-x(]Kt]tVeECD %|DmPCoA@{vf/VFC~Yڻ*;@3ɫVĻf=Y7P|z Ձ)r pF.j z*E.~ -3rG먟pSWM9gU&;2_O29t  z=^_\ :o1f^^u5ۋ4"W`{=ᕳ'mV,SpJ[c]x2U`hEAҎXɍ#LjSrBS3i$Z%1CtQN<#pV9 [ e$29mOL- K#\Vs!u%PfmuI<2%b5tq'yjUCP2`_8H -eż l&" 6fˏj`YI}106$ 0 zׅ%$ʯ,1Z C)\~D:iUÿ/ 'lPR *lV ,!h@VJfKd!&]ڜfaǏW|3t=>}jδq.Ī\]Fk H{@Nu1 &m_i8u$]1~(&56ᷲ^0&Z82_Bp~@]bclcϓpGrz>7' nHWZzUĄ.37\#H6oe{$?υs2kck[!y 1Nz`}0ㄻ` &8.X&<NO.`5?RCɲ*DTpUOh57(\Dk/ٖ-3S:,[Z%& Fr8Qy4dK4ja}v0pvQțzqU+ 娇B#w]Eq]CpsOʧڑ飫_ Nh{ZPI_ -@e\pt -hI _%=h>Iyt %[JSl' pleR]Ef,Ar* l7In׫B Lꢙ5;JVuaq#ë-`c@lxΤX'vwA5 y;s/=~vt(;x9N~ynF 9YɖQfrjCQ!+.+n=GQ 46fH|]+ U_RH! e: Ln8. $gJ뢨_exֈxdbqQ%6n/_!_e]锦CEVl~"}d ,t؂܇6`w.BTJb\Ÿmn"/mTn2Ks8JMpi [C$.#Ot}~bJPM]L6a` -8~dЊ[ >!ܚ`R;ŒE3}җ-X4eeb f*7[Ԭ҅c#(twu.'Qު0匄TfƤ)q^HŜ+G;VP6~Қj~0_ɾ@ַOIռRj`uF* %xjL{lTI!{&,M:CچѨ C~'ҹh{]IT/6,ō+z2?m2s2j,e0yz'G'ŅV }X *'>1^{G_>zu ̣mݿxqW< C ({țl IF/ZrEQ F v<;pV7p Z A@fr*Qijfᰙmv:V*uSTUg̰cJAv1ۏM 2-/ pFrŸlI^0D[mr}y|r/޿U2um@$躬8NVm+x6f`co,Hj4;Q+\~!]ŁF1y:6#ې;=1^V2E ) kjnȃeNcV,gNyozV6ʖT{:׳>tbԚ x ]f_fz'sp@^\@ƛ0& %?˨/?k :tyS 9[f<ܓqd@<7~p8օxCkFGd^cy`8">VN>%'SX4dHL%>ftZg=]}I&%-(g8t3{Ɵ͆y٣ֆihƑQͽ7}=6}6i-u7(:W7P>K:BCNy]2oad;]dLK>l-hY>=;!̢u\<`\o,{M@!k ;bĮߠE7 sP\dCvt+1o-ƜԳ3ܱ(U2 t8ju:Wi VY)tge d>EN5[~ F64N΢؜8nԢt]Y=[]6<@<ʙB@ `@*I|pˈe ~,Lp~ E*iܼudӒ#ΰWeIEnXkކ0a9wbJ)06sC.If (mI0k6~?2ģ$AG5tFzXB! 1-"DgSƭM74mqNQ𐶻;Va0]Z%C aw/L%Bޟhu1vL?:-Q$l.?J NrGzvD!PY\Ȗ7ӥ8z-MoV 6$Qӵ*i;,}/[ݝ:4hͅQJὬ\+Z&!Bm:EUJ5AG+z KF^hv/.[ Mg ;oӮHT N#Iz{`e*͏;< νi}t߫e)tLIzOd3HQTV%-$$S7H%(wvYpi#䗆8$3xHU4!4iz +#UiM wsPEPHIL8`*"IX;YfxZ+Efz9PmD w3Ǒ]а9g=ۆ4dwt]Ft eޮ-|W( C`બ׭zZWzүWc1U"GJL[|hj NB~^<4[k9Qq07~ͷ dL%ޡ,Hd2~Gvh!\q-Y*zD#MWr={ ®e\8=<9`SZ'$$cIҳh+Ԣ"fk`Éuh\ k95wdKX8[% +gz/j_f|9?Ww 0&$7%?P*8UUMlu5T|,(OzWBEjq,?>#|oܧN>'zUa*(iSf3Zir3sZc淌:@/8qVBH\7"T!<(J[qX}dmH9D4I}il?- t,[RvdZ>AҫiNi3W8GZY7Y];\ʍ/5GXqCD;y!bBHr-hMͱeHEZg;eJ5=FIHbg#vJL'EYҩX5nn>vv$d35 "6r=P!(XEf xU+ c4ZRdy`Pp M̭6m9~-"RE>CRAv'0؀ "\%ܲ4_Xըx򂛠[q *# W^30YO=Mr+F7 ݭCf` {F s_y5bP8,i`EA4`X>$V9?Ǥ6Ð|+Y l PGө-. aZ"G==r홢khqtC㏴N= ~RPVDzY e#.Mm``fD%]l:\: hp[32KśwHQ!32Ily_IB%x*O=#źDPջ?Q [>O2b|aD.ȴ^1È ۝cϣ17&馫=%dHެnx:B//P%%I,ނn/܇6fҬH찋yt9.u\Xg:ݕ\@}^cK7EUeGE;RԖN ?v%HHq<0z"»elG+cwo L=z%qyM|8O7[䖈xew+Pv1r #0_RetK^,]zL֬fW)䢠VKOylAXToaX}}`z.J9F YL?Ǜ˨-5m٧//~DAW6vB}tw7M>-HoE1K_,4ھ-:F^vLP*!=O?>pg*js1Y3С%p^ں $P )9$ _'.ߘ/Fgv|@omS >̰C+H~r=xH8u6M,[)'ĵ#h%”-UavYy.kS?LN=ڌ[%po5]wVf2yrxv8Ynhl?F_7 Cv̍SK |ws-F8xsVotV3 1|;4ź^ː9NUӫ \Nc 1A=F@ጕ)`/…\q&?j) PLTB-q_x`h5op>i ~ vѤ=wVpU\_ ]0'?Kϥ] ܤVzz{ y I9q,崙bXp4w=Q/VlfN_2C*u*T'O$mZ(a= t dLOxؕ,|D80Gx'~#M@n>; kqfh|˕wAQ;g,O6m#w)?I lR.)/Ь+H֩L3GѻD 5dIݗ`OE̸E\ٳuLJbUiK;P3<vs0-.OOpi֐LxC-E.7 H~ 6`hTN_|qc o29W^qznXY+~%m>?-FoKG'K9,`aZ D˜Ibƒ_r;o)tw 8f ohXAdi4n #>(za#eCPGg,~'DӴPsU"|sio걱Ia|S_ qNX}_:!A8XUmNOO׋ӣ5iaTco1'm{d[E0<"~u;obJ1/?aέ ܝu.e,Z=YN=9@+ݍJQ`Mf6%)K"y!R8䃁 mP_WdHoJKR=Ei8[7\+X %e|Gb2?;>;Ͼ> ;25٭g|m옝Ra{N|W4t&$2%Evab#`}rYt=~w xGǧ_Aѧ}X9܈곏NkdcD)֫BSVik)ng 5]D.wv@wธtOMD,9~6O_Bމ'}K_0(ZU1 B /[J M(Rs뙜K34z 4ޡ)`ڋۓ y_?M>w:@3wlb{T.Wm|ٺ wf|WglEmvə7uq M]u#Ӥy0ۻ7;UCMsq;-bKL pԍ rhfnœY!DrdHNׇX[&u?zuxkW1 Pҿ4%[e8E酿?_Ϟ]7|jŇ<\x(%h{$z #|;dIҪh& $_9zSb0 ;cMy3}7kϮ$h-t7?xXx.֯ ogd̬P^_w,)Hك&6ѧ G*EOɉ3ipGn;$TbVcζ},Tۈ.Td)`#7.(5iH ׌l,Mg`[4#l:@Bs.kWAݢ^97lz) p[٭ 1,W.6Ee`lE|fH;Lb"rwf`h XF{`տ5s7pl(@?T5Knʞi U&}׹5}7qIE!Pa# "K@SAR",oݾ` D$m(Z9Eˌ}߷V3:I>5w%2:f"1W^ qNPKN_aք-html/_sources/ppapi.rst.txt]yoDRAK@, 8ĭIlbn>;{o.;i!D`؞y3ͻgw~^/63l:[MWg⦅YѶGi-Z;gUQuUͳuS?./fϊ/]uhQ.>?]B,7sY/몈Q^֛<YQ՛ˣT\-Fo2L  XQ1\(Z"vdU.n2(w-HVNXd(sE{ .JX y1WrS&Dk֒J K@L0{RԇLsZAu;{T4KtOVb/6@= ix.eHKt=ۮU"4CD2D[`x)-% :i3(IAp9; 71_\0#cM؍4+ Xnimy.NZ5oo'>[Y|!g;çC;7̓aG&_ԉ5F4v쳇>|`cگӇ_؏O_~~}'N <8j?x_K[֮-?.pbyvb ,z;lg6ͤf zp' Z]`\oSϣpGrf4R: B 浴kd8^ bO`"ͽ]pu A~r#$˺ h覡 J SMl] Mt1 VM 7@&X8FOWMg _[Iݐtț ԭ"QU_Ax\YցlR$㺘s*1dM@xı)7 FQeٳЄ ;}~p"'ЕV@<F hWe[ 5zooIר6d_jW=67&cR9[B2&II{`rTJ v,7vXMҀm=;;Il' p Y҂ن򨸁`doopm]!fų-z*]{G ̬-àmDLa> 3 `4SD,|b9>w97IzoN;QvHEfM-)˕;^߻+V0`FhC4Q!i.Z}Q)f80>zm~ڬִ@5+wDEH4tɯ5\3 ˕ XD//^&l<<{ sԆOD\֧(tQI=*WEQۿms]Wv#ܗTڻX5p6NhD>v٪s4D鼬tҗO1yU3/E3߮C ʁtܬ#vISqm2B&!\\.mY )@g#tCa{8t} cը:8[[@s!zÒp2⢦!Z*宄dBɭ IEk `z )H&ҊЫ@o6bꜳB N0Id֔kc$N`b asSo\ !/K?THԼYYV9oA{H'RubH8i,,5򡻷ޑej1TX-Jw#0hx~WXLS!J NoxdM|5=Ӈ LI&"(+{& q 偿!?K8¦ hyHT{˵)Ce%iySOje&zz:r6uaoN8&D`Ty0?J(*#b9#3INF5@mW "ˤ\VI.&x,ev\nl5/߿ZL"X{%*GQY0F=Pߋ+N)ɛnq;K},1Mڲ5 wo2 d^,xFicuc͞Ba*0:&ؕp°@ ЃS`gU1 sF:2(xm2=?;mJY&WR gL_H4>fWZ&-tS1kY!&ix~fnA]K/VgI/;˰blضCGKv Exˆ<.x!Ԛ;xΊ7%CICCL Zԥx=%IZӶ| δN$L1&2f|lxK+CP.kR# E9Sl;2S]O!KR}3sb0qݐHEs}yF쾽j2H!hL 굕yPU vS[D ;:SQ5y׷Eq{nv=5x`޸;'@e։޺hVnY~5VR!hr0o x{pq``zzAl75^ /֪$(\pNI^hC'y xJKLxI)׬=-o=qLz$<%X-E94_Ja@{dY- `Nz zUә ӄw>N%Y KXHwo#8xO6JyNLJv^'N,{Lq. t@t*`O]|T\ ŇUx gTA׋KEKտ^}ѯ ' Po5 I+5Th=˴_aZV% ZQO ;(eeu2 ֑Sg%|h*{Af` Z aIx2KetwZl ??O"i:rs,-K6UT|>.sM.vPv?'Z(vL1ǚjXTڧt}FzVy B.yoRaиqV벷UtVz}AlnzNy A ]$ȭӴ:~܊qVƍwN kQ19rt{P.mϋ4"OkĤb G!ey,Umz^_qZ)Ѹ!-̲H1Perz)7o}Prɫh]<+]4D!oJisX׍dq-bş7aTɉVZ1WF/N$Ds+,}Ey]Z2̌7+2KXG·j $V!P_8qW_s 5VS2qBYPn=ҁTr/;0Ƌz8 6=9<+W_7O3)'s(;Hv3m_$z:Z7zӑ=&SBbX̕ȑx*hNt%#L^ʆԾw+]nFl(] g:nq8v?|=ְ"ׇ E>zdHpn!jҠG]K=`UȄ!w62"ZA[WMhoe9EF|a ,vEҍ$M%sK½qO79b~{ƼՓ28 M qK: 2-Gd-k(1Lo\}Q mXf,tJw^.wq0-|-wtdLP'!,Hd[.jsi롆DM4NilUIg#Pev&{纹:(㠋[#VIJ"rN'/$Xl 7y'+a#ЦU|4)I$dp(Ltn-ښq"Fyef|9?W706Z^t[h K*ü+^+UIl;K)yQbO2 Sn,޻%S`Ͼq=~޿% Ŷ"OTP0fRivCLPvmJ0tkkFmj.L1|aa H.ҼOG= ֆ(mM Ka{dii]\Nߋ2}cGQbIkMpWxJNqL(Rzܧ%~Y2evf݈xp)wgEzEĘOZmZPcHEZq?"[t7UKc ''v:1hסDzTk 5¹ xl Vwƅ\? 5FM'7,݌ D=PX]-u8foﻭEY+8^O|6a-d|D(xhBS8s;m6{4,[__'UKp*5>[qeeuw:p:BRR/[y loAГJz,/h]Y0!( y݉ ir )(+RuNֱIeH0r&Kt>6Aq~.d4#}7![9;i[8ݸIJpΑmxqd/HœTPe/ jRW_=P >h~?>A !䏿7]a\¢r~k0"ȸ 2|aggd\Ͽ,ssx יn׵U7R4WYT^ƐWΪ^'e=$'W[\svJӣ }s//uZXk:NVl}@Bb.-L>tXPrJE;xeW@(N=0/RU)kF,_<=R+Mce}E0mP{@@Ĕ lr= ϥB1Qޕ2178ćQ[˛L${\d:f"'{G'>ă"wC촣,;}WLHaI3.σhW;?N$"@#dKNP:}p"yYr0 t4aW׋AB P5#EkjGrQ83 B[ƒެ9whRB FLo5/jBu2p-%a}Exp9?c;}4܉"  ~,E6M&!,% g v{e:?}$;>zGHun<M4k)ص# iʣR”-aZy]kC?NMҌĸ%жoՋq k P|;4G*Ͱ$ޘ0_7}@SJ |w2.F+gfrHw7ڝ^Tqy^ÛRg= O4?Q8ޜʄz9 R%Gv= (&@Y3CUm{27AfĭGoEn=_ FJZ*]UqU0Gr͇1Qb~.$-K] mc6};®kQzYaF K \̶FkM Y.ԪQ ynv8XGK MFb@ei+, K߱)ӅEzq=?u"^LަPc}q)va2Y[ KӴBp(@P+v|\ײH5ԑC˄"4u#V4w`\BzllDr8>jȐXyxUQr=?4f]EsrzzYqj&aPIo+i _98$y[į'~P)LL6n8 yVgvlw¢fvǟ~}x?wdzm_O;}1u+\KyU83 w`y}?Cu܉0i(N>/n{ׇ/??cN pɻx %}SĚ%z9QMWh|؜*5uw.O& +t˙z01AvG& \% tǩ nS/G>/e'6"}>hTq+Ybby{:  4YTY.Dor 5g\@٪9!cmmA|ݞܲ9'˧iwMR m/lU_hl@\+ճQ"϶[Lz\5S@L~!loߠ@D 5m ZYZraKLntȹu0d[fSW <(֭H N > ,}PaPt^9-Sոj,{a Ty#7_B&ĵsѣ&oV'q=8=?# O5Yv ʡ̉asrRƙl< ZY&/{f[k܈ı d6Ō([mLMaj7H#m Jڇɩ]};C}IMQxǝ 'zw ʟSx!S[Cj(O>OHt~"v}H#i܎{dN"o;]:t΅ I^Bf# Ӎ_]OT;ՒU'I|W2L“s`SZO,3ߔO`ﶚlXNpM=>޺~C9ڶ_z Y .3z/ߗtȽ;-tĻ>MV'r08q6 NmT3XoGBmLn5l%_eYOst,`C06ZMΣ״R"/04uw{ut,Y1 aJ6uY|Z` RxЬY+(~Fi ͕-2 зr>AaSݪLb"r6D-1*Y>TnaaX j0ݾ=";q}'/;\BWWݘ?%@7Kd +.Հv2ك>D X>Ztt&-퓴h}g=kl5cDS#Ex,cK3b&n%}'PKŻX~#G535x% @$ opjf%#ҸS=UUumRn 'S V`KQbT3mnUUcr>,88DgxmYۼnZ4fw2]<% JN)Em6q;#'PK!Oe]html/_sources/ppnew.rst.txtXٮ0}ZeB X@bx@ɴ5ױĿsf4-;mb{3;]Y|Vۨ.>GwhSTcknӛ2?zNBu(^N;=sWuVg ;x9AFM"9E$=Pq1F̕fr&?q3j])(֕)Ӈ5SКD}eIM٬7۰ryxOL4a$LK%*'ǺI18uXaL5ڪ1C:Zv|j:V&bAޏgymrԈ WO5b ޴xK%Uy:LDE.HJ?0XZ"EeUχ}yĥ})`Xq|ydWڮ27u%p[7O5?"(}D^Vuoo3gDRg~jqj|П kaƟ@{b$2hL1ͤ=4I Db-s1tPou D?A\!edW&r8a:ÑSu({$!j5t[$OL%*G8/ҌLn+\vLN*r)ىi2/>iDΧw;gӧV(\A<("k/B2!&:,k|R43f:> Qƒvmw:ri(pϗ]; ٸTFzyCz~8)9sYIi( *\ zmPKN html/_static/PKN<>Lhtml/_static/ajax-loader.gif]]hRqq%hGj sV>`1lq:4W]­jEQcV$}2H&$ha- 4 {^x*e@*v:4y: ,͂-fl N`Z0Tc&/UUȥ %ҧ25vA$QljBL7Y;HQ>Ef&JV u~"&b( NÎ &;Ecyd -t5?Ir6~2U2lN?xO u+pCf3b"e3"nqIG%.`le-`_=E+Xn$l6b#muH.j限I7yi_G*iΗP?zYtI?㏣(s'"GQIX R.&NYR2Aw}Zc;V>*_ qԇ[O VFE«,C4޹ !WgED:Tt]mS Ҿn4W0,ywq#AVF+`j=6S^ic^Z/cl=-jhbN[6ִ]pG%Z+Ao$EYhkw0ܨLQ6] j[+=.mT1Bhs޵)MSxՙ6@]?fݛ"?SCuE+99q`vqfC P&oBIZFsg485K4R8̱qxˏ[)doNi6s -ֶ2hb` h_Bpژ d!f .`5N߼y4kofT96ctg`ƺ2c+ݣ[\h-n}nt;-ַB%hU~ygƑ*"&7DRF×#5i}p6h,; G*PǒZd[v LF )ITarIH:&n'a/``'dD ,c֜F7NGgLobMWg&ʻx^D5!7OKZ|˪][oW_h鹮Qb?3 ƙލ >AOT륎+KW:-u\_ z0Ĥ mefv1)~kNNjߋ}Tр Ja?:VHK s]!YL&Fz(-_D tI:jZAӰN2-d(46Fm$6׭rKjM obSBIJ۞)95Ț}KH1;&˷+ܓlvuu&q'~fHao3|/ g6٥ivotW&h["[Nd6vD Y5DYޔM; -ho: nRUYˌ/,ebDJ;0cϘ(M{$7Y]>?j˦M8 u&o IJjE0աc@{o?/!X䐘1b=888Q.vも5m:E9-]87%bt0htP7X)X@( >uq)ESe܊vڦHQt4faw!JlW )-XQ8EᙸFU  bͥI11]}S,PP LM7j$%8>tl6⑽MO}X(UP̛),>:R>Χ%y\0_t|9½_toH}-堁& oppaﰱ1f@\nB klMV/' *dVC_l.j.Z2me]XN g 8R=.N80)x^H= 2᷐>\!7S# 0ėkiGreets.3m+1(_<5Ek5% ;}>YjŤJt3#8a/1񽿾~A-PKNb%html/_static/bizstyle.cssZ듓0޿"8>9ީ''pD) ^ѿMCG֣lvl6wnmr5"}/?0N}‰giH-yHCV$%Ih0kF/B6G-r2o|}#8'st|I|0B۝^]I&n^]zGWo`S|AP&l_O F_ƳG}e8ɍpH#/ h|n%cxrhd$]*&aĈ0F2+OG9= % 9 }]2 G"#02>]d&-/l.CʈsajE4% $%ž/ څ$yVKdzG0qvAc pn~z>o;Zkt?#)GZNYdk!9rҭ5.m]'vrV]f8MI&{Rir5r@T3aF|GV(nӝx ɂ+ X3KRbFƒbhP!;Ilԗ{[MځV^bJ2*#Ay46dlb0?WŒSpVMvntE\R021Lo BP:!&M^]NLwOq2tupo"}p%+ 99I~k_T4J*mhn`ZU6HX>YVtk6傂)_~>vj]1wR_$ (4e"-m>Ä֣IiǴ:ˠppBVh7)Gn)i&h":MQ#>Unȥ\nNfwS,OztGkCH!6+p1+qIǹઇWތAjy Of){H9c rhMk%Imdr%k4MK4PLk:I@I}[d2\I )F?$XNO@Yu7T"uG S `e;# ߔ|2Z2&1?ۍ,'qZ!ߧ,g qᇗ¤eɜǬe,s]~C=2Or/k0HAmҵ<}n;Eڔ#<]Lf]coumg8>\im&`ی5~Ծ_ۊ݋Vo7 r4asAq=젼}~}Fr0LR&aKZe3eYd*u2;So?Et\N`qg,hzuST܅pWE8@KӽLTUلm:` =JYzjJ]:3"SK v!xLĽ:Z5,FYDGU6E $;j0[o/ x{̞w[lE?eⓡ 9j:LWӱsohsIG \-TkKxbG6!gE|shTNͩtojLH:m`ݳr=qBơVe.n>"b(("x H@NJ"m$zzVA;V%H=s iߝF% (ȕd]$`m+z`. [mw ņ% 39!eڱ7TUE2Ao<F-+O2xQ7 "}'%\&L$*֨//፵S;ͣ|Av2*CՉ0ue5#HNnWJD, ['fNfzԗ_PKNm html/_static/bizstyle.jsSۊ0}W f!2M [H -niPq<"ir+Ne[>gta<F#XӃFy믮z `Ydpej^%nQUIفrYT9O`@kl+6~KgU})y7G'ӡ_^g)j;Owߗ25ehcۯۨ6GV]y%-NחD3SeD_T0jOŕ;vpI&]! SxcxzJZV7/+h;$p *I%E$}?{ !yA14h I*Qeegixh6\zVeW{ec paDHg&6ي5&Qn~3sD{oS+ٻ؀=nnW?XumAHI%pHscYoo_{Z)48sڳۗ8YüYsj34s^#ǒtˋqkZܜwݿߵ>!8pVn{շ=n$p\^;=;wPIENDB`PK N5==html/_static/comment-close.pngPNG  IHDRaIDATxm8$km۶m۶m۶m۶AMfp:O'e$Qq aO[B3U9Og+ł-81dw=7q1CKa~ ʏ lϕ]O4l!A@@wny^xa*;1uSWݦO<*7g>b~yޞ mN\(t:+tU&>9Z}Ok=wԈ=ehjo OSd̳m#(2ڮ&!Q&$|~\>&nMK<+W 7zɫ ?w!8_O ާ4& MS'/қ=rּ`V0!?t'$#'P`iawP?Dãqف.`Ž lZ%9A {EҺ !;e`fT]P]ZCDX2e)ןryOZs߂Ј {1<*Bx `(B42|k@=PAȚe; HͭU`B@(IϚR F"a(. |R*wZB/bZ fMQ+d!!065.9Eq+@3ىVSËd8;&KpHh0f;hY,]|Lcne!fKcJFiySOhמ%ws vaJ{ڣ;/S3 ?qcC\qHxsemk2n%k8aJseI|ޡ*io&4A.qRB4މ-?}wEM~ QQBOg$='1-mJ2ZÒ ì'<'$=3oa<%OW3W{e~tMxHe`n{l0{sD`O$St!q˜L >rMD<ʮ6JI/8+0vu PBhqM鳫( כ0{z (""64͉3KR;"ӹC]kE<:1hW<%1I.88Nbm?ԞQFh<=SMWm+!GADiDdJZLDFM INdO%XT#$q(`! 4"TzVG8jt:dQ/J H,[&y.KѫƤlP*jL\ZrT1ȲS^WշW?lL?x6 K|%- 6Ǿ= Qq%sxouzx@ o><Ʀ1c ?kN~<9=rxy0yx߲wpLRzzv3xt sgLN1^҂w!`?]Øp&Gwqu=l\}d`;<: /ΎΙp&˫}ɭlTLs^Z )9D irr("2& ]_抚$ױ^$N*Ia0ä8L|1Lg#k+6o秔.Zy*#h{ Ӊi\`ƲdF͏['&IF'te5FM h*S0 hpt3<԰ \mRCYMB'[5cޤ?%[ 4,I]N`)2I_én?uɸσBcu:QvA1 #20B}~w:~E:#槡'ʗKGϖIDIHdKOMðFjbdŲi[$` p9H5͝5iJ%T;wđ QTUY);į irM(nM]N N4C+I\qΖQHM2| g5&6Xp&M1豱~IlμhƆS\V/2 -Top;Rixp27w ;\tsi^Xq //Q& i#hb IgEKKs! I m:m!@gj!m0)Hu1֪ɼIa oJ Md8Rʝ×P㬡5窾VЗ3C뷮7ةUrrj'2|H$DH>ʴpC)z}k5X_m~A2ٮZT]Ƣapdg H1L9X;aUq_zl?ܐAvN 8#Dφ:sAWG3[c 7evPqJU)VL>Ls?(ǃIQp($֠mlaBF:FuA* Q8mYe҆AЃA x&۸g!u̐63Ct3# I؇‘"[-~JO٬2+lˍoouBNڴl |>'fQü`NjYSk_PoXIϱJED2t0q3Cb*AvI(j@yЎ7ZsSkT͋e+Qj>Tڶd7q%8$RjFd᯳7a{=dRY+Msg͸Ww$bHcHE3SlHg츕7]9tU  kєެwg JRt1*br/P3r! I*DRxᄟf*HM@w&Xcd!P4B}zSF lW5UV*JfdhT :, 4tՍL$ !Iqzsq^:䔦} _MOC*'2̾}/@LYĨFliRFL&S0D9 0s$.γ^4og)R"8ߛd&M0 iw1ϒ-/D0p8uAݚ FyǏ;#yt׀29X>Sg) ۛK+ui.>kʔ \2u78_e$/e91[\fcd!wYd9>d- ]ƃsSt1\#r epyEx3󳗅fl-Mf Ϭi4kp],R\Z_8SFiiy#vk-3-iag ;V `PU,^HP6v<$&58㻉JvBבRAD @K(J 5߅ұ:jl n$릃aJEdX [8ygn[2u5{oEn{|eu)KxRu".|E! @+ C5E_eͤfr:3.%{5d3P ^fA &.* mˮG .}F Mֺ>} gޑO(-{rsb0# Qw|_PKN?=o%html/_static/css3-mediaqueries_src.jsiS#E3^FC<ʀ[C!Nf̄}eiTA2^~W~=l=]ųͩosDzZMqpv&^d&s#||&~]%2__$wŏټ7Q*FuEcKR,D|~#"]{;;Ϟmw֓x(B*&e9f#ٻβDtkU}bk}gɡ\,ų^_oA!I )+9ɑ( f%t;)V-&2qz-PJ#Dtb!XDwy"H9""Q"$QRl*"ӱL$W֟[[Rʻ-t(EFkt2{tqbW]E\9vQATHxA;3a0󨧔h 4*pkpQ\]n7qyG .ve8.7:[q/%u&.C . :T}-o2 @+pj@ݦTҭT84j!4/X9ʆsTZTYTg#*f=K_8i<0'06T9AGNwO?ȦQ`E]kTY4.(S^a@$$ݠXd4*z㴀=z5'Q~M(z(ISՐ5k`=lF9nǧHV/u9 ~x_Xá`H #7lQ@:H67Hqa>`s.+QRHrJ хUF &dg@<-DDcTSI)x$ DrgmBRy<0Utݚ”$WS5g&%&Uwd(%Ёp,ⰲryݿ<7gbkE,~Uy(غ O_5"c`MW燧'e_]m_߿:<[E~٥ύU\t8Va^ECI6%tMPw/?yrӓ9 lBO/bٸxzu1{Ǘo/Ӱ{{wCP?-|q(O_=^WÓgÃ#fik5;D\r5J^Dq <"xj6 gmTȟx1E6. z&Y8޽E)n<Ulc̝b0_BȂ Q%L6 %(":?~y_T(@%?k{\Wd%P|eG7Xen *$7Y<iZMi( P ("\KGxl 6K` 9l5M]WSB,oΣ-΢@lLo!1(1htq%) t!HhGb[˂Uh&Jz&H<'"MF%DPWy!€@O5?6#sF8@GGp1cweWUbu%ܜ琬iZ&{Aޭ[wCl Xh&҄V0#{TnwVmɛݜe27ͼȡ@yRDŽY6)DhU4' 紺a 7lf( R 9gP”qj^ta/ZcP]œ?Luq+cSNu:g:bUG h{SU}i؂p}fݑWV@̕>Y!pVtlf9hz UWٲէX2Km(CiL 4(,ޘlkHdL. 񑸎oNż-J8X7W4{ds;\?Նb{/ƒTBd`nmmM/\`Z3˻$9 scMۿҿU4T6NhUˤfHe;t%~ ذrf$0 5H &{[ͷ̙Fq#1%'O.'=f[Ax4l4߯VZnl bHݤD;1Sx3@ R&SQY\œ#܉xܲ9)"!vfsebDP y4fw* j@KZ5`[W怰kUod[2"kyO_gVeiat6aeyttg^;mzR3Sj,jFfJ94+4*RK*TVyv`H,-g7AUjUu\uL@7 VkŤq=EMn.RΤ7!P4 |vM>,@wVS %2tKiPi Bs`Ԏ y'lGXrV-֊汶Qaz֑aY]6S& JU-2/qլJϪ)<ʗs81dYOfҤփCّ^~v c"$"ŪbT+ `8g\6a箶9]M N !q^ 7"B$ &ⴆWz]bda A 3j,NqWZĉ>)@/ `[2c%b>׫BыӤ^v"I6^`ȌG0*µ%ZclPO;9\ V[ճen`ùu0XfU JrxwMj".`ga⃵A@⍤yo˔Dc R3w{( V²RZ;{*m5Ԑ יcf^QM[2!-UU͑9vo )M+PPV3.P!F *wnc'B #O[QFp Z[\q| jC C' %B q4JHp[ch(o)ᥚwINk wT5xe {l\Ihvܑž5Qx ;qO@l>;u/nmrC݁{Z5~jO\ߝ @a\$X"8'e@*c$"5AwgUĴ1/K[ 1spf^-ģ/_rx7?rz/ogqz?O~6켳ŷNǫ,͆Snku~Vj%aiWzܬ-K]Fx{l>I4omN(TJ6>ļk7JI z+!F 3F%p(~U/aB މ"\#ޟ|Gh.VIbiDݪc5 Ѐ$s PN reŮoPJނGluRZJ; {]].2 _OW)M4N%:~ct>(,FOʪ@_&y&ZlN(Bcu% Tֵ9 1]<0Ȕ1b? r CUCsz0Tfƽ|\i92)u:U+}Zu]Ɠ<~L27?;Y:A3 +8MNfq../4ˁf*ph3D8tsn}uxlSԼ h}[7o+2j(!ADɷ#\N0w8%;pZj*t02֢kh l 8[|.أ[[trE~9#u;U}ϊ/m9gw2P:C္aE#k+"Nƻ1Ju}ͽj{ĩ'Rg!Aθ]5]%؄D&*!R eb &0ˡ8 m=;Ğa^Ds.YndJ]Ex}\-r:$ ޑs׾sd .TgmC%{ġ^0fNS7QQNj[Qd#"?EdpN b=~f_8Ն }^Bǩf ge4c0t*M{]6sVӳ}c7>59kگcn]݉I<2 ;b<ϟr~W `dItM?4#Ȁ"콏>hv(Ҧ)(y'.q;&&nScI_Їff~63=}_'t$0ׯy4XD78sqG8'.bcUSK%)w+mЭr5W֘dVMGwϠYQpnvLCl{43Q҄'u4xacR }3jb=y$6kwWVUH 0go,CR$+ i?g_m͗>;9ѿ 3 p =@--ͿxPKN a$html/_static/doctools.jskw43Z6͜7麝. Pd& Jg_qeyzKK:r)R(př$S$HXTQEZ^$ugs#|~Wd&jJEHѫai$+ 9IIuJĬ&-$x:y,ET`JrXBD?.\ȪA V~ڥHk9XI'Ŕ'|WT d}TR>„35?$<M c 5WW8-4ٻsd, ȓi/Jt&5 j%kL3U)1/O~BL@,7Yx @ћVx[S͹ZA׌x TLq1"Ǵ &UH~(3Ւ&BP&4 ,bqQR4Yѵ\.o A-)pQ_cuƀ.lwS{:Pp?a""S$y4?4# e1|r s{>^@A } T`E\G/7b|A0tKG^|< 2@mU gx䁁4C<+m {WaVȹY,H FC霾r4}=_3d"RXyL-efnsLɠfBF?RciE T2]*!4MÊ*7|o3~vkqL&4O[`ԕf;!AnOhCg?t*q*V,S=-v@Al>ȓQ3x 9}ϯTThFJ 'oA%BB[^ژ.sh `5u]D3v:Ul`lJeNgZOC:Xt;@_S2p~R@+Kjk@C4y \/0 mUB -(AC"<_04q4y(d)K`cvAoZ!mcR<$0&q /^[-tC &ކ*QЖ]-Lx[s݀-|'B3Lʝvo E2uN |Oǧ-ۙX9ڹ?XJs?-mRko*jf՟w-iJm)gh470œDd'kmBAU =+ <8$dDhp[niwQ5璤:#'"43p\p4TW8M$_0@h pbsWni·@U4wj !?rbLtUxtBv"M;h\ W6 ?^BC {Br81jU]C?[y!~K[IK_Wo-3rciU&Ut[biPS.*0`K+L{N,D\$>&:{/e mSF{[$.7&dh. ޯRxGER?A(w? Fǣ g?|k~>~H y~֧\*c|*Q)m'sWizGgcT|qzťkђ~<']98$|:nRJ=#B@q "1I/H=X"&VHT*P(HO,Y( 25}fz NfHi m9MeBPmr567Z'!p4 ( ݭF.E@iڒz)AeJjgrjה''d7#U?p'JGׇvH3&'s^wI!l؊|SAV{dfpnVsIHvCJ1 YU ^̗c_gg, v67)5}Q ֳ7E⋧ -0U0(xs(h> ̵]|A$T 5FBXeR-Z)Q+\KZ[Ri30:Hf$tfBOKr.VװF3G_|yϫq.nMU)b/9c8،:.7c)>!j䠻Lmss%{%Y 57>|D߲B=tQ="w/GFC`<3|VBkJo|хnHt^u}z.Z(Y f߈e"®>Ҵ_,[SM|Nu󺮭mV' xg| B݋hl ͲFagY^*A/p6s t:v̵?s5/LW>Bk/6N]`88G[4՘Qm҇9AK>Z%l*r|970G-\oao Uf⺚Mu7U`R(ͽ-ul냯8v>oٸ67 nA 8 Ծw{/Mxqo ba#PKNQ]q*%html/_static/documentation_options.jsZs40B)eYcJ)pp\0tֽ3IIByoW0C{맏j.7q}GS%$l$=yo%U5lvUw.z~Sk5͸>6UՋ~c:Gխm{+y51jýG=F*>|G|VrL*Ãӣ'~Rg|ˏO?=3OO:|o77 "N "Lp9KPQLl5cKF6s9STXP8O!Ȁbm=holߛ%X7a}YWM5`qOzN|?IT,n ~^H^K>ji Tmv93nz[;xP IFo恱(C,5J ZoK꟫1J bـ-.)J)(l4\ !wF@[2w 0jXM&s3ot92<2<ʚL ZUgul}0zW]^YSy;,;caa Y=Χk9F<_գnfr1lbi+MVތnAlv+:IQ;=E$}]Zm8E6o}Sػ-<z7k~4kӞ0xֵڎwLkk[նd\ SxsDW-:Ěf'[ETwo~yd2oƺlɖ-W;M5i-b+ool^6*܏cG=u5~Y=|u5,K~~Qamzvu{S*W]^E\(uCpOknb7jĚvYwtQ;n%l0綕;fo~T&?l}y3n+7KOw'^Bٷ+k\_?\T?iS\>_ r$'QsgILfQ0 ̫>`%"1=btq4I39,5l.Rj!LNɾw//ti':خ;XdS3If9ziF` 0Y&J`g8l&/3Mg 'n$ZʭɬGە8wFfhbl,[K׮Ϫq[iml;+$/%zw|:i0~gE}'eG]n^H[iCҌ>ݾ(A'5Nzm*=ץ$)bPl'L vA} %xBXHM,rٍ@MXRXRF0M&舎-} I fh=1O `Rb&Ϲ`a,+ Ǧ @c,1ˁ a41E ѵ A *qiA4R7-y.X]*(Œ%z&lZ.G^ K|``25̶ A"4{a9Y4 U.liʠ jKE, @.c\X QeQF"U-c;^IM"W!ʗ)X9"G0KEdADp:v6NƷ 7ɤWc?I=`e\My[''/iwoL1'OcW8>qy2wʳ2f6|6 ;PȩµҀu︲^%׷g)"H*J,5f4\ cB1#q2CBbjQN ' ;3W@zUV* w80gby(E]2(TYbT #+xT-`+0 Cr+ lP ]9\:[(3iҮ< 0,籞a!oFdnqN9/pμ0YrG(2YWR"Hde/0U}u݂b+!i2Xc`Bz2ܦ f]@i#Vkh0,'sACyis~颡I 2W2Q( mLfr˅(Ђ\ q9aq`\d*w 0vE'ntsUse^XUyE\(c Q:)+EKX:EdAfܐy,e-CȢPI(ύܵƉsYu& 020,VDF-YU)W tDUˑ]o *t PȆeL\ʨLbU=bUӄʙ,&+0SVzد-^Ha,H l&(%Sr/k``&BA.Ţ2 0Y*ۿ򬠖̛r[]dW2xI+'; @mTH +N|^U9Y4ˬlL!L[BYIGr4ϔq 2-\Y% Oci$ŀYL&) p%(Sm ) H52y.ťFr&IQe r dlÒxaIa<wYcw|E{-.۞"v=ydCV _YAp,uFA X~E1yTG1o|Cro@,r1Ƚ^9+(`p΀yֶx]T{M:i]tܗuo\O'{E]6oE€k"0K)PK N'html/_static/down-pressed.pngPNG  IHDRaIDATxc@J@lKf[^g%_  HK ĿD Ab3CGhr.x/`X Wʱ 2 eF+,.xEJ lAR $WT?0i)1maUIENDB`PK N9.html/_static/down.pngPNG  IHDR7IDATxP@ @Iߗ`&"z6xK@kbϢxs]M :/+gPd2GeÐ~߸J_c S_ S%exdU](UH>&;4i$n3> ycdIENDB`PK NS[Shtml/_static/file.pngPNG  IHDRaIDATxR){l ۶f=@ :3~箄rX$AX-D ~ lj(P%8<<9:: PO&$ l~X&EW^4wQ}^ͣ i0/H/@F)Dzq+j[SU5h/oY G&Lfs|{3%U+S`AFIENDB`PKNص(html/_static/jquery-3.2.1.js]i{XdKd!؞Ej$lgE~{޺{&!y꺺zcmu%Z~<^F?t2^GIw[Tt6a~dy QRDc,'X54=9E{(y(g4lg[>'q"E6Dx>$+ӓ KųdUoswwGCJ;:gCC;:GC~uWVnEp8jGIGgh>NݨHtv9\L(Ҷoml S,~~$JO4Β MBIGi>0>(? ғQr 糄eOy^5Y4ʣ,E'溉)9֎|"z :s.Pw^T?ŰͱL8Kǹph?n@F?Ga.J4OI%8괥FgGjKtf{7 Mڭ.bG=A[:=;<(c97% f\ƭ4?ϣi>mG-Mea&43ȣs?GɸHemT :o@ [A1J sV>G*_,l C]YRi>Q z9bYA}txCs&TSS1ͱf/kk lK"Ϛ:a |jӸxsH[K-xwiJ[ (rQ 4С?(DU&J k/_p|LK>8"I˯,@yfG&O&I6;Mǣ5O)bJ>MOIR5.ώ1NRg Zг;pq8`2v"cqvLi h]jsx 0C,EEa{ !':ƗzvʬL ZP,fuZ]&AnBCp,~ rg'eX]B|?c2Z%cq^xیjcΩN0ݑv^HRZ"CF79 D(_*LSaF;$1~8S+x Fh#P{M5!A4={=^ʭ)F?_8xy &gӤl?K? oX nwV6HJOǓӘ{ÎkOY] 8Qn.w4A[+aς] cvC4 T^=[KmgxQ"A{zmFO}}]h}O[φtG/QL\b[:WKD }jU @ES8-Mq=%ӓ- Z';HSڌ @Ga%vJKe_@_˫?ܟ;)9-rjRFOP2PaN7(h8'׷ĐuضQW,+2v]!Y}ppabKƗ:hbveOhW7(Wlh7܏]ΏH[OQQX/ .ܾTes#吷a0lt4B^,1?p0eN>]Brw?%|%VB4/a'69z -$Jp <18\Ryμr NcHSaY5eAc\Z0HǨ}@g*"o>dYJdB!E$ :&`PE WHH>s:fXF}L'L40qŖ*YЬiK# Gj\Тi=GJ"=ijE˘/lhG'?E6|kXc0t~+.xRUݰ ^OFگZ9zAķyR6]3%!SFMVPe_Jo*<$A.iy>qG5"Y ,B*&x"aUQrl?hd4I+aK ו "š[+mX,d `l/Vvjnnck$_#JE3O/iS568J[g_RL q+-S#,lYq%˝6p-dOxLg#sI\s倲*-tuWF ݾM?5Mo5eCsWTUXt \\0uH2:*RSB6L+jugwMCn, ]iw;l^l8J;-:} q0AF_s uvjLRX'n:Z҂ COz7t=x4Q 4-GPff&$3lR|r$]M | z$8%K$aF{`XzP>INbrE)]Lǯ wHWt3Ewn7/Oa [/5T{*D8N쟈 =΍L v6|_U7p[ K?ߙo Dh$eXTmƭڦrr-^+:%'Fv1FԬعPFǷ0\JeTf-to11YޛgM}*C |HΏت-OZ8 7lݽ/Pi%m29&f2X½m})Nu_,Y~ PM0נO}J6fH3^%ZB%KIIgfos*ߝh.c1Mr!փ(+_>p-fv:E~<# {Ϩ }NLPDjytWt S _:5s-NC x+h: ׃ RZ|8b\Rlj#ի3'BxLby4R a3=uXD=OuW{])O75%\sQل-XRBMjQ&X< Q佮ՖyӦ?4<:%QV(՗IJZd46P2D)tmhet`4,}5l UФ4Z]\JpI=?oі7lT#k/SSj8v4^UnT1U(>nI>YGε=Iw%<('uzIES8${UȜl rk V/ G"ҕs?y&Y/°3p8gӮј,IALPmJ-˓Ddhq[,񵹆\T~>2p<^ϫRQVC4 _rxn: aOAIs@%)8Fy@'`>d#XMu3y!]{p'pjMز BR|MBÜ\Y c3` ZUZs&}-'5ؐ1F/Z)[ߩ]hT8WJ⦅ h)"Ffk1B볠":l'Qϝ5$\fDKp^d,h>4b!{-v4 s߽|/m6W )9DBgkJQV[X|.ewrP-IjQ)I?sw^8Vo)SqM=$bԔS7^&3B3om{::QBj@Dnx8b޽ީ`<;!B ʤom]y|-_:_} petxc cOJ|s:{/ɜ)|EΧQ왤PQ铜ėoG?{՝9\{ 5-yX!n*.g0#YYS [q vMj0Xb7Nl'!g *qQ/4k n@6+L3eMp5-JzN7&m ?T\1&'~WWi1 5RsgzuL&fc2g83@iw 錪 UFz̛z~Ts` DREO%(H80\~?= v¤KmMAL2ZG\\fE s - |\,F 1i:%"-p/}bclN" t-5DH=0' xE3vϷv~Jdߜ(&ifw7fӃ].ofO:;"Tl7u~28 w;\pCag:|:DnQS-ȫwpO4My6QRZìTD90She_,Vyd\妥}tFȑ~Eom?rg۔j?xagsgqЪf(砵h|gQpp (1)(gs_mk MZ*~qgB/9`3O;/MP/RgrO|'65[\T3HY sUM:,vxv-K T#wck#hG :0IAw :UhWvo:!1BiTJ~aǀ5]m}uҨi-MZjE=PV~vи[{42K YiYwL<ܮrx(Eu~&$|5P}*w*X߫*ZCW*|i*ktm>&'{k| OJ Ĕ5 |4_Qi:٢iђjnꮃB4 2aR4g//\5{^0}]ҥ0A$Lh[}p٩j߾y'*z `Ti ߊO,,_[Sp'eC?p@Tp F[zǿ5\Db0s `,:Hi#d5NFT9u~>(5|"v"vr*}G/iCx}DU6oĂko ,ܷ>`vENC=zALt}K;>ys}y, xїuҳz( \p_|3^8߆$[ w6 2ao*{Uwȧ)i.X=%젦N/XA8GF3X,&o`=2ɺM[tC/,Qt *4Mk:[@7nnv%6~nx!{z5PgaWKGn * ~#w{VAI3BQq'bWLJzO@?n?6N FB.j[W'y^+vR ]=ӽx}ۧ?{{ovUf*xZ"[O{""mm$ԯyBeȈYC;C=_ 5Tx:kP.׷R-j f+(O|i󱑒Qqj\b~h?'Dئ_,.DST-mp6\5R4D4p~i] NG]av J1sM?(nmcogJdT |Ew햏0ܪ eGdvIJm @'9[S<؛ȁ.?_`5$~V-?s-R)Q-ekőr*~zI)Nʸ 7/vtt?bǘN lqā:\pW<ssE%JwqTjUd*q*yB%"EV̏aIȩ拜.% ^Nk]($/Mr6Um% JnVS|ǘĻ M" "=sYHU"=7uh奂G^Wo%?] (I|wr+Xݪծ?1޿yf`KmV%]s8Ƞ7|9S9'6 ?Fa,_n\VzoQ&_ne栻r1nE}ېS6!672Y,q[PmmM&wJ?X_|w>>y SWYtSݺۇ#ש̭P2exv_:=WPZ:S2cO㊝Qx:tm*%HEѡk:S> WѠd6S^[fE'9ȳA,gؒ~c!~9D#7̚ 3+WU߰yh/(ĕ9}_ŻT fP>OĶzV.|L36[ E)j?VUOgRlC5Bw\Kioix]noA:ZG]:us/$ξ ԴFy))12GGnՄ)tE6GoH`uKvJpw#SA"hSӮVw 2JӢ߼"E2wn0R7dUگңJZ-{ !}ҋϥÔ9a,d"b9*ըozX. xĻ|HD.НZ/,PWQ糊\r[ 'A(4vN%H>ai RW_6k{_ *QE× ՇlyC.`V ؋5v)Lj&/ʅ(S[;mdȆW<8UAm:%kˡo+{W?c\ڴe8. J' >7)H4=/]д\.D?ӏ}&EdKѥh5@Ng6(_Z,!8;zݼ{0J;}IAA<;p-q}5F[B0uoOmLg oo\n{jr(a(zH5mkf {kh /  TY˟`孫 0a𚜜H繱 %(|GB0(tzX}z@񎠇dĞs$md9@\ِh!3qeQ}l`v;ki)%¢3>I.)3ЖDFs[JXcQG735_|B gda'n6-d2W\:੾آD_))=G(5Co*rSXssʼngox%oYn ug]@ygpuI b9slHu bu(VcVZa=f0{g_+E%ȄX_6KzK/ZN]0"Yf@G1g)Xuݡk "фe 'S ~8}|pC)m®}9cGnv3}#f/Tb iQ)n%E/'l*3Q~d TJᏥ,<ʒcY&R7o]5=Q.`*쯜+'soLFHf~IIl\Α9$a0n{!/1 YEMJ]lTF4*BBDьH2j[*?5 O]YftvɯNB4}/WVK-$q\E!!n@JX֨[hWkFySsxPOob]}ȹlYc?7zڬԻkt%)y~TUB')EڞLaC̸ܑBBW|MTx44UgB`eswX\vԦ}Ly'鵣2d%aؽT z]: ]A/K=O^YFLjo5`]O߶ADh򂾥PwW@S 3wsh9Т0l ʰb,ZJZ}?~eKҨu\|W]1)IROP璯z qK_Ku߾|uU~E}־L>UF* ˦}yW'ĕ,fOVBTĵl+'R'_˵"|e]YS]`,ԓ߽Dz+z_Yh=sSzZN˅uW/1-](q#+lؾeS9H}<% )e=?J\E,kv]!W  -P*_X(Q42\C{-7|LZ~7&/꩔ uA}0WǠʅA<$d*T#cG|U:^/sNdȹ8<(-G _v'э( nGr^'"C`csDP,&G|a}J#)`|: .їI2zDL4~@ \uz˸v 50'MQ>.M\ 0G\ J/m!镯%L k)i -+4@~+zNy3<)J5LoPs4])*Z-Rl&>4&van߇|Aɤ*,01 _;>ވ02ZEmTG5F> {NPY\h]!߶yndwh9 rN*f^oVVc".Lr#M6NO=a?.&>ߋ15s 20ZoK=Zp9 wkZMRMUb%^zG]{ͳ\4a}"Wmzv؝^ք/0[cev̙[EPPkSu%֬ImXTqk:mr?5!"`)rmTZY+k쎉4 $lL R(qq/6UWk "_4m2 30e lךQuZcde2x㫆$|Udw/ .nB)ˢvaE/[ 6BֵW̶'a.}cnoJ=stqƙoE=Y. PX 1[{v)׿^, ZqG0N5O_uo%}pڦO? ,ƍF^OpC344ܱperQ]z@Pթ!R&/ן=.&kn LWhXh7 2=YiD\3|4ً V#8 )6rEG2wlF_V[j8x<䇚5FWeԤIV XoyĔ Ɠrx>҉GF9Iܻ6FhڑJhY>b tnZSߔrCM}1N`ڜtK 6VM%'9Pa{g OOWjz3muO) nC.m5"9 m;5Q>;KdL{;9PZb.F 5)SQ$&lIXo<_SЫމ37iCb=N{TPV+gu52X__d/+k.ᅯ^OENJ&̳οlev2HU?}0Դ3Ct;׼xҟgp\KG7o騹% ?F!Hgp,R Lpec<׍˺Y_39,OmE `IѾq:^B۩R'1])UTȜ&S֔H--2__BPJ_s束PhQnဢ1J+yBbM-(_t V4ݰ פr'`%y%7B߇%Q&,8QF?qk$U9W^tFI kx!N2“#)!}؜nR˷zn:~ w f+ʗm]ݲ5 %[ CײE{r97L(e`xZ%G%?;sW⯗_mk?Ĉ?WIR>m݋.Iy&n~[:OB*܏ȵI hLrYy]6$m,IJKƌ@+-X߽H.LUU[}oV 9nm;W 1783k祶zS\6@}>9v1Dpisc+@{%O&Y7$8ެPd O?Jw_Y҈'l@t.G̿!]TZkWGuWwfnK7.+&bovdģ_M_j kp(mK.T :O9':M"p%1稢!8,{pոrߖ vL~>/jYHw KJ/7߮-GV!x,(f*FIBolLpe5CW)Ic.g,.-|"iv7/Eaբ殴h50t @N ``\_#~gѾwX.خj#eW.^[戳fȮlnaf-|vzhj_ӝuS'!šob.dX,4-V8Ӳ.3ܭW1c,}"Ux\'|8sQlh~,j D%VHePZPxѡ4h L5: 1Ϙ2Z{@1/tJ ڼ Z;^MuM(<WPߛF U,tPPVS/ Yzu@[%{gw\#gߢ&Z\_eT>2Y?|{(=cK:9f/ڤ'B<@|>cd uCuc0H3qd; 0.,&mX `s?Il^ pH׮qPT"{2B >;=EL^ON{[|0CH Ώt_Hg=cDJI,½,%FoZGI^a~x@ޕ3rOXF"*/ؠ}V/=UPKwMaiG_84|FW8h0'EC[pZ.㽝R\\~ F[1)%5-ԂwSèaPp}h a M1v^{:ԖDGT߮ _i|[5Md6ᪧcN7=-#?b=p/P Zx8[}ȻN껏uy˽Wk'fu7+7 -*74W%߻Կnם( I3MJ-&yI['3A'C{q4I ȵQRߙ)pe'DTkRnJG '^6{b{!QDafj#<5" ,5ǂ0ur4-8~mDgxe<.^+aX޵r=ݏʍXOԓC~V1@ 'XC??jlL+V3#.'mbNxƅrdU(+A3FsBv$ pfYφ 9B,#)]ќ#H妨)x*NrR'dQ$y(=֠y쑦S2Bʻ4|kXjRJ OF9٘iV (yd e0 -T8_R!/GY< jֲfHTvE?%+>bU(hl)'z /G56 ޻w怖ֹf"\~Ж΅F`#7v@dZV(mr] ECt@;;WZOF@yO?j}u[t7~FDT#ʶ0^r iP ؁nNÉ®xttmN4$ !6qGA@G∋rO 16K+O W2{ڸZø=^?R^hW%k^egkMhr~adêa6698ErGSAPS)p.W#(u`XF;= TD]]l.+3\bzάm7nw<ѠqTvG})U#NޘimcɨΊX~7ygtS{RWn6pw]ũûr_u-1jq EV ۴."!._c#zBZk @G8yH$aaR0Fv|4=N"#Ar4,7E|R>5Vﳅm=L'7DFQQi7K@𗖌c@Rzу6@R8_%XEM(D%`l`GᐣpňAn[b`ⲄyUꭋXs S+fW sƿrI K5 $!DzYbmL%E#-x{~uutdCy% RfQY%є!}#Kwݐi;{mLjh|db1J#VhAe-&lgfZo-qV)s`>l*EڭEy@ \yAq|0F)a:#w-8}Nc\ď7u)O[?0J$V.(H8qa'>^>]=oJM>:K_ͦzq^;ݟ~RH]Oڏ^a )vфv5i=ְ>U!O_n _58 l{s;fo~m B.n,T4ǖ{4ACvʟ&B"7lDLJ)^(.ɉ]f/U.f_/ӞsoMVzk^Q\)4Kr}y$ |G8FM\\r0;PdeJ'mfwEW+ve{m)T6ZCf(7EXz?@b!M@`\.S40j}QI%Q"u2|-$<p`<=ݨe'K PSosO,P;dui, yn}xg6N$} *諀4`QFh/l-t:e%nKp,隸#O[c;5A=UGOܰzeso*̈́w~ ˩-繁=IjG/Hp^:#Cy߁P7.X+Bΐ&%b/ke>^0$OhW/x~1://D5!wEػ>ePbm9Rb,~X"71$$#`DT([rGcbvYAKҴzѽdl+ &AͰF暕lk \FnzZ\\d#mʒ]Bt]@ k ^b%h ]?^#nı8T9:jC>~QmWVj*bݯzk`縇ek%#*ǤTva3IR6o?N>z0ڶ5n[J[{ v:!QiMQ V[tUY,;) zMU%šyizy? n97?Ws OYm[I(wG]|ϘFK@1Xiaf 0Na>Y}FflCa5}Kbd["m/HFkFonXx%oV3߀D؇ 1\eA\54W7 `gCTntaC zӼʕPvՆh?I`ma1?!`FmQO+ʻÏVdO\WYaCi*ښX]qNes$z'nD-Xd<,V&=9Z}T~kj^jEPGRYf;qEP&(a@cs݃>튈)6K S4 .xd!ǖJV@тy[ H@ uƮ˵Gyվgju7*$\)J/Ş&n%NS+m6ܭLW @pk!'-xt. N#d.n 1jHT-iD{y&.;喂m3WQh9]}z?rZ [[f_.H{U.U{1I]3( _35%(el8yvH_LOJ1b2MJq/D*ܬNE[./n7QڼgN'2k(q͏Y ^x~QTjV[Vb~0-Se Kz%1#I\=4b"a8:3 *e>;e"H^C*{պ@D[)O/M)B+KX8h$:<YOxәˇo 1G̓Zp*\۠4?y%ί^uI>`]+ .lS~BbrooB6%8e4vRW&xUFk˪fV6RN*09#4VVA:E=ˤҴRИMk'Mؽ}"-(oHĦ,ܙ>2 N{Q2M\7rϦN&ޞ.U>DEV 4`a`12_Ě(\ftbq)֐ Z'PNVWLAP>{^P;jg5-]VS7Q\ǗR]!~0 `vƥ EwlRsBn͒"I}Q_,>O>$9 ̼lg_(1Bh htP]VŨ=/ݏ&#|妉r^{L $;*=  D.Qa|fDVBj VCҎYoLS%.;FNdUc|z%>5ۓE@=fj<_8ܴ͏,$;!j- [l0T8\t,^-Y 0f橼딼;'tXgh)H/I&#ugMtzͲT%Y==y\ܪ:y:ckC o)Z4;3"!>iqN92XRlCNՑ~keA|)LJ 0tԻzN@pnrXh~OCo6^n( nKO"_@z&-$*[Q5>fzݟީ-ՏGFwwtwd*?FZUsW؅"FV7^7Z+c1=B "iȦ%X[nI5Htܗa0 "G߉M"X=D9P.:;z"Z !O;r6\g'|&*ޖjt9fv$w&UsV;=Tw_w~/GfaKZM$V) M+ mოi6J*յWj^>if(<:!)ͧ!$␚v߱H%G#, ;h`7Pߴ9˚T^`'ję.y`l(W4֣[դdo8XmyT}6UGW]`ύx=]ՏOIFT@&<`eI@%ZB_C!?t$ճk`ĖӁ\x(f`_v{Cnc]h|8u*Hf/jȗTnܾ*"\*wFVz_P elYh 4PĔ\?)l ^׳eYKpo +ŽYEM`E˚j4XN0fס}vc+z"N?|Amӎ`Sƌ_> po|Y&V@ e 6ZUg{}hֆd)Jv?__pՇ;^-_-^M_~aԽb~BhhaWFÃQ,myq2|^P2[:`uOJfe29sL! O ӖOkkjuOqK_17|hhS>WR3#π2Q?, p~7*b' adTISC$%\㴜7h)wsEzly|O,ƒLթoӔtl̟KV-Q鼟]ats3dMHtF\5irAn!ѫ]SOG>HyAM[ r23%>ǰ:ntl^XJ6th"wuY@̎! w5okELy r4%l#,د"H`g &dw~¡n$6EU}iQҿDX#Au^vaj#a!9QC` usׯk^u˼8>1À_$[$sb ^a~7ߕ,)㯳1Q@KX6 {{VP2`JmÚ4ZlQfoaz71_.JgoEWG803&-SsRǦy+T4P& F+SdV{Ќ`K,ǧ][/q8Zi/w@txVw:e:@۷|yk=%,igI \@ށ6gX?|8Ã$iBy+g|>GrXůOw:{!!%HJs̭'%!=-/ٴ<&VҤ!Sx S?7V}sm'ǝdCg6?Qִ&q'pQbkaâUBT4;񱩀PhzDRN쪫tA63kdsRI2*|S괶1j{K( aǮFS] Aۣh" QҘj!~azOqN[;8#+5ߵ?Cns BI֪9ZDu8b)}i؇nJ ٧LhÉD$ܤuC5|:cQhq0/#2}d'iS]f:hONK\r0`\07 =-y0V%?zX%MP{FqS3b=V28 o;!̃VNR$^crHeۮ@lP4 .w]caedAUz㴪wlbK;kJn%W݊n"&\WOZZD&m]NЫɜrtk:xՍICڳ52w1(8|Q_2f7x 'ˍQ"vijnvV\bp0tAR4[63m 6\#qWy:2&ntβ*Zk/oEM2!,no_BOe->tEI4w8(-R~xPJ}ß cȨ]l=RTyK,Pb'Ĭi?@aOJ ,fK/Y˿m"p/:q*\^L Z17R1ry?ӎac"BM~}6i4^tׅ=Ec?O闺jv-!2MZ^ý=I(nVI|~Dz\_ljM]UMŊ}ZMJrm2>顦sf} rKLn_e\XiՀUѼ4ԙ zߨt.(>U4TI[wJXEތ|^(6dRL|#K*4ƿ nfj%NX-mr>urz||Z7s:Il1a]׹\iQ~Ķ ^j/!L YVDŽnP+ ^ i6D `ʳ"Xr9|IL Exz ϠlȖ.Ů^òL`y<"06/e|3Q-~4iq:ZFna7q !H/>IS)wSLm)ˊ.~!ś[Rԣ{,I(~I{@2,98tI5R%ٓy^>_7bhks 7tUe* ^FI-Z{Pd-d8}go$}5D\rvsmd.+yycZ>pZKυK5M,*Crr@ +~z{kq66Hx_>a؅aJͯVc֚C K;/~jLf-k .V]Iىn&̑(JHQJ㛽Gg,&N20'7g<ֻhs}'ٜt1pϮowd@3FNP.X_MAq@wЀص*Q{_Q]8_hj z#\ۑn1I0 EcCL'|`92Xf2xg5&h=b?/z-V E|0Y|jm7| ;?Y沬Z}Rأ"g>vLVj'*uFa,FO8*=?QZCij\eQ· gwx*Z ‘/x' `jƢ.y+?v"Q SהWN~ǝ P CJ3V .,  cp(XhxZxTA_!%/ZCQ0&u4'I)|Κ7"bqu dDy|ƓPIq/ΖK @=fuS 9bVlP"JǼ(-ǥ2; DOr59=<ɛ;dE6 &r:W,L`4ʀc[K=>@y>u 635CGP_5ntpG=9sXNS"\~ F%mDuV,vvwkc& uØVyNL*{\DZ=?94GhGA;,1"DTnHTe7ѾL6,Lv-}N nD!L3\A9JVoQk$WgR\᳤+ϟ?K0!%8;a}: I[&cHnRaE')}:Iyʠubm. vl0ױ=6.I #mjuzyJ]Zd., ᵤN;F# ֎T]nq}Co9,; EBpP$?2*nx$`xb,*䓱 ܳ)X'1g[ lìhD#O%LR~1e9TY1o=v# Ls;d⢑~sӦP 0N=r:=.2𱓮c9 hsxlvl,/7I}_286K^!@Eg|5}՝AFP2!twj+ՃfŢ1gǼ=yLj'3fITR_3*b?\e-Jrt59V Xxqg ]뼷x3 f&jU[g5ݏ s<_kk%H:2)ٶ~G;=^F P>/yӶk61^@7ڀEWY YާV'ֿ)#!wOEa\/*fvթ'vN "Jѯ '3Z^om ߓnz":z^^6[]@XoL xl:A5.)+p|m1td5PL3'Hv OWnQJY Lj\x4cR]ܩt鮺Uf.^$B责:5-6O\[~mZV Ӊ]j$ %"MUЇ=Ȓ,zJBSCk#F+3 Rf>w9gk#;_$niomlNh()AeSDeo/ܟOs^wa.35#$4[lHRʻ+<{<- bZN/'yŒofٔkΛ[J_qI8'Ŕ6fsZIΖ{HS%H9Ub UW(zٓP8\ոkuVaT[ҶC;S[W 8w&9}ޫRZ,9i?Yk`RVyfZV :]!eіF #mg 7IiVjL " XY#bw&kHm߻6Ea([ 9Ҵ_u -Z\B_VfB\YeLZu\:9V]^/ЃL/[iKAPj:U𴨧pRW1vyAժ*:ʚ6ob-:pnJF%`)Vm:n:nWn!׀_ xVp*,lN'1DӸ_sj[#Ng !9;6Iߐ멽x¼,(8E#b]Xo 7rLـ퀜f0q( ],O!}7{ = C ٙ, 9|vܱLG&̦`_'MZ$EAŀjֿ_E%8 T;F"%=$O~5buwuq2b21zeEITڙO%)'ͦa#x'PV9k5|mP~_}_|'O'l[Ayӏ6}qK|'̯ͬKni"dOj~=7hU߼ɬ gUhg*2k:'D#ϭR}j}$XȇkxXA&N.pN !{94&G68ǏشGM.?{h]Gƴ=&lX@*0cR"ٽKyVM]4ɔf=jF9rڐ%1mAubv D# :Y璫磵R>]I%$- leղu4b_k:akCibt~0[>{q6taqu'b yݼSeYf#QJ;3JA9`O@LGQh+yɠA+CPDRg}i1pRY ?oN~x)I":qW6'F޽ GᐼS" LDj O%m9Bb ;bl=W8 zu>B̝S7 N8y>޻O>Z!a7mľ^`w lٺ9]R nf2ǚڍkmFG T5`,8R4v(&=; Jsx oR_?'u,#w-b~{n ' 0 ruF$ONXb}H, J]\ãᛐa@zȹus> wGI[܀)4}9 3魀c{bT"m:k&Tb&t_˵嵰Y:sKwll#]dx'#caq?U_W wI$9lߍp/4/![Jxo8Ooy2nR99N'rqP5%׾'&2E:)0I* RĄrܢY ~H7\ۣ '~+Ij,.Őa1F]*t=<`U\+Dg'^'dER*d"ؓ۾zsDŠe;7`cd|.&PP)'0r[reH s;=ij>3EO#Ut$鲺w,tmqQkVa_wL @M'Ad|񑰂Jo{0,3~L4e,CctQW@Owٍ3sCxRmOT4H셣.!VLMkec4hoqbNIźX3y;(ʏ^z{ XBf~u)K +%H%.y/~YUā2 GEY4deo{ Q$h$}>ܬ[ fJhgG݌\TX3Ejr5MĀٵ:nAH,K;7g7+"K 0ڼȷQ(+M#-w&D&%EQz Vgl3yڽa6g#XVځxKKpy6aтD9֑qW"0PnE^j+B&6e[Ip&6"B r:0Z_-K}Oxyhl.ȑHh!l+h\#p8C;r)(܊濥l[g-i\u b`Zv3T8цA2{nʕJ{9679z7W+ݮLT)reHQ7ĉ$2DX#ڻkdAf l IITE !6]n\3k-1b Fx"wjڕM6 q4A*ف|ZϵKaH,HӜKȉ#=ۺܼ)3M~ װU|wm |sYgo] ^\?/;8 :-+t fo&O?qfO{>42 0S`!}lc9я͖>o''Ѝ(uajD1Y'38}Zpp<:*Eߓɔ5E77@')$|Ls$Orw(ُsOh>YWj7̦![9\؜gr,6HSzя/2ΕѿO{Ozh `v;Ҟx@.1F3dr*# zN6^^c Cw2A/Dc}gpp1[t}ٶe[7T8% 8(Qls=<OA@[:ì-IGW/H%'y}Dý6eπ0jd+JPV!ڊ/qyuYL ;5p/WsqyWm>ŚM"nɲԍh6(yd%woY`IH5ߍ:|[|w~2sȾSRPXȷt̑%ww`XN`iE?'x()p$K+tTwSdaC> sƆ@M)0|f%=5!$q:6;'UYz?}4p M@ɔubeG2'&v nˆ)6t7=n4U7ψ:nU7f!wwbs3z`o WPK1PJG/ųX>f~V[VxKcX`I z(^ 2Hdn#P%R:#u>^Bvx.*sV!WU'8c'c]{4ϢphC} EF!nLp`?4-Ov^ӽL; u BzO'6rf &$6hH +pRsP4I_V H#/F,&2y5DZ/-3Ռ6>]]vd9:õ_l~jw^l%qZ(\Hߌ^磞6+|L` "+2CELB*m㨅L=笗1=Mߵ}С-Rqr:4[P/*p+ h?:Er @F;kg)I ٭X_W憱/~7ӣzKt͖Buv}|:[8]l6\.v6 ^Zh+:RegI(bAѪVby$ܺ*,Ogem?+249wU;p#My[;_2i0'>'< ۃN=W /]ʊWy&@Z]n X 4M~݈Y#OlźѨi#z/ms/8|Ssٶ$Q҇ g 쵉;5nhmjYqڛytyx_sgD;`l)%є206&z6lgn~ Sj)/~V|]E[ԥ߷HDPF-缥}:t 0yoXt DPqK)_MUHn.M|u `X['a֬G_|!TqyZyFdHűb}Wb/6zy&UKR{ `]FrNWO[`xRsGdO'x ztwWM7S㟴XQ&7^w|\y؃̩nFř Y)qe&oETN8뻵,^p:Mڕb'➴ iɾXEZóUE'{Eb\CY(ipeXQtUaOdrƻo@`6U2n0J°una`cE}b _{=wpVsMeȝڂa{­laUBN *#pnEXr!w¼/~ u'?7Jh$RrT7ejnßȊEsߺzpusxfu}Pd^]z1LX&8ܰO栻?$" !/׬m/bAW'1c6]O$!nn{Ǎg+ӓfﳊk߯wh!O`NĆ)#<2GGK1ṿ$KU[־`M6|[[/S8LUA.7]:ܲT[AaetוDBIA*X}V[b_%:B{k~bZ-닲zJeKF~B`%+_40UO+ϒWHJp*h? *)m۷`@ un>-+6sdC]X/Pf#s[/nⰉWwC Lߣ.' 楹[対3>>pnE޲>)-M[M D6f__`,UJGt t@s|H*A|FD)zn+ok$^|!v몘ӯ3p|?Qo Г#()y2ZS5^QTc>Rm.qK&V*yH 夭 D6qǞ&\ic4{( GOu[#֤>찮(?l~Gr7Hr JĢ3l3/\⃵#fNf*l*Һ>ߎsͤ8;Qi,ؔY>Q~)g;Q֦ ˜N16f8PD恶,\rNo&U|}#1^. " i87c|Ȯabvpo|zaJ_xK~'XGK+>o-LNDbna)Z5۩,Wv"k͎dPE/|~[ P([o]v"a"l>JI) \*jŖj೙lBfUCʑO34;]`sGrlU @/`QowZ-^$g'e8i "]Y`&ʥun3\Iud)!Lk"]"lJ671ԶL1ȧWxuLH%HH!O"Gdh4E^X*bp0?+% >fsbJ8Q_u,oȬHϮ5Wb l)QrVQ>Lw.j!^wiXő=G! ʅ!Fg>{ jvs4zwFGDt_Z>7bO/yBP03q68l#ڵӀ[Qp )wzۖZCQ0 Z 9[K-8ib',2])ݪg8 _M )p6\h& zp;)\Oz|ζa]A*scL{6x/OՕ]mfvry0M=f}KMspUj/c˿oaj@VJb#8&PyøΎT3rˬ9?Uq~ؔݖ y] " z1%1޲ֱ֠ѓʍ,"]W8;|B+5Jy/D Եʰ q. Q&iU E>D9 hi%:`5X~Kx 7\jM@%R{͊=)2ЌCGzR`g;콿~w{N <ĺ&6 (IXr tVr];\/"s5KdV)KI" ϔ1lz.Zs2`/ۓR QccZab$2x?`뻉Chdݟs#00,Mrd<^Q}l?=}a? w"4 4&"t]0ԋKo۞o5Kr$sm6eN]+T,HpOhA8?< P;0W"GCxGGmU/Py%S{F ,ʑ["pd! =]^7''F'&OaW3U()(oUci'I=hDk2iU\LbOF@#KFlAn%jsSʯeى+(v3LkR?%0ɭUM$Ձ"k˙>;Kǃi{ɐּ<Anۮ׈~]jmXtaZt@x]E'`+藽{&kIti]$/}=UwӪ{+MZU}BbMKY yLЯ&QĹXJgQ8ңʬ\uTW5c@)yf V$Tc,ͼz`!`<+ u!@A?9 3JC^ slX#?stqV#)C95-UV({X}o{hoeL[XWe-5_Ξq/IU%J'0#;EvtZ e%$Br d Rkk$b>q&\8OHZhb7yCmhɄ(&'}6Ik>#lbqN9@Ac*>G2"bi5}ʹTbV҇8[Lw;e+ @4JuV*Ȼ֒xy-' nu D  M[a叄uRa9bv($s[A73*оVd{`q3jo&FK\xF~^$Ls뀵.{YhcX6LgO"F˪-ynpm)y,5_:FcF=hr ۋ#-i}owI+H \3>M(K\Z\g׬&5t]N[L1`d'ү HE-b|*xۇv3{f룩 %a#RSH)52?{{՚Ξ'>'OO%S2 աG-#ҿEun~}G] A ?Ukz!NÍQکȲܿMƞg^< z勗xgLXd $::qP.т4E`A> g9ChEq>|L/[},C}C6ClsL 7c1odIGlFt7kW7 Ƃ.}G.|Gb#J-&wSs&YA WNBB'@yKsfS45Cq[>dܙ$- '^ ) $LMqx5pf9^Pјc &h]0ҼOY$|$9zZٖޭXDn7?uĞnRVk#}ý{f]QU{HHi'&I+-nun^oOp Ƹ6HhY g<~qc{~z.ګZ]?/E̾ Ksqθ}^׆M2#2'au,\d1kϬ5&_[jd+2 R=h G8Cv4ګE.m]I|ou! ]8OKliG ^ϴ~/hWRGX0/r^IZ%)jZi1{nJ~cYNP`yGy݃d,|L6*$Rg~gA/C3HHGm5$?'R5c悺A,JZ.A$l7;]1\*:fCGZ,2zt3:dp6 z/ugJ9Yxgw/fU_(r<)IE-FBRx+x3"ͻpMH΍.a11Ȗ{~|*[aM|Iw'abJ"5Jlb?C_2y] !iҎOܝ_FD"Jh1MW09c`62qB2Q0 ixcm?t^'wJ(=Zь%hƫ},k$fɗQQDaߘsL-i()֛0Ŭ~Nsk8؊2C2~!Hbd?\}!^^IuqZt^ƗՌ?Ϟ?R-g2{tzJGl`v# &&PI9'T6 UsԢF`YXnem EUJ |5&D'P \?5(QESTztT& y߇Գi #5l2bя2;"gomouփw= t>EZD看GO6}!N_ WJv6k &I0XPrj2*ט`#GV.G;h̐W5>&fo' [!iL?̍ #QgwOȏCjT;P=tBuԸ=^~ONO=Tb?w !)d"& s?`27J߆6q>2BI`vg <3IN.ˊn :0F̙bBm* EPBD?= ok/rpvmX lx̹D`-+C%74eb& }OZruRj} /f5-۳dF)V$*P3(2Z=QYHgHաL>JRqv9ނMo XC O+4]RQe_ ͭAOJFnpDg?3]~grѲ0l!f_`xOí1)lH$=8js8 "](m;(>w$)^mO6C.m ts}G-LO^5_k+ ǵ!?DP~ һ~TS{JDł)X_O8 ዇&+cR ʯB9\\l$}f:U~ Pm]Nkn:co+Cl ?$>şN/&P a˱34X$eR*VrYxt&ӯHiXk&n;qEiA\UÐwus>-9$m0QFqQ<ʶ%й)ham_,}v74t sl?jych%mtf::q!7 x\a&; %QTI ^9%?N)sјCjųؾ<&K쨥cǶ4JJ{cQiQ;2`7.rα H|*l %̭=] GFM`nE8T29R9O\̰bU˜ :5qUɊc[brϿ61_4O ?^NaQjnA_T"䌇{QўAcpn$/}[?3f-hّGӳk5~l23XL<蘙5W%Y,l|NX`q(!Pܐ6-N;>`jwݰ T>=yfQajTIuNMd_Id>3z.Dd%jyUN%^*2gA| uXngOvVH)pfۂOY}^[HFu#DDoD }U`x!KՁ9O?8_sS&S:>יϓկA oO_-G0~uABJ,1af?Q_D2o]jGe_SF(]+,:)]T+ a/-="uO~`?,P u݃siLxwtFtЧd=}64˥A$RH^'EE,ֆ8 &oXd~M" ^}kA&܀mՋHȇ / M[-"_#PB ѭho}]Ӕ 0O*/瓢b̿m"}ާ fA"UqwaIdy^Г-۳kh8} 8eIgÂ!T(᧐=ݽ׋fi +B7{uԎse>埍>Sf^ _"!.N`:=m~`IR[ٻ*gMq5c&0rVo l^.VΫe^ ZDgW甒z>)>L',س4n5H}k9Πī^҆N/JE!5Yř.Yc%\ 6wb3 [(Tn&7Y('ys_ h9 K5WriSj>%ϾR+O+g7M=h&oF?ð ?.m;˅S _(- 2԰s+-3%4ˁ <]H HFi=r]lMVn[ҜX^eLu EOJ}nZS #HoaO>rk];׺L3ԽB#*! ǚ*^I!6a 7,KXeޠ;/ϮC#X<wig$~IMaY+FEynQ^xa/u`dꨫ()~j%Em ZsCcGo l婔Acd Mx(iJ2?+ظxΟN!סY]_4Q014gqc\T-UtQ5w}x),_*Ίj*X_SQePi>/Ƶ(++9rlx+3pu|bJl8QI*XR(&UXRf uc`+m~'rbvN Z}jj"pBE*$=%9Œ[ ϵҏ۞j_ $hKJM ,UQg)絽WQ!;&Z:ZHqY1]e m`;:ՓdزzPʸD kk(j|UXB>9k+X޾vt ^CuZq0I g^'x }%XJ;B+̋e,h0{d-bu)Q\3!GPjH !/M[8މ "C?- [0GgxrPs^!C}2Y`2v5L`-V^k!ph|oG%ڬMQ=xVQTDZA/wى]pqg>D**#dg9S JEl R; k_~ҁ;0u}r8\}&uWman(vߏ̾1zp$>&N^-8+/\3/{Ъb)Oȅk~uVi_dr0Wlbٛ.* :G  N _ҠdFW>Xe?jGip+4 /nA֙} uLd\HZ2qKbGԖどq 3O#ϠenS":UxN#RL}1+Fji$v`(")&BO_L]ik=< 07PLWx1-dn ldN6|)o-Mu 1w l#ale 'WPXo,ҫF(L!Lh5''ps},ʿy\HW8I2m "#O=e'}5^dyY}e>[Ǯɒͤ 1CBz=]F:s~81~U~R\Bv-onkѺN1[X2z/&"!'o,`hB*4:=F|+!kP}&~C0OL{W%g5NpPy2Uנj_rwTgur߱5lePdo>QkF G 6؜}kB!B֜XjvV|Xڅ!>6uH\Sslʼn8`~Rp4eSyyJ4k/1 b'361Xܵ|>ꌅZF{5"]d'sy|EÔ *tqSn edsTg{1(?.$rxm(Xg uD΂Z޲޽Y$42XyEqdK5"c3Ĭ!]rZXA>92;NKs*ٞwL:^[Ԙ Laj9Sg6S&g!}BYr୕~]6upiIzhPg"Q|*Z2&BGOZ5m3 *wٍHkˊ>{Oci+%IY<_͝Z=uAjr<7wo]s{9uo>UBI+6.z2EfP?4>]W+-w_0H>RWjBBHy1 k]#-5Z :ymH_eN"Ɔ9ty4W[͚nەPREe.^, XohD;oc@LNb ;Tq'`iX!A@Ry.1 1ɖ+b3/D/T}]Օ>, hu~1ݔW7FrbsA#i/ o[ .Z{YZo0Ht)Pb7NGS4M" McjM H3k?jh]I:qU- (Ho(>̀Wo|w)!v[\O'3ș:ӼZIܨt$ԩC#嶖mM-8G,^]̄xze Eʦ4' RԕkFԱvXTbS\ٞDk(V߰H}A3z= '"-PʁYu aV4jRXя&Jdں'ˡZ3I[1Iz0ߎdU(PЯ-k:'vKh%+!JSDoL`\KW)vu"4#_wE>z_#Mw/DpB!3='I_"$·%wSOxe N-|pn/t4<6"r[qdw$?@E7)}ָ^[z:=P] ojK;6@/&dqS{2(|tAX䝥ES`Ed PZ?fmniGi;[45;Yz6;oF,Q'IWHϑSu٢<v wkziʜp)JfWns5e:~-:+f P;}[+_`4pjMX 2O)avYNKb뛎5Js>`ux =|3=h%s,2u7һ7le)5{dYW3$0q[aA'RW\-:% FDZzLNQ!F }ʰjcu N`? N/ wnL\,Y!Z1OH;b.qbfZ[Y4\5&KGzR]B<8/:QǬJ6%1v](N2p'|;m"ׁՃfcjNqCpsjLʚh UjH*ΑFH勅Fqh~Wqpΰ?6+IJ谲ރaojwv(vT YJE֠R##.,+Sm^kR.@k]׎rEqRY[R qXOZ"(ޅ2oE}J;,~'i(@91 SF?jXh$%-k18?N7f6876;!K?Nڌ߃ݽݻvݿw@UQАuf7K4|{-pAw(A2Lƀ~Gh0r#VC_Cͺ/}hV7 *b$,9XTQN7ǫ` 7$ nj 7V] )P#9򸽾(e^XYoW8S%M~|xO"[|n}X2Z܂rď̓bshx\S=sJ,y@qjBqGs$ۻ|)" Lq$B]M$|bV J;)}jZCɗa"1aSً<*4Kץ'U[G$r*|Pm=)L:;^5W2'4A-Шjk̺U"3쉗ӚY˞y8f瘓XxE8ZLGDqb.Z hՌy+zL!spY*1sG/|홅 >Z:au\h0[$ 凉d~"8y6Ӣ9P4"0$D玄L# Bݘ¨XaMϡbm6˺$L3/Hdq*}vA+HNԿ'e^Z!w2ɮ$^L%>~JF kAݪцҫU(Js~-mii*oMt# <VQ3W_lHT> 4;p֞ ؤЦWgߟ.GgYߏz (y↭hM7ĕh8Cox g8 2WYwUtʎQƦ9a02zts'`P-v$QhX'> ?]ruRm՝w6%+i9)# iy)+3cj(y;3##b a0y'xo4fYdK,Yjze7/[!MCcSC 0.sSuW][IMpV΋!'sd\l4zOx @y/s3m^y *Ti_y 6l3n^^qՏғSRC;J"(vdeue}zQ uۖ:nѲJ= gBxwnCwﺤtv9cdU匄{J53z=lzh`вG޷Ǎ`pY\o=~N1m5AKrݽqg7v+eu9 }N$ n._ T4n3P饱jݦ2iWbaI+JIO3٥o'#)U ZN׊J#$G\"W:t{ rjA x |#f}}! @ɖ!R4ިw9Z='A([ԓa{hi`t.+ /֎>sjpZ>[eEz)'!_Hdrt)0# ȧy%f%ڦNZL]=fB Nziײ0y'-#tZ`q|:f/ũ~q7RUcپ-O)xjs<O u.-S!@Y,@m0Ѳ<Pg\~ӱtHn'|yӗ4&_-6f{ŏj &nBY6J;0[[MkC~itmq겏U) "Ś7pϓ @KTѕPususHblzOu"c5۠M@{VaRc,$ &1d=(ԑճslQQ!Łt `ӝĭ匑EƠ4DzOjcK ݨzf% ٺtwJó":Qɩ3NW}aȆ(4A7x>&&;^('xgq~xhYsG"ZZ'obH\-x%OMONΘA2E#zx\/D]y2xROU?P04{]XmL}l]q8Xk qZևg @lVLu\K ؃>$+Ɠ.)u1)r($(z/'k#c7xKm] -M"~BvP5e9YU%827xӛ]'JL[/&ZB( ʓp'4 ('}H) )YD YDr8x9a/11#9^5_M%ߒ7)8eP?|Q boǤn\'!fBStHlӫԏnnpt" |/m.<oc1k_-V7f_s=.dz s&~ kZD?GRJ+@|fU[xs|>}x(f aJ /Jϕ/U5[\6y9.E@ T< Y"ӱa0 Au0Et@ ѨDLkpv/3&gPfҔ Dӹ"Zm?*3F}} Z+b_,0Pm~5 SyLy:[ħ< D- Lv(ef ɒkzY!OhrUO=M 5cD\LD7Kّxzq>*?,jG ҇SQ 1q1Xu]ZX 'Ru)@yϞ[q_whQۿߣ rKeG\iiu?)S$BXC껠ՙM `^ [ܙƥ ѷi<#kY룪x؅ Lƫe&CV,GzX;p)_?E6ۀF>I#eh4@֓'̓oH>~K +:o?$'[hQ]+]VZF8(j$GFq=Su֮-[By=Y䙃9gS.$-JB/wbÚEjM!f|B9'j8!ϸh7[\ڹ0m6c]g%rhE{?w~:'M${21E#1Sg?3rN(g8l^w[p(V7H1{ȸ"P|rf1cW7 ە~.Y_z)e᧪ݳJb;phnǷ/Sɤk՝Pj1гeJIbrP;<ҋ?@Y$.Wh`% Jß;5q4#kC?ԉ33Yl[`k-XLVr yv"u#d۸FzfdŅÒz1n}3"MSs! /t_6ć6o$2δqIKO ګWn5cc+l!nnđ[i7DaS8}uuտ0zޣ%zw{L;w %qzyC,s0-K)ΰDؕ|v=O+׃ʜ]i[ۇ%r9>~O FyW̟\)o$,+ Ժ֮&EtQHv,@m2zELQ}W$JxI#[t+mnYgMP̖Ǿ$ev1)4^ 'ɬe%W@Z!s{ %o3|*Br={U|8ʬ';2\p8OI"MiIaO@ hޟr"o{s#epFDx!.at^rvB> ᠑IciR)*Tcx*rh?A"n"^^(6j(,5}`EJ݇ vv(|D>yYv {K"kA Ge*9%ѻb[j3(aa~Kz#r :.zO>{&SehDwügW{ӯO W%QA%-&q ƀ}:MձƂjcHHlAb{fF4Ez[j\VNanCƬ0-gcM*hkcǶecuți ~0/ Pؠ2,F`@ܔDƬΔ)0Ll{TXo3pKb &}(][Hз%z2ٹjQro^\,[2!if:_+ն͈ w9FX!nMqQ: |l"=R;*O'߱m=e <Ʋf0p'.8s2r4]6ʐ"}0CJjqLl|#DniMrihd@}Wm30QYzjіH8qݽ֧2UgD ,NR!1I}LHd㘠=SttM_31ܶu!co=vR=dAGH5{?R'?ǩ=x׃@l-zY `{zZOަ LɵWQQ_( lp C4y >D;UuֺH)!'S;w\pY[i?3-_a,t 1J Nj'Z/wQあ菁Cԫzsϖȷ/' ~^¬p9_˶h/V|wTV`fX FI94 Ë+;bҺkhnTVh3?-!u/ vb<% L{[ZFSұBv1x]xSr6OS&JeqVͯf`=gT`WZ a>W)6.q$ʆ+Uh/_<?s Ӊ3 R*YU\6QUY-5\:^?cbT}hkV!_kpw }h _r*w(URF ~.ǀ67bdM8kfki77)mV_p+.=h BQ"Cɝ_oYAQFKThۅЋT@9zi#؃j80;ERaED2%KFpf%+ݖjJU'*U LFvi2/z[^]~yV"GUC$,p{Ex;I.1|Q%U|P&U֟8c\'ślZ..;rizY 26b+Nu{ k6>`wۓKs<%Џ哫bޱO*KG#ezj,E E͐Nvik,G1řvC;m3_MVëL4vBfv}TK wVWSݓ.5KI0Q Nә2e_^,@ux* f=3M $`Њ=R_n7.N%E`"ݽqǦϾ\(J_+w]hAKQnoMjq¡ƋszPTix*0V~]{NS‹ [Be=p7 бwm'joEDWB9l{T,'D(cw>66vtV$5a\`:61P{r| td89%<0Jq!Mo}. 8]d7Sx[JxO|>-5/Hdž<([l]-+5BN CŶI$df2@Mq`%W7Gb7ThluoT),<>zp_ ;W٘U_F%N3%9 '~'XsNqxU 2|^ݤFly$:w%"8<^+ι[[Hdb/Yk2lsG]22:(|kYLa2Yawʘ]2޴IӮlm1kCmڀ}l!HZSǰ n9I 9R À}< a@"{˻UQZ.$ [C{iU:1A(WQKZ..DL-b) *gQ.Y5Bka .Zھʄt2^u`F*#L|`ĜǷ1scѣ;#Ƿ!6/$y{5>K=d٤ѓƓ4'H.zj,αr]IUiPndӒۤ]Mg7ʮQwt9W""9ٜT #ًa5W1 Z|wQA+ʦ5exJf-2%l^aL\J,Z ݽQɣgójrf.$Y9z#` PmvlȆp!HRS0g5_horp} &9Q{B_ȝgp̈6د9+M'8Un?ha%B,HpTqEqp#C !?ACDHAG׭u{#I[{8ͨޢj!S݃Pkg76dԝxU Ҥ4vr1{; /#2|L%ӄ+]^^3~\'PW/Cy!O~ t~vk- MrSM}yť )d)!=Ο`{Q1nV,z)4̺ KJ/mኦhXOSv> vIz %kSO_%WG$&>IPxcO[Mdu{~Nɲ/NU~8\) Zrɧ;k\>4LkJ $tF3I V54'WŐ}gBs3r>jD=,ɈDj582ImT ʴFS+y_UL鐜z % hbI%tb[9$p<^\w1w Hߦ9,o'ǿ>Of&"zb\SHJjZ逋Uew$Gmˑ<ȉt|'K(m0ᖈ-auCqCw,+|:8o}a|_aA:R^\2ZS+%#kPȜդT,3{!ȝPʊYʑ Mw&!YnXLzcԥq$msm>+NorwL1Wؖ86;D5D1VVpbqݖb4c=3|7H|azZ @颃Ɛ+Aș.S4TBCM᪽pt|sdHh<~2տ0˕|h ic `H)5Yk e/8v#`z荵tq>ȫqA"I\*Y>c9[JkzJ8T"D{F bq})'PM\@j9үi3licˀAdmA` @,vu\tlkpqiFg<0L 8u>+6tI7tr!w1lBws5HD"f9sSʦ!E⩿sxUV>qj2 BUi 鉻&j[E?o˓u7-Gz'KԹԽBHycF'nPa%SX6Z|o9!nk7ɨK络 # v>G(%}S|D*Uh?0$;O Ý#0v(>Qe^Gʇv)O=Ҝ^((k)&xhѰ V)(эe]p',Ócr,k~jʤ5}H]7 bJ_knyqތH^OWyKB$G^ϔDBM;YӁh,84YA7ىս2CAY A=Q+:JKT o XpFSN UΘԼ׾jvdeze-{>|{;v\l_= D? e׈t&W%{S9I5J.NH 1z*Pci7x}LqgoF R6R_z[zlTЅp1n;i}mnTE]Ӑc喗E:as\:$bnC0u(ug 7=MoBcZG7_OWtp'cy]qx1ɀŭEcL/Qe^0\Qs{-'/P,`<w鰡koS |zU2|YS`W2P{kJY~6fibK{DU  0/9*8{5B7E=,p9 Yߏ'{S= 5;֎i?Rq`: Ml`ͿᏇhbIE P\F77+luhI] dߵuMQwSO3Iֲ(bw_F'>듃GV "6&PSK肃TK@Y?Y@CZwٖ\nj4f$l\IIW`V! mEX4';d2yO@|t18nĘRxs>P&i""IjQ;ط*Ks\&XXˣ.5k:tQ(FMigr2c7μ=I"U[& ؙ1뜣kQ|og!7? jt-`jgA$.[2FMWQ'[ UUD}qTLc'${E +tTb' O=A귐8.hw}IUprzۼ_=.q5ksBkNDh).]#J]xac5ȇB7vi;~Q7(2FөNj!9Lgڰ@Cήs~FPֿ%sZpk 4銡*eiz jo9[ƄJZ {1[EYp=JM(O X_J\snkSY{]~F&@ı B;nbϦ%P\31a c^׵PХlĊf6N8Xl츨Ckj|r["0Or.3IqgtNz-_{UZm97Xt0c77t5glo R2Hq+oz`,J} 9I A+˦t6?>{ `+՚1g1(L}/˜ѷҾ1smeT@ 덁R Lsu\1-53`{D BkGHawPa2} CoX'"v)kᾸ4-nC3=O !)/Ecbq|vdMY3OGk=2F0=y@W-_ohʦ@y̶sm60$j1,+'4ZF'Kǝh^ &NWOzzr88hӨʳA9q,]l^b.Cz9 ם!pKARq!o(c16oP $U#xF@<+  %50FUr'p=ϓ})9,E%աaƳOθ܇[XqIRsM=xiuz#> ݨ|E$BImz4bmWr%x艣+P۹X`ZҥnGv+YgN/ES}IVd2E)L7=Y$A~ 4DZg-p g3*:a BO:sZw\I:+Ky9BE;;yci "h;oc%'G sH}+E0P_$AmSպm!w#[`{tl( ۬&yBVS*Һ A5s Dӽ+2H6 !5fNEry8' 6J~~'MDl$nFKܿJ~zL[n2h@su|`~ xT.8{hcPwQeRF&Tؼ 콒O*F|\gU'? 3G} TQ&:c!%r֡;T\W՝H'=Ț8z`VeH8n 4k( 0Imo`K(h/jWPRaMpxD- x|JnPJjR ډ˭?0yn\(QJ;_X &`D)kR 8xN(VB%d]tc!h<{;rFF0|ZqsF>b9t`zQ=I]an8EŹG簘P)m` %tWŘ1X=2>[UՋ Ji, T }Y ig8^"҉VSxdwIaŧc>'F1*N#hz1ۦ۬Ff~Qosq[eO[_ [4^/+ʋHjxzA~%1XX^CL fZ+JD7sP)$0 oEǸUv26RT?ԏ\w۷әm4(IsyMUbSQNjHvw+xGÞ'p >;#cTRTC gM2,|3H7PKN)rrRhtml/_static/jquery.js]ywBD]0W4=@|7nS'$-E" DRl쉃ruD{ν;G\G>=#? zŻb:Wz77Gzn"֛H6?zxzRA?Ip]ҝ7(OO66^nzsCYƗmٸA3C>A1>A| f{jKLvw?CE|o>W-ꬄHni,3}.ј_ez`W, (>-pcGl/i&uC~6Ă=טInl=:j^/xe؟GJe3UYڟWl_~MIZM%dNNq1it"?ԯ`i4%sgډw|L?7Xy3j8x#1~2j4!ꔩ:u#1٨8|.3+΂v;1ΙPc^׿9zrzxQlN~vDN`'$z}$mץe{>GD*=\'c{"8*ԬgV$ߠU:-kwPUʏOplupBea͈TU!ZGNdU`DEa:~QhhqSw 3c4u*6!ٻx8acbNh/I]4zɠ|3CٝpV{Ż@C l F 7 "Q=ec8)>_@KIQM2اCbOy3hh< TwTз p@JHQf":BCr*v5""o.B<ʻq23*'A+%"'{jƩI dQ'rM+68糗l7[Rp3e´2n.tk(-```j0+ZÝx]da_m> H-<qVƦ"EkS< 0M$VK$-"2Œ vmzxvENMDb{}yITzzd5obob q6>B69zőL8~}OT^a)ogÈ( ^Dβgs _D{KA?|;-)v*9uAoM⏑"m!޾_e &Ŷ ߔY ̊,We2* bW\.r}2zx_JA"!ğ"ot~~|w~9?_Oƞ*axJd9g''y#_u%ea/Hķ5Oܫ'{rޠTk"/T \_uPS`ǥsC|Դ:,7>vⷪq[A2*Ad=O+Y#-}8~xV}|Ew/_G곯~]M@mʤrpLO߼ǤϿ'Bs_֙gf\z NCzrBJ6:˰^.;ϳ @+0VGLezEree†WU9Řc  cޢTzU.w Ӕԙ v^N 3zGo RwͩP?ătQ+śm']_frTxi8ZI;x>7!2 A Iɉ<{zloAZg].O>{߈٣O|:I#Bo'm D;?5d2ɼ0&N@@U]ȰT|} Ո]o ebZDoeB8˖L:_ITrгWOAmL!I@6(ї{H%=9F[ v ȭ \ꤾE5`le2ws:/``~oiL #뚈"bDD 1UynId':# qo/j(kc|T\m,]+0`IJ\Đ1 wHv"M&bsGOR(_f܎{|dgRmKU;0s~bh5eox=}:Fi53`-ŚHhk&'FC]E{ij~:iOIse6cKL\ '⁞*-AB!Njr+2Eta$ XlIo[5)f ҵuH3~u/hm <_+9?C:>DM@1;^'eJKi Wx#yGM$5I1 4I?:^(5$:Ѱm/ Bw643R\9spK߂ӟ kRC{'γ[R[:k}|(K ,rNn7k㾴m:X7nrHH|L9bIy@y4MDŽgCϠ7ۥ+Ka?n;h_{JE US‚ܸN ā$͗oU!9&7t%-"fc;#oSg_OhhUOQ*8Њ&?<$<"vDjN8˾x_]n  Y, tsB^@ 8W&;Nm4?@HmN\;M2-غ?z) &!DVM$nvD]NB,۾׶&-poEl-ZI2 jbU im[N_*lޤm(%~ds-S79~7"d3"ŽB|טFi$4> Zn咛ךP/Ȳ};lcNUA.&d~ 9:5&<:S߬\&V Dd8;Stl |jK:ZxC?31ka&[2G@jxɓǧRf?&/<חLh>; ??D\hoT-ʎ R%W#q8v| R[UYHoQ p]EH6 ңQ&(>l)eK"G>fks'#CT~ L7Kz,ߠD|0`憆Z$e_K2Pz5aC+j2(օW3o:+OʢL_ѝ ?;Bf_#;aw[K|Av=†\9wԒ]/VR2A&O/XhޱL Z܈TH"4/3;$^!A # =BLX_{isoO*Ð p`:3^`lI}> dL*e=跀~&v!XoqMMͣ ͎z<|,: CАrF!pz#vwL5n@:T̜0þB?}?s@ypz6NRݯ22p~^bh A/IEH ~=M~7~+QZk.6=gtZq.M}iL5]gڕ3Wy&2ط56FZ#z/P;!I7rJVHDڥ> ۯTjvZ@r*ـR{Q? D*&S9IZu0TBeB1qʟ$Zߴ>cNΪ` oqvN'--%޿L=eiQ`hjx%SGM]$2U頍}ST8N'7#xRJh2A>i7 SI==idϣ ['(~_PG"bR'WfZp[9~e|6qV!Jh$Y~/ެ7=xJBD,VNŎorPA+o-7O m-xi1%j"'lBVFZ(qY3PQ=s4ngR _/Č-E3UIVg{$)njҹ6NJS>*uI#6TZĔ\j4@:}U}ST0a~u֐*JgC+; >:0s}LT@#lSO,VYnG~'czkU8[ 9߷FQLV%fyI #Xѹ /l!Q Z5ξ |8 h˶SUkTMĻ4oES?~EƟ{Detn9=?{r|Vo0UOOyOJuFo_{b5J(ĄbiRʊq1[[-y*hqTH#LroQS3[i}Vw?]@u}@20PkP|J4h:YfgMnu2 YиR6fڿc0P9k D6GmzMr3B i2S";LN9"GiTkd1nMr^0\ ^,զ?!D*:ӱ>M!g9t8A?:`|D  i C/iV:F]FE>UJ] ip &bK ŪxY3^#Z3y"Irwc䨤d.K%vaߢS)bbb&{^ kAr$m(P2*`B9JhTj5A'fRB %Y](1F$9XC(mWL9R1c.%/ b|D_n- PnW-) lbZIhA–[DIAIL̠E)I`;N勲o{ϷK26&F GxJ 5>f &oVg5MPBeĴ`nj E䗫qlHB]c-_ +#s mVQIt_L{dӍ':ݯVOCɩii:&ŕ#J!bpͻf'}mW}&Pކe:~O dK/۫ʗu"$6wY~dֈ-b!~rUya(HƐiΌinX:ķ%|VÓXF}(r<4 dr.d 4»|B Q/ȩImNٴtU"09Y5%2Y>xm/ZP2kYDMsMkOpuQrF^`f*hZ-s%@I$MG}uE/"׫b+mdt1 Dܼ>`+(+.!5.Еb*ewRÜњed"]Z(5Tt.ç4mHG% `nII΄ Bhk/ddŖq^¢IqWiа0%/$^Nwm7d!!恕 P+^3'y(I yVs,q ^)od}yYNŮrEF62x~}@fIA{`\{#s>ꕻfVdv~gEm9ZݒXXý+]FapZ]M_8ۤ btWAebrx4 (&=:nS:}O/8N?#֋w$;* xߗt&tE'k%Ҭ*ar޵AZ`q߶mRԪ6Z}HxCeS35#P/e,6NEuPGbVO ~hԁׁxDf6Uw@vt δýl7竏l{z>mͨwS_f-j#1 ^'Idj*&vιX L#P\ 55ύ9wyr2@Uo L  W_㑂E3ZGZRWL4(SD{Yrz97M :!ѹ 3 @`i_װ_8H7qtOf;uKSr7ݵA2 m^\eHt aM;>iDc) :ABx_$#=/ %xECyH4F yx7hJEGpccӱ߉F ,`L"u I-^!dJY v¢R5,wi" f&,)OM哨q0:D6N9⥘:b%œm mDݾ !ioYHt΍rg50G-G#泖 J§ o8MC,a3BY|O÷ois OS _}0n9NKػ7p$Ez+r\gїWLKk8<)c#_蜑KTIФl YO2ЇgG}3̑@͍}ٿڢ+{Ï- *697=UߑT?9+ok;ڌY'GY5V._!45F|_eצ5GF;tmL~~M;,9;32G7-VЙL=6獅uғsFp]b}IiHUTvnI>_'7gouv kTr@8w)T. 5F>)qvN$ |2Q_*7q5upOz|ΪT hV r [(q bO(CDbza.h)v~V4Л[2|i}E5TbJ1q?| Xf" &LDZ-̎YQ*cAf&%k% ŏ;țN{ۖ.^v0 eե%I 1*w89w+Ba[#}t @WcEǓ)3ád!3 ov{A1j7_8d\`~O-pʁBJ$rwcCDeY_쇭7;`g p [tzתOyEDP18`^]@ȅ@ΈW5MhP\jHWt਷[ x.-*}SXnpg/s-}Z( ɹ "q,QEYRdI:Eק_D|HKEF1vH <G`g|̫notC+s8ECg!uur?IZuRQx&x{ϔĞ^Bj>DMZ_=?cX`X(ׅd]'m􀖁ZBY2TZ%dgͲ(8wF/tdEE6,R4^ ьKjn rcOY9Mc$KܤCth;<=W.AHrjWfԀ|njezUe4ǒJ~GH.U-YP)2OqM7Kl`o2Z³8D?}B#r WbRNpI|f}})7nS)(sc%+?-e~T `0xIg9O\: ҃1?ǜy?k6jL\ğ~m55o>6?Zj;W741˄SF5YԔO(;=&%7]xA-=%IitȻ$.9<Gh96ڗ@L#5]ή+v/`:+ ZrY}KY\L#Pv#; ubOH(4h/i5NC:Nl\|ɵE-|`W' h/+5PMq@HaFHNTXJs rRa/pDq#ȪiGE91v`uՓcm yttA}YU1Iē^f4 P΢ҡ, ?t;\~; U=Yߐ'Ӽ1&%R@te{N~yMqV,騢l1'OͳVk,PUs_Lyr-{SSyy.H^ңy ?ˠL!8\f`N0RܯbabMޖy_ECh-:ɦ,H"̬Qj?RN mH%)L+ߦP%DrgȭWec̦ ?!)QmIpVdBi!|S!|O˶PMFNJgb۵BbCo,;/٣vLٸJ?R YWdM Ɨ aoMaC@@\oq8o|Tf'ޛks=m4퉌1nk6.+J 'וnsbZ *F%DS9kaOz:I V^uN -p#)4 %zz$I,{Y b1+\ǥdB-0hB`B/u 7B?AXX}B4ӅGC__'"ft3qԃBCmY62J %`WWw1p åvq3T*君FI|V+ *܎+{OZompwf[ "Ev˜j土p[!,=ovħx$UCl^$Ut EOCYtKK = W̧w9zyp] 䖩 b-O4ѣbADv Fzl\+XFi{qo> @-*N% A]i_TܞlHGnnlR#a(\Yu )} *n}GKຨèMU[H`$m[b~ 96͗DRsA>Ո˲.'r<[7cR {@QJywa|(J |I$:6顣KzhS2 f,8 %3'( ߯l1/*3[@jHF((NríMʢex~)ƘLN5lnExGt׉^ߐrYT',C8 ˮP.+wj,7Uڠܺ p PF/iA 0?rIHQh"{`d^% /C<Ɂ 땜y'Vt+lU2`ڗl*D*ajV>#UجbRq'*Ld~Ru8]!Sت3y$jf"u'ϴ8M%2Ȣ߬|'CqE5!66 .Et{:S@=4gL>lͨ}%ruE粌04aײbmE*Y4I<:u汓Fhz6ep8ł/ɶTk$i>56/v”i++kv/tțxS΢-Y0PәJm"x1_΅7z`5oC3OyHϐERyIvhQ#7$̣/30w "|})8߭_wpM KNAIyqz"Q(3k??N(-~g*",G [qtlyC/lOęb'ED;SK~faff aXI m\,Jժ@xlnNʇ&ZOw^aa|Q)̘16)ʄX2_&bfXNK9KD( + 8e6y(XONxJ H%inTE>GR!&$eg3RNzT9}@ҥ=1=?Ab^ nCK\ 5M<21iSyHC'R |]}N1[LIte.{P@&-~ׅFF=h\IUVOtJ aJ<JɆV>ȱ/R4Lm%gxzDՅqӾ۱lbA"§ہa1b3bSxۚ2JPJ^9Sqr{,؝R㕿Erj$y!c+o>w;92cʷPE.v2597ГO- 򛻵BO )fOO>N>l_lh2-N,oIve=Lڄf2{0hِ$XbթswK;tjWq\ڍAYw Z[dcP>C%0kv#lט\=Y^ <ȶ><)|\kB}@yN}6z Kߎy}{W-W זP5dé "+5]xt){3r ~?sB Xe"GU 3yT+m2rA|jyq}"?sr ,Si0[Uf@2~gifxzxx,['|D%/ (1N[y$!w{{MC^,]ӕݽ-S.(BȚE4D s>h8_Sj t0zxUz~rAK/9mv[د鞒 eic CN3}\ϫۺ$٦uyZ ٲ>R xB֪Li!i>uY?#3 .oeB/yAWLF"mش:Ө$ d$4p7iM*DI {m2ȐtR`jԔP/a8P|39FuLŋbS_2K(,f3%/d>md97$cMُAx$,"娢kK3 l|SN:֐jiU]`yY1g&xތțZ+r0[le>0RP2>JN GOk6pgkiju*|vɄ&$g`=Zkbs|Kr]>pܡfmo{ wҩE43^Dǰ]:e! Zpbx}d37MFZn 9@T rM=5H;F95-bw70OA&ϻkcWoӹua<2cWwEs &]tVe1,܄Id-Ϫkby׻|zgL䂔ZCA{N-$ГG}{̭`rBAVX8_|2=y/СZaŞ=$\#DIh%bmuזvyS$Zc;#yrT\n9wSz,) k61 &ړbop, FTsi)P,Iea+YYV#>&*;!h(%'dǜI =Cwū—ch02cs.4_ ޏ1wU~ @{@@"H,JعL&;A?zL蚂V2͗5g]"%emuWD7rE|vħ7par{r?~<Eu( uA-GT]gGs7&puQwuc&蛦F~hrpw>pqWd-Ull| IOK"YR10X8&xSX_j snz!,HX~Zɫa:U3e cJϬm8P5KC\P($iRy;H";VOO WMg&a^TfQT{Wo%֊@`$:227vUUKn<^',]iB{w,]WfO/8z򙋏-I߂Y "0l\R&#}+!6[_挌WԻ;XsN[3@}HZbW1g* V*wVRvNBI[)H9~4sew| c cYcp W\{KQ ο̥&l$Hq_v{W3vCs3ka0\(kHG?-:LI@6RboKխO]fLmub}Iٰ,ڌ\! 19*͡۱ 5Y0Mj‡Z5Tބ&,BH! ׿EMi@h,|;G^?wyP9p놤K1;f&#AxJ%#0}p~F(ܺ;c^޽f 8}a4LCĹf},!jPyTQu=})t.d#[sE4?X`Wޝ_++d:ş7^6?OvfbxżVHzdf"Deօ{jBVkxs!rP9eGWjښ^e$Ŧk>i`4q^ϫ'І%Df 77}:!lB*V,xh֩.z!( ׽Ç} c:rEh[ڮ!BAP٭BU1S\uGI!*̣iKA\jưqRƁbJDƑՕ2 nl)n[Xdk|sq,CI1J(J"IFIy4vTq5Q`]ѩIhd?Nb(Hs*bNV'|yM7WDvq$?:-/C+Ru'swl¿T9'/P{U&^kgKkmT*ٯ܇])WMSsq!>%YKJ_] Ex>@YO5/ĴZ%lZFw.@97v|cT&I<cK r/ۏVlx;gjtVC)\E:a*pǤ 00O@T.g1ogMԙ"QŌzfWXd_ϯʟ~їKb^{AjM}'K ]./*κ5_-Nk@,u嗚{ٳԕ[jG[-C.ʧԤ!2DbC޸'D7Ѹ#rg?t'gwsj Mq ]$9z̘z`ǒĬ + bО n0 Z3<ۨbgvWz4DrFȾ^-S`TYY V-oiFl1p艽ʈ66M@򷅁X.AW 5N8Vړ:tv롰Q ^'"XC ]Gizz<؍it_p;A'5j8xEam䶊C=$M$s3CVRoI`+nu/ؖVO":riȒҕy28ͽ'wL^=sdU%QmM, y\H#<2pFFM2 rjLB`ho$rϡә5a Y}zfbh'キ*9nd4ėS'nI 3@15Bۊ0[p\JN:fqZFȣ$dŨ<`)vsvAs[?,ΡESP㊘&4mn]:P֞4;fuMEzF/&kUsCjL: EkVP`f]?m2]+&? ŇC {ADrÁwh (8 í\)l,;AfigurըJZ $*(nôŷr\4b(Qv/k::$@p Z)+fz6/f3}Hy'iKS1C ||5Q-kEHwv,6RlOgq#>n/͜";i1F gƈèWQjqnίiuuO` e[:]g=L+]|6>G]0PA Й^G'K71Ϣ\!G!`)z+ oi:6D݇eQ84r+jwTw`}fkPbЉqLJ &ó:(B0ke&T'- &Ɓ96]Iw|>9m|.?!HO !/&JZ0׍?T3aLvG.}vkU8ǣRdQ>i~`] 8#: x65װ+h d:QZK*,R=t`]5è&Å Frlgk9H?օ]DH.6뱏e_X:J=QVO?L@K`EI1UV 4&8%M_D[MGiю~ENRi i-27P=xU>ɣO[~Y s[ \%A8HFɠPpqCBU[󐌊2um55ܜia,:ػЁ:r}V D_OnWKɠ''fͻ[tF좜/~ȧ0cWyEo\CS)6 Q~I)wϟ̨+`m8#"6l)/~(ImLt^9AmM ('s#yQWw\˵.[6σ녪P UDKVVeC#'zd4ܰ8¿H0 yOѪ(Ω@$O?w&<~8-<3b3g?;zt8~hzޱ7z|ҖA銐nct-ߕ_+C{H75~?G5>ф*Tiv-ͯdžnXtqʾ3k B$i\U!(9Dbm:uhgvvfUhI ߋ?DLәɟ;.DwᕾGRY OOz݂/j`5 ,\qRҘ8=o4  =aa2nlغ ~ܟYiQ~ ߐA5 c]ˌFiaӍ{2sǯ<q29+獚 0;Νř;tI:KY ɕ5Pe"nϏH|y:N jIVƞ:ߡ$lުȂr[H{6jtkBcl)[c)})Ly 2U.?*g&2;jMwP0-r؛NFdwE 3N'q ,;|.,?M@Uy|VdBKh*a2-9/ÄoQN=4,&g*41V 03e7kC*֌s5ʨsj֔Kf t1M-H$=[:kMD\\'ݦ\@crESkd{&9ڈq8WJ-FZ\^_OO+jċEJF~ng 8L>JP TǗlRLJEE7{jbu /Y!:B(y1?*j51R[qƂT-!7Ȩ kx T(D !4tEM[^$pZJa-xwvaoHU y-O)_?;FlPw 7E3%M܈+ '{vPhd~̏ !qiz(d5J̔~x^O;8Gy2.SU(66L.(z|ꔼ[7 YcX vM|bI,v\L^ 䭓#[9:53$P#@GYK p ӲΗ ZE5T_יzCш]7sý9a@/iB7Zb#|I(ae.Q1Teĺb8Vwjf3gs#8-Q&bʙc+G2B,xcO+~ia4BY3aN+pm^;;}8)NRYW73XcWĈUy'%3H߲7vdi _ 8-9^Oa#M x*`jW?[O^ k)Ek1 [X9Wru2L[[f|]67MV(n_6F/x3HXpǤ^_ڎ_^i%?.i9\{v%6Uz&/ZTO#[["80WȗZ.խg;;/h4Ǡe1c_ Gy|2_[ޔvё=w=5vvg1QLl>++Zd2%3QN'(d }'YQҪٹeNw_,(Yt7WJ0Y^T҈>pGٹP|2S+O۾O*`cwHqն`ũ!s!,;3 }rXhlt-+1|+~-9\JXβIь]0BeT?a3ba#gމSo cY7kADj&+:7rǂfeYJNRXԐ>9 ty>`u z]8lpT%wV_#X7$oՄy *>Ӻ¸]n@4Nc޻^353Hs= ;F8}N}G(D[ᗤ~)}ҵW,::įaR[nCV$ l -#5hL5eiXY 7ekq gdXp7&^0?-U2Ēdp@NM/xEAڹn7٬KyO<΂r*+W ?1OL1úfƤZ ΡgBυԎۯP$ D/R2'MW/&a~z,G9' Jj]w=o;|W^fl$fJ3'evwhD<mTՉkE(hf 18wlcb3UjQ"t9w߁ڣ0׾X]X"yҺVPLdozS]!sAῆk(%P[*dJR@? 6(4$w c=1 &NhTwRYn&q6=g11ɫגhuS^ʒiBP3%96(FR*ZU{EN셓34Ԛ M~uj_iR^a:,e!$H:O*H'KnnE^F%+]pA8ֽƉŅEg"n+3lz/|Tɭ"!ĜhЌjitX<ңVf6l%iPB qbɯXrdykxӼK):txpfwo^5"n%iL1;OLa[v݌D-O SuדG!Q{")FocUJ =Uk*Ҭgk,#tܱ;1%u% (Qy-L$eaԝĺ'S3=-Gg#Oџ#H6n5`׏.Ii &lÖ.fŅbY6>ႃB .~O_2Ǜ]'I"߿/5Oz0iNImmW`ډ%L]yG)7!;)ك,ٺv-1媽C.<⹼_Fg 8mB}ZɞQ6tP6|#<ـDgo|;Kn+Mrn`AJ}*̹S\8by,U\r܆ wUɆMKa,g,;?~y3V-].1Dq IrqpzT»,mxR4+ooMp*3_V3a;IbENz}Թ< *Q.J]xxgaF]Z4Mo{-jI s9  C}BऋSrw/5oT5&T2KЊFEmDNCs5gXa* WlcLIPxd1h. U Ͽzd{2(%׊@ɻk]^iUPhQٴ:i6z¿C.M *sy=D/ 2+NN x{Dأ`X*ư,Iw~4sEĀW0;;?\0J $6R8we|ePJ\w5P]!5蒖S9-p 'BQ/w hz4U֝./H\Qt3Bo`GAPW>,J7E|؊7Tbv Df~>QĥPȗo8a9TSi'ථrEno5{mI{W֧QLQOPK NZZhtml/_static/minus.pngPNG  IHDR (!IDATxc8 g>@;(!&]f2nNIENDB`PK N%ZZZhtml/_static/plus.pngPNG  IHDR (!IDATxc8 g>@;([[U @l-!a@IENDB`PKNIohtml/_static/pygments.css]O0'?DݤeiKlW ځhv±#)9T\,q#ݴe%ܟMJOPjJ*]߷on@ή}/:G,;>'knN;>9Wu &?5@. ̨Y%F ΒwEk4BYv_?BSl# ֏.Pi@IkN/r"KVB{+,\kPG譆F+W$"w\!-َd4Kie[} X`Bn*Lj/bC]^ِͷJ@%=3•4e'eMkbη$=jU7)gwVPݭ,wm^ xttqg59,zJϕ4f;\ nq%<\4\Q529}x?vNTcy2}uHuKΝ!;I2uMϬr-9fG4ޭй `ՙƦ[fزҷ[w~UXÌ9~ܮk|E Yxvh% cnO <3Tiw 엺'/=-\ WDó]-V 82#Ն]|aŹ (Vq ]*ȷfGuvՍXtޑA[>Nf ӝcGq bNNG0@yq:J1#}},B偟gKE_=2C}YGẹϳ5PKN$@J;html/_static/searchtools.jsksD{~5<,ENs SJhJ0d.9V#KI<%vvޒ;޾oo<`}dіeį1>Yq˾8e]Y˚ ]Udc;IjUgvžާ?c+DS/# ؓW'?!GJYYATr޴,-Vu p\Q# x\Q}J hE*UەOېfoǑ$5"E >T-/ڗ"h,5]GT.{P7 'kXQZK^U ts4a{Qϒ+R@ *a}֝,3{Yo+ovFdJ$~*.jϲ"l n'3"W]ވm>Z  ]/BHY^M42L/ԃ4CD 44yStlH|N"kJԐqRJH%O]+&A,oU. [n4h_4~"QZ++m 8)1n6z}uĆ+4R=T]GeaF3=9al˅!0nMQBU8]x`4وvHK6tL*1<5j+[Z?1_fs p5,XS15K&| -. /DĞ:|5!Xy[G.p-V5'T)2iΎJ!zgO=?;zo'/~w߾z㓣gh%d￯V"HXU@Ƌ'E:5 R=CMșO93!!i`߫u~YKЍKs?e0A鄩v|}ihODbQ >ԫcmLJn`IlZeVl;t<Ӕ{]#&JC@|n>+e0ҋ"W%pSbӥWe -3Yd=XV]N*%P7Ғ/% \{s^IJvX~ V|Iss: "Žx7ԏ22yVP k3pj~'EL XpCl~-:KM&!ŠAhj!ٲ IYphr%onҢ8.>((oI N`UMu$qΰ5x 5AU!auTe`"E,1 2i&jE%2\sl`2ss1ژScոV=!Bvo8-(=^E DqQe,f" az<6Pa]w%)_A o?d@U((4?hC^ e{Yii/d&0 U@Π1mdw%W3wg$w[.VčYy\jW|s+#t21[/eλ,>T`Wf]Z1`,EIfTAO@dP(l<0m [Fj޹FP,r_ 1hRlGj,Q}۟EtCL֮fgњ&5۽ 庭pGoaHO!yb;0vCSMES &T|]Qi_{tDwOώ^|lxtyv ~<|2J?%>|1 $s4j菍>F;Zv:O"/!VD}3醃 ,5?09@5C®`'9<{d>lvwK8YCsxۧHoF}_o:P2= 2t_u~PteeGWdJC^Α_w͜3"h%R{U# *,b iR*e󚘽!kCps&[4Rʩ}t-T`DQv .J\4u4q=ػƒ&:g[[ ACv׌"լ gC|joPwoa۞˸-6۔J>z{%zUк pG܇>:zjYA~~؛ x}%\2}FiÃS3ԏPg~P9CӸδ! TGeo>ؚ͸H./a(t 㭂fP9w>mѣmb. jn}?%WwÎiW|yݛLێ \<4Cz7RstĴ4z:ZN  :w7tt8gK`[>zc*! 1Ƕ2t.N3`:A HPўh&![r˟Y'F=XfV8nA&e+̈R2E{k@u!@C Yx*|/`qm'aDY3FnFQf\SzƇ /䥠VOrֿ36s ;A|޴_keIt9Ŀ~F_kIA7Yn֚8 YNnj6_;l_Djrh[m4ֻ_;ZYbq!r(nߎi ITЭ01/2G1+1E$7VK׀@HO)VP7+j~˩<.&+l}ٵ|(Wڟ (ZpyEQ)}m5mi+CzO} })K={"ߢ7sgF; y9 `3^ + $l\| =vuAt%N8CjaVDPGک֯ł4a/>97hßظ !͠HFrb蓂0K7ϼIPKN%` html/_static/underscore-1.3.1.jsks5;l'0 !ã@y-0Cٖ;s濳/is촼: j]V+LQNŒ~+G^tut[Gފ0YS0$KUy=ek-QRF˜t͒*I]œD5DD_?zdqˋ*ɳ2>f*),ʋhE~ E+*+3hMg 02Φ7NQ͢/E}o *NE~,Wi\%ٕ#S)Nhf8IKj9LjB.ju|"cFWI'i_Z.џE㣸4i4U>,I y^E73:fEPi!k|8G9A9g89<~ A ,Pr-Q i;%ԣ˓.pWѕʨj&~u]Q(LA|.ϟ7ueJˈe%'.ˡ?3Q}X PҗV7[-UI6} W&NOasЬlj+Ae`ʕbԔ%XPv%N;Q5r̫ R+5P`7򑭳:)uY|?|<-Ue00kV1T8-+YDM\ȑ4LV< J0.:=皡ue7zj i~\-& >MR.sa;)6X>Ηf@TȄk|Ⲓ Tk%G p_uiU{ >G@B&Pл8*BJc!JY41i~m5; `fExN5'э3zW7 at ӧq1+aYX'ITkl3E^â{4AOzZp%g32q %,= tl&!,'e ?js/=hdH642wzp'`{3!q&`R3TipZ-YY+\@:R+34&dhƼ.hkf:,9u אR.HK; v(U#- _"]RhWsﶡ%ъ!j9ϼqWet/jUj87jn rMa''@|/tVI[h aZHvZe[S"xn2tV)`ux5M뙙(>!/Lո)q$ 1d4% 8&0b+*Hp`g6vrb{l䛹y訔cd͹Qʃy=˟|g;OTɖ,at"q`xha*du4 HN#zfGs`D8-h<iM@E7ć>9 9w AYZ!jӧ\F &•pq'z5OTO_jllN͉q@u֓hBԠQ6=ں$C1+KK밝qvs]GqZ:,fp=v%"ðJDSd(; XS <%6f71}0*؉sKiyz8?]p&u@$G4#}| G`pj-2,os$7+%<(D3\,04ɖL ~-d>-)w#i[`9hŖc|6t0c:&PZ&tUt _>/>zYUü0d*kA\q^ ' 1BcJZb_`>ALrTA$%)>,/gL:@"D\ 31C-geTv8Q>vKݷ̝d?߇4hos",'* #<_tW5$J`Mb3*u^yBUI]3ȩ;s.KvmL,Blhpxivmy I,o,)]i$y_Ӷ0t#Sw 6KM<U 5 Lw"K&m?+~r q՗΢3RҸ۸Rs_^;wx4f1*,ܔtDTDL~)!Ѽagɜ74 w%g Z!^&7Z<8h!U7g`H*/`ˎ:K~&~!.,s򷓁龴wh*h[k( MABK*it&k 5 {H&Ŗ's0T$.ز]oe*Ve]gxUtvorNי&tv%J?z0 z8 &69fMj$yvXNa͐k ش`\FU?\A\E{`sWnb1`$IMZ2<&C^,ѷY)V@V 11%am6/";s}k9YKsʼn$KiwY20ވĝN y㹣JqIܗڸ)6!U$ {奞}M0_@R:(Z^e'Gx)'q 7GCхYБ7XebK|: Jv*0PS&;#2Ү9>S<)G/ =ސhDŽAq_nkvE@Cvހ#cV/ 05qD(iaﴑX\y"fc7!!# /bF^@m]w !WƉM{]T}>Ѻfv8Za^ys[p89<Q%݄+#Wϭ"jS%S<V0ke;q] U qfpg ъ Fyqu?M&h}-e:u"v Y6߬9oe=p:Joy\*!oE%rS>uuȈ}4`̞cZ@cm.j@mx)sVPqppac2Bn[tf0X0S{i0xx4W+ gU$>ƏHǸ~^z[P" +X Ypy]2]\2"q ̠1!8c1R.4P;~㷮\f"^9t{9I9KEGBo .艐Jŝrh\(5SyɳJ@@~y>.@Dd45f u:.PDosZp9:* ) Hrjs{L$VdcӝKjm\DlS<5a#DH=xHdsH69,*BÍ2T^nZqgБ[s+og6~~WT5ͱ0`PI5N/Lָj|*[ڍ$IK hlAPMO`>福SnWqwpkRRѦ˗ÑI<#88/2na>['kHh\`6E3~2ܳ-kng_qRm.\$K1ݾ-wEQ7 Hs2,4TQ+y2tVIR\#8e{ aB&5^y0 C^ 9y60Mѝ D '^tgb*.ꌬsa+KyUUjOnBa4yU0kFaGT!+;P6cn*'ݲ2/5 g E: z@:KMsCENڛD6S#Q[nbo'MkA'7;,߄IV91 Yr^%}yLTX3mrJ:m!&0 *;LC(ÿ_ϣ|`eJY]n1ʹd!L,.kّed㛄S'T@iu3imߚX虥Y;%{_}3ZgQscY0X-|os /]jcS^n7d!e@ m\Xx.UV֫7.4GF85j!W>}F+y8u>(0`a]Ə!L%3q[5y]LM9"kAp[S0x!ђK)/!,a3.mS/Dͳ yA0mF1Ji>9;lqցc'f?o$~վ\~)BPj$ŏ\G FLRO]4JՐ/+"[;u61q:G=<m1! GcëqקtLjqzl_N]%-tT7˟֣buN_lԃQZ8 >LN,=}i1 >PR21Ǩ~^=^V?INЃƻ.J,c=K]{o㉅{^¦2plD}N„g0Y3ƣMUpWԟ <7Q~A& TFk_%9 & UfB&ѱ(f~k}nfţ+;vQKX<~( +o^7D/B X< DR0$a;fWD({0{TAW͏`B+k?&?g L>wQ"Y~|Eo< ٓ.R>CU ^z{4ℇ&N@J ( Bm)xU_y8zg y[MP'xʥd?+刋pZK]k=3Abf_x(QxQI&~fJoE-~p16…FUlS#J1w\h>}N^hϋn)k@,ps";M|`\凮F,XܨŰ]xi~7:dē[^QX\WEb{1sFJ]sy}:fq >wv9)JƠ: | naݛ[dG-+aĂm\Nvܼ[>ϯ4W`L;W5ܔBKhXx7JD0G.k7ۼ`u-lnߋ^8-J⣤Uz\`1sEށT nphwW/W'=}.N. K:{\ T)Q8(oa :F&] E;f \4o>OΌYq `wO ܇s{?8şݥ W$.䁱s"RcMtK6>9+xi}{e>>:M{Q`FA r3xtf? 1/<Ƨ~aYx!M1Ng7@C{gm^!Ͻmض9*#7翔<88JA@NDY#09X5HiM `88:S!^eh e8W0E {>t̞AA ~r5@dʾƯ_1%T F! vt=Vږ:kDHlX%G_|[߰t*$ cKn@ BW M*#Z/i$Pa}ǑOeJxJ^+S00:#6\H4.DWP =|y:GWz]Vr KZ |zI7'@ iFSş7ڰKOaN478IkL{PU= Vgx'P#.}x ‰#  =/yM+PMN~J烓v:dT4UQ:qs2E>lC}%_@ $p4'A±ܿtx?T/(ȥ6Ԫ5jgMˏ=J2ӻ$֬o!6pKJ'1d#iWj裹oG $rHQB~ZjC&;W05|vVuvL?D خܣa'ܘM1^z8GMV5)(ۻCB7|7|mn|mC"XhBɐ St˄pt.8Z9wf -]JId^rN*A^ KuXl_4=6954W~(?/C^_@*aT;R7B m~@XȔHw2 R׼jʃ~fyLJc72~.[b~5]&ѼI9 GLcv`c|7]/:XmצX{l["YS fV[ǭ4Qꖷ ~Y"n.wPKN;l/html/_static/underscore.jsٖ6_c=5iN./7GI08Wdyn;yYEE'+g6z2ۋc'^ڼ9}vh2hrIJγ;'NL}Œ;{\z+.(:-)CdyKK;EEY9)GYE}Q5*>'ˈK^k5/eN`P`YļfiV XWmz7*aN>Droxc$"&F~};lHp]2 |ϲ!z)Wц(e}GZ96lGP}0/&Oqyռ$#O{G!bRp{'{xXU_BɐV%x*RQF&!"!Mkr1h xTC0Gt ,Y-}(h_0 4yECaNA8?"lℼsR:vJoᔛ͛A֩3k>QҽNIs~|<(6&lW:b"'g B/`_dp2 c܂bC5P %Wb.$r紥aEi6@e-3M'ĄK@ZJ!{8(x N@:3Ú{`I ϛ%$s/T8iaȚ 󲄭v_ pAT9 wAZ,sޱl]b1(}>H-5h) UR_!ׅ<6`8{p+w11[+yjH ![D;}|AYap뎾m#ԫPT&>z ,s^6{ {8|F9CuzN9߁r JSjjD G9yNC1$4@" 2ĤN1a93_ 9y(9U{?y<1!ZKMgY &vN¼ҋ[ *ļ˳pk;PЇ,S(Ne_ee q!es#x!φrFVi0qѼh.x;z/f'-uYwBӓ9?s~KŰp?mkX16[6¤,=%А-9+Zh77N=Q!r]viIYX sS[ U7 ]2}30}Ģ2s, /}1GEY\h!ȘdZi6;p),4sސ120hRּI ?X7StH") n\4(#YX3ri@8ܣ}g;JxHHq#cl)9Q--x1Ф. Nkb'1.@H8z}am^ 5luXj,=(m^mmjGգk/&k3iKoAl}BG4;S#\2gHYlj/N`BWˀ+w΅6 3H  jcn{y-OR؋2k+PIh $\3\y5CM R EѸ NS~q`{( wkԭ /lB\lqS^0Ym\h.&l.C O3 /͉Y+@z ]8i`xa1݁2G̃hTi+S3ζ}-xR(vkYq%?x_/,es XR# ?L^o-b ,{L7o:6(p;@gbvL%|[s2͠[h; ҆hzF]\##4ݵڊ[;N.`/+/m>5f3LHS %fsCײkp=CWd&%_D`3)#?X/6"W:0%&`3vE]M,n#vXj9:ij~9Ől2ɸui|{ؘ @GvE,է^0}uf]3 LƒNb7,m-ɲ4hmf|_mjau@8OEC΋ g)Ga6IN Q:=C6[.̠} B;Ԛt yz(cz=Cj +<V6>@}xՕU? M h*(^y(ys硠ru,B o}w),FC?׊ų"Ov ?Q K\HFLxYO3QO̥ "S ]..p+1խ'bpt}~$}MkRq6;]F]B`O*P&t:~^q8z5C'aE9d_^}-o?W%.C"S3-4.f.3Ú^N$ޭn>uG0sqի;p%-˄q ^r`sу#v8#wl񭸆+歱X&܈#}}D^%;!]K.䏂mDYޔ7MB&^R貵["ͽG|A\b%o .}z1DXW-jo rSGkZh̭U2ĶӚxױPíMem; -DPЁegz 6zEtfXčnX.yqce>wS:j-~l7B_J")zNE+ZpʼnjwFYA8Kchllql8 NtV}al|FPK NHhtml/_static/up-pressed.pngPNG  IHDRaIDATxc o+ Sb)SbG&W +H,Cق n;"VsT?.#&ھjFcgb ..NG`49l* Ŀ'q< *]Jcyjؖf b0Sԙµ|IENDB`PK N>uhtml/_static/up.pngPNG  IHDR7IDATx@ez $& 8:& :Kpwn}O<:!!{G@Dz?"̧ S{g<ݢ lMQwy|? 0 pq8q` pL-'SBNAwTń|U VIENDB`PKNFo, chtml/_static/websupport.jsrD #gq(g TQ!,4$t$k`vSdHF====}HLxޭWMQeZa4`[U,Y``NҤND- ԋY$qźL;S[u}o Wp5*!wHHdzo?cQIZ' M!Fޟ<`aSe~<_)̀ D-a)BU*Б2-pU 6HZ7a #VRuڇ|s|kiS#Rz}\0~,!hl6$_,²0bUG%'ePT[ t6" PۡS[Q6́Ev7sͯ0c5ϛ3^H!)Y*zM#ﯿwfܥ͐CohJs$_oQT7VV&IOxIjvt ,jhTt ˰Kn} 0}YQ4 "X ƞ_aQ- DaLAQ'b6iM25(I쏂Wud÷FFewt/@JũPH0p)t}Bd! ыE8^2]@bM ;\wUo2/*LMby"Ē Fc_F@ , 10$G|{oG;c[W- ⬅TE=@G."يRdL]9$@8k@1v漂\1 6NB,@^#d\Jidǁ.]@@IkUV{H;vUP6|gr{:ZB&>f,P-#  ܓ2QP?O@3`ŬUzh5~[DgMġ8h4Lba+ȿfdP7'g09/?3| `h kq!VE fHX9tp`.p%f } o/gC"o;i}_lzeFODI >{#*4t5SY< giw0+@*r*[2j |u8D2ie檉TS&S M5V(!4AXVc}۽ -X$MzSF/mwT!46(V+1͌`^C$MNɍp֢N&HkۀK Ԙe#8Qs@V1E#o " K i3e"VˠR4/, 0l8Τ$-*j1zTe^ZJ*US2%) y,}Py .NqY&g} "Gql›$$,t1Ki'r5%h\J@~ Kζ3 K+t&~ 'Z۲na)#'Peu(na*vIR("LF7 kAjcNb2@V#| %7*2v2'/-oD[SVoP4VRKyӞuT_#9lR:-%ժdžDi{twC bguza=j%- +1f~ ݒLJs>(u4J! <": 3 `7Jc[gK,Pvw*Kfs6jwfܵ7:DԔUr#ҵ$+UFm&j\bL4P3P`{9-qhusWIrCeF^F<5sOSw J@oT޿TR0*>bj{{MXlΘUlCc5W\C!M|:D&M?*WŦR?O>q؛ebx3ZqvNк γӦ2t|L;[q?پ5d埙7Pc8+ѢnLT 3>0XkR0*9ή9O9p &f?e樯jϨVgӨMZݐ2f( ә8SC([g琜sL5{<:9g"p)M]ٕYe8oDm~;{-b>.mR/wU\ΡQf-|EM<:0ʭ}Թ r`YC].Sgum/ĝ('j*RXt{z ™ͽ0[{yHC&Ճwymb@Y!Z jJ+)saEi\/j` &'6s)a2] _5T!P6o:cļ}@7 ÊJAϻbX*ԪmlBu@E1P(/oHĕxpmb6P0Ӌe+{7z$FIR[>I9+ 7@'Ɂs0:ir'@],vi``pTa/d*mL73óFJRЋr5,M/sHGtU=%lN#+MF+J\Bܭ9~+, ߋ/T!l z¿}@V8ڪi  F2!bY,YezN~ (4!ӵv$=wgH0:[zfh=BQpST4԰U~RRtty3Mf_gGYo,9V,챺C=IH,n+\]NI)()E3!;Jo Te4ބ N bG {tokQ֝h|0bzRNAqX,ǒV5,L|5Q/&\HM}rCԘr)ыtB WO?WN$5xW]u5+su.> 3Ψ!p}NdPS4ܿH'#hP_OO1`'GFOb4ȇ<:I _BWL^F8VM؈9+Sf=8R Vng'9V|V\<Ӌ>#l:S0 Ά!LڍQlhzm*٣=1 JU̐wFdvM; N߱l?tP)I +*zzȾ Pr?8R=e![O´ZOYSq=7K[b-Șޞp5CDDM+(9h _';|H}b v*$ S^Gr5~)e M:אRE9qЎ/sؠ(2 R̟Si4m:߂sk?:Zj #8؁9ERswΧ`$$c: z:,;s| 'Bӯ,[⮝'C'یRSe X\O(k,8S l>u{ABOnR"C itiRZn8P)yao?mo@0p}rvIl_V3*J-5eAJ@y}}?F+j) QO16{q&qq1ϸᄑe\]Κ%'jIa3y?M~ #lm/N;ě:HX':q8: Sw_~F~CViU@("cbIXc:y]V`0OϨ]Cq1@LR+Jˈy{}nMsplw ms.=N{hًy{s/d˨ۅv<|B:܉,KHb:Yt*]qRcnVEԮT]C9uK_v#7Ouuٳjzn$lS:M.eg]ټ:pyclN 9WS^>TǣI)"#h#v"hӅE\k͝8 ՝BOU1<;wb(o8(MHdQn_ ~x؄g!珮˰XVt4ZC`Hg qzj N pT F'D |ۃ&}$!~Mwu ;o6+){&a*Bܷ-'z0B|gg}4|['&~-"s% JCmY@\2"BZ>%s>}ft`4^ð1"NPK?Ż$ html/searchindex.js '"Cv'"CvxtPK?ŻL$ html/_static/ajax-loader.gif cPK?N a$$ html/_static/doctools.js vdvd,cPK?NQ]q*%$ html/_static/documentation_options.js eeԝdPK? N'$ }+html/_static/down-pressed.png tete9ePK? N9.$ ,html/_static/down.png ffePK? NS[S$ -html/_static/file.png ff7fPK?Nص($ .html/_static/jquery-3.2.1.js սgսgfPK?N)rrR$ Whtml/_static/jquery.js hhegPK? NZZ$ yhtml/_static/minus.png ~i~ihPK? N%ZZZ$ html/_static/plus.png jjiPK?NIo$ html/_static/pygments.css RjRjAjPK?N$@J;$ Vhtml/_static/searchtools.js {k{kfjPK?N%` $ html/_static/underscore-1.3.1.js wlwlkPK?N;l/$ 2html/_static/underscore.js mmlPK? NH$ html/_static/up-pressed.png VmVm:mPK? N>u$ )html/_static/up.png ԆnԆn`mPK?NFo, c$ %html/_static/websupport.js "o"onPK..2luametatex-2.10.08/source/libraries/pplib/ppapi.h000066400000000000000000000343031442250314700217040ustar00rootroot00000000000000 #ifndef PP_API_H #define PP_API_H #include #include #include #include "ppconf.h" #define pplib_version "v2.1" #define pplib_author "p.jackowski@gust.org.pl" /* types */ typedef int64_t ppint; typedef size_t ppuint; // machine word typedef char ppbyte; typedef double ppnum; typedef struct ppname ppname; typedef struct ppstring ppstring; struct ppname { ppbyte *data; size_t size; ppname *alterego; int flags; }; struct ppstring { ppbyte *data; size_t size; ppstring *alterego; int flags; }; typedef struct ppobj ppobj; typedef struct ppref ppref; typedef struct { ppobj *data; size_t size; } pparray; typedef struct { ppobj *data; ppname **keys; size_t size; } ppdict; typedef enum { PPSTREAM_BASE16 = 0, PPSTREAM_BASE85, PPSTREAM_RUNLENGTH, PPSTREAM_FLATE, PPSTREAM_LZW, PPSTREAM_CCITT, PPSTREAM_DCT, PPSTREAM_JBIG2, PPSTREAM_JPX, PPSTREAM_CRYPT } ppstreamtp; typedef struct { ppstreamtp *filters; ppdict **params; size_t count; } ppstream_filter; typedef struct { ppdict *dict; void *input, *I; size_t offset; size_t length; ppstream_filter filter; ppobj *filespec; ppstring *cryptkey; int flags; } ppstream; PPDEF extern const char * ppstream_filter_name[]; PPAPI int ppstream_filter_type (ppname *filtername, ppstreamtp *filtertype); PPAPI void ppstream_filter_info (ppstream *stream, ppstream_filter *info, int decode); #define PPSTREAM_FILTER (1<<0) #define PPSTREAM_IMAGE (1<<1) #define PPSTREAM_ENCRYPTED_AES (1<<2) #define PPSTREAM_ENCRYPTED_RC4 (1<<3) #define PPSTREAM_ENCRYPTED (PPSTREAM_ENCRYPTED_AES|PPSTREAM_ENCRYPTED_RC4) #define PPSTREAM_ENCRYPTED_OWN (1<<4) #define PPSTREAM_NOT_SUPPORTED (1<<6) #define ppstream_compressed(stream) ((stream)->flags & (PPSTREAM_FILTER|PPSTREAM_IMAGE)) #define ppstream_filtered(stream) ((stream)->flags & PPSTREAM_FILTER) #define ppstream_image(stream) ((stream)->flags & PPSTREAM_IMAGE) #define ppstream_encrypted(stream) ((stream)->flags & PPSTREAM_ENCRYPTED) typedef enum { PPNONE = 0, PPNULL, PPBOOL, PPINT, PPNUM, PPNAME, PPSTRING, PPARRAY, PPDICT, PPSTREAM, PPREF } ppobjtp; PPDEF extern const char * ppobj_kind[]; struct ppobj { union { ppint integer; ppnum number; ppname *name; ppstring *string; pparray *array; ppdict *dict; ppstream *stream; ppref *ref; void *any; }; ppobjtp type; }; typedef struct ppxref ppxref; struct ppref { ppobj object; ppuint number, version; size_t offset; size_t length; ppxref *xref; }; typedef struct ppdoc ppdoc; /* object */ #define ppobj_get_null(o) ((o)->type == PPNULL ? 1 : 0) #define ppobj_get_bool(o, v) ((o)->type == PPBOOL ? ((v = ((o)->integer != 0)), 1) : 0) #define ppobj_get_int(o, v) ((o)->type == PPINT ? ((v = (o)->integer), 1) : 0) #define ppobj_get_uint(o, v) ((o)->type == PPINT && (o)->integer >= 0 ? ((v = (ppuint)((o)->integer)), 1) : 0) #define ppobj_get_num(o, v) ((o)->type == PPNUM ? ((v = (o)->number), 1) : (((o)->type == PPINT ? ((v = (ppnum)((o)->integer)), 1) : 0))) #define ppobj_get_name(o) ((o)->type == PPNAME ? (o)->name : NULL) #define ppobj_get_string(o) ((o)->type == PPSTRING ? (o)->string : NULL) #define ppobj_get_array(o) ((o)->type == PPARRAY ? (o)->array : NULL) #define ppobj_get_dict(o) ((o)->type == PPDICT ? (o)->dict : NULL) #define ppobj_get_stream(o) ((o)->type == PPSTREAM ? (o)->stream : NULL) #define ppobj_get_ref(o) ((o)->type == PPREF ? (o)->ref : NULL) #define ppobj_rget_obj(o) ((o)->type == PPREF ? ppref_obj((o)->ref) : o) #define ppobj_rget_null(o) ((o)->type == PPNULL ? 1 : ((o)->type == PPREF ? ppobj_get_null(ppref_obj((o)->ref)) : 0)) #define ppobj_rget_bool(o, v) ((o)->type == PPBOOL ? ((v = ((o)->integer != 0)), 1) : ((o)->type == PPREF ? ppobj_get_bool(ppref_obj((o)->ref), v) : 0)) #define ppobj_rget_int(o, v) ((o)->type == PPINT ? ((v = (o)->integer), 1) : ((o)->type == PPREF ? ppobj_get_int(ppref_obj((o)->ref), v) : 0)) #define ppobj_rget_uint(o, v) ((o)->type == PPINT && (o)->integer >= 0 ? ((v = (ppuint)((o)->integer)), 1) : ((o)->type == PPREF ? ppobj_get_uint(ppref_obj((o)->ref), v) : 0)) #define ppobj_rget_num(o, v) ((o)->type == PPNUM ? ((v = (o)->number), 1) : (((o)->type == PPINT ? ((v = (ppnum)((o)->integer)), 1) : ((o)->type == PPREF ? ppobj_get_num(ppref_obj((o)->ref), v) : 0)))) #define ppobj_rget_name(o) ((o)->type == PPNAME ? (o)->name : ((o)->type == PPREF ? ppobj_get_name(ppref_obj((o)->ref)) : NULL)) #define ppobj_rget_string(o) ((o)->type == PPSTRING ? (o)->string : ((o)->type == PPREF ? ppobj_get_string(ppref_obj((o)->ref)) : NULL)) #define ppobj_rget_array(o) ((o)->type == PPARRAY ? (o)->array : ((o)->type == PPREF ? ppobj_get_array(ppref_obj((o)->ref)) : NULL)) #define ppobj_rget_dict(o) ((o)->type == PPDICT ? (o)->dict : ((o)->type == PPREF ? ppobj_get_dict(ppref_obj((o)->ref)) : NULL)) #define ppobj_rget_stream(o) ((o)->type == PPSTREAM ? (o)->stream : ((o)->type == PPREF ? ppobj_get_stream(ppref_obj((o)->ref)) : NULL)) #define ppobj_rget_ref(o) ((o)->type == PPREF ? (o)->ref : ((o)->type == PPREF ? ppobj_get_ref(ppref_obj((o)->ref)) : NULL)) #define ppobj_get_bool_value(o) ((o)->type == PPBOOL ? ((o)->integer != 0) : 0) #define ppobj_get_int_value(o) ((o)->type == PPINT ? (o)->integer : 0) #define ppobj_get_num_value(o) ((o)->type == PPNUM ? (o)->number : ((o)->type == PPINT ? (ppnum)((o)->integer) : 0.0)) /* name */ #define ppname_is(name, s) (memcmp((name)->data, s, sizeof("" s) - 1) == 0) #define ppname_eq(name, n) (memcmp((name)->data, s, (name)->size) == 0) #define ppname_size(name) ((name)->size) #define ppname_exec(name) ((name)->flags & PPNAME_EXEC) #define ppname_data(name) ((name)->data) #define PPNAME_ENCODED (1 << 0) #define PPNAME_DECODED (1 << 1) #define PPNAME_EXEC (1 << 1) PPAPI ppname * ppname_decoded (ppname *name); PPAPI ppname * ppname_encoded (ppname *name); PPAPI ppbyte * ppname_decoded_data (ppname *name); PPAPI ppbyte * ppname_encoded_data (ppname *name); /* string */ #define ppstring_size(string) ((string)->size) #define ppstring_data(string) ((string)->data) #define PPSTRING_ENCODED (1 << 0) #define PPSTRING_DECODED (1 << 1) //#define PPSTRING_EXEC (1 << 2) // postscript only #define PPSTRING_PLAIN 0 #define PPSTRING_BASE16 (1 << 3) #define PPSTRING_BASE85 (1 << 4) #define PPSTRING_UTF16BE (1 << 5) #define PPSTRING_UTF16LE (1 << 6) #define ppstring_type(string) ((string)->flags & (PPSTRING_BASE16|PPSTRING_BASE85)) #define ppstring_hex(string) ((string)->flags & PPSTRING_BASE16) #define ppstring_utf(string) ((string)->flags & (PPSTRING_UTF16BE|PPSTRING_UTF16LE)) PPAPI ppstring * ppstring_decoded (ppstring *string); PPAPI ppstring * ppstring_encoded (ppstring *string); PPAPI ppbyte * ppstring_decoded_data (ppstring *string); PPAPI ppbyte * ppstring_encoded_data (ppstring *string); /* array */ #define pparray_size(array) ((array)->size) #define pparray_at(array, index) ((array)->data + index) #define pparray_first(array, index, obj) ((index) = 0, (obj) = pparray_at(array, 0)) #define pparray_next(index, obj) (++(index), ++(obj)) #define pparray_get(array, index) (index < (array)->size ? pparray_at(array, index) : NULL) PPAPI ppobj * pparray_get_obj (pparray *array, size_t index); PPAPI int pparray_get_bool (pparray *array, size_t index, int *v); PPAPI int pparray_get_int (pparray *array, size_t index, ppint *v); PPAPI int pparray_get_uint (pparray *array, size_t index, ppuint *v); PPAPI int pparray_get_num (pparray *array, size_t index, ppnum *v); PPAPI ppname * pparray_get_name (pparray *array, size_t index); PPAPI ppstring * pparray_get_string (pparray *array, size_t index); PPAPI pparray * pparray_get_array (pparray *array, size_t index); PPAPI ppdict * pparray_get_dict (pparray *array, size_t index); //PPAPI ppstream * pparray_get_stream (pparray *array, size_t index); PPAPI ppref * pparray_get_ref (pparray *array, size_t index); PPAPI ppobj * pparray_rget_obj (pparray *array, size_t index); PPAPI int pparray_rget_bool (pparray *array, size_t index, int *v); PPAPI int pparray_rget_int (pparray *array, size_t index, ppint *v); PPAPI int pparray_rget_uint (pparray *array, size_t index, ppuint *v); PPAPI int pparray_rget_num (pparray *array, size_t index, ppnum *v); PPAPI ppname * pparray_rget_name (pparray *array, size_t index); PPAPI ppstring * pparray_rget_string (pparray *array, size_t index); PPAPI pparray * pparray_rget_array (pparray *array, size_t index); PPAPI ppdict * pparray_rget_dict (pparray *array, size_t index); PPAPI ppstream * pparray_rget_stream (pparray *array, size_t index); PPAPI ppref * pparray_rget_ref (pparray *array, size_t index); /* dict */ #define ppdict_size(dict) ((dict)->size) #define ppdict_at(dict, index) ((dict)->data + index) #define ppdict_key(dict, index) ((dict)->keys[index]) PPAPI ppobj * ppdict_get_obj (ppdict *dict, const char *name); PPAPI int ppdict_get_bool (ppdict *dict, const char *name, int *v); PPAPI int ppdict_get_int (ppdict *dict, const char *name, ppint *v); PPAPI int ppdict_get_uint (ppdict *dict, const char *name, ppuint *v); PPAPI int ppdict_get_num (ppdict *dict, const char *name, ppnum *v); PPAPI ppname * ppdict_get_name (ppdict *dict, const char *name); PPAPI ppstring * ppdict_get_string (ppdict *dict, const char *name); PPAPI pparray * ppdict_get_array (ppdict *dict, const char *name); PPAPI ppdict * ppdict_get_dict (ppdict *dict, const char *name); //PPAPI ppstream * ppdict_get_stream (ppdict *dict, const char *name); PPAPI ppref * ppdict_get_ref (ppdict *dict, const char *name); PPAPI ppobj * ppdict_rget_obj (ppdict *dict, const char *name); PPAPI int ppdict_rget_bool (ppdict *dict, const char *name, int *v); PPAPI int ppdict_rget_int (ppdict *dict, const char *name, ppint *v); PPAPI int ppdict_rget_uint (ppdict *dict, const char *name, ppuint *v); PPAPI int ppdict_rget_num (ppdict *dict, const char *name, ppnum *v); PPAPI ppname * ppdict_rget_name (ppdict *dict, const char *name); PPAPI ppstring * ppdict_rget_string (ppdict *dict, const char *name); PPAPI pparray * ppdict_rget_array (ppdict *dict, const char *name); PPAPI ppdict * ppdict_rget_dict (ppdict *dict, const char *name); PPAPI ppstream * ppdict_rget_stream (ppdict *dict, const char *name); PPAPI ppref * ppdict_rget_ref (ppdict *dict, const char *name); #define ppdict_first(dict, pkey, obj) (pkey = (dict)->keys, obj = (dict)->data) #define ppdict_next(pkey, obj) (++(pkey), ++(obj)) /* stream */ #define ppstream_dict(stream) ((stream)->dict) PPAPI uint8_t * ppstream_first (ppstream *stream, size_t *size, int decode); PPAPI uint8_t * ppstream_next (ppstream *stream, size_t *size); PPAPI uint8_t * ppstream_all (ppstream *stream, size_t *size, int decode); PPAPI void ppstream_done (ppstream *stream); PPAPI void ppstream_init_buffers (void); PPAPI void ppstream_free_buffers (void); /* ref */ #define ppref_obj(ref) (&(ref)->object) /* xref */ PPAPI ppxref * ppdoc_xref (ppdoc *pdf); PPAPI ppxref * ppxref_prev (ppxref *xref); PPAPI ppdict * ppxref_trailer (ppxref *xref); PPAPI ppdict * ppxref_catalog (ppxref *xref); PPAPI ppdict * ppxref_info (ppxref *xref); PPAPI ppref * ppxref_pages (ppxref *xref); PPAPI ppref * ppxref_find (ppxref *xref, ppuint refnumber); /* doc */ PPAPI ppdoc * ppdoc_load (const char *filename); PPAPI ppdoc * ppdoc_filehandle (FILE *file, int closefile); #define ppdoc_file(file) ppdoc_filehandle(file, 1) PPAPI ppdoc * ppdoc_mem (const void *data, size_t size); PPAPI void ppdoc_free (ppdoc *pdf); #define ppdoc_trailer(pdf) ppxref_trailer(ppdoc_xref(pdf)) #define ppdoc_catalog(pdf) ppxref_catalog(ppdoc_xref(pdf)) #define ppdoc_info(pdf) ppxref_info(ppdoc_xref(pdf)) #define ppdoc_pages(pdf) ppxref_pages(ppdoc_xref(pdf)) PPAPI ppuint ppdoc_page_count (ppdoc *pdf); PPAPI ppref * ppdoc_page (ppdoc *pdf, ppuint index); PPAPI ppref * ppdoc_first_page (ppdoc *pdf); PPAPI ppref * ppdoc_next_page (ppdoc *pdf); PPAPI ppstream * ppcontents_first (ppdict *dict); PPAPI ppstream * ppcontents_next (ppdict *dict, ppstream *stream); /* crypt */ typedef enum { PPCRYPT_NONE = 0, PPCRYPT_DONE = 1, PPCRYPT_FAIL = -1, PPCRYPT_PASS = -2 } ppcrypt_status; PPAPI ppcrypt_status ppdoc_crypt_status (ppdoc *pdf); PPAPI ppcrypt_status ppdoc_crypt_pass (ppdoc *pdf, const void *userpass, size_t userpasslength, const void *ownerpass, size_t ownerpasslength); /* permission flags, effect in Acrobat File -> Properties -> Security tab */ PPAPI ppint ppdoc_permissions (ppdoc *pdf); #define PPDOC_ALLOW_PRINT (1<<2) // printing #define PPDOC_ALLOW_MODIFY (1<<3) // filling form fields, signing, creating template pages #define PPDOC_ALLOW_COPY (1<<4) // copying, copying for accessibility #define PPDOC_ALLOW_ANNOTS (1<<5) // filling form fields, copying, signing #define PPDOC_ALLOW_EXTRACT (1<<9) // contents copying for accessibility #define PPDOC_ALLOW_ASSEMBLY (1<<10) // (no effect) #define PPDOC_ALLOW_PRINT_HIRES (1<<11) // (no effect) /* context */ typedef struct ppcontext ppcontext; PPAPI ppcontext * ppcontext_new (void); PPAPI void ppcontext_done (ppcontext *context); PPAPI void ppcontext_free (ppcontext *context); /* contents parser */ PPAPI ppobj * ppcontents_first_op (ppcontext *context, ppstream *stream, size_t *psize, ppname **pname); PPAPI ppobj * ppcontents_next_op (ppcontext *context, ppstream *stream, size_t *psize, ppname **pname); PPAPI ppobj * ppcontents_parse (ppcontext *context, ppstream *stream, size_t *psize); /* boxes and transforms */ typedef struct { ppnum lx, ly, rx, ry; } pprect; PPAPI pprect * pparray_to_rect (pparray *array, pprect *rect); PPAPI pprect * ppdict_get_rect (ppdict *dict, const char *name, pprect *rect); PPAPI pprect * ppdict_get_box (ppdict *dict, const char *name, pprect *rect); typedef struct { ppnum xx, xy, yx, yy, x, y; } ppmatrix; PPAPI ppmatrix * pparray_to_matrix (pparray *array, ppmatrix *matrix); PPAPI ppmatrix * ppdict_get_matrix (ppdict *dict, const char *name, ppmatrix *matrix); /* logger */ typedef void (*pplogger_callback) (const char *message, void *alien); PPAPI void pplog_callback (pplogger_callback logger, void *alien); PPAPI int pplog_prefix (const char *prefix); /* version */ PPAPI const char * ppdoc_version_string (ppdoc *pdf); PPAPI int ppdoc_version_number (ppdoc *pdf, int *minor); /* doc info */ PPAPI size_t ppdoc_file_size (ppdoc *pdf); PPAPI ppuint ppdoc_objects (ppdoc *pdf); PPAPI size_t ppdoc_memory (ppdoc *pdf, size_t *waste); #endif luametatex-2.10.08/source/libraries/pplib/pparray.c000066400000000000000000000074511442250314700222500ustar00rootroot00000000000000 #include "pplib.h" pparray * pparray_create (const ppobj *stackpos, size_t size, ppheap *heap) { pparray *array; array = (pparray *)ppstruct_take(heap, sizeof(pparray)); array->data = (ppobj *)ppstruct_take(heap, size * sizeof(ppobj)); // separate chunk, alignment requirements warning otherwise array->size = size; memcpy(array->data, stackpos, size * sizeof(ppobj)); return array; } ppobj * pparray_get_obj (pparray *array, size_t index) { return pparray_get(array, index); } ppobj * pparray_rget_obj (pparray *array, size_t index) { ppobj *obj; return (obj = pparray_get(array, index)) != NULL ? ppobj_rget_obj(obj) : NULL; } int pparray_get_bool (pparray *array, size_t index, int *v) { ppobj *obj; return (obj = pparray_get(array, index)) != NULL ? ppobj_get_bool(obj, *v) : 0; } int pparray_rget_bool (pparray *array, size_t index, int *v) { ppobj *obj; return (obj = pparray_get(array, index)) != NULL ? ppobj_rget_bool(obj, *v) : 0; } int pparray_get_int (pparray *array, size_t index, ppint *v) { ppobj *obj; return (obj = pparray_get(array, index)) != NULL ? ppobj_get_int(obj, *v) : 0; } int pparray_rget_int (pparray *array, size_t index, ppint *v) { ppobj *obj; return (obj = pparray_get(array, index)) != NULL ? ppobj_rget_int(obj, *v) : 0; } int pparray_get_uint (pparray *array, size_t index, ppuint *v) { ppobj *obj; return (obj = pparray_get(array, index)) != NULL ? ppobj_get_uint(obj, *v) : 0; } int pparray_rget_uint (pparray *array, size_t index, ppuint *v) { ppobj *obj; return (obj = pparray_get(array, index)) != NULL ? ppobj_rget_uint(obj, *v) : 0; } int pparray_get_num (pparray *array, size_t index, ppnum *v) { ppobj *obj; return (obj = pparray_get(array, index)) != NULL ? ppobj_get_num(obj, *v) : 0; } int pparray_rget_num (pparray *array, size_t index, ppnum *v) { ppobj *obj; return (obj = pparray_get(array, index)) != NULL ? ppobj_rget_num(obj, *v) : 0; } ppname * pparray_get_name (pparray *array, size_t index) { ppobj *obj; return (obj = pparray_get(array, index)) != NULL ? ppobj_get_name(obj) : NULL; } ppname * pparray_rget_name (pparray *array, size_t index) { ppobj *obj; return (obj = pparray_get(array, index)) != NULL ? ppobj_rget_name(obj) : NULL; } ppstring * pparray_get_string (pparray *array, size_t index) { ppobj *obj; return (obj = pparray_get(array, index)) != NULL ? ppobj_get_string(obj) : NULL; } ppstring * pparray_rget_string (pparray *array, size_t index) { ppobj *obj; return (obj = pparray_get(array, index)) != NULL ? ppobj_rget_string(obj) : NULL; } pparray * pparray_get_array (pparray *array, size_t index) { ppobj *obj; return (obj = pparray_get(array, index)) != NULL ? ppobj_get_array(obj) : NULL; } pparray * pparray_rget_array (pparray *array, size_t index) { ppobj *obj; return (obj = pparray_get(array, index)) != NULL ? ppobj_rget_array(obj) : NULL; } ppdict * pparray_get_dict (pparray *array, size_t index) { ppobj *obj; return (obj = pparray_get(array, index)) != NULL ? ppobj_get_dict(obj) : NULL; } ppdict * pparray_rget_dict (pparray *array, size_t index) { ppobj *obj; return (obj = pparray_get(array, index)) != NULL ? ppobj_rget_dict(obj) : NULL; } /* ppstream * pparray_get_stream (pparray *array, size_t index) { ppobj *obj; return (obj = pparray_get(array, index)) != NULL ? ppobj_get_stream(obj) : NULL; } */ ppstream * pparray_rget_stream (pparray *array, size_t index) { ppobj *obj; return (obj = pparray_get(array, index)) != NULL ? ppobj_rget_stream(obj) : NULL; } ppref * pparray_get_ref (pparray *array, size_t index) { ppobj *obj; return (obj = pparray_get(array, index)) != NULL ? ppobj_get_ref(obj) : NULL; } ppref * pparray_rget_ref (pparray *array, size_t index) { ppobj *obj; return (obj = pparray_get(array, index)) != NULL ? ppobj_rget_ref(obj) : NULL; } luametatex-2.10.08/source/libraries/pplib/pparray.h000066400000000000000000000001711442250314700222450ustar00rootroot00000000000000 #ifndef PP_ARRAY_H #define PP_ARRAY_H pparray * pparray_create (const ppobj *stack, size_t size, ppheap *heap); #endifluametatex-2.10.08/source/libraries/pplib/ppconf.h000066400000000000000000000027151442250314700220620ustar00rootroot00000000000000 #ifndef PP_CONF_H #define PP_CONF_H /* Aux flags: PPDLL -- indicates a part of a shared library PPEXE -- indicates a host program using shared library functions */ #if defined(_WIN32) || defined(_WIN64) # ifdef PPDLL # define PPAPI __declspec(dllexport) # define PPDEF __declspec(dllexport) # else # ifdef PPEXE # define PPAPI __declspec(dllimport) # define PPDEF # else # define PPAPI # define PPDEF # endif # endif #else # define PPAPI # define PPDEF #endif /* platform vs integers */ #if defined(_WIN32) || defined(WIN32) # ifdef _MSC_VER # if defined(_M_64) || defined(_WIN64) # define MSVC64 # else # define MSVC32 # endif # else # if defined(__MINGW64__) # define MINGW64 # else # if defined(__MINGW32__) # define MINGW32 # endif # endif # endif #endif #if defined(_WIN64) || defined(__MINGW32__) # define PPINT64F "%I64d" # define PPUINT64F "%I64u" #else # define PPINT64F "%lld" # define PPUINT64F "%llu" #endif #if defined(MSVC64) # define PPINT(N) N##I64 # define PPUINT(N) N##UI64 # define PPINTF PPINT64F # define PPUINTF PPUINT64F #elif defined(MINGW64) # define PPINT(N) N##LL # define PPUINT(N) N##ULL # define PPINTF PPINT64F # define PPUINTF PPUINT64F #else // 32bit or sane 64bit (LP64, where long is long indeed) # define PPINT(N) N##L # define PPUINT(N) N##UL # define PPINTF "%ld" # define PPUINTF "%lu" #endif #define PPSIZEF PPUINTF #endif luametatex-2.10.08/source/libraries/pplib/ppcrypt.c000066400000000000000000000647451442250314700223040ustar00rootroot00000000000000 #include "utilmd5.h" #include "utilsha.h" #include "pplib.h" /* crypt struct */ static ppcrypt * ppcrypt_create (ppheap *heap) { ppcrypt *crypt; crypt = (ppcrypt *)ppstruct_take(heap, sizeof(ppcrypt)); memset(crypt, 0, sizeof(ppcrypt)); return crypt; } int ppcrypt_type (ppcrypt *crypt, ppname *cryptname, ppuint *length, int *cryptflags) { ppdict *filterdict; ppname *filtertype; int cryptmd = 0, default256 = 0; if (crypt->map == NULL || (filterdict = ppdict_rget_dict(crypt->map, cryptname->data)) == NULL) return 0; if ((filtertype = ppdict_get_name(filterdict, "CFM")) == NULL) return 0; *cryptflags = 0; if (ppname_is(filtertype, "V2")) *cryptflags |= PPCRYPT_INFO_RC4; else if (ppname_is(filtertype, "AESV2")) *cryptflags |= PPCRYPT_INFO_AES; else if (ppname_is(filtertype, "AESV3")) *cryptflags |= PPCRYPT_INFO_AES, default256 = 1; else return 0; /* pdf spec page. 134: /Length is said to be optional bit-length of the key, but it seems to be a mistake, as Acrobat produces /Length key with bytes lengths, opposite to /Length key of the main encrypt dict. */ if (length != NULL) if (!ppdict_get_uint(filterdict, "Length", length)) *length = (*cryptflags & PPCRYPT_INFO_RC4) ? 5 : (default256 ? 32 : 16); /* one of metadata flags is set iff there is an explicit EncryptMetadata key */ if (ppdict_get_bool(filterdict, "EncryptMetadata", &cryptmd)) *cryptflags |= (cryptmd ? PPCRYPT_INFO_MD : PPCRYPT_INFO_NOMD); return 1; } /* V1..4 algorithms */ /* V1..4 unicode do PdfDocEncoding */ typedef struct { uint32_t unicode; uint32_t code; uint32_t count; } map_range_t; static const map_range_t unicode_to_pdf_doc_encoding_map[] = { { 32, 32, 95 }, { 161, 161, 12 }, { 174, 174, 82 }, { 305, 154, 1 }, { 321, 149, 1 }, { 322, 155, 1 }, { 338, 150, 1 }, { 339, 156, 1 }, { 352, 151, 1 }, { 353, 157, 1 }, { 376, 152, 1 }, { 381, 153, 1 }, { 382, 158, 1 }, { 402, 134, 1 }, { 710, 26, 1 }, { 711, 25, 1 }, { 728, 24, 1 }, { 729, 27, 1 }, { 730, 30, 1 }, { 731, 29, 1 }, { 732, 31, 1 }, { 733, 28, 1 }, { 8211, 133, 1 }, { 8212, 132, 1 }, { 8216, 143, 3 }, { 8220, 141, 2 }, { 8222, 140, 1 }, { 8224, 129, 2 }, { 8226, 128, 1 }, { 8230, 131, 1 }, { 8240, 139, 1 }, { 8249, 136, 2 }, { 8260, 135, 1 }, { 8364, 160, 1 }, { 8482, 146, 1 }, { 8722, 138, 1 }, { 64257, 147, 2 } }; #define unicode_to_pdf_doc_encoding_entries (sizeof(unicode_to_pdf_doc_encoding_map) / sizeof(map_range_t)) static int unicode_to_pdf_doc_encoding (uint32_t unicode, uint8_t *pcode) { const map_range_t *left, *right, *mid; left = &unicode_to_pdf_doc_encoding_map[0]; right = &unicode_to_pdf_doc_encoding_map[unicode_to_pdf_doc_encoding_entries - 1]; for ( ; left <= right; ) { mid = left + ((right - left) / 2); if (unicode > mid->unicode + mid->count - 1) left = mid + 1; else if (unicode < mid->unicode) right = mid - 1; else { *pcode = (uint8_t)(mid->code + (unicode - mid->unicode)); return 1; } } return 0; } #define utf8_unicode2(p) (((p[0]&31)<<6)|(p[1]&63)) #define utf8_unicode3(p) (((p[0]&15)<<12)|((p[1]&63)<<6)|(p[2]&63)) #define utf8_unicode4(p) (((p[0]&7)<<18)|((p[1]&63)<<12)|((p[2]&63)<<6)|(p[3]&63)) #define utf8_get1(p, e, unicode) ((unicode = p[0]), p + 1) #define utf8_get2(p, e, unicode) (p + 1 < e ? ((unicode = utf8_unicode2(p)), p + 2) : NULL) #define utf8_get3(p, e, unicode) (p + 2 < e ? ((unicode = utf8_unicode3(p)), p + 3) : NULL) #define utf8_get4(p, e, unicode) (p + 4 < e ? ((unicode = utf8_unicode3(p)), p + 4) : NULL) #define utf8_get(p, e, unicode) \ (p[0] < 0x80 ? utf8_get1(p, e, unicode) : \ p[0] < 0xC0 ? NULL : \ p[0] < 0xE0 ? utf8_get2(p, e, unicode) : \ p[0] < 0xF0 ? utf8_get3(p, e, unicode) : utf8_get4(p, e, unicode)) static int ppcrypt_password_encoding (uint8_t *password, size_t *passwordlength) { uint8_t *p, newpassword[PPCRYPT_MAX_PASSWORD], *n; const uint8_t *e; uint32_t unicode; for (n = &newpassword[0], p = &password[0], e = p + *passwordlength; p < e; ++n) { p = utf8_get(p, e, unicode); if (p == NULL) return 0; if (unicode_to_pdf_doc_encoding(unicode, n) == 0) return 0; } *passwordlength = n - &newpassword[0]; memcpy(password, newpassword, *passwordlength); return 1; } /* setup passwords */ static const uint8_t password_padding[] = { 0x28, 0xBF, 0x4E, 0x5E, 0x4E, 0x75, 0x8A, 0x41, 0x64, 0x00, 0x4E, 0x56, 0xFF, 0xFA, 0x01, 0x08, 0x2E, 0x2E, 0x00, 0xB6, 0xD0, 0x68, 0x3E, 0x80, 0x2F, 0x0C, 0xA9, 0xFE, 0x64, 0x53, 0x69, 0x7A }; static void ppcrypt_set_user_password (ppcrypt *crypt, const void *userpass, size_t userpasslength) { crypt->userpasslength = userpasslength > PPCRYPT_MAX_PASSWORD ? PPCRYPT_MAX_PASSWORD : userpasslength; memcpy(crypt->userpass, userpass, crypt->userpasslength); if (crypt->algorithm_variant < 5) { if (ppcrypt_password_encoding(crypt->userpass, &crypt->userpasslength) == 0) return; if (crypt->userpasslength > 32) crypt->userpasslength = 32; else if (crypt->userpasslength < 32) memcpy(&crypt->userpass[crypt->userpasslength], password_padding, 32 - crypt->userpasslength); } crypt->flags |= PPCRYPT_USER_PASSWORD; } static void ppcrypt_set_owner_password (ppcrypt *crypt, const void *ownerpass, size_t ownerpasslength) { crypt->ownerpasslength = ownerpasslength > PPCRYPT_MAX_PASSWORD ? PPCRYPT_MAX_PASSWORD : ownerpasslength; memcpy(crypt->ownerpass, ownerpass, crypt->ownerpasslength); if (crypt->algorithm_variant < 5) { if (ppcrypt_password_encoding(crypt->ownerpass, &crypt->ownerpasslength) == 0) return; if (crypt->ownerpasslength > 32) crypt->ownerpasslength = 32; else if (crypt->ownerpasslength < 32) memcpy(&crypt->ownerpass[crypt->ownerpasslength], password_padding, 32 - crypt->ownerpasslength); } crypt->flags |= PPCRYPT_OWNER_PASSWORD; } /* V1..4 retrieving user password from owner password and owner key (variant < 5) */ static void ppcrypt_user_password_from_owner_key (ppcrypt *crypt, const void *ownerkey, size_t ownerkeysize) { uint8_t temp[16], rc4key[32], rc4key2[32]; uint8_t i; ppuint k; md5_state md5; md5_digest_init(&md5); md5_digest_add(&md5, crypt->ownerpass, 32); md5_digest_get(&md5, rc4key, MD5_BYTES); if (crypt->algorithm_revision >= 3) { for (i = 0; i < 50; ++i) { md5_digest(rc4key, 16, temp, MD5_BYTES); memcpy(rc4key, temp, 16); } } rc4_decode_data(ownerkey, ownerkeysize, crypt->userpass, rc4key, crypt->filekeylength); if (crypt->algorithm_revision >= 3) { for (i = 1; i <= 19; ++i) { for (k = 0; k < crypt->filekeylength; ++k) rc4key2[k] = rc4key[k] ^ i; rc4_decode_data(crypt->userpass, 32, crypt->userpass, rc4key2, crypt->filekeylength); } } //crypt->userpasslength = 32; for (crypt->userpasslength = 0; crypt->userpasslength < 32; ++crypt->userpasslength) if (memcmp(&crypt->userpass[crypt->userpasslength], password_padding, 32 - crypt->userpasslength) == 0) break; crypt->flags |= PPCRYPT_USER_PASSWORD; } /* V1..4 generating file key; pdf spec p. 125 */ static void ppcrypt_compute_file_key (ppcrypt *crypt, const void *ownerkey, size_t ownerkeysize, const void *id, size_t idsize) { uint32_t p; uint8_t permissions[4], temp[16]; int i; md5_state md5; md5_digest_init(&md5); md5_digest_add(&md5, crypt->userpass, 32); md5_digest_add(&md5, ownerkey, ownerkeysize); p = (uint32_t)crypt->permissions; permissions[0] = get_number_byte1(p); permissions[1] = get_number_byte2(p); permissions[2] = get_number_byte3(p); permissions[3] = get_number_byte4(p); md5_digest_add(&md5, permissions, 4); md5_digest_add(&md5, id, idsize); if (crypt->algorithm_revision >= 4 && (crypt->flags & PPCRYPT_NO_METADATA)) md5_digest_add(&md5, "\xFF\xFF\xFF\xFF", 4); md5_digest_get(&md5, crypt->filekey, MD5_BYTES); if (crypt->algorithm_revision >= 3) { for (i = 0; i < 50; ++i) { md5_digest(crypt->filekey, (size_t)crypt->filekeylength, temp, MD5_BYTES); memcpy(crypt->filekey, temp, 16); } } } /* V1..4 generating userkey for comparison with /U; requires a general file key and id; pdf spec page 126-127 */ static void ppcrypt_compute_user_key (ppcrypt *crypt, const void *id, size_t idsize, uint8_t password_hash[32]) { uint8_t rc4key2[32]; uint8_t i; ppuint k; if (crypt->algorithm_revision <= 2) { rc4_encode_data(password_padding, 32, password_hash, crypt->filekey, crypt->filekeylength); } else { md5_state md5; md5_digest_init(&md5); md5_digest_add(&md5, password_padding, 32); md5_digest_add(&md5, id, idsize); md5_digest_get(&md5, password_hash, MD5_BYTES); rc4_encode_data(password_hash, 16, password_hash, crypt->filekey, crypt->filekeylength); for (i = 1; i <= 19; ++i) { for (k = 0; k < crypt->filekeylength; ++k) rc4key2[k] = crypt->filekey[k] ^ i; rc4_encode_data(password_hash, 16, password_hash, rc4key2, crypt->filekeylength); } for (i = 16; i < 32; ++i) password_hash[i] = password_hash[i - 16] ^ i; /* arbitrary 16-bytes padding */ } } static ppcrypt_status ppcrypt_authenticate_legacy (ppcrypt *crypt, ppstring *userkey, ppstring *ownerkey, ppstring *id) { uint8_t password_hash[32]; if ((crypt->flags & PPCRYPT_USER_PASSWORD) == 0 && (crypt->flags & PPCRYPT_OWNER_PASSWORD) != 0) ppcrypt_user_password_from_owner_key(crypt, ownerkey, ownerkey->size); ppcrypt_compute_file_key(crypt, ownerkey->data, ownerkey->size, id->data, id->size); ppcrypt_compute_user_key(crypt, id->data, id->size, password_hash); /* needs file key */ return memcmp(userkey->data, password_hash, (crypt->algorithm_revision >= 3 ? 16 : 32)) == 0 ? PPCRYPT_DONE : PPCRYPT_PASS; } /* V5 */ static const uint8_t nulliv[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* AES-256 initialization vector */ /* V5 R5..6 password hash */ #define PPCRYPT_MAX_MANGLED ((127+64+48)*64) // 127 password, 64 hash, 48 /U key static void ppcrypt_password_hash_indeed (const uint8_t *password, size_t passwordlength, const uint8_t *userkey, uint8_t hash[64]) { size_t hashlength, datalength; uint8_t data[PPCRYPT_MAX_MANGLED], *pdata; uint8_t *key, *iv; uint8_t round, i; uint32_t div3; hashlength = 32; /* initial hash is sha256 */ round = 0; do { /* concat password, hash, and /U value 64 times */ pdata = &data[0]; memcpy(pdata, password, passwordlength); pdata += passwordlength; memcpy(pdata, hash, hashlength); pdata += hashlength; if (userkey != NULL) { memcpy(pdata, userkey, 48); pdata += 48; } datalength = pdata - &data[0]; for (i = 1; i < 64; ++i, pdata += datalength) memcpy(pdata, &data[0], datalength); datalength *= 64; /* encrypt the data with aes128 using hash bytes 1..16 as key and bytes 17..32 as initialization vector encryption inplace, CBC, no-padding, no change to datalength */ key = &hash[0]; iv = &hash[16]; aes_encode_data(data, datalength, data, key, 16, iv, AES_NULL_PADDING); /* get modulo 3 of first 16 bytes number of encrypted data (sum of digits modulo 3) */ for (i = 0, div3 = 0; i < 16; ++i) div3 += data[i]; /* compute new hash using sha256/384/512 */ switch (div3 % 3) { case 0: sha256_digest(data, datalength, hash, SHA_BYTES); hashlength = 32; break; case 1: sha384_digest(data, datalength, hash, SHA_BYTES); hashlength = 48; break; case 2: sha512_digest(data, datalength, hash, SHA_BYTES); hashlength = 64; break; } /* do 64 times, then keep going until the last byte of data <= round - 32 */ } while (++round < 64 || round < data[datalength - 1] + 32); } static void ppcrypt_password_hash (ppcrypt *crypt, const uint8_t *password, size_t passwordlength, const uint8_t *salt, const uint8_t *userkey, uint8_t password_hash[32]) { sha256_state sha; uint8_t hash[64]; /* result password_hash is 32 bytes, but we need 64 for R6 procedure */ /* take sha256 of password, salt and /U */ sha256_digest_init(&sha); sha256_digest_add(&sha, password, passwordlength); sha256_digest_add(&sha, salt, 8); if (userkey != NULL) sha256_digest_add(&sha, userkey, 48); sha256_digest_get(&sha, hash, SHA_BYTES); /* V5 R5 - password_hash is the digest, V5 R6 - password_hash is mangled */ if (crypt->algorithm_revision >= 6) ppcrypt_password_hash_indeed(password, passwordlength, userkey, hash); memcpy(password_hash, hash, 32); } /* V5 permissions */ static ppcrypt_status ppcrypt_authenticate_permissions (ppcrypt *crypt, ppstring *perms) { uint8_t permsdata[16]; aes_decode_data(perms->data, perms->size, permsdata, crypt->filekey, crypt->filekeylength, nulliv, AES_NULL_PADDING); if (permsdata[9] != 'a' || permsdata[10] != 'd' || permsdata[11] != 'b') return PPCRYPT_FAIL; /* do not check/update permissions flags here; they might be different inside crypt string */ if (0) { int64_t p; int i; for (p = 0, i = 0; i < 8; ++i) p = p + (permsdata[i] << (i << 3)); /* low order bytes first */ crypt->permissions = (ppint)((int32_t)(p & 0x00000000FFFFFFFFLL)); /* unset bits 33..64, treat as 32-bit signed int */ } if (permsdata[8] == 'T') crypt->flags &= ~PPCRYPT_NO_METADATA; else if (permsdata[8] == 'F') crypt->flags |= PPCRYPT_NO_METADATA; return PPCRYPT_DONE; } /* V5 authentication */ static ppcrypt_status ppcrypt_authenticate_user (ppcrypt *crypt, ppstring *u, ppstring *ue, ppstring *perms) { uint8_t password_hash[32], *salt; salt = (uint8_t *)&u->data[32]; /* validation salt */ ppcrypt_password_hash(crypt, crypt->userpass, crypt->userpasslength, salt, NULL, password_hash); if (memcmp(u->data, password_hash, 32) != 0) return PPCRYPT_PASS; salt = (uint8_t *)&u->data[40]; /* key salt */ ppcrypt_password_hash(crypt, crypt->userpass, crypt->userpasslength, salt, NULL, password_hash); aes_decode_data(ue->data, 32, crypt->filekey, password_hash, 32, nulliv, AES_NULL_PADDING); return ppcrypt_authenticate_permissions(crypt, perms); } static ppcrypt_status ppcrypt_authenticate_owner (ppcrypt *crypt, ppstring *u, ppstring *o, ppstring *oe, ppstring *perms) { uint8_t password_hash[32], *salt; salt = (uint8_t *)&o->data[32]; /* validation salt */ ppcrypt_password_hash(crypt, crypt->ownerpass, crypt->ownerpasslength, salt, (uint8_t *)u->data, password_hash); if (memcmp(o->data, password_hash, 32) != 0) return PPCRYPT_PASS; salt = (uint8_t *)&o->data[40]; /* key salt */ ppcrypt_password_hash(crypt, crypt->ownerpass, crypt->ownerpasslength, salt, (uint8_t *)u->data, password_hash); aes_decode_data(oe->data, 32, crypt->filekey, password_hash, 32, nulliv, AES_NULL_PADDING); return ppcrypt_authenticate_permissions(crypt, perms); } /* authentication */ static ppcrypt_status ppcrypt_authenticate (ppcrypt *crypt, ppstring *u, ppstring *ue, ppstring *o, ppstring *oe, ppstring *id, ppstring *perms) { /* V1..V4 */ if (crypt->algorithm_variant < 5) return ppcrypt_authenticate_legacy(crypt, u, o, id); /* V5 */ if (crypt->flags & PPCRYPT_USER_PASSWORD) if (ppcrypt_authenticate_user(crypt, u, ue, perms) == PPCRYPT_DONE) return PPCRYPT_DONE; if (crypt->flags & PPCRYPT_OWNER_PASSWORD) return ppcrypt_authenticate_owner(crypt, u, o, oe, perms); return PPCRYPT_PASS; } /**/ ppcrypt_status ppdoc_crypt_init (ppdoc *pdf, const void *userpass, size_t userpasslength, const void *ownerpass, size_t ownerpasslength) { ppcrypt *crypt; ppdict *trailer, *encrypt; ppobj *obj; ppname *name, **pkey; ppstring *userkey, *ownerkey, *userkey_e = NULL, *ownerkey_e = NULL; size_t hashlength; pparray *idarray; ppstring *id = NULL, *perms = NULL; int cryptflags, encryptmd; size_t strkeylength, stmkeylength; trailer = ppxref_trailer(pdf->xref); if ((obj = ppdict_get_obj(trailer, "Encrypt")) == NULL) return PPCRYPT_NONE; /* this happens early, before loading body, so if /Encrypt is indirect reference, it points nothing */ obj = ppobj_preloaded(pdf, obj); if (obj->type != PPDICT) return PPCRYPT_FAIL; encrypt = obj->dict; for (ppdict_first(encrypt, pkey, obj); *pkey != NULL; ppdict_next(pkey, obj)) (void)ppobj_preloaded(pdf, obj); if ((name = ppdict_get_name(encrypt, "Filter")) != NULL && !ppname_is(name, "Standard")) return PPCRYPT_FAIL; if ((crypt = pdf->crypt) == NULL) crypt = pdf->crypt = ppcrypt_create(&pdf->heap); /* get /V /R /P */ if (!ppdict_get_uint(encrypt, "V", &crypt->algorithm_variant)) crypt->algorithm_variant = 0; if (crypt->algorithm_variant < 1 || crypt->algorithm_variant > 5) return PPCRYPT_FAIL; if (!ppdict_get_uint(encrypt, "R", &crypt->algorithm_revision)) return PPCRYPT_FAIL; if (!ppdict_get_int(encrypt, "P", &crypt->permissions)) return PPCRYPT_FAIL; /* get /O /U /ID /OE /UE */ if ((userkey = ppdict_get_string(encrypt, "U")) == NULL || (ownerkey = ppdict_get_string(encrypt, "O")) == NULL) return PPCRYPT_FAIL; userkey = ppstring_decoded(userkey); ownerkey = ppstring_decoded(ownerkey); /* for some reason acrobat pads /O and /U to 127 bytes with NULL, so we don't check the exact length but ensure the minimal */ hashlength = crypt->algorithm_variant < 5 ? 32 : 48; if (userkey->size < hashlength || ownerkey->size < hashlength) return PPCRYPT_FAIL; if (crypt->algorithm_variant < 5) { // get first string from /ID (must not be ref) if ((idarray = ppdict_get_array(trailer, "ID")) == NULL || (id = pparray_get_string(idarray, 0)) == NULL) return PPCRYPT_FAIL; id = ppstring_decoded(id); } else { if ((userkey_e = ppdict_get_string(encrypt, "UE")) == NULL || (ownerkey_e = ppdict_get_string(encrypt, "OE")) == NULL) return PPCRYPT_FAIL; userkey_e = ppstring_decoded(userkey_e); ownerkey_e = ppstring_decoded(ownerkey_e); if (userkey_e->size < 32 || ownerkey_e->size < 32) return PPCRYPT_FAIL; if ((perms = ppdict_get_string(encrypt, "Perms")) == NULL) return PPCRYPT_FAIL; perms = ppstring_decoded(perms); if (perms->size != 16) return PPCRYPT_FAIL; } /* collect flags and keylength */ switch (crypt->algorithm_revision) { case 1: crypt->filekeylength = 5; crypt->flags |= PPCRYPT_RC4; break; case 2: case 3: if (ppdict_get_uint(encrypt, "Length", &crypt->filekeylength)) crypt->filekeylength >>= 3; /* 40..256 bits, 5..32 bytes*/ else crypt->filekeylength = 5; /* 40 bits, 5 bytes */ crypt->flags |= PPCRYPT_RC4; break; case 4: case 5: case 6: if ((crypt->map = ppdict_rget_dict(encrypt, "CF")) == NULL) return PPCRYPT_FAIL; for (ppdict_first(crypt->map, pkey, obj); *pkey != NULL; ppdict_next(pkey, obj)) (void)ppobj_preloaded(pdf, obj); /* /EncryptMetadata relevant only for version >=4, may be also provided in crypt filter dictionary; which takes a precedence then? we assume that if there is an explicit EncryptMetadata key, it overrides main encrypt dict flag or default flag (the default is true, meaning that Metadata stream is encrypted as others) */ if (ppdict_get_bool(encrypt, "EncryptMetadata", &encryptmd) && !encryptmd) crypt->flags |= PPCRYPT_NO_METADATA; strkeylength = stmkeylength = 0; /* streams filter */ if ((name = ppdict_get_name(encrypt, "StmF")) != NULL && ppcrypt_type(crypt, name, &stmkeylength, &cryptflags)) { if (cryptflags & PPCRYPT_INFO_AES) crypt->flags |= PPCRYPT_STREAM_AES; else if (cryptflags & PPCRYPT_INFO_RC4) crypt->flags |= PPCRYPT_STREAM_RC4; if (cryptflags & PPCRYPT_INFO_NOMD) crypt->flags |= PPCRYPT_NO_METADATA; else if (cryptflags & PPCRYPT_INFO_MD) crypt->flags &= ~PPCRYPT_NO_METADATA; } /* else identity */ /* strings filter */ if ((name = ppdict_get_name(encrypt, "StrF")) != NULL && ppcrypt_type(crypt, name, &strkeylength, &cryptflags)) { if (cryptflags & PPCRYPT_INFO_AES) crypt->flags |= PPCRYPT_STRING_AES; else if (cryptflags & PPCRYPT_INFO_RC4) crypt->flags |= PPCRYPT_STRING_RC4; } /* else identity */ /* /Length of encrypt dict is irrelevant here, theoretically every crypt filter may have own length... It means that we should actually keep a different file key for streams and strings. But it leads to nonsense, as /U and /O entries refers to a single keylength, without a distinction for strings/streams. So we have to assume /Length is consistent. To expose the limitation: */ if ((crypt->flags & PPCRYPT_STREAM) && (crypt->flags & PPCRYPT_STRING)) if (strkeylength != stmkeylength) return PPCRYPT_FAIL; crypt->filekeylength = stmkeylength ? stmkeylength : strkeylength; if ((crypt->flags & PPCRYPT_STREAM) || (crypt->flags & PPCRYPT_STRING)) if (crypt->filekeylength == 0) return PPCRYPT_FAIL; break; default: return PPCRYPT_FAIL; } /* setup passwords */ if (userpass != NULL) ppcrypt_set_user_password(crypt, userpass, userpasslength); if (ownerpass != NULL) ppcrypt_set_owner_password(crypt, ownerpass, ownerpasslength); if ((crypt->flags & (PPCRYPT_USER_PASSWORD|PPCRYPT_OWNER_PASSWORD)) == 0) return PPCRYPT_PASS; return ppcrypt_authenticate(crypt, userkey, userkey_e, ownerkey, ownerkey_e, id, perms); } /* decrypting strings */ /* Since strings are generally rare, but might occur in mass (name trees). We generate decryption key when needed. All strings within the same reference are crypted with the same key. Both RC4 and AES algorithms expands the crypt key in some way and the result of expansion is the same for the same crypt key. Instead of recreating the ky for every string, we backup the initial decryption state. */ static void ppcrypt_strkey (ppcrypt *crypt, ppref *ref, int aes) { if (crypt->cryptkeylength > 0) { /* crypt key already generated, just reinitialize crypt states */ if (aes) { /* aes codecs that works on c-strings do not modify aes_state flags at all, so we actually don't need to revitalize the state, we only rewrite an initialization vector, which is modified during crypt procedure */ } else { /* rc4 crypt map is modified during crypt procedure, so here we reinitialize rc4 bytes map */ rc4_map_restore(&crypt->rc4state, &crypt->rc4copy); } return; } if (crypt->algorithm_variant < 5) { crypt->filekey[crypt->filekeylength + 0] = get_number_byte1(ref->number); crypt->filekey[crypt->filekeylength + 1] = get_number_byte2(ref->number); crypt->filekey[crypt->filekeylength + 2] = get_number_byte3(ref->number); crypt->filekey[crypt->filekeylength + 3] = get_number_byte1(ref->version); crypt->filekey[crypt->filekeylength + 4] = get_number_byte2(ref->version); if (aes) { crypt->filekey[crypt->filekeylength + 5] = 0x73; // s crypt->filekey[crypt->filekeylength + 6] = 0x41; // A crypt->filekey[crypt->filekeylength + 7] = 0x6C; // l crypt->filekey[crypt->filekeylength + 8] = 0x54; // T } md5_digest(crypt->filekey, crypt->filekeylength + (aes ? 9 : 5), crypt->cryptkey, MD5_BYTES); crypt->cryptkeylength = crypt->filekeylength + 5 >= 16 ? 16 : crypt->filekeylength + 5; } else { memcpy(crypt->cryptkey, crypt->filekey, 32); crypt->cryptkeylength = 32; } if (aes) { aes_decode_initialize(&crypt->aesstate, &crypt->aeskeyblock, crypt->cryptkey, crypt->cryptkeylength, NULL); aes_pdf_mode(&crypt->aesstate); } else { rc4_state_initialize(&crypt->rc4state, &crypt->rc4map, crypt->cryptkey, crypt->cryptkeylength); rc4_map_save(&crypt->rc4state, &crypt->rc4copy); } } int ppstring_decrypt (ppcrypt *crypt, const void *input, size_t size, void *output, size_t *newsize) { int aes, rc4; aes = crypt->flags & PPCRYPT_STRING_AES; rc4 = crypt->flags & PPCRYPT_STRING_RC4; if (aes || rc4) { ppcrypt_strkey(crypt, crypt->ref, aes); if (aes) *newsize = aes_decode_state_data(&crypt->aesstate, input, size, output); else // if (rc4) *newsize = rc4_decode_state_data(&crypt->rc4state, input, size, output); return 1; } return 0; // identity crypt } /* decrypting streams */ /* Streams are decrypted everytime when accessing the stream data. We need to be able to get or make the key for decryption as long as the stream is alive. And to get the key we need the reference number and version, plus document crypt info. First thought was to keep the reference to which the stream belongs; stream->ref and accessing the crypt info stream->ref->xref->pdf->crypt. It would be ok as long as absolutelly nothing happens with ref and crypt. At some point pplib may drift into rewriting support, which would imply ref/xref/crypt/pdf structures modifications. So I feel better with generating a crypt key for every stream in encrypted document, paying a cost of md5 for all streams, not necessarily those actually read. Key generation is the same as for strings, but different for distinct encryption methods (rc4 vs aes). Since streams and strings might theoretically be encrypted with different filters. No reason to cacche decryption state here. */ ppstring * ppcrypt_stmkey (ppcrypt *crypt, ppref *ref, int aes, ppheap *heap) { ppstring *cryptkeystring; //if (crypt->cryptkeylength > 0) // return; if (crypt->algorithm_variant < 5) { crypt->filekey[crypt->filekeylength + 0] = get_number_byte1(ref->number); crypt->filekey[crypt->filekeylength + 1] = get_number_byte2(ref->number); crypt->filekey[crypt->filekeylength + 2] = get_number_byte3(ref->number); crypt->filekey[crypt->filekeylength + 3] = get_number_byte1(ref->version); crypt->filekey[crypt->filekeylength + 4] = get_number_byte2(ref->version); if (aes) { crypt->filekey[crypt->filekeylength + 5] = 0x73; crypt->filekey[crypt->filekeylength + 6] = 0x41; crypt->filekey[crypt->filekeylength + 7] = 0x6C; crypt->filekey[crypt->filekeylength + 8] = 0x54; } md5_digest(crypt->filekey, crypt->filekeylength + (aes ? 9 : 5), crypt->cryptkey, MD5_BYTES); crypt->cryptkeylength = crypt->filekeylength + 5 >= 16 ? 16 : crypt->filekeylength + 5; // how about 256bits AES?? } else { // we could actually generate this string once, but.. aes itself is way more expensive that we can earn here memcpy(crypt->cryptkey, crypt->filekey, 32); // just for the record crypt->cryptkeylength = 32; } cryptkeystring = ppstring_internal(crypt->cryptkey, crypt->cryptkeylength, heap); return ppstring_decoded(cryptkeystring); } luametatex-2.10.08/source/libraries/pplib/ppcrypt.h000066400000000000000000000055171442250314700223010ustar00rootroot00000000000000 #ifndef PP_CRYPT_H #define PP_CRYPT_H #include "ppfilter.h" #include "utilcrypt.h" #include "utilcryptdef.h" #define PPCRYPT_MAX_PASSWORD 127 #define PPCRYPT_MAX_KEY 32 typedef struct { ppuint algorithm_variant; /* /V entry of encrypt dict */ ppuint algorithm_revision; /* /R entry of encrypt dict */ ppint permissions; /* /P entry of encrypt dict */ ppdict *map; /* /CF filters map of encrypt dict */ uint8_t userpass[PPCRYPT_MAX_PASSWORD]; /* user password */ size_t userpasslength; /* user password length */ uint8_t ownerpass[PPCRYPT_MAX_PASSWORD]; /* owner password */ size_t ownerpasslength; /* owner password length */ uint8_t filekey[PPCRYPT_MAX_KEY+5+4]; /* file key with an extra space for salt */ size_t filekeylength; /* key length; usually 5, 16 or 32 bytes */ uint8_t cryptkey[PPCRYPT_MAX_KEY]; /* crypt key for a recent reference */ size_t cryptkeylength; /* crypt key length; usually keylength + 5 */ //ppstring *cryptkeystring; /* todo: cached cryptkey string for V5, where all refs has the same */ ppref *ref; /* recent reference */ union { /* cached crypt states for strings encrypted/decrypted with the same key */ struct { rc4_state rc4state; rc4_map rc4map; rc4_map rc4copy; }; struct { aes_state aesstate; aes_keyblock aeskeyblock; uint8_t ivcopy[16]; }; }; int flags; } ppcrypt; #define PPCRYPT_NO_METADATA (1<<0) #define PPCRYPT_USER_PASSWORD (1<<1) #define PPCRYPT_OWNER_PASSWORD (1<<2) #define PPCRYPT_STREAM_RC4 (1<<3) #define PPCRYPT_STRING_RC4 (1<<4) #define PPCRYPT_STREAM_AES (1<<5) #define PPCRYPT_STRING_AES (1<<6) #define PPCRYPT_STREAM (PPCRYPT_STREAM_AES|PPCRYPT_STREAM_RC4) #define PPCRYPT_STRING (PPCRYPT_STRING_AES|PPCRYPT_STRING_RC4) #define PPCRYPT_RC4 (PPCRYPT_STREAM_RC4|PPCRYPT_STRING_RC4) #define PPCRYPT_AES (PPCRYPT_STREAM_AES|PPCRYPT_STRING_AES) #define PPCRYPT_INFO_AES (1<<0) #define PPCRYPT_INFO_RC4 (1<<1) #define PPCRYPT_INFO_MD (1<<2) #define PPCRYPT_INFO_NOMD (1<<3) ppcrypt_status ppdoc_crypt_init (ppdoc *pdf, const void *userpass, size_t userpasslength, const void *ownerpass, size_t ownerpasslength); int ppstring_decrypt (ppcrypt *crypt, const void *input, size_t size, void *output, size_t *newsize); #define ppcrypt_start_ref(crypt, r) ((crypt)->ref = r, (crypt)->cryptkeylength = 0) #define ppcrypt_end_ref(crypt) ((crypt)->ref = NULL, (crypt)->cryptkeylength = 0) #define ppcrypt_ref(pdf, crypt) ((crypt = (pdf)->crypt) != NULL && crypt->ref != NULL) int ppcrypt_type (ppcrypt *crypt, ppname *cryptname, ppuint *length, int *cryptflags); ppstring * ppcrypt_stmkey (ppcrypt *crypt, ppref *ref, int aes, ppheap *heap); #endif luametatex-2.10.08/source/libraries/pplib/ppdict.c000066400000000000000000000105411442250314700220470ustar00rootroot00000000000000 #include "pplib.h" ppdict * ppdict_create (const ppobj *stackpos, size_t size, ppheap *heap) { ppdict *dict; ppobj *data; ppname **pkey; size_t i; size >>= 1; // num of key-value pairs dict = (ppdict *)ppstruct_take(heap, sizeof(ppdict)); dict->data = data = (ppobj *)ppstruct_take(heap, size * sizeof(ppobj)); dict->keys = pkey = (ppname **)ppstruct_take(heap, (size + 1) * sizeof(ppname **)); dict->size = 0; for (i = 0; i < size; ++i, stackpos += 2) { if (stackpos->type != PPNAME) // we need this check at lest for trailer hack continue; *pkey = stackpos->name; *data = *(stackpos + 1); ++pkey, ++data, ++dict->size; } *pkey = NULL; // sentinel for convinient iteration return dict; } ppobj * ppdict_get_obj (ppdict *dict, const char *name) { ppname **pkey; ppobj *obj; for (ppdict_first(dict, pkey, obj); *pkey != NULL; ppdict_next(pkey, obj)) if (strcmp((*pkey)->data, name) == 0) // not ppname_eq() or ppname_is()!! return obj; return NULL; } ppobj * ppdict_rget_obj (ppdict *dict, const char *name) { ppobj *obj; return (obj = ppdict_get_obj(dict, name)) != NULL ? ppobj_rget_obj(obj) : NULL; } int ppdict_get_bool (ppdict *dict, const char *name, int *v) { ppobj *obj; return (obj = ppdict_get_obj(dict, name)) != NULL ? ppobj_get_bool(obj, *v) : 0; } int ppdict_rget_bool (ppdict *dict, const char *name, int *v) { ppobj *obj; return (obj = ppdict_get_obj(dict, name)) != NULL ? ppobj_rget_bool(obj, *v) : 0; } int ppdict_get_int (ppdict *dict, const char *name, ppint *v) { ppobj *obj; return (obj = ppdict_get_obj(dict, name)) != NULL ? ppobj_get_int(obj, *v) : 0; } int ppdict_rget_int (ppdict *dict, const char *name, ppint *v) { ppobj *obj; return (obj = ppdict_get_obj(dict, name)) != NULL ? ppobj_rget_int(obj, *v) : 0; } int ppdict_get_uint (ppdict *dict, const char *name, ppuint *v) { ppobj *obj; return (obj = ppdict_get_obj(dict, name)) != NULL ? ppobj_get_uint(obj, *v) : 0; } int ppdict_rget_uint (ppdict *dict, const char *name, ppuint *v) { ppobj *obj; return (obj = ppdict_get_obj(dict, name)) != NULL ? ppobj_rget_uint(obj, *v) : 0; } int ppdict_get_num (ppdict *dict, const char *name, ppnum *v) { ppobj *obj; return (obj = ppdict_get_obj(dict, name)) != NULL ? ppobj_get_num(obj, *v) : 0; } int ppdict_rget_num (ppdict *dict, const char *name, ppnum *v) { ppobj *obj; return (obj = ppdict_get_obj(dict, name)) != NULL ? ppobj_rget_num(obj, *v) : 0; } ppname * ppdict_get_name (ppdict *dict, const char *name) { ppobj *obj; return (obj = ppdict_get_obj(dict, name)) != NULL ? ppobj_get_name(obj) : NULL; } ppname * ppdict_rget_name (ppdict *dict, const char *name) { ppobj *obj; return (obj = ppdict_get_obj(dict, name)) != NULL ? ppobj_rget_name(obj) : NULL; } ppstring * ppdict_get_string (ppdict *dict, const char *name) { ppobj *obj; return (obj = ppdict_get_obj(dict, name)) != NULL ? ppobj_get_string(obj) : NULL; } ppstring * ppdict_rget_string (ppdict *dict, const char *name) { ppobj *obj; return (obj = ppdict_get_obj(dict, name)) != NULL ? ppobj_rget_string(obj) : NULL; } pparray * ppdict_get_array (ppdict *dict, const char *name) { ppobj *obj; return (obj = ppdict_get_obj(dict, name)) != NULL ? ppobj_get_array(obj) : NULL; } pparray * ppdict_rget_array (ppdict *dict, const char *name) { ppobj *obj; return (obj = ppdict_get_obj(dict, name)) != NULL ? ppobj_rget_array(obj) : NULL; } ppdict * ppdict_get_dict (ppdict *dict, const char *name) { ppobj *obj; return (obj = ppdict_get_obj(dict, name)) != NULL ? ppobj_get_dict(obj) : NULL; } ppdict * ppdict_rget_dict (ppdict *dict, const char *name) { ppobj *obj; return (obj = ppdict_get_obj(dict, name)) != NULL ? ppobj_rget_dict(obj) : NULL; } /* ppstream * ppdict_get_stream (ppdict *dict, const char *name) { ppobj *obj; return (obj = ppdict_get_obj(dict, name)) != NULL ? ppobj_get_stream(obj) : NULL; } */ ppstream * ppdict_rget_stream (ppdict *dict, const char *name) { ppobj *obj; return (obj = ppdict_get_obj(dict, name)) != NULL ? ppobj_rget_stream(obj) : NULL; } ppref * ppdict_get_ref (ppdict *dict, const char *name) { ppobj *obj; return (obj = ppdict_get_obj(dict, name)) != NULL ? ppobj_get_ref(obj) : NULL; } ppref * ppdict_rget_ref (ppdict *dict, const char *name) { ppobj *obj; return (obj = ppdict_get_obj(dict, name)) != NULL ? ppobj_rget_ref(obj) : NULL; } luametatex-2.10.08/source/libraries/pplib/ppdict.h000066400000000000000000000001651442250314700220550ustar00rootroot00000000000000 #ifndef PP_DICT_H #define PP_DICT_H ppdict * ppdict_create (const ppobj *stack, size_t size, ppheap *heap); #endifluametatex-2.10.08/source/libraries/pplib/ppfilter.h000066400000000000000000000002141442250314700224120ustar00rootroot00000000000000 #ifndef PP_FILTER_H #define PP_FILTER_H #include "utilbasexx.h" #include "utilflate.h" #include "utillzw.h" #include "utilfpred.h" #endifluametatex-2.10.08/source/libraries/pplib/ppheap.c000066400000000000000000000015641442250314700220460ustar00rootroot00000000000000 #include "pplib.h" #define PPBYTES_HEAP_BLOCK 0xFFF #define PPBYTES_HEAP_LARGE (PPBYTES_HEAP_BLOCK >> 2) #define PPSTRUCT_HEAP_BLOCK 0xFFF #define PPSTRUCT_HEAP_LARGE (PPSTRUCT_HEAP_BLOCK >> 2) void ppheap_init (ppheap *heap) { ppstruct_heap_init(heap, PPSTRUCT_HEAP_BLOCK, PPSTRUCT_HEAP_LARGE, 0); ppbytes_heap_init(heap, PPBYTES_HEAP_BLOCK, PPBYTES_HEAP_LARGE, 0); } void ppheap_free (ppheap *heap) { ppstruct_heap_free(heap); ppbytes_heap_free(heap); } void ppheap_renew (ppheap *heap) { ppstruct_heap_clear(heap); ppbytes_heap_clear(heap); ppbytes_buffer_init(heap); } ppbyte * ppbytes_flush (ppheap *heap, iof *O, size_t *psize) { ppbyte *data; size_t size; //ASSERT(&heap->bytesheap == O->link); iof_put(O, '\0'); data = (ppbyte *)O->buf; size = (size_t)iof_size(O); ppbytes_heap_done(heap, data, size); *psize = size - 1; return data; }luametatex-2.10.08/source/libraries/pplib/ppheap.h000066400000000000000000000034661442250314700220560ustar00rootroot00000000000000 #ifndef PP_HEAP_H #define PP_HEAP_H #include "utilmem.h" #define pp_malloc util_malloc //#define pp_callic util_calloc //#define pp_realloc util_realloc #define pp_free util_free #include "utilmemheapiof.h" //#include "utilmeminfo.h" #define ppbytes_heap heap16 #define ppbytes_heap_init(heap, space, large, flags) (heap16_init(&(heap)->bytesheap, space, large, flags), heap16_head(&(heap)->bytesheap)) //#define ppbytes_heap_some(heap, size, pspace) _heap16_some(&(heap)->bytesheap, size, pspace) #define ppbytes_heap_done(heap, data, written) heap16_done(&(heap)->bytesheap, data, written) #define ppbytes_heap_clear(heap) heap16_clear(&(heap)->bytesheap) #define ppbytes_heap_free(heap) heap16_free(&(heap)->bytesheap) #define ppbytes_heap_info(heap, info, append) heap16_stats(&(heap)->bytesheap, info, append) #define ppbytes_take(heap, size) _heap16_take(&(heap)->bytesheap, size) #define ppbytes_buffer_init(heap) heap16_buffer_init(&(heap)->bytesheap, &(heap)->bytesbuffer) #define ppbytes_buffer(heap, atleast) _heap16_buffer_some(&(heap)->bytesheap, &(heap)->bytesbuffer, atleast) #define ppstruct_heap heap64 #define ppstruct_heap_init(heap, space, large, flags) (heap64_init(&(heap)->structheap, space, large, flags), heap64_head(&(heap)->structheap)) #define ppstruct_heap_clear(heap) heap64_clear(&(heap)->structheap) #define ppstruct_heap_free(heap) heap64_free(&(heap)->structheap) #define ppstruct_heap_info(heap, info, append) heap64_stats(&(heap)->structheap, info, append) #define ppstruct_take(heap, size) _heap64_take(&(heap)->structheap, size) typedef struct { ppbytes_heap bytesheap; ppstruct_heap structheap; iof bytesbuffer; } ppheap; ppbyte * ppbytes_flush (ppheap *heap, iof *O, size_t *psize); void ppheap_init (ppheap *heap); void ppheap_free (ppheap *heap); void ppheap_renew (ppheap *heap); #endifluametatex-2.10.08/source/libraries/pplib/pplib.h000066400000000000000000000005111442250314700216730ustar00rootroot00000000000000 #ifndef PP_LIB_H #define PP_LIB_H #include #include #include #include #include "utiliof.h" #include "utillog.h" #include "ppapi.h" #include "ppheap.h" #include "ppdict.h" #include "ppstream.h" #include "pparray.h" #include "ppcrypt.h" #include "ppxref.h" #include "ppload.h" #endifluametatex-2.10.08/source/libraries/pplib/ppload.c000066400000000000000000002432601442250314700220510ustar00rootroot00000000000000 #include "pplib.h" const char * ppobj_kind[] = { "none", "null", "bool", "integer", "number", "name", "string", "array", "dict", "stream", "ref" }; #define ignored_char(c) (c == 0x20 || c == 0x0A || c == 0x0D || c == 0x09 || c == 0x00) #define newline_char(c) (c == 0x0A || c == 0x0D) #define IGNORED_CHAR_CASE 0x20: case 0x0A: case 0x0D: case 0x09: case 0x00 #define NEWLINE_CHAR_CASE 0x0A: case 0x0D #define DIGIT_CHAR_CASE '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9' #define OCTAL_CHAR_CASE '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7' #define MAX_INT_DIGITS 32 #define PP_LENGTH_UNKNOWN ((size_t)-1) static const char * ppref_str (ppuint refnumber, ppuint refversion) { static char buffer[MAX_INT_DIGITS + 1 + MAX_INT_DIGITS + 1 + 1 + 1]; sprintf(buffer, "%lu %lu R", (unsigned long)(refnumber), (unsigned long)(refversion)); return buffer; } /* name */ /* pdf spec page 57: "The name may include any regular characters, but not delimiter or white-space characters (see Section 3.1, Lexical Conventions)." "The token / (a slash followed by no regular characters) is a valid name" "Beginning with PDF 1.2, any character except null (character code 0) may be included in a name by writing its 2-digit hexadecimal code, preceded by the number sign character (#); see implementation notes 3 and 4 in Appendix H. This syntax is required to represent any of the delimiter or white-space characters or the number sign character itself; it is recommended but not required for characters whose codes are outside the range 33 (!) to 126 (~)." This suggests we should accept bytes 128..255 as a part of the name. */ // pdf name delimiters: 0..32, ()<>[]{}/% // # treated specially // .+- are valid part of name; keep in mind names such as -| | |- .notdef ABCDEF+Font etc. static const int8_t ppname_byte_lookup[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, '#', 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }; /* 20190827: The end of the name is any byte with 0 lookup value. When reading a ref object or objstm stream containing a single name, we may get input byte IOFEOF (-1), which must not be treated as 255. So a check for (c >= 0) is needed, otherwise we keep writing byte 255 to the output buffer until not enough memory. */ #define ppnamebyte(c) (c >= 0 && ppname_byte_lookup[(uint8_t)(c)]) static const int8_t pphex_byte_lookup[] = { -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1, -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 }; /* no need for (c >= 0) check here */ #define pphex(c) pphex_byte_lookup[(uint8_t)(c)] #define PPNAME_INIT (7 + 1) static ppname * ppscan_name (iof *I, ppheap *heap) { ppname *encoded, *decoded; iof *O; int decode, c; uint8_t *p, *e; int8_t h1, h2; O = ppbytes_buffer(heap, PPNAME_INIT); for (decode = 0, c = iof_char(I); ppnamebyte(c); c = iof_next(I)) { if (c == '#') decode = 1; iof_put(O, c); } encoded = (ppname *)ppstruct_take(heap, sizeof(ppname)); encoded->data = ppbytes_flush(heap, O, &encoded->size); if (decode) { O = ppbytes_buffer(heap, encoded->size); // decoded always a bit smaller for (p = (uint8_t *)encoded->data, e = p + encoded->size; p < e; ++p) { if (*p == '#' && p + 2 < e && (h1 = pphex(p[1])) >= 0 && (h2 = pphex(p[2])) >= 0) { iof_set(O, ((h1 << 4)|h2)); p += 2; } else iof_set(O, *p); } decoded = (ppname *)ppstruct_take(heap, sizeof(ppname)); decoded->data = ppbytes_flush(heap, O, &decoded->size); encoded->flags = PPNAME_ENCODED; decoded->flags = PPNAME_DECODED; encoded->alterego = decoded, decoded->alterego = encoded; } else { encoded->flags = 0; encoded->alterego = encoded; } return encoded; } static ppname * ppscan_exec (iof *I, ppheap *heap, uint8_t firstbyte) { ppname *encoded, *decoded; iof *O; int decode, c; uint8_t *p, *e; int8_t h1, h2; O = ppbytes_buffer(heap, PPNAME_INIT); iof_put(O, firstbyte); for (decode = 0, c = iof_char(I); ppnamebyte(c); c = iof_next(I)) { if (c == '#') decode = 1; iof_put(O, c); } encoded = (ppname *)ppstruct_take(heap, sizeof(ppname)); encoded->data = ppbytes_flush(heap, O, &encoded->size); if (decode) { O = ppbytes_buffer(heap, encoded->size); for (p = (uint8_t *)encoded->data, e = p + encoded->size; p < e; ++p) { if (*p == '#' && p + 2 < e && (h1 = pphex(p[1])) >= 0 && (h2 = pphex(p[2])) >= 0) { iof_set(O, ((h1 << 4)|h2)); p += 2; } else iof_set(O, *p); } decoded = (ppname *)ppstruct_take(heap, sizeof(ppname)); decoded->data = ppbytes_flush(heap, O, &decoded->size); encoded->flags = PPNAME_EXEC|PPNAME_ENCODED; decoded->flags = PPNAME_EXEC|PPNAME_DECODED; encoded->alterego = decoded, decoded->alterego = encoded; } else { encoded->flags = PPNAME_EXEC; encoded->alterego = encoded; } return encoded; } static ppname * ppname_internal (const void *data, size_t size, int flags, ppheap *heap) { // so far needed only for 'EI' operator ppname *encoded; encoded = (ppname *)ppstruct_take(heap, sizeof(ppname)); encoded->data = (ppbyte *)ppbytes_take(heap, size + 1); memcpy(encoded->data, data, size); encoded->data[size] = '\0'; encoded->size = size; encoded->alterego = encoded; encoded->flags = flags; return encoded; } #define ppexec_internal(data, size, heap) ppname_internal(data, size, PPNAME_EXEC, heap) ppname * ppname_decoded (ppname *name) { return (name->flags & PPNAME_ENCODED) ? name->alterego : name; } ppname * ppname_encoded (ppname *name) { return (name->flags & PPNAME_DECODED) ? name->alterego : name; } ppbyte * ppname_decoded_data (ppname *name) { return (name->flags & PPNAME_ENCODED) ? name->alterego->data : name->data; } ppbyte * ppname_encoded_data (ppname *name) { return (name->flags & PPNAME_DECODED) ? name->alterego->data : name->data; } /* string */ static const int8_t ppstring_byte_escape[] = { /* -1 escaped with octal, >0 escaped with \\, 0 left intact*/ -1,-1,-1,-1,-1,-1,-1,-1,'b','t','n',-1,'f','r',-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 0, 0, 0,'(',')', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'\\', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 }; //// pp string #define PPSTRING_INIT (7 + 1) #define ppstring_check_bom(decoded) ((void)\ (decoded->size >= 2 ? (ppstring_utf16be_bom(decoded->data) ? (decoded->flags |= PPSTRING_UTF16BE) : \ (ppstring_utf16le_bom(decoded->data) ? (decoded->flags |= PPSTRING_UTF16LE) : 0)) : 0)) #define ppstring_check_bom2(decoded, encoded) ((void)\ (decoded->size >= 2 ? (ppstring_utf16be_bom(decoded->data) ? ((decoded->flags |= PPSTRING_UTF16BE), (encoded->flags |= PPSTRING_UTF16BE)) : \ (ppstring_utf16le_bom(decoded->data) ? ((decoded->flags |= PPSTRING_UTF16LE), (encoded->flags |= PPSTRING_UTF16LE)) : 0)) : 0)) #define ppstring_utf16be_bom(data) (data[0] == '\xFE' && data[1] == '\xFF') #define ppstring_utf16le_bom(data) (data[0] == '\xFF' && data[1] == '\xFE') #define ppstringesc(c) ppstring_byte_escape[(uint8_t)(c)] static ppstring * ppscan_string (iof *I, ppheap *heap) { ppstring *encoded, *decoded; iof *O; int c, decode, balance; uint8_t *p, *e; O = ppbytes_buffer(heap, PPSTRING_INIT); for (decode = 0, balance = 0, c = iof_char(I); c >= 0; ) { switch (c) { case '\\': decode = 1; iof_put(O, '\\'); if ((c = iof_next(I)) >= 0) { iof_put(O, c); c = iof_next(I); } break; case '(': // may be unescaped if balanced ++balance; iof_put(O, '('); c = iof_next(I); break; case ')': if (balance == 0) { c = IOFEOF; ++I->pos; break; } --balance; iof_put(O, ')'); c = iof_next(I); break; default: iof_put(O, c); c = iof_next(I); } } encoded = (ppstring *)ppstruct_take(heap, sizeof(ppstring)); encoded->data = ppbytes_flush(heap, O, &encoded->size); if (decode) { O = ppbytes_buffer(heap, encoded->size); // decoded can only be smaller for (p = (uint8_t *)encoded->data, e = p + encoded->size; p < e; ++p) { if (*p == '\\') { if (++p >= e) break; switch (*p) { case OCTAL_CHAR_CASE: c = *p - '0'; if (++p < e && *p >= '0' && *p <= '7') { c = (c << 3) + *p - '0'; if (++p < e && *p >= '0' && *p <= '7') c = (c << 3) + *p - '0'; } iof_set(O, c); break; case 'n': iof_set(O, '\n'); break; case 'r': iof_set(O, '\r'); break; case 't': iof_set(O, '\t'); break; case 'b': iof_set(O, '\b'); break; case 'f': iof_set(O, '\f'); break; case NEWLINE_CHAR_CASE: // not a part of the string, ignore (pdf spec page 55) break; case '(': case ')': case '\\': default: // for anything else backslash is ignored (pdf spec page 54) iof_set(O, *p); break; } } else iof_set(O, *p); } decoded = (ppstring *)ppstruct_take(heap, sizeof(ppstring)); decoded->data = ppbytes_flush(heap, O, &decoded->size); encoded->flags = PPSTRING_ENCODED; decoded->flags = PPSTRING_DECODED; encoded->alterego = decoded, decoded->alterego = encoded; ppstring_check_bom2(decoded, encoded); } else { encoded->flags = 0; encoded->alterego = encoded; ppstring_check_bom(encoded); } return encoded; } static ppstring * ppscan_base16 (iof *I, ppheap *heap) { ppstring *encoded, *decoded; iof *O; int c; uint8_t *p, *e; int8_t h1, h2; O = ppbytes_buffer(heap, PPSTRING_INIT); for (c = iof_char(I); (pphex(c) >= 0 || ignored_char(c)); c = iof_next(I)) iof_put(O, c); if (c == '>') ++I->pos; encoded = (ppstring *)ppstruct_take(heap, sizeof(ppstring)); encoded->data = ppbytes_flush(heap, O, &encoded->size); O = ppbytes_buffer(heap, ((encoded->size + 1) >> 1) + 1); // decoded can only be smaller for (p = (uint8_t *)encoded->data, e = p + encoded->size; p < e; ++p) { if ((h1 = pphex(*p)) < 0) // ignored continue; for (h2 = 0, ++p; p < e && (h2 = pphex(*p)) < 0; ++p); iof_set(O, (h1 << 4)|h2); } decoded = (ppstring *)ppstruct_take(heap, sizeof(ppstring)); decoded->data = ppbytes_flush(heap, O, &decoded->size); encoded->flags = PPSTRING_BASE16|PPSTRING_ENCODED; decoded->flags = PPSTRING_BASE16|PPSTRING_DECODED; encoded->alterego = decoded, decoded->alterego = encoded; ppstring_check_bom2(decoded, encoded); return encoded; } static ppstring * ppstring_buffer (iof *O, ppheap *heap) { ppstring *encoded, *decoded; uint8_t *p, *e; decoded = (ppstring *)ppstruct_take(heap, sizeof(ppstring)); decoded->data = ppbytes_flush(heap, O, &decoded->size); O = ppbytes_buffer(heap, (decoded->size << 1) + 1); // the exact size known for (p = (uint8_t *)decoded->data, e = p + decoded->size; p < e; ++p) iof_set2(O, base16_uc_alphabet[(*p) >> 4], base16_uc_alphabet[(*p) & 0xF]); encoded = ppstruct_take(heap, sizeof(ppstring)); encoded->data = ppbytes_flush(heap, O, &encoded->size); encoded->flags = PPSTRING_BASE16|PPSTRING_ENCODED; decoded->flags = PPSTRING_BASE16|PPSTRING_DECODED; encoded->alterego = decoded, decoded->alterego = encoded; // ppstring_check_bom2(decoded, encoded); // ? return encoded; } ppstring * ppstring_internal (const void *data, size_t size, ppheap *heap) { // so far used only for crypt key iof *O; O = ppbytes_buffer(heap, size); memcpy(O->buf, data, size); O->pos = O->buf + size; return ppstring_buffer(O, heap); } /* base85; local function for that to make that part independent from utilbasexx */ static const int8_t ppstring_base85_lookup[] = { -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14, 15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30, 31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46, 47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62, 63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78, 79,80,81,82,83,84,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 }; #define base85_value(c) ppstring_base85_lookup[(uint8_t)(c)] #define base85_code(c1, c2, c3, c4, c5) ((((c1 * 85 + c2) * 85 + c3) * 85 + c4) * 85 + c5) #define base85_eof(c) (c == '~' || c < 0) static iof_status ppscan_base85_decode (iof *I, iof *O) { int c1, c2, c3, c4, c5; uint32_t code; while (iof_ensure(O, 4)) { do { c1 = iof_get(I); } while (ignored_char(c1)); if (base85_eof(c1)) return IOFEOF; switch (c1) { case 'z': iof_set4(O, '\0', '\0', '\0', '\0'); continue; case 'y': iof_set4(O, ' ', ' ', ' ', ' '); continue; } do { c2 = iof_get(I); } while (ignored_char(c2)); if (base85_eof(c2)) return IOFERR; do { c3 = iof_get(I); } while (ignored_char(c3)); if (base85_eof(c3)) { if ((c1 = base85_value(c1)) < 0 || (c2 = base85_value(c2)) < 0) return IOFERR; code = base85_code(c1, c2, 84, 84, 84); /* padding with 'u' (117); 117-33 = 84 */ iof_set(O, (code >> 24)); return IOFEOF; } do { c4 = iof_get(I); } while (ignored_char(c4)); if (base85_eof(c4)) { if ((c1 = base85_value(c1)) < 0 || (c2 = base85_value(c2)) < 0 || (c3 = base85_value(c3)) < 0) return IOFERR; code = base85_code(c1, c2, c3, 84, 84); iof_set2(O, code>>24, ((code>>16) & 0xff)); return IOFEOF; } do { c5 = iof_get(I); } while (ignored_char(c5)); if (base85_eof(c5)) { if ((c1 = base85_value(c1)) < 0 || (c2 = base85_value(c2)) < 0 || (c3 = base85_value(c3)) < 0 || (c4 = base85_value(c4)) < 0) return IOFERR; code = base85_code(c1, c2, c3, c4, 84); iof_set3(O, (code >> 24), ((code >> 16) & 0xff), ((code >> 8) & 0xff)); return IOFEOF; } if ((c1 = base85_value(c1)) < 0 || (c2 = base85_value(c2)) < 0 || (c3 = base85_value(c3)) < 0 || (c4 = base85_value(c4)) < 0 || (c5 = base85_value(c5)) < 0) return IOFERR; code = base85_code(c1, c2, c3, c4, c5); iof_set4(O, (code >> 24), ((code >> 16) & 0xff), ((code >> 8) & 0xff), (code & 0xff)); } return IOFFULL; } static ppstring * ppscan_base85 (iof *I, ppheap *heap) { // base85 alphabet is 33..117, adobe also hires 'z' and 'y' for compression ppstring *encoded, *decoded; iof *O, B; int c; O = ppbytes_buffer(heap, PPSTRING_INIT); for (c = iof_char(I); (c >= '!' && c <= 'u') || c == 'z' || c == 'y'; c = iof_next(I)) iof_put(O, c); if (c == '~') if ((c = iof_next(I)) == '>') ++I->pos; encoded = (ppstring *)ppstruct_take(heap, sizeof(ppstring)); encoded->data = ppbytes_flush(heap, O, &encoded->size); iof_string_reader(&B, encoded->data, encoded->size); O = ppbytes_buffer(heap, (encoded->size * 5 / 4) + 1); // may be larger that that because of 'z' and 'y' ppscan_base85_decode(&B, O); decoded = (ppstring *)ppstruct_take(heap, sizeof(ppstring)); decoded->data = ppbytes_flush(heap, O, &decoded->size); encoded->flags = PPSTRING_BASE85|PPSTRING_ENCODED; decoded->flags = PPSTRING_BASE85|PPSTRING_DECODED; encoded->alterego = decoded, decoded->alterego = encoded; ppstring_check_bom2(decoded, encoded); return encoded; } ppstring * ppstring_decoded (ppstring *string) { return (string->flags & PPSTRING_ENCODED) ? string->alterego : string; } ppstring * ppstring_encoded (ppstring *string) { return (string->flags & PPSTRING_DECODED) ? string->alterego : string; } ppbyte * ppstring_decoded_data (ppstring *string) { return (string->flags & PPSTRING_ENCODED) ? string->alterego->data : string->data; } ppbyte * ppstring_encoded_data (ppstring *string) { return (string->flags & PPSTRING_DECODED) ? string->alterego->data : string->data; } /* encrypted string */ static ppstring * ppscan_crypt_string (iof *I, ppcrypt *crypt, ppheap *heap) { ppstring *encoded, *decoded; iof *O; int c, b, balance, encode; uint8_t *p, *e; size_t size; O = ppbytes_buffer(heap, PPSTRING_INIT); for (balance = 0, encode = 0, c = iof_char(I); c >= 0; ) { switch (c) { case '\\': if ((c = iof_next(I)) < 0) break; encode = 1; switch (c) { case OCTAL_CHAR_CASE: b = c - '0'; if ((c = iof_next(I)) >= 0 && c >= '0' && c <= '7') { b = (b << 3) + c - '0'; if ((c = iof_next(I)) >= 0 && c >= '0' && c <= '7') { b = (b << 3) + c - '0'; c = iof_next(I); } } iof_put(O, b); // c is set to the next char break; case 'n': iof_put(O, '\n'); c = iof_next(I); break; case 'r': iof_put(O, '\r'); c = iof_next(I); break; case 't': iof_put(O, '\t'); c = iof_next(I); break; case 'b': iof_put(O, '\b'); c = iof_next(I); break; case 'f': iof_put(O, '\f'); c = iof_next(I); break; case NEWLINE_CHAR_CASE: // not a part of the string, ignore (pdf spec page 55) c = iof_next(I); break; case '(': case ')': case '\\': default: // for enything else backslash is ignored (pdf spec page 54) iof_put(O, c); c = iof_next(I); break; } break; case '(': ++balance; encode = 1; iof_put(O, '('); c = iof_next(I); break; case ')': if (balance == 0) { c = IOFEOF; ++I->pos; } else { --balance; //encode = 1; iof_put(O, ')'); c = iof_next(I); } break; default: if (ppstringesc(c) != 0) encode = 1; iof_put(O, c); c = iof_next(I); } } /* decrypt the buffer in place, update size */ if (ppstring_decrypt(crypt, O->buf, iof_size(O), O->buf, &size)) O->pos = O->buf + size; decoded = (ppstring *)ppstruct_take(heap, sizeof(ppstring)); decoded->data = ppbytes_flush(heap, O, &decoded->size); /* make encoded counterpart */ if (encode) { O = ppbytes_buffer(heap, decoded->size + 1); // we don't know for (p = (uint8_t *)decoded->data, e = p + decoded->size; p < e; ++p) { b = ppstringesc(*p); switch (b) { case 0: iof_put(O, *p); break; case -1: iof_put4(O, '\\', ((*p) >> 6) + '0', (((*p) >> 3) & 7) + '0', ((*p) & 7) + '0'); break; default: iof_put2(O, '\\', b); break; } } encoded = (ppstring *)ppstruct_take(heap, sizeof(ppstring)); encoded->data = ppbytes_flush(heap, O, &encoded->size); encoded->flags = PPSTRING_ENCODED; decoded->flags = PPSTRING_DECODED; encoded->alterego = decoded, decoded->alterego = encoded; ppstring_check_bom2(decoded, encoded); } else { decoded->flags = 0; decoded->alterego = decoded; ppstring_check_bom(decoded); encoded = decoded; } return encoded; } static ppstring * ppscan_crypt_base16 (iof *I, ppcrypt *crypt, ppheap *heap) { ppstring *encoded, *decoded; iof *O; int c; uint8_t *p, *e; int8_t h1, h2; size_t size; O = ppbytes_buffer(heap, PPSTRING_INIT); for (c = iof_char(I); c != '>'; ) { if ((h1 = pphex(c)) < 0) { if (ignored_char(c)) { c = iof_next(I); continue; } break; } do { c = iof_next(I); if ((h2 = pphex(c)) >= 0) { c = iof_next(I); break; } if (!ignored_char(c)) // c == '>' || c < 0 or some crap { h2 = 0; break; } } while (1); iof_put(O, (h1 << 4)|h2); } if (c == '>') ++I->pos; /* decrypt the buffer in place, update size */ if (ppstring_decrypt(crypt, O->buf, iof_size(O), O->buf, &size)) O->pos = O->buf + size; decoded = (ppstring *)ppstruct_take(heap, sizeof(ppstring)); decoded->data = ppbytes_flush(heap, O, &decoded->size); O = ppbytes_buffer(heap, (decoded->size << 1) + 1); for (p = (uint8_t *)decoded->data, e = p + decoded->size; p < e; ++p) iof_set2(O, base16_uc_alphabet[(*p) >> 4], base16_uc_alphabet[(*p) & 0xF]); encoded = (ppstring *)ppstruct_take(heap, sizeof(ppstring)); encoded->data = ppbytes_flush(heap, O, &encoded->size); encoded->flags = PPSTRING_BASE16|PPSTRING_ENCODED; decoded->flags = PPSTRING_BASE16|PPSTRING_DECODED; encoded->alterego = decoded, decoded->alterego = encoded; ppstring_check_bom2(decoded, encoded); return encoded; } /* scanner stack */ #define PPSTACK_BUFFER 512 static void ppstack_init (ppstack *stack, ppheap *heap) { stack->buf = stack->pos = (ppobj *)pp_malloc(PPSTACK_BUFFER * sizeof(ppobj)); stack->size = 0; stack->space = PPSTACK_BUFFER; stack->heap = heap; } #define ppstack_free_buffer(stack) (pp_free((stack)->buf)) static void ppstack_resize (ppstack *stack) { ppobj *newbuffer; stack->space <<= 1; newbuffer = (ppobj *)pp_malloc(stack->space * sizeof(ppobj)); memcpy(newbuffer, stack->buf, stack->size * sizeof(ppobj)); ppstack_free_buffer(stack); stack->buf = newbuffer; stack->pos = newbuffer + stack->size; } #define ppstack_push(stack) ((void)((stack)->size < (stack)->space || (ppstack_resize(stack), 0)), ++(stack)->size, (stack)->pos++) #define ppstack_pop(stack, n) ((stack)->size -= (n), (stack)->pos -= (n)) #define ppstack_at(stack, i) ((stack)->buf + i) #define ppstack_clear(stack) ((stack)->pos = (stack)->buf, (stack)->size = 0) /* scanner commons */ #define ppscan_uint(I, u) iof_get_usize(I, u) #define ppread_uint(s, u) string_to_usize((const char *)(s), u) static ppobj * ppscan_numobj (iof *I, ppobj *obj, int negative) { ppint integer; ppnum number; int exponent; int c; c = iof_char(I); iof_scan_integer(I, c, integer); switch(c) { case '.': { number = (ppnum)integer; c = iof_next(I); iof_scan_fraction(I, c, number, exponent); double_negative_exp10(number, exponent); obj->type = PPNUM, obj->number = negative ? -number : number; break; } default: obj->type = PPINT, obj->integer = negative ? -integer : integer; break; } return obj; } static ppobj * ppscan_numobj_frac (iof *I, ppobj *obj, int negative) { ppnum number; int c, exponent; number = 0.0; c = iof_next(I); iof_scan_fraction(I, c, number, exponent); double_negative_exp10(number, exponent); obj->type = PPNUM, obj->number = negative ? -number : number; return obj; } static int ppscan_find (iof *I) { // skips whitechars and comments int c; for (c = iof_char(I); ; c = iof_next(I)) { switch (c) { case IGNORED_CHAR_CASE: break; case '%': { do { if ((c = iof_next(I)) < 0) return c; } while (!newline_char(c)); break; } default: return c; } } return c; // never reached } static int ppscan_keyword (iof *I, const char *keyword, size_t size) { size_t i; int c; if ((size_t)iof_left(I) >= size) { if (memcmp(I->pos, keyword, size) != 0) return 0; I->pos += size; return 1; } // sticky case, we can't go back for (i = 0, c = iof_char(I); i < size; ++i, ++keyword, c = iof_next(I)) if (c < 0 || *keyword != c) /* PJ20190503 bugfix: there was (i!=c), we actually never get here anyway */ return 0; return 1; } #define ppscan_key(I, literal) ppscan_keyword(I, "" literal, sizeof(literal) - 1) /* objects parser */ static ppref * ppref_unresolved (ppheap *heap, ppuint refnumber, ppuint refversion) { ppref *ref = (ppref *)ppstruct_take(heap, sizeof(ppref)); memset(ref, 0, sizeof(ppref)); ref->object.type = PPNONE; ref->number = refnumber; ref->version = refversion; return ref; } #define PPMARK PPNONE static ppobj * ppscan_obj (iof *I, ppdoc *pdf, ppxref *xref) { int c; ppobj *obj; size_t mark, size; ppuint refnumber, refversion; ppref *ref; ppstack *stack; ppcrypt *crypt; stack = &pdf->stack; c = iof_char(I); switch (c) { case DIGIT_CHAR_CASE: return ppscan_numobj(I, ppstack_push(stack), 0); case '.': return ppscan_numobj_frac(I, ppstack_push(stack), 0); case '+': ++I->pos; return ppscan_numobj(I, ppstack_push(stack), 0); case '-': ++I->pos; return ppscan_numobj(I, ppstack_push(stack), 1); case '/': ++I->pos; obj = ppstack_push(stack); obj->type = PPNAME; obj->name = ppscan_name(I, &pdf->heap); return obj; case '(': ++I->pos; obj = ppstack_push(stack); obj->type = PPSTRING; if (ppcrypt_ref(pdf, crypt)) obj->string = ppscan_crypt_string(I, crypt, &pdf->heap); else obj->string = ppscan_string(I, &pdf->heap); return obj; case '[': mark = stack->size; obj = ppstack_push(stack); obj->type = PPMARK; // ppscan_obj() checks types backward for 'R', so set the type immediatelly (reserved for PPARRAY) obj->any = NULL; ++I->pos; for (c = ppscan_find(I); c != ']'; c = ppscan_find(I)) { if (ppscan_obj(I, pdf, xref) == NULL) { // callers assume that NULL returns means nothing pushed size = stack->size - mark; // pop items AND the obj reserved for array ppstack_pop(stack, size); return NULL; } } ++I->pos; size = stack->size - mark - 1; obj = ppstack_at(stack, mark); // stack might have been realocated obj->type = PPARRAY; obj->array = pparray_create(ppstack_at(stack, mark + 1), size, &pdf->heap); ppstack_pop(stack, size); // pop array items, leave the array on top return obj; case '<': if ((c = iof_next(I)) == '<') { mark = stack->size; obj = ppstack_push(stack); obj->type = PPMARK; obj->any = NULL; ++I->pos; for (c = ppscan_find(I); c != '>'; c = ppscan_find(I)) { if (ppscan_obj(I, pdf, xref) == NULL) { size = stack->size - mark; ppstack_pop(stack, size); return NULL; } } if (iof_next(I) == '>') ++I->pos; size = stack->size - mark - 1; obj = ppstack_at(stack, mark); obj->type = PPDICT; obj->dict = ppdict_create(ppstack_at(stack, mark + 1), size, &pdf->heap); ppstack_pop(stack, size); return obj; } obj = ppstack_push(stack); obj->type = PPSTRING; if (ppcrypt_ref(pdf, crypt)) obj->string = ppscan_crypt_base16(I, crypt, &pdf->heap); else obj->string = ppscan_base16(I, &pdf->heap); return obj; case 'R': if (stack->size >= 2 && stack->pos[-1].type == PPINT && stack->pos[-2].type == PPINT) { ++I->pos; obj = &stack->pos[-2]; refnumber = (ppuint)obj->integer; ppstack_pop(stack, 1); // pop version number, retype obj to a reference if (xref == NULL || (ref = ppxref_find(xref, refnumber)) == NULL) { /* pdf spec page 64: unresolvable reference is not an error, should just be treated as a reference to null. we also need this to read trailer, where refs can't be resolved yet */ refversion = (obj + 1)->integer; //if (xref != NULL) // loggerf("unresolved reference %s", ppref_str(refnumber, refversion)); ref = ppref_unresolved(stack->heap, refnumber, refversion); } obj->type = PPREF; obj->ref = ref; return obj; } break; case 't': if (iof_next(I) == 'r' && iof_next(I) == 'u' && iof_next(I) == 'e') { ++I->pos; obj = ppstack_push(stack); obj->type = PPBOOL; obj->integer = 1; return obj; } break; case 'f': if (iof_next(I) == 'a' && iof_next(I) == 'l' && iof_next(I) == 's' && iof_next(I) == 'e') { ++I->pos; obj = ppstack_push(stack); obj->type = PPBOOL; obj->integer = 0; return obj; } break; case 'n': if (iof_next(I) == 'u' && iof_next(I) == 'l' && iof_next(I) == 'l') { ++I->pos; obj = ppstack_push(stack); obj->type = PPNULL; obj->any = NULL; return obj; } break; } return NULL; } /* A variant for contents streams (aka postscript); wise of operators, blind to references. We are still PDF, so we don't care about postscript specific stuff such as radix numbers and scientific numbers notation. It takes ppstack * as context (no ppdoc *) to be able to run contents parser beyond the scope of ppdoc heap. */ static ppstring * ppstring_inline (iof *I, ppdict *imagedict, ppheap *heap); static ppobj * ppscan_psobj (iof *I, ppstack *stack) { int c; ppobj *obj, *op; size_t size, mark; ppname *exec; ppbyte *data; c = iof_char(I); switch (c) { case DIGIT_CHAR_CASE: return ppscan_numobj(I, ppstack_push(stack), 0); case '.': return ppscan_numobj_frac(I, ppstack_push(stack), 0); case '+': c = iof_next(I); if (base10_digit(c)) // '+.abc' is probably an executable name, but we are not in postscript return ppscan_numobj(I, ppstack_push(stack), 0); else if (c == '.') return ppscan_numobj_frac(I, ppstack_push(stack), 0); obj = ppstack_push(stack); obj->type = PPNAME; obj->name = ppscan_exec(I, stack->heap, '+'); return obj; case '-': c = iof_next(I); if (base10_digit(c)) // ditto, we would handle type1 '-|' '|-' operators though return ppscan_numobj(I, ppstack_push(stack), 1); else if (c == '.') return ppscan_numobj_frac(I, ppstack_push(stack), 1); obj = ppstack_push(stack); obj->type = PPNAME; obj->name = ppscan_exec(I, stack->heap, '-'); return obj; case '/': ++I->pos; obj = ppstack_push(stack); obj->type = PPNAME; obj->name = ppscan_name(I, stack->heap); return obj; case '(': ++I->pos; obj = ppstack_push(stack); obj->type = PPSTRING; obj->string = ppscan_string(I, stack->heap); return obj; case '[': mark = stack->size; obj = ppstack_push(stack); obj->type = PPMARK; obj->any = NULL; ++I->pos; for (c = ppscan_find(I); c != ']'; c = ppscan_find(I)) { if (ppscan_psobj(I, stack) == NULL) { size = stack->size - mark; ppstack_pop(stack, size); return NULL; } } ++I->pos; size = stack->size - mark - 1; obj = ppstack_at(stack, mark); obj->type = PPARRAY; obj->array = pparray_create(ppstack_at(stack, mark + 1), size, stack->heap); ppstack_pop(stack, size); return obj; case '<': if ((c = iof_next(I)) == '<') { mark = stack->size; obj = ppstack_push(stack); obj->type = PPMARK; obj->any = NULL; ++I->pos; for (c = ppscan_find(I); c != '>'; c = ppscan_find(I)) { if (ppscan_psobj(I, stack) == NULL) { size = stack->size - mark; ppstack_pop(stack, size); return NULL; } } if (iof_next(I) == '>') ++I->pos; size = stack->size - mark - 1; obj = ppstack_at(stack, mark); obj->type = PPDICT; obj->dict = ppdict_create(ppstack_at(stack, mark + 1), size, stack->heap); ppstack_pop(stack, size); return obj; } obj = ppstack_push(stack); obj->type = PPSTRING; if (c == '~') ++I->pos, obj->string = ppscan_base85(I, stack->heap); else obj->string = ppscan_base16(I, stack->heap); return obj; default: if (!ppnamebyte(c)) break; // forbid empty names; dead loop otherwise ++I->pos; /* true false null practically don't occur in streams so it makes sense to assume that we get an operator name here. If it happen to be a keyword we could give back those several bytes to the heap but.. heap buffer is tricky enough. */ exec = ppscan_exec(I, stack->heap, (uint8_t)c); data = exec->data; obj = ppstack_push(stack); switch (data[0]) { case 't': if (data[1] == 'r' && data[2] == 'u' && data[3] == 'e' && data[4] == '\0') { obj->type = PPBOOL; obj->integer = 1; return obj; } break; case 'f': if (data[1] == 'a' && data[2] == 'l' && data[3] == 's' && data[4] == 'e' && data[5] == '\0') { obj->type = PPBOOL; obj->integer = 0; return obj; } break; case 'n': if (data[1] == 'u' && data[2] == 'l' && data[3] == 'l' && data[4] == '\0') { obj->type = PPNULL; obj->any = NULL; return obj; } break; case 'B': /* Inline images break rules of operand/operator syntax, so 'BI/ID' operators need to be treated as special syntactic keywords. BI IDEI We treat the image as a single syntactic token; BI starts collecting a dict, ID is the beginning of the data. Effectively EI operator obtains two operands - dict and string. It is ok to put three items onto the stack, callers dont't assume there is just one. */ if (data[1] == 'I' && data[2] == '\0') { ppdict *imagedict; ppname *name; /* key val pairs -> dict */ mark = stack->size - 1; obj->type = PPMARK; obj->any = NULL; for (c = ppscan_find(I); ; c = ppscan_find(I)) { if ((op = ppscan_psobj(I, stack)) == NULL) { size = stack->size - mark; ppstack_pop(stack, size); return NULL; } if (op->type == PPNAME) { name = op->name; if (name->flags & PPNAME_EXEC) { if (name->size != 2 || name->data[0] != 'I' || name->data[1] != 'D') { // weird size = stack->size - mark; ppstack_pop(stack, size); return NULL; } break; } } } size = stack->size - mark - 1; obj = ppstack_at(stack, mark); obj->type = PPDICT; obj->dict = imagedict = ppdict_create(ppstack_at(stack, mark + 1), size, stack->heap); ppstack_pop(stack, size); /* put image data string */ obj = ppstack_push(stack); obj->type = PPSTRING; obj->string = ppstring_inline(I, imagedict, stack->heap);; /* put EI operator name */ obj = ppstack_push(stack); obj->type = PPNAME; obj->name = ppexec_internal("EI", 2, stack->heap); return obj; } break; } obj->type = PPNAME; obj->name = exec; return obj; } return NULL; } /* We try to get the exact inline image length from its dict params. If cannot predict the length, we have to scan the input until 'EI'. I've checked on may examples that it gives the same results but one can never be sure, as 'EI' might happen to be a part of the data. Stripping white char is also very heuristic; \0 is a white char in PDF and very likely to be a data byte.. weak method (pdf spec page 352). Revision 20190327: inline images may be compressed, in which case we can't predict the length. */ static size_t inline_image_length (ppdict *dict) { ppuint w, h, bpc, colors; ppname *cs; if (ppdict_get_name(dict, "F") == NULL) { if (ppdict_get_uint(dict, "W", &w) && ppdict_get_uint(dict, "H", &h) && ppdict_get_uint(dict, "BPC", &bpc) && (cs = ppdict_get_name(dict, "CS")) != NULL) { if (ppname_is(cs, "DeviceGray")) colors = 1; else if (ppname_is(cs, "DeviceRGB")) colors = 3; else if (ppname_is(cs, "DeviceCMYK")) colors = 4; else return PP_LENGTH_UNKNOWN; return (w * h * bpc * colors + 7) >> 3; } } return PP_LENGTH_UNKNOWN; } static ppstring * ppstring_inline (iof *I, ppdict *imagedict, ppheap *heap) { iof *O; int c, d, e; size_t length, leftin, leftout, bytes; c = iof_char(I); if (ignored_char(c)) c = iof_next(I); length = inline_image_length(imagedict); if (length != PP_LENGTH_UNKNOWN) { O = ppbytes_buffer(heap, length); while (length > 0 && iof_readable(I) && iof_writable(O)) { leftin = iof_left(I); leftout = iof_left(O); bytes = length; if (bytes > leftin) bytes = leftin; if (bytes > leftout) bytes = leftout; memcpy(O->pos, I->pos, bytes); I->pos += bytes; O->pos += bytes; length -= bytes; } // gobble EI if (ppscan_find(I) == 'E') if (iof_next(I) == 'I') ++I->pos; } else { O = ppbytes_buffer(heap, PPSTRING_INIT); // ? while (c >= 0) { if (c == 'E') { d = iof_next(I); if (d == 'I') { e = iof_next(I); if (!ppnamebyte(e)) { /* strip one newline from the end and stop */ if (O->pos - 2 >= O->buf) // sanity { c = *(O->pos - 1); if (ignored_char(c)) { if (c == 0x0A && *(O->pos - 2) == 0x0D) O->pos -= 2; else O->pos -= 1; } } break; } iof_put2(O, c, d); c = e; } else { iof_put(O, c); c = d; } } else { iof_put(O, c); c = iof_next(I); } } } return ppstring_buffer(O, heap); } /* input reader */ /* PDF input is a pseudo file that either keeps FILE * or data. Reader iof * is a proxy to input that provides byte-by-byte interface. Our iof structure is capable to link iof_file *input, but t avoid redundant checks on IOF_DATA flag, here we link iof *I directly to FILE * or mem buffer. When reading from file we need an internal buffer, which should be kept rather small, as it is only used to parse xrefs and objects (no streams). We allocate the buffer from a private heap (not static) to avoid conflicts when processing >1 pdfs at once. Besides, the input buffer may be needed after loading the document, eg. to access references raw data. */ #define PPDOC_BUFFER 0xFFF // keep that small, it is only used to parse body objects static void ppdoc_reader_init (ppdoc *pdf, iof_file *input) { iof *I; pdf->input = *input; input = &pdf->input; input->refcount = 1; I = &pdf->reader; if (input->flags & IOF_DATA) { pdf->buffer = NULL; // input iof_file is the buffer iof_string_reader(I, NULL, 0); } else { pdf->buffer = (uint8_t *)ppbytes_take(&pdf->heap, PPDOC_BUFFER); iof_setup_file_handle_reader(I, NULL, 0, iof_file_get_fh(input)); // gets IOF_FILE_HANDLE flag and FILE * I->space = PPDOC_BUFFER; // used on refill } } /* Whenever we need to read the input file, we fseek the to the given offset and fread to the private buffer. The length we need is not always predictable, in which case PPDOC_BUFFER bytes are read (keep it small). I->buf = I->pos is set to the beginning, I->end set to the end (end is the first byte one shouldn't read). */ static iof * ppdoc_reader (ppdoc *pdf, size_t offset, size_t length) { iof_file *input; iof *I; input = &pdf->input; I = &pdf->reader; if (iof_file_seek(input, (long)offset, SEEK_SET) != 0) return NULL; I->flags &= ~IOF_STOPPED; if (input->flags & IOF_DATA) { I->buf = I->pos = input->pos; I->end = (length == PP_LENGTH_UNKNOWN || I->pos + length >= input->end) ? input->end : (I->pos + length); } else { I->buf = I->pos = pdf->buffer; // ->buf is actually permanently equal pdf->buffer but we might need some tricks if (length == PP_LENGTH_UNKNOWN || length > PPDOC_BUFFER) length = PPDOC_BUFFER; length = fread(I->buf, 1, length, I->file); I->end = I->buf + length; } return I; } /* The position from the beginning of input - for data buffer: (pdf->input.pos - pdf->input.buf) + (I->pos - I->buf) I->buf == pdf->input.pos, so this resolves to (I->pos - pdf->input.buf), independent from I->buf - for file buffer: ftell(pdf->input.file) - (I->end - I->pos) */ #define ppdoc_reader_tell(pdf, I) ((size_t)(((pdf)->input.flags & IOF_DATA) ? ((I)->pos - (pdf)->input.buf) : (ftell(iof_file_get_fh(&(pdf)->input)) - ((I)->end - (I)->pos)))) /* pdf */ #define PPDOC_HEADER 10 // "%PDF-?.??\n" static int ppdoc_header (ppdoc *pdf, uint8_t header[PPDOC_HEADER]) { size_t i; if (memcmp(header, "%PDF-", 5) != 0) return 0; for (i = 5; i < PPDOC_HEADER - 1 && !ignored_char(header[i]); ++i) pdf->version[i - 5] = header[i]; pdf->version[i - 5] = '\0'; return 1; } static int ppdoc_tail (ppdoc *pdf, iof_file *input, size_t *pxrefoffset) { int c; uint8_t tail[4*10], *p, back, tailbytes; if (iof_file_seek(input, 0, SEEK_END) != 0) return 0; pdf->filesize = (size_t)iof_file_tell(input); // simple heuristic to avoid fgetc() / fseek(-2) hiccup: keep seeking back by len(startxref) + 1 == 10 // until a letter found (assuming liberal white characters and tail length) for (back = 1, tailbytes = 0; ; ++back) { if (iof_file_seek(input, -10, SEEK_CUR) != 0) return 0; tailbytes += 10; c = iof_file_getc(input); tailbytes -= 1; switch (c) { case IGNORED_CHAR_CASE: case DIGIT_CHAR_CASE: case '%': case 'E': case 'O': case 'F': if (back > 4) // 2 should be enough return 0; continue; case 's': case 't': case 'a': case 'r': case 'x': case 'e': case 'f': if (iof_file_read(tail, 1, tailbytes, input) != tailbytes) return 0; tail[tailbytes] = '\0'; for (p = &tail[0]; ; ++p) { if (*p == '\0') return 0; if ((c = base10_value(*p)) >= 0) break; } ppread_uint(p, pxrefoffset); return 1; default: return 0; } } return 0; // never reached } /* xref/body */ static int ppscan_start_entry (iof *I, ppref *ref) { ppuint u; ppscan_find(I); if (!ppscan_uint(I, &u) || u != ref->number) return 0; ppscan_find(I); if (!ppscan_uint(I, &u) || u != ref->version) return 0; ppscan_find(I); if (!ppscan_key(I, "obj")) return 0; ppscan_find(I); return 1; } static int ppscan_skip_entry (iof *I) { ppuint u; ppscan_find(I); if (!ppscan_uint(I, &u)) return 0; ppscan_find(I); if (!ppscan_uint(I, &u)) return 0; ppscan_find(I); if (!ppscan_key(I, "obj")) return 0; ppscan_find(I); return 1; } static int ppscan_start_stream (iof *I, ppdoc *pdf, size_t *streamoffset) { int c; ppscan_find(I); if (ppscan_key(I, "stream")) { // PJ20180912 bugfix: we were gobbling white characters (also null byte), while "stream" may be followed by EOL // pdf spec page 60: "CARRIAGE RETURN and a LINE FEED or just a LINE FEED, and not by a CARRIAGE RETURN alone" c = iof_char(I); if (c == 0x0D) { if (iof_next(I) == 0x0A) // should be ++I->pos; } else if (c == 0x0A) { ++I->pos; } *streamoffset = ppdoc_reader_tell(pdf, I); return 1; } return 0; } static ppxref * ppxref_load (ppdoc *pdf, size_t xrefoffset); static ppxref * ppxref_load_chain (ppdoc *pdf, ppxref *xref); /* Parsing xref table 1 10 // first ref number and refs count 0000000000 00000 n // 10-digits offset, 5 digits version, type identifier 0000000000 00000 n // n states for normal I guess 0000000000 00000 f // f states for free (not used) ... Free entries seem to be a relic of ancient times, completelly useless for us. To avoid parsing xref table twice, we waste some space on free entries by allocating one plane of refs for each section. Later on we slice sections, so that effectively free entries are not involved in map. Subsequent refs gets number, version and offset. Other fields initialized when parsing PDF body. Having xref table loaded, we sort sections for future binary search (xref with objects count == 0 is considered invalid). Then we have to deal with the trailer dict. In general, to load objects and resolve references we need a complete chain of xrefs (not only the top). To load the previous xref, we need its offset, which is given in trailer. So we have to parse the trailer ignoring references, which might be unresolvable at this point (objects parser makes a dummy check for xref != NULL on refs resolving ppscan_obj(), which irritates me but I don't want a separate parser for trailer..). The same applies to xref streams, in which we have parse the trailer not having xref map at all. So the procedure is: - load xref map, initialize references, make it ready to search - parse trailer ignoring references - get /Prev xref offset and load older xref (linked list via ->prev) - sort all refs in all xrefs by offset - parse refs in order resolving references in contained objects - fix trailer references First created xref becomes a pdf->xref (top xref). We link that early to control offsets already read (insane loops?). */ // Every xref table item "0000000000 00000 n" is said to be terminated with 2-byte EOL but we don't like relying on whites. #define xref_item_length (10 + 1 + 5 + 1 + 1) static ppxref * ppxref_load_table (iof *I, ppdoc *pdf, size_t xrefoffset) { ppxref *xref; ppxsec *xrefsection; ppref *ref; ppuint first, count, refindex; uint8_t buffer[xref_item_length + 1]; const char *p; const ppobj *obj; buffer[xref_item_length] = '\0'; xref = ppxref_create(pdf, 0, xrefoffset); if (pdf->xref == NULL) pdf->xref = xref; for (ppscan_find(I); ppscan_uint(I, &first); ppscan_find(I)) { ppscan_find(I); if (!ppscan_uint(I, &count)) return NULL; if (count == 0) // weird continue; xref->count += count; xrefsection = NULL; ref = (ppref *)ppstruct_take(&pdf->heap, count * sizeof(ppref)); for (refindex = 0; refindex < count; ++refindex, ++ref) { ref->xref = xref; ref->number = first + refindex; ppscan_find(I); iof_read(I, buffer, xref_item_length); switch (buffer[xref_item_length - 1]) { case 'n': if (xrefsection == NULL) { xrefsection = ppxref_push_section(xref, &pdf->heap); xrefsection->first = ref->number; xrefsection->refs = ref; } xrefsection->last = ref->number; for (p = (const char *)buffer; *p == '0'; ++p); p = ppread_uint(p, &ref->offset); for ( ; *p == ' ' || *p == '0'; ++p); p = ppread_uint(p, &ref->version); ref->object.type = PPNONE; // init for sanity ref->object.any = NULL; ref->length = 0; break; case 'f': default: --ref; xrefsection = NULL; --xref->count; } } } /* sort section */ ppxref_sort(xref); // case of xref->size == 0 handled by ppxref_load_chain() /* get trailer ignoring refs */ if (!ppscan_key(I, "trailer")) return NULL; ppscan_find(I); if ((obj = ppscan_obj(I, pdf, NULL)) == NULL) return NULL; ppstack_pop(&pdf->stack, 1); if (obj->type != PPDICT) return NULL; xref->trailer = *obj; return ppxref_load_chain(pdf, xref); } /* Parsing xref stream First we load the trailer, ignoring references. Dict defines sections and fields lengths: /Size % max ref number plus 1 /Index [ first count first count ... ] % a pair of numbers for every section, defaults to [0 Size] /W [w1 w2 w3] % fields lengths, 0 states for omitted field xref stream data is a continuous stream of binary number triplets. First number is a type: 0 - free entry (as 'f' in xref table) 1 - normal entry, followed by offset an version (as 'n' in xref table) 2 - compressed entry, followed by parent object stream number and entry index 0 and 1 are handled as 'n' and 'f' entries in xref table. For type 2 we normally initialize ref->number and ref->version (the later is implicitly 0). ref->offset is set to 0 (invalid offset), which is recognized by objects loader. */ #define XREF_STREAM_MAX_FIELD 4 static ppxref * ppxref_load_stream (iof *I, ppdoc *pdf, size_t xrefoffset) { ppxref *xref; ppxsec *xrefsection; ppref *ref; ppobj *obj; ppstream *xrefstream; size_t streamoffset; ppuint w1, w2, w3, w, bufferbytes; uint8_t buffer[3 * XREF_STREAM_MAX_FIELD], *b; ppuint first, count, f1, f2, f3; pparray *fieldwidths, *sectionindices; ppobj sectionmock[2], *sectionfirst, *sectioncount; size_t sections, sectionindex, refindex; if (!ppscan_skip_entry(I)) return NULL; if ((obj = ppscan_obj(I, pdf, NULL)) == NULL) return NULL; ppstack_pop(&pdf->stack, 1); if (obj->type != PPDICT || !ppscan_start_stream(I, pdf, &streamoffset)) return NULL; xrefstream = ppstream_create(pdf, obj->dict, streamoffset); ppstream_info(xrefstream, pdf); if ((fieldwidths = ppdict_get_array(xrefstream->dict, "W")) != NULL) { if (!pparray_get_uint(fieldwidths, 0, &w1)) w1 = 0; if (!pparray_get_uint(fieldwidths, 1, &w2)) w2 = 0; if (!pparray_get_uint(fieldwidths, 2, &w3)) w3 = 0; } else w1 = w2 = w3 = 0; if (w1 > XREF_STREAM_MAX_FIELD || w2 > XREF_STREAM_MAX_FIELD || w3 > XREF_STREAM_MAX_FIELD) return NULL; bufferbytes = w1 + w2 + w3; if ((sectionindices = ppdict_get_array(xrefstream->dict, "Index")) != NULL) { sections = sectionindices->size >> 1; sectionfirst = sectionindices->data; } else { sections = 1; sectionmock[0].type = PPINT; sectionmock[0].integer = 0; sectionmock[1].type = PPINT; if (!ppdict_get_int(xrefstream->dict, "Size", §ionmock[1].integer)) sectionmock[1].integer = 0; sectionfirst = §ionmock[0]; } if ((I = ppstream_read(xrefstream, 1, 0)) == NULL) return NULL; // we fseek() so original I is useless anyway xref = ppxref_create(pdf, sections, xrefoffset); if (pdf->xref == NULL) pdf->xref = xref; xref->trailer.type = PPSTREAM; xref->trailer.stream = xrefstream; for (sectionindex = 0; sectionindex < sections; ++sectionindex, sectionfirst += 2) { sectioncount = sectionfirst + 1; first = 0, count = 0; // warnings if (!ppobj_get_uint(sectionfirst, first) || !ppobj_get_uint(sectioncount, count)) goto xref_stream_error; if (count == 0) continue; xref->count += count; xrefsection = NULL; ref = (ppref *)ppstruct_take(&pdf->heap, count * sizeof(ppref)); for (refindex = 0; refindex < count; ++refindex, ++ref) { ref->xref = xref; ref->number = first + refindex; if (iof_read(I, buffer, bufferbytes) != bufferbytes) goto xref_stream_error; b = buffer; if (w1 == 0) f1 = 1; // default type is 1 else for (f1 = 0, w = 0; w < w1; f1 = (f1 << 8)|(*b), ++w, ++b); for (f2 = 0, w = 0; w < w2; f2 = (f2 << 8)|(*b), ++w, ++b); for (f3 = 0, w = 0; w < w3; f3 = (f3 << 8)|(*b), ++w, ++b); switch (f1) { case 0: //--ref; xrefsection = NULL; --xref->count; break; case 1: if (xrefsection == NULL) { xrefsection = ppxref_push_section(xref, &pdf->heap); xrefsection->first = ref->number; xrefsection->refs = ref; } xrefsection->last = ref->number; ref->offset = f2; ref->version = f3; ref->object.type = PPNONE; ref->object.any = NULL; ref->length = 0; break; case 2: if (xrefsection == NULL) { xrefsection = ppxref_push_section(xref, &pdf->heap); xrefsection->first = ref->number; xrefsection->refs = ref; } xrefsection->last = ref->number; ref->offset = 0; // f2 is parent objstm, f3 is index in parent, both useless ref->version = 0; // compressed objects has implicit version == 0 ref->object.type = PPNONE; ref->object.any = NULL; ref->length = 0; break; default: goto xref_stream_error; } } } /* sort sections */ ppxref_sort(xref); // case of xref->size == 0 handled by ppxref_load_chain() /* close the stream _before_ loading prev xref */ ppstream_done(xrefstream); /* load prev and return */ return ppxref_load_chain(pdf, xref); xref_stream_error: ppstream_done(xrefstream); return NULL; } /* The following procedure loads xref /Prev, links xref->prev and typically returns xref. Some docs contain empty xref (one section with zero objects) that is actually a proxy to xref stream referred as /XRefStm (genuine concept of xrefs old/new style xrefs in the same doc). In case of 0-length xref we ignore the proxy and return the target xref (otherwise we would need annoying sanity check for xref->size > 0 on every ref search). */ static ppxref * ppxref_load_chain (ppdoc *pdf, ppxref *xref) { ppdict *trailer; ppuint xrefoffset; ppxref *prevxref, *nextxref; trailer = ppxref_trailer(xref); if (!ppdict_get_uint(trailer, "Prev", &xrefoffset)) // XRefStm is useless return xref; for (nextxref = pdf->xref; nextxref != NULL; nextxref = nextxref->prev) if (nextxref->offset == xrefoffset) // insane return NULL; if ((prevxref = ppxref_load(pdf, (size_t)xrefoffset)) == NULL) return NULL; if (xref->size > 0) { xref->prev = prevxref; return xref; } if (pdf->xref == xref) pdf->xref = prevxref; return prevxref; } static ppxref * ppxref_load (ppdoc *pdf, size_t xrefoffset) { iof *I; if ((I = ppdoc_reader(pdf, xrefoffset, PP_LENGTH_UNKNOWN)) == NULL) return NULL; ppscan_find(I); if (ppscan_key(I, "xref")) return ppxref_load_table(I, pdf, xrefoffset); return ppxref_load_stream(I, pdf, xrefoffset); // iof_close(I) does nothing here } static void ppoffmap_sort (ppref **left, ppref **right) { ppref **l, **r, *t; ppuint pivot; l = left, r = right; pivot = (*(l + ((r - l) / 2)))->offset; do { // don't read from pointer! while ((*l)->offset < pivot) ++l; while ((*r)->offset > pivot) --r; if (l <= r) { t = *l; *l = *r; *r = t; ++l, --r; } } while (l <= r); if (left < r) ppoffmap_sort(left, r); if (l < right) ppoffmap_sort(l, right); } static void fix_trailer_references (ppdoc *pdf) { ppxref *xref; ppdict *trailer; ppname **pkey; ppobj *obj; ppref *ref; for (xref = pdf->xref; xref != NULL; xref = xref->prev) { if ((trailer = ppxref_trailer(xref)) == NULL) continue; for (ppdict_first(trailer, pkey, obj); *pkey != NULL; ppdict_next(pkey, obj)) { // no need to go deeper in structs, all items in trailer except info and root must be direct refs if (obj->type != PPREF) continue; ref = obj->ref; if (ref->offset == 0) // unresolved? if ((ref = ppxref_find(xref, ref->number)) != NULL) obj->ref = ref; // at this moment the reference still points nothing, but should be the one with the proper offset } } } /* Here comes a procedure that loads all entries from all document bodies. We resolve references while parsing objects and to make resolving correct, we need a complete chain of xref maps, and a knowledge about possible linearized dict (first offset). So loading refs sorted by offsets makes sense (not sure if it matters nowadays but we also avoid fseek() by large offsets). Here is the proc: - create a list of all refs in all bodies - sort the list by offsets - for every ref from the sorted list: - estimate object length to avoid fread-ing more than necessary (not perfect but enough) - fseek() to the proper offset, fread() entry data or its part - parse the object with ppscan_obj(I, pdf, xref), where xref is not necessarily top pdf->xref (since v0.98 xref actually no longer matters, see xref_find() notes) - save the actual ref->length (not used so far, but we keep that so..) - make a stream if a dict is followed by "stream" keyword, also save the stream offset - free the list PJ2080916: Luigi and Hans fixeed a bug (rev 6491); a document having a stream with /Length being a reference, that was stored in /ObjStm, and therefore not yet resolved when caching /Length key value as stream->offset (ppstream_info()). At the end, references were resolved propertly, but the stream was no readable; stream->offset == 0. In rev6491 ObjStm streams are loaded before others streams. */ static int ppdoc_load_objstm (ppstream *stream, ppdoc *pdf, ppxref *xref); #define ppref_is_objstm(ref, stream, type) \ ((ref)->xref->trailer.type == PPSTREAM && (type = ppdict_get_name((stream)->dict, "Type")) != NULL && ppname_is(type, "ObjStm")) static void ppdoc_load_entries (ppdoc *pdf) { size_t objects, sectionindex, refnumber, offindex; size_t streams = 0, object_streams = 0, redundant_indirections = 0; ppnum linearized; ppref **offmap, **pref, *ref; ppxref *xref; ppxsec *xsec; ppobj *obj; ppname *type; ppcrypt *crypt; ppstream *stream; if ((objects = (size_t)ppdoc_objects(pdf)) == 0) // can't happen return; pref = offmap = (ppref **)pp_malloc(objects * sizeof(ppref *)); objects = 0; // recount refs with offset > 0 for (xref = pdf->xref; xref != NULL; xref = xref->prev) for (sectionindex = 0, xsec = xref->sects; sectionindex < xref->size; ++sectionindex, ++xsec) for (refnumber = xsec->first, ref = xsec->refs; refnumber <= xsec->last; ++refnumber, ++ref) if (ref->offset > 0) // 0 means compressed or insane *pref++ = ref, ++objects; ppoffmap_sort(offmap, offmap + objects - 1); crypt = pdf->crypt; for (offindex = 0, pref = offmap; offindex < objects; ) { ref = *pref; ++pref; ++offindex; if (ref->object.type != PPNONE) // might be preloaded already (/Encrypt dict, stream filter dicts, stream /Length..) continue; if (offindex < objects) ref->length = (*pref)->offset - ref->offset; else ref->length = pdf->filesize > ref->offset ? pdf->filesize - ref->offset : 0; if (crypt != NULL) { ppcrypt_start_ref(crypt, ref); obj = ppdoc_load_entry(pdf, ref); ppcrypt_end_ref(crypt); } else { obj = ppdoc_load_entry(pdf, ref); } switch (obj->type) { case PPDICT: /* Check if the object at first offset is linearized dict. We need that to resolve all references properly. */ if (offindex == 1 && ppdict_get_num(obj->dict, "Linearized", &linearized)) // /Linearized value is a version number, default 1.0 pdf->flags |= PPDOC_LINEARIZED; break; case PPSTREAM: ++streams; if (ppref_is_objstm(ref, obj->stream, type)) ++object_streams; break; case PPREF: ++redundant_indirections; break; default: break; } } /* cut references pointing to references (rare). doing for all effectively cuts all insane chains */ for (pref = offmap; redundant_indirections > 0; ) { ref = *pref++; if (ref->object.type == PPREF) { --redundant_indirections; ref->object = ref->object.ref->object; } } /* load pdf 1.5 object streams _before_ other streams */ for (pref = offmap; object_streams > 0; ) { ref = *pref++; obj = &ref->object; if (obj->type != PPSTREAM) continue; stream = obj->stream; if (ppref_is_objstm(ref, stream, type)) { --object_streams; if (crypt != NULL) { ppcrypt_start_ref(crypt, ref); ppstream_info(stream, pdf); ppcrypt_end_ref(crypt); } else { ppstream_info(stream, pdf); } if (!ppdoc_load_objstm(stream, pdf, ref->xref)) loggerf("invalid objects stream %s at offset " PPSIZEF, ppref_str(ref->number, ref->version), ref->offset); } } /* now handle other streams */ for (pref = offmap; streams > 0; ) { ref = *pref++; obj = &ref->object; if (obj->type != PPSTREAM) continue; --streams; stream = obj->stream; if (crypt != NULL) { ppcrypt_start_ref(crypt, ref); ppstream_info(stream, pdf); ppcrypt_end_ref(crypt); } else { ppstream_info(stream, pdf); } } pp_free(offmap); } ppobj * ppdoc_load_entry (ppdoc *pdf, ppref *ref) { iof *I; size_t length; ppxref *xref; ppobj *obj; ppstack *stack; size_t streamoffset; ppref *refref; ppuint refnumber, refversion; length = ref->length > 0 ? ref->length : PP_LENGTH_UNKNOWN; // estimated or unknown if ((I = ppdoc_reader(pdf, ref->offset, length)) == NULL || !ppscan_start_entry(I, ref)) { loggerf("invalid %s offset " PPSIZEF, ppref_str(ref->number, ref->version), ref->offset); return &ref->object; // PPNONE } stack = &pdf->stack; xref = ref->xref; if ((obj = ppscan_obj(I, pdf, xref)) == NULL) { loggerf("invalid %s object at offset " PPSIZEF, ppref_str(ref->number, ref->version), ref->offset); return &ref->object; // PPNONE } ref->object = *obj; ppstack_pop(stack, 1); obj = &ref->object; ref->length = ppdoc_reader_tell(pdf, I) - ref->offset; if (obj->type == PPDICT) { if (ppscan_start_stream(I, pdf, &streamoffset)) { obj->type = PPSTREAM; obj->stream = ppstream_create(pdf, obj->dict, streamoffset); } } else if (obj->type == PPINT) { ppscan_find(I); if (ppscan_uint(I, &refversion) && ppscan_find(I) == 'R') { refnumber = (ppuint)obj->integer; if ((refref = ppxref_find(xref, refnumber)) != NULL) { obj->type = PPREF; obj->ref = refref; } else { obj->type = PPNONE; // as ppref_unresolved() obj->any = NULL; } } } return obj; } /* Loading entries from object stream /N is the number of contained entries /First is the offset of the first item The stream consists of N pairs of numbers ... Offsets are ascending (relative to the first), but ref numbers order is arbitrary. PDF spec says there might be some additional data between objects, so we should obey offsets. Which means we should basically load the stream at once (may be needed anyway to grab the stream [...]). */ static int ppdoc_load_objstm (ppstream *stream, ppdoc *pdf, ppxref *xref) { ppdict *dict; // stream dict, actually still on stack ppref *ref; ppobj *obj; ppuint items, firstoffset, offset, objnum, i, invalid = 0; iof *I; uint8_t *firstdata, *indexdata; ppstack *stack; dict = stream->dict; if (!ppdict_rget_uint(dict, "N", &items) || !ppdict_rget_uint(dict, "First", &firstoffset)) return 0; if ((I = ppstream_read(stream, 1, 1)) == NULL) return 0; firstdata = I->pos + firstoffset; if (firstdata >= I->end) goto invalid_objstm; stack = &pdf->stack; //if (pdf->crypt != NULL) // ppcrypt_end_ref(pdf->crypt); // objects are not encrypted, pdf->crypt->ref ensured NULL for (i = 0; i < items; ++i) { ppscan_find(I); if (!ppscan_uint(I, &objnum)) goto invalid_objstm; ppscan_find(I); if (!ppscan_uint(I, &offset)) goto invalid_objstm; if ((ref = ppxref_find_local(xref, objnum)) == NULL || ref->object.type != PPNONE) { loggerf("invalid compressed object number " PPUINTF " at position " PPUINTF, objnum, i); ++invalid; continue; } if (firstdata + offset >= I->end) { loggerf("invalid compressed object offset " PPUINTF " at position " PPUINTF, offset, i); ++invalid; continue; } indexdata = I->pos; // save position I->pos = firstdata + offset; // go to the object ppscan_find(I); if ((obj = ppscan_obj(I, pdf, xref)) != NULL) { ref->object = *obj; ppstack_pop(stack, 1); // nothing more needed, as obj can never be indirect ref or stream } else { ++invalid; loggerf("invalid compressed object %s at stream offset " PPUINTF, ppref_str(objnum, 0), offset); } I->pos = indexdata; // restore position and read next from index } ppstream_done(stream); return invalid == 0; invalid_objstm: ppstream_done(stream); return 0; } /* main PDF loader proc */ ppcrypt_status ppdoc_crypt_pass (ppdoc *pdf, const void *userpass, size_t userpasslength, const void *ownerpass, size_t ownerpasslength) { switch (pdf->cryptstatus) { case PPCRYPT_NONE: case PPCRYPT_DONE: case PPCRYPT_FAIL: break; case PPCRYPT_PASS: // initial status or really needs password pdf->cryptstatus = ppdoc_crypt_init(pdf, userpass, userpasslength, ownerpass, ownerpasslength); switch (pdf->cryptstatus) { case PPCRYPT_NONE: case PPCRYPT_DONE: ppdoc_load_entries(pdf); break; case PPCRYPT_PASS: // user needs to check ppdoc_crypt_status() and recall ppdoc_crypt_pass() with the proper password case PPCRYPT_FAIL: // hopeless.. break; } break; } return pdf->cryptstatus; } static ppdoc * ppdoc_read (ppdoc *pdf, iof_file *input) { uint8_t header[PPDOC_HEADER]; size_t xrefoffset; input = &pdf->input; if (iof_file_read(header, 1, PPDOC_HEADER, input) != PPDOC_HEADER || !ppdoc_header(pdf, header)) return NULL; if (!ppdoc_tail(pdf, input, &xrefoffset)) return NULL; if (ppxref_load(pdf, xrefoffset) == NULL) return NULL; fix_trailer_references(pdf); // after loading xrefs but before accessing trailer refs (/Encrypt might be a reference) // check encryption, if any, try empty password switch (ppdoc_crypt_pass(pdf, "", 0, NULL, 0)) { case PPCRYPT_NONE: // no encryption case PPCRYPT_DONE: // encryption with an empty password case PPCRYPT_PASS: // the user needs to check ppdoc_crypt_status() and call ppdoc_crypt_pass() break; case PPCRYPT_FAIL: // hopeless //loggerf("decryption failed"); //return NULL; break; } return pdf; } static void ppdoc_pages_init (ppdoc *pdf); /* 20191214: We used to allocate ppdoc, as all other structs, from the internal heap: ppheap heap; ppheap_init(&heap); pdf = (ppdoc *)ppstruct_take(&heap, sizeof(ppdoc)); pdf->heap = heap; ppbytes_buffer_init(&pdf->heap); ... So ppdoc pdf was allocated from the heap owned by the pdf itself. Somewhat tricky, but should work fine, as from that point nothing refered to a local heap variable addres. For some reason that causes a crash on openbsd. */ static ppdoc * ppdoc_create (iof_file *input) { ppdoc *pdf; pdf = (ppdoc *)pp_malloc(sizeof(ppdoc)); ppheap_init(&pdf->heap); ppbytes_buffer_init(&pdf->heap); ppstack_init(&pdf->stack, &pdf->heap); ppdoc_reader_init(pdf, input); ppdoc_pages_init(pdf); pdf->xref = NULL; pdf->crypt = NULL; pdf->cryptstatus = PPCRYPT_PASS; // check on ppdoc_read() -> ppdoc_crypt_pass() pdf->flags = 0; pdf->version[0] = '\0'; if (ppdoc_read(pdf, &pdf->input) != NULL) return pdf; ppdoc_free(pdf); return NULL; } ppdoc * ppdoc_load (const char *filename) { FILE *file; iof_file input; if ((file = fopen(filename, "rb")) == NULL) return NULL; iof_file_init(&input, file); input.flags |= IOF_CLOSE_FILE; return ppdoc_create(&input); } ppdoc * ppdoc_filehandle (FILE *file, int closefile) { iof_file input; if (file == NULL) return NULL; iof_file_init(&input, file); if (closefile) input.flags |= IOF_CLOSE_FILE; return ppdoc_create(&input); } ppdoc * ppdoc_mem (const void *data, size_t size) { iof_file input; iof_file_rdata_init(&input, data, size); input.flags |= IOF_BUFFER_ALLOC; return ppdoc_create(&input); } void ppdoc_free (ppdoc *pdf) { iof_file_decref(&pdf->input); ppstack_free_buffer(&pdf->stack); ppheap_free(&pdf->heap); pp_free(pdf); } ppcrypt_status ppdoc_crypt_status (ppdoc *pdf) { return pdf->cryptstatus; } ppint ppdoc_permissions (ppdoc *pdf) { return pdf->crypt != NULL ? pdf->crypt->permissions : (ppint)0xFFFFFFFFFFFFFFFF; } /* pages access */ static pparray * pppage_node (ppdict *dict, ppuint *count, ppname **type) { ppname **pkey, *key; ppobj *obj; pparray *kids = NULL; *count = 0; *type = NULL; for (ppdict_first(dict, pkey, obj); (key = *pkey) != NULL; ppdict_next(pkey, obj)) { switch (key->data[0]) { case 'T': if (ppname_is(key, "Type")) *type = ppobj_get_name(obj); break; case 'C': if (ppname_is(key, "Count")) ppobj_rget_uint(obj, *count); break; case 'K': if (ppname_is(key, "Kids")) kids = ppobj_rget_array(obj); break; } } return kids; } #define ppname_is_page(type) (type != NULL && ppname_is(type, "Page")) ppuint ppdoc_page_count (ppdoc *pdf) { ppref *ref; ppname *type; ppuint count; if ((ref = ppxref_pages(pdf->xref)) == NULL) return 0; if (pppage_node(ref->object.dict, &count, &type) == NULL) return ppname_is_page(type) ? 1 : 0; // acrobat and ghostscript accept documents with root /Pages entry being a reference to a sole /Page object return count; } ppref * ppdoc_page (ppdoc *pdf, ppuint index) { ppdict *dict; ppuint count; pparray *kids; size_t size, i; ppobj *r, *o; ppref *ref; ppname *type; if ((ref = ppxref_pages(pdf->xref)) == NULL) return NULL; dict = ref->object.dict; if ((kids = pppage_node(dict, &count, &type)) != NULL) { if (index < 1 || index > count) return NULL; } else { return index == 1 && ppname_is_page(type) ? ref : NULL; } scan_array: if (index <= count / 2) { // probably shorter way from the beginning for (i = 0, size = kids->size, r = pparray_at(kids, 0); i < size; ++i, ++r) { if (r->type != PPREF) return NULL; o = &r->ref->object; if (o->type != PPDICT) return NULL; dict = o->dict; if ((kids = pppage_node(dict, &count, &type)) != NULL) { if (index <= count) goto scan_array; index -= count; continue; } if (index == 1 && ppname_is_page(type)) return r->ref; --index; } } else if ((size = kids->size) > 0) // for safe (size-1) { // probably shorter way from the end index = count - index + 1; for (i = 0, r = pparray_at(kids, size - 1); i < size; ++i, --r) { if (r->type != PPREF) return NULL; o = &r->ref->object; if (o->type != PPDICT) return NULL; dict = o->dict; if ((kids = pppage_node(dict, &count, &type)) != NULL) { if (index <= count) { index = count - index + 1; goto scan_array; } index -= count; continue; } if (index == 1 && ppname_is_page(type)) return r->ref; --index; } } return NULL; } /* Through pages iterator. Iterating over pages tree just on the base of /Kids and /Parent keys is ineffective, as to get next pageref we need to take parent, find the pageref in /Kids, take next (or go upper).. Annoying. We use a dedicated stack for pages iterator. This could actually be done with pdf->stack, but some operations may clear it, so safer to keep it independent Besides, its depth is constant (set on first use), so no need for allocs. */ static void ppdoc_pages_init (ppdoc *pdf) { pppages *pages; pages = &pdf->pages; pages->root = pages->parent = &pages->buffer[0]; pages->depth = 0; pages->space = PPPAGES_STACK_DEPTH; } static ppkids * pppages_push (ppdoc *pdf, pparray *kids) { ppkids *newroot, *bounds; pppages *pages; pages = &pdf->pages; if (pages->depth == pages->space) { pages->space <<= 1; newroot = (ppkids *)ppstruct_take(&pdf->heap, pages->space * sizeof(ppkids)); memcpy(newroot, pages->root, pages->depth * sizeof(ppkids)); pages->root = newroot; } bounds = pages->parent = &pages->root[pages->depth++]; bounds->current = pparray_at(kids, 0); bounds->sentinel = pparray_at(kids, kids->size); return bounds; } #define pppages_pop(pages) (--((pages)->parent), --((pages)->depth)) static ppref * ppdoc_pages_group_first (ppdoc *pdf, ppref *ref) { ppdict *dict; pparray *kids; ppuint count; ppname *type; ppobj *o; dict = ref->object.dict; // typecheck made by callers while ((kids = pppage_node(dict, &count, &type)) != NULL) { if ((o = pparray_get_obj(kids, 0)) == NULL) // empty /Kids return ppdoc_next_page(pdf); if ((ref = ppobj_get_ref(o)) == NULL || ref->object.type != PPDICT) return NULL; pppages_push(pdf, kids); dict = ref->object.dict; } return ppname_is_page(type) ? ref : NULL; } ppref * ppdoc_first_page (ppdoc *pdf) { ppref *ref; pppages *pages; if ((ref = ppdoc_pages(pdf)) == NULL) return NULL; pages = &pdf->pages; pages->parent = pages->root; pages->depth = 0; return ppdoc_pages_group_first(pdf, ref); } ppref * ppdoc_next_page (ppdoc *pdf) { pppages *pages; ppkids *bounds; ppref *ref; ppobj *obj; pages = &pdf->pages; while (pages->depth > 0) { bounds = pages->parent; obj = ++bounds->current; if (obj < bounds->sentinel) { if (obj->type != PPREF) return NULL; ref = obj->ref; if (ref->object.type != PPDICT) return NULL; return ppdoc_pages_group_first(pdf, ref); } else { // no next node, go upper pppages_pop(pages); } } return NULL; } /* context */ ppcontext * ppcontext_new (void) { ppcontext *context; context = (ppcontext *)pp_malloc(sizeof(ppcontext)); ppheap_init(&context->heap); ppbytes_buffer_init(&context->heap); ppstack_init(&context->stack, &context->heap); return context; } void ppcontext_done (ppcontext *context) { ppheap_renew(&context->heap); ppstack_clear(&context->stack); } void ppcontext_free (ppcontext *context) { ppstack_free_buffer(&context->stack); ppheap_free(&context->heap); pp_free(context); } /* page contents streams */ //#define ppcontents_first_stream(array) pparray_rget_stream(array, 0) static ppstream * ppcontents_first_stream (pparray *array) { size_t i; ppobj *obj; ppref *ref; for (pparray_first(array, i, obj); i < array->size; pparray_next(i, obj)) if ((ref = ppobj_get_ref(obj)) != NULL && ref->object.type == PPSTREAM) return ref->object.stream; return NULL; } static ppstream * ppcontents_next_stream (pparray *array, ppstream *stream) { size_t i; ppobj *obj; ppref *ref; for (pparray_first(array, i, obj); i < array->size; pparray_next(i, obj)) if ((ref = ppobj_get_ref(obj)) != NULL && ref->object.type == PPSTREAM && ref->object.stream == stream) if (++i < array->size && (ref = ppobj_get_ref(obj + 1)) != NULL && ref->object.type == PPSTREAM) return ref->object.stream; return NULL; } ppstream * ppcontents_first (ppdict *dict) { ppobj *contentsobj; if ((contentsobj = ppdict_rget_obj(dict, "Contents")) == NULL) return NULL; switch (contentsobj->type) { case PPARRAY: return ppcontents_first_stream(contentsobj->array); case PPSTREAM: return contentsobj->stream; default: break; } return NULL; } ppstream * ppcontents_next (ppdict *dict, ppstream *stream) { ppobj *contentsobj; if ((contentsobj = ppdict_rget_obj(dict, "Contents")) == NULL) return NULL; switch (contentsobj->type) { case PPARRAY: return ppcontents_next_stream(contentsobj->array, stream); case PPSTREAM: break; default: break; } return NULL; } static ppobj * ppcontents_op (iof *I, ppstack *stack, size_t *psize, ppname **pname) { ppobj *obj; ppstack_clear(stack); do { if (ppscan_find(I) < 0) return NULL; if ((obj = ppscan_psobj(I, stack)) == NULL) return NULL; } while (obj->type != PPNAME || !ppname_exec(obj->name)); *pname = obj->name; *psize = stack->size - 1; return stack->buf; } ppobj * ppcontents_first_op (ppcontext *context, ppstream *stream, size_t *psize, ppname **pname) { iof *I; if ((I = ppstream_read(stream, 1, 0)) == NULL) return NULL; return ppcontents_op(I, &context->stack, psize, pname); } ppobj * ppcontents_next_op (ppcontext *context, ppstream *stream, size_t *psize, ppname **pname) { return ppcontents_op(ppstream_iof(stream), &context->stack, psize, pname); } ppobj * ppcontents_parse (ppcontext *context, ppstream *stream, size_t *psize) { iof *I; ppstack *stack; ppobj *obj; stack = &context->stack; ppstack_clear(stack); if ((I = ppstream_read(stream, 1, 0)) == NULL) return NULL; while (ppscan_find(I) >= 0) if ((obj = ppscan_psobj(I, stack)) == NULL) goto error; *psize = stack->size; ppstream_done(stream); return stack->buf; error: ppstream_done(stream); return NULL; } /* boxes */ pprect * pparray_to_rect (pparray *array, pprect *rect) { ppobj *obj; if (array->size != 4) return NULL; obj = pparray_at(array, 0); if (!ppobj_get_num(obj, rect->lx)) return NULL; obj = pparray_at(array, 1); if (!ppobj_get_num(obj, rect->ly)) return NULL; obj = pparray_at(array, 2); if (!ppobj_get_num(obj, rect->rx)) return NULL; obj = pparray_at(array, 3); if (!ppobj_get_num(obj, rect->ry)) return NULL; return rect; } pprect * ppdict_get_rect (ppdict *dict, const char *name, pprect *rect) { pparray *array; return (array = ppdict_rget_array(dict, name)) != NULL ? pparray_to_rect(array, rect) : NULL; } pprect * ppdict_get_box (ppdict *dict, const char *name, pprect *rect) { do { if (ppdict_get_rect(dict, name, rect) != NULL) return rect; dict = ppdict_rget_dict(dict, "Parent"); } while (dict != NULL); return NULL; } ppmatrix * pparray_to_matrix (pparray *array, ppmatrix *matrix) { ppobj *obj; if (array->size != 6) return NULL; obj = pparray_at(array, 0); if (!ppobj_get_num(obj, matrix->xx)) return NULL; obj = pparray_at(array, 1); if (!ppobj_get_num(obj, matrix->xy)) return NULL; obj = pparray_at(array, 2); if (!ppobj_get_num(obj, matrix->yx)) return NULL; obj = pparray_at(array, 3); if (!ppobj_get_num(obj, matrix->yy)) return NULL; obj = pparray_at(array, 4); if (!ppobj_get_num(obj, matrix->x)) return NULL; obj = pparray_at(array, 5); if (!ppobj_get_num(obj, matrix->y)) return NULL; return matrix; } ppmatrix * ppdict_get_matrix (ppdict *dict, const char *name, ppmatrix *matrix) { pparray *array; return (array = ppdict_rget_array(dict, name)) != NULL ? pparray_to_matrix(array, matrix) : NULL; } /* logger */ void pplog_callback (pplogger_callback logger, void *alien) { logger_callback((logger_function)logger, alien); } int pplog_prefix (const char *prefix) { return logger_prefix(prefix); } /* version */ const char * ppdoc_version_string (ppdoc *pdf) { return pdf->version; } int ppdoc_version_number (ppdoc *pdf, int *minor) { *minor = pdf->version[2] - '0'; return pdf->version[0] - '0'; } /* doc info */ size_t ppdoc_file_size (ppdoc *pdf) { return pdf->filesize; } ppuint ppdoc_objects (ppdoc *pdf) { ppuint count; ppxref *xref; for (count = 0, xref = pdf->xref; xref != NULL; xref = xref->prev) count += xref->count; return count; } size_t ppdoc_memory (ppdoc *pdf, size_t *waste) { mem_info info; size_t used; ppbytes_heap_info(&pdf->heap, &info, 0); ppstruct_heap_info(&pdf->heap, &info, 1); *waste = info.ghosts + info.blockghosts + info.left; // info.ghosts == 0 used = info.used + *waste; used += pdf->stack.space * sizeof(ppobj); return used; } luametatex-2.10.08/source/libraries/pplib/ppload.h000066400000000000000000000022761442250314700220560ustar00rootroot00000000000000 #ifndef PP_LOAD_H #define PP_LOAD_H typedef struct { ppobj *buf; // ppobjects buffer (allocated, not from our heap) ppobj *pos; // current ppobj * size_t size; // stack size size_t space; // available space ppheap *heap; // allocator (parent pdf->stack->heap or parent context) } ppstack; typedef struct { ppobj *current; ppobj *sentinel; } ppkids; #define PPPAGES_STACK_DEPTH 4 typedef struct { ppkids buffer[PPPAGES_STACK_DEPTH]; ppkids *root; ppkids *parent; ppuint depth; ppuint space; } pppages; struct ppdoc { /* input */ iof_file input; iof reader; uint8_t *buffer; size_t filesize; /* heap */ ppheap heap; ppstack stack; /* output struct */ ppxref *xref; pppages pages; ppcrypt *crypt; ppcrypt_status cryptstatus; int flags; char version[5]; }; #define PPDOC_LINEARIZED (1 << 0) ppobj * ppdoc_load_entry (ppdoc *pdf, ppref *ref); #define ppobj_preloaded(pdf, obj) ((obj)->type != PPREF ? (obj) : ((obj)->ref->object.type == PPNONE ? ppdoc_load_entry(pdf, (obj)->ref) : &(obj)->ref->object)) ppstring * ppstring_internal (const void *data, size_t size, ppheap *heap); struct ppcontext { ppheap heap; ppstack stack; }; #endifluametatex-2.10.08/source/libraries/pplib/ppstream.c000066400000000000000000000340401442250314700224170ustar00rootroot00000000000000 #include "ppfilter.h" #include "pplib.h" ppstream * ppstream_create (ppdoc *pdf, ppdict *dict, size_t offset) { ppstream *stream; stream = (ppstream *)ppstruct_take(&pdf->heap, sizeof(ppstream)); stream->dict = dict; stream->offset = offset; //if (!ppdict_rget_uint(dict, "Length", &stream->length)) // may be indirect pointing PPNONE at this moment // stream->length = 0; stream->length = 0; stream->filespec = NULL; stream->filter.filters = NULL; stream->filter.params = NULL; stream->filter.count = 0; stream->input = &pdf->input; stream->I = NULL; stream->cryptkey = NULL; stream->flags = 0; return stream; } static iof * ppstream_predictor (ppdict *params, iof *N) { ppint predictor, rowsamples, components, samplebits; if (!ppdict_get_int(params, "Predictor", &predictor) || predictor <= 1) return N; if (!ppdict_get_int(params, "Columns", &rowsamples) || rowsamples == 0) // sanity, filter probably expects >0 rowsamples = 1;; if (!ppdict_get_int(params, "Colors", &components) || components == 0) // ditto components = 1; if (!ppdict_get_int(params, "BitsPerComponent", &samplebits) || samplebits == 0) samplebits = 8; return iof_filter_predictor_decoder(N, (int)predictor, (int)rowsamples, (int)components, (int)samplebits); } static iof * ppstream_decoder (ppstream *stream, ppstreamtp filtertype, ppdict *params, iof *N) { int flags; iof *F, *P; ppint earlychange; ppstring *cryptkey; switch (filtertype) { case PPSTREAM_BASE16: return iof_filter_base16_decoder(N); case PPSTREAM_BASE85: return iof_filter_base85_decoder(N); case PPSTREAM_RUNLENGTH: return iof_filter_runlength_decoder(N); case PPSTREAM_FLATE: if ((F = iof_filter_flate_decoder(N)) != NULL) { if (params != NULL) { if ((P = ppstream_predictor(params, F)) != NULL) return P; iof_close(F); break; } return F; } break; case PPSTREAM_LZW: flags = LZW_DECODER_DEFAULTS; if (params != NULL && ppdict_get_int(params, "EarlyChange", &earlychange) && earlychange == 0) // integer, not boolean flags &= ~LZW_EARLY_INDEX; if ((F = iof_filter_lzw_decoder(N, flags)) != NULL) { if (params != NULL) { if ((P = ppstream_predictor(params, F)) != NULL) return P; iof_close(F); break; } return F; } break; case PPSTREAM_CRYPT: if ((cryptkey = stream->cryptkey) == NULL) return N; // /Identity crypt if (stream->flags & PPSTREAM_ENCRYPTED_AES) return iof_filter_aes_decoder(N, cryptkey->data, cryptkey->size); if (stream->flags & PPSTREAM_ENCRYPTED_RC4) return iof_filter_rc4_decoder(N, cryptkey->data, cryptkey->size); return NULL; // if neither AES or RC4 but cryptkey present, something went wrong; see ppstream_info() case PPSTREAM_CCITT: case PPSTREAM_DCT: case PPSTREAM_JBIG2: case PPSTREAM_JPX: break; } return NULL; } #define ppstream_source(stream) iof_filter_stream_coreader((iof_file *)((stream)->input), (size_t)((stream)->offset), (size_t)((stream)->length)) #define ppstream_auxsource(filename) iof_filter_file_reader(filename) static ppname * ppstream_get_filter_name (ppobj *filterobj, size_t index) { if (filterobj->type == PPNAME) return index == 0 ? filterobj->name : NULL; if (filterobj->type == PPARRAY) return pparray_get_name(filterobj->array, index); return NULL; } static ppdict * ppstream_get_filter_params (ppobj *paramsobj, size_t index) { if (paramsobj->type == PPDICT) return index == 0 ? paramsobj->dict : NULL; if (paramsobj->type == PPARRAY) return pparray_rget_dict(paramsobj->array, index); return NULL; } static const char * ppstream_aux_filename (ppobj *filespec) { // mockup, here we should decode the string if (filespec->type == PPSTRING) { return (const char *)(filespec->string); } // else might be a dict - todo return NULL; } #define ppstream_image_filter(fcode) (fcode == PPSTREAM_DCT || fcode == PPSTREAM_CCITT || fcode == PPSTREAM_JBIG2 || fcode == PPSTREAM_JPX) iof * ppstream_read (ppstream *stream, int decode, int all) { iof *I, *F; ppstreamtp *filtertypes, filtertype; int owncrypt; ppdict **filterparams, *fparams; size_t index, filtercount; const char *filename; if (ppstream_iof(stream) != NULL) return NULL; // usage error if (stream->filespec != NULL) { filename = ppstream_aux_filename(stream->filespec); // mockup, basic support I = filename != NULL ? ppstream_auxsource(filename) : NULL; } else { I = ppstream_source(stream); } if (I == NULL) return NULL; /* If the stream is encrypted, decipher is the first to be applied */ owncrypt = (stream->flags & PPSTREAM_ENCRYPTED_OWN) != 0; if (!owncrypt) { if (stream->cryptkey != NULL && stream->filespec == NULL) { /* implied global crypt; does not apply to external files (pdf psec page 115), except for embedded file streams (not supported so far) */ if ((F = ppstream_decoder(stream, PPSTREAM_CRYPT, NULL, I)) == NULL) goto stream_error; I = F; } /* otherwise no crypt at all or /Identity */ } if (decode || owncrypt) { if ((filtercount = stream->filter.count) > 0) { filtertypes = stream->filter.filters; filterparams = stream->filter.params; for (index = 0; index < filtercount; ++index) { fparams = filterparams != NULL ? filterparams[index] : NULL; filtertype = filtertypes[index]; if ((F = ppstream_decoder(stream, filtertype, fparams, I)) != NULL) { I = F; if (owncrypt && !decode && filtertype == PPSTREAM_CRYPT) break; // /Crypt filter should always be first, so in practise we return decrypted but compressed continue; } if (!ppstream_image_filter(filtertype)) goto stream_error; // failed to create non-image filter, something unexpected break; } } } if (all) iof_load(I); else iof_input(I); stream->I = I; return I; stream_error: iof_close(I); return NULL; } uint8_t * ppstream_first (ppstream *stream, size_t *size, int decode) { iof *I; if ((I = ppstream_read(stream, decode, 0)) != NULL) { *size = (size_t)iof_left(I); return I->pos; } *size = 0; return NULL; } uint8_t * ppstream_next (ppstream *stream, size_t *size) { iof *I; if ((I = ppstream_iof(stream)) != NULL) { I->pos = I->end; if ((*size = iof_input(I)) > 0) return I->pos; } *size = 0; return NULL; } uint8_t * ppstream_all (ppstream *stream, size_t *size, int decode) { iof *I; if ((I = ppstream_read(stream, decode, 1)) != NULL) { *size = (size_t)iof_left(I); return I->pos; } *size = 0; return NULL; } void ppstream_done (ppstream *stream) { iof *I; if ((I = ppstream_iof(stream)) != NULL) { iof_close(I); stream->I = NULL; } } /* fetching stream info PJ20180916: revealed it makes sense to do a lilbit more just after parsing stream entry to simplify stream operations and extend ppstream api */ /* stream filters */ const char * ppstream_filter_name[] = { "ASCIIHexDecode", "ASCII85Decode", "RunLengthDecode", "FlateDecode", "LZWDecode", "CCITTFaxDecode", "DCTDecode", "JBIG2Decode", "JPXDecode", "Crypt" }; int ppstream_filter_type (ppname *name, ppstreamtp *filtertype) { switch (name->data[0]) { case 'A': if (ppname_is(name, "ASCIIHexDecode")) { *filtertype = PPSTREAM_BASE16; return 1; } if (ppname_is(name, "ASCII85Decode")) { *filtertype = PPSTREAM_BASE85; return 1; } break; case 'R': if (ppname_is(name, "RunLengthDecode")) { *filtertype = PPSTREAM_RUNLENGTH; return 1; } break; case 'F': if (ppname_is(name, "FlateDecode")) { *filtertype = PPSTREAM_FLATE; return 1; } break; case 'L': if (ppname_is(name, "LZWDecode")) { *filtertype = PPSTREAM_LZW; return 1; } break; case 'D': if (ppname_is(name, "DCTDecode")) { *filtertype = PPSTREAM_DCT; return 1; } break; case 'C': if (ppname_is(name, "CCITTFaxDecode")) { *filtertype = PPSTREAM_CCITT; return 1; } if (ppname_is(name, "Crypt")) { *filtertype = PPSTREAM_CRYPT; return 1; } break; case 'J': if (ppname_is(name, "JPXDecode")) { *filtertype = PPSTREAM_JPX; return 1; } if (ppname_is(name, "JBIG2Decode")) { *filtertype = PPSTREAM_JBIG2; return 1; } break; } return 0; } void ppstream_info (ppstream *stream, ppdoc *pdf) { // called in ppdoc_load_entries() for every stream, but after loading non-stream objects (eg. /Length..) ppdict *dict, *fparams; ppobj *fobj, *pobj; ppname *fname, *tname, *owncryptfilter = NULL; ppcrypt *crypt; ppref *ref; size_t i; int cflags; ppstreamtp *filtertypes = NULL, filtertype; ppdict **filterparams = NULL; size_t filtercount = 0, farraysize = 0; const char *filterkey, *paramskey; dict = stream->dict; ppdict_rget_uint(dict, "Length", &stream->length); if ((stream->filespec = ppdict_get_obj(dict, "F")) != NULL) { stream->flags |= PPSTREAM_NOT_SUPPORTED; filterkey = "FFilter", paramskey = "FDecodeParms"; } else filterkey = "Filter", paramskey = "DecodeParms"; if ((fobj = ppdict_rget_obj(dict, filterkey)) != NULL) { switch (fobj->type) { case PPNAME: farraysize = 1; break; case PPARRAY: farraysize = fobj->array->size; break; default: break; } if (farraysize > 0) { filtertypes = (ppstreamtp *)ppstruct_take(&pdf->heap, farraysize * sizeof(ppstreamtp)); if ((pobj = ppdict_rget_obj(dict, paramskey)) != NULL) { filterparams = (ppdict **)ppstruct_take(&pdf->heap, farraysize * sizeof(ppdict *)); } for (i = 0; i < farraysize; ++i) { if ((fname = ppstream_get_filter_name(fobj, i)) != NULL && ppstream_filter_type(fname, &filtertype)) { filtertypes[filtercount] = filtertype; if (pobj != NULL) { fparams = ppstream_get_filter_params(pobj, i); filterparams[filtercount] = fparams; } else fparams = NULL; switch (filtertype) { case PPSTREAM_BASE16: case PPSTREAM_BASE85: case PPSTREAM_RUNLENGTH: case PPSTREAM_FLATE: case PPSTREAM_LZW: stream->flags |= PPSTREAM_FILTER; break; case PPSTREAM_CCITT: case PPSTREAM_DCT: case PPSTREAM_JBIG2: case PPSTREAM_JPX: stream->flags |= PPSTREAM_IMAGE; break; case PPSTREAM_CRYPT: stream->flags |= PPSTREAM_ENCRYPTED_OWN; owncryptfilter = fparams != NULL ? ppdict_get_name(fparams, "Name") : NULL; // /Type /CryptFilterDecodeParms /Name ... if (i != 0) // we assume it is first stream->flags |= PPSTREAM_NOT_SUPPORTED; break; } ++filtercount; } else { stream->flags |= PPSTREAM_NOT_SUPPORTED; } } } } stream->filter.filters = filtertypes; stream->filter.params = filterparams; stream->filter.count = filtercount; if ((crypt = pdf->crypt) == NULL || (ref = crypt->ref) == NULL) return; if (stream->flags & PPSTREAM_ENCRYPTED_OWN) { /* Seems a common habit to use just /Crypt filter name with no params, which defaults to /Identity. A real example with uncompressed metadata: <> */ if (owncryptfilter != NULL && !ppname_is(owncryptfilter, "Identity") && stream->filespec == NULL) // ? { if (crypt->map != NULL && ppcrypt_type(crypt, owncryptfilter, NULL, &cflags)) { if (cflags & PPCRYPT_INFO_AES) stream->flags |= PPSTREAM_ENCRYPTED_AES; else if (cflags & PPCRYPT_INFO_RC4) stream->flags |= PPSTREAM_ENCRYPTED_RC4; } } } else { if ((crypt->flags & PPCRYPT_NO_METADATA) && (tname = ppdict_get_name(dict, "Type")) != NULL && ppname_is(tname, "Metadata")) ; /* special treatment of metadata stream; we assume that explicit /Filter /Crypt setup overrides document level setup of EncryptMetadata. */ else if (stream->filespec == NULL) /* external files are not encrypted, expect embedded files (not supported yet) */ { if (crypt->flags & PPCRYPT_STREAM_RC4) stream->flags |= PPSTREAM_ENCRYPTED_RC4; else if (crypt->flags & PPCRYPT_STREAM_AES) stream->flags |= PPSTREAM_ENCRYPTED_AES; } } /* finally, if the stream is encrypted with non-identity crypt (implicit or explicit), make and save the crypt key */ if (stream->flags & PPSTREAM_ENCRYPTED) stream->cryptkey = ppcrypt_stmkey(crypt, ref, ((stream->flags & PPSTREAM_ENCRYPTED_AES) != 0), &pdf->heap); } void ppstream_filter_info (ppstream *stream, ppstream_filter *info, int decode) { size_t from, index; ppstreamtp filtertype; ppdict *params; *info = stream->filter; if (info->count > 0) { from = (stream->flags & PPSTREAM_ENCRYPTED_OWN) && info->filters[0] == PPSTREAM_CRYPT ? 1 : 0; if (decode) { for (index = from; index < info->count; ++index) { filtertype = info->filters[index]; if (ppstream_image_filter(filtertype)) { break; } } } else { index = from; } if (index > 0) { info->count -= index; if (info->count > 0) { info->filters += index; if (info->params != NULL) { info->params += index; for (index = 0, params = NULL; index < info->count; ++index) if ((params = info->params[index]) != NULL) break; if (params == NULL) info->params = NULL; } } else { info->filters = NULL; info->params = NULL; } } } } /* */ void ppstream_init_buffers (void) { iof_filters_init(); } void ppstream_free_buffers (void) { iof_filters_free(); } luametatex-2.10.08/source/libraries/pplib/ppstream.h000066400000000000000000000004331442250314700224230ustar00rootroot00000000000000 #ifndef PP_STREAM_H #define PP_STREAM_H ppstream * ppstream_create (ppdoc *pdf, ppdict *dict, size_t offset); iof * ppstream_read (ppstream *stream, int decode, int all); #define ppstream_iof(stream) ((iof *)((stream)->I)) void ppstream_info (ppstream *stream, ppdoc *pdf); #endifluametatex-2.10.08/source/libraries/pplib/pptest1.c000066400000000000000000000047231442250314700221710ustar00rootroot00000000000000 #include #include "ppapi.h" #include "util/utiliof.h" static const char * sizenum (size_t s) { static char buffer[32]; if (s < 1000) sprintf(buffer, "%uB", (unsigned)s); else if (s < 1000000) sprintf(buffer, "%.2fkB", (double)(s) / 1000); else sprintf(buffer, "%.2fMB", (double)(s) / 1000000); return buffer; } static const char * crypt_info (ppdoc *pdf) { switch (ppdoc_crypt_status(pdf)) { case PPCRYPT_NONE: return "none"; case PPCRYPT_DONE: return "empty password"; case PPCRYPT_PASS: return "nonempty password"; default: break; } return "this shouldn't happen"; } static void print_info (ppdoc *pdf) { ppdict *info; ppstring *creator, *producer; size_t memused, memwaste; if ((info = ppdoc_info(pdf)) != NULL) { if ((creator = ppdict_rget_string(info, "Creator")) != NULL) printf(" creator: %s\n", ppstring_decoded_data(creator)); if ((producer = ppdict_rget_string(info, "Producer")) != NULL) printf(" producer: %s\n", ppstring_decoded_data(producer)); } printf(" version: %s\n", ppdoc_version_string(pdf)); printf(" protection: %s\n", crypt_info(pdf)); printf(" filesize: %s\n", sizenum(ppdoc_file_size(pdf))); printf(" objects: %lu\n", (unsigned long)ppdoc_objects(pdf)); printf(" pagecount: %lu\n", (unsigned long)ppdoc_page_count(pdf)); memused = ppdoc_memory(pdf, &memwaste); printf(" memused: %s\n", sizenum(memused)); printf(" memwaste: %s\n", sizenum(memwaste)); } static int usage (const char *argv0) { printf("pplib " pplib_version ", " pplib_author "\n"); printf("usage: %s file1.pdf file2.pdf ...\n", argv0); return 0; } int main (int argc, const char **argv) { const char *filepath; int a; ppdoc *pdf; const void *data; size_t size; if (argc < 2) return usage(argv[0]); for (a = 1; a < argc; ++a) { filepath = argv[a]; printf("loading %s... ", filepath); pdf = ppdoc_load(filepath); if (pdf == NULL) { printf("failed\n"); continue; } printf("done.\n"); print_info(pdf); ppdoc_free(pdf); /* now loading from memory buffer */ printf("loading %s from mem buffer... ", filepath); data = iof_copy_file_data(filepath, &size); if (data != NULL) { pdf = ppdoc_mem(data, size); if (pdf == NULL) { printf("failed\n"); continue; } printf("done.\n"); //print_info(pdf); ppdoc_free(pdf); } } return 0; } luametatex-2.10.08/source/libraries/pplib/pptest2.c000066400000000000000000000110511442250314700221620ustar00rootroot00000000000000 #include #include #include "ppapi.h" /* static const char * get_file_name (const char *path) { const char *fn, *p; for (fn = p = path; *p != '\0'; ++p) if (*p == '\\' || *p == '/') fn = p + 1; return fn; } */ static void box_info (ppdict *pagedict, FILE *fh) { const char *boxes[] = {"MediaBox", "CropBox", "BleedBox", "TrimBox", "ArtBox"}; pprect rect; size_t i; for (i = 0; i < sizeof(boxes) / sizeof(const char *); ++i) if (ppdict_get_box(pagedict, boxes[i], &rect)) fprintf(fh, "%%%% %s [%f %f %f %f]\n", boxes[i], rect.lx, rect.ly, rect.rx, rect.ry); } static int usage (const char *argv0) { printf("pplib " pplib_version ", " pplib_author "\n"); printf("usage: %s file1.pdf file2.pdf ...\n", argv0); printf(" %s file.pdf -u userpassword\n", argv0); printf(" %s file.pdf -o ownerpassword\n", argv0); printf(" %s file.pdf -p bothpasswords\n", argv0); return 0; } static void log_callback (const char *message, void *alien) { fprintf((FILE *)alien, "\nooops: %s\n", message); } static const char * get_next_argument (const char *opt, int *a, int argc, const char **argv) { const char *next; if ((*a) + 2 < argc) { next = argv[*a + 1]; if (strcmp(next, opt) == 0) { *a += 2; return argv[*a]; } } return NULL; } int main (int argc, const char **argv) { const char *filepath, *password; int a; ppdoc *pdf; ppcrypt_status cryptstatus; ppref *pageref; ppdict *pagedict; int pageno; char outname[1024]; FILE *fh; ppstream *stream; uint8_t *data; size_t size; ppcontext *context; ppobj *obj; ppname *op; size_t operators; if (argc < 2) return usage(argv[0]); ppstream_init_buffers(); pplog_callback(log_callback, stderr); context = ppcontext_new(); for (a = 1; a < argc; ++a) { /* load */ filepath = argv[a]; printf("loading %s... ", filepath); pdf = ppdoc_load(filepath); if (pdf == NULL) { printf("failed\n"); continue; } printf("done\n"); /* decrypt */ if ((password = get_next_argument("-u", &a, argc, argv)) != NULL) cryptstatus = ppdoc_crypt_pass(pdf, password, strlen(password), NULL, 0); else if ((password = get_next_argument("-o", &a, argc, argv)) != NULL) cryptstatus = ppdoc_crypt_pass(pdf, NULL, 0, password, strlen(password)); else if ((password = get_next_argument("-p", &a, argc, argv)) != NULL) cryptstatus = ppdoc_crypt_pass(pdf, password, strlen(password), password, strlen(password)); else cryptstatus = ppdoc_crypt_status(pdf); switch (cryptstatus) { case PPCRYPT_NONE: break; case PPCRYPT_DONE: printf("opened with password '%s'\n", password != NULL ? password : ""); break; case PPCRYPT_PASS: printf("invalid password\n"); ppdoc_free(pdf); continue; case PPCRYPT_FAIL: printf("invalid encryption\n"); ppdoc_free(pdf); continue; } /* process */ sprintf(outname, "%s.out", filepath); fh = fopen(outname, "wb"); if (fh == NULL) { printf("can't open %s for writing\n", outname); continue; } for (pageref = ppdoc_first_page(pdf), pageno = 1; pageref != NULL; pageref = ppdoc_next_page(pdf), ++pageno) { pagedict = pageref->object.dict; /* decompress contents data */ fprintf(fh, "%%%% PAGE %d\n", pageno); box_info(pagedict, fh); for (stream = ppcontents_first(pagedict); stream != NULL; stream = ppcontents_next(pagedict, stream)) { for (data = ppstream_first(stream, &size, 1); data != NULL; data = ppstream_next(stream, &size)) fwrite(data, size, 1, fh); ppstream_done(stream); } /* now parse contents */ for (stream = ppcontents_first(pagedict); stream != NULL; stream = ppcontents_next(pagedict, stream)) { operators = 0; for (obj = ppcontents_first_op(context, stream, &size, &op); obj != NULL; obj = ppcontents_next_op(context, stream, &size, &op)) ++operators; fprintf(fh, "%%%% OPERATORS count %lu\n", (unsigned long)operators); ppstream_done(stream); //obj = ppcontents_parse(context, stream, &size); //fprintf(fh, "%%%% items count %lu\n", (unsigned long)size); fprintf(fh, "\n"); } ppcontext_done(context); } fclose(fh); ppdoc_free(pdf); } ppcontext_free(context); ppstream_free_buffers(); return 0; } luametatex-2.10.08/source/libraries/pplib/pptest3.c000066400000000000000000000061051442250314700221670ustar00rootroot00000000000000 #include //#include "ppapi.h" #include "pplib.h" #include "assert.h" static int usage (const char *argv0) { printf("pplib " pplib_version ", " pplib_author "\n"); printf("usage: %s file1.pdf file2.pdf ...\n", argv0); return 0; } static void print_result_filter (ppstream *stream, int decode) { ppstream_filter info; size_t i; ppstream_filter_info(stream, &info, decode); printf(" when %s: /Filter [", decode ? "uncompressed" : "compressed"); for (i = 0; i < info.count; ++i) printf(" /%s", ppstream_filter_name[info.filters[i]]); printf(" ]"); if (info.params != NULL) { printf(" /DecodeParms ["); for (i = 0; i < info.count; ++i) printf(" %s", info.params[i] != NULL ? "<<...>>" : "null"); printf(" ]"); } printf("\n"); } static void print_stream_info (ppref *ref, ppstream *stream) { size_t length; printf("object %lu %lu R\n", (unsigned long)ref->number, (unsigned long)ref->version); if (stream->flags & PPSTREAM_FILTER) printf(" filtered "); else printf(" plain "); if (stream->flags & PPSTREAM_IMAGE) printf("image "); else printf("stream "); if (stream->flags & PPSTREAM_ENCRYPTED) printf("encrypted "); if (stream->flags & PPSTREAM_NOT_SUPPORTED) printf("invalid "); if (!ppdict_rget_uint(stream->dict, "Length", &length)) length = 0; assert(stream->length == length); printf("length %lu (/Length %lu)\n", (unsigned long)stream->length, (unsigned long)length); print_result_filter(stream, 0); print_result_filter(stream, 1); } static void check_stream_chunks (ppstream *stream) { size_t sum, size; uint8_t *data; const int decode[2] = {0, 1}; int d; for (d = 0; d < 2; ++d) { for (sum = 0, data = ppstream_first(stream, &size, decode[d]); data != NULL; data = ppstream_next(stream, &size)) sum += size; ppstream_done(stream); ppstream_all(stream, &size, decode[d]); ppstream_done(stream); assert(sum == size); printf(" %s chunks size [%lu]\n", (decode[d] ? "decoded" : "raw"), (unsigned long)size); } } #define USE_BUFFERS_POOL 1 int main (int argc, const char **argv) { const char *filepath; int a; ppdoc *pdf; ppxref *xref; ppxsec *xsec; size_t xi; ppuint refnum; ppref *ref; if (argc < 2) return usage(argv[0]); if (USE_BUFFERS_POOL) ppstream_init_buffers(); for (a = 1; a < argc; ++a) { filepath = argv[a]; printf("loading %s... ", filepath); pdf = ppdoc_load(filepath); if (pdf == NULL) { printf("failed\n"); continue; } printf("done.\n"); for (xref = ppdoc_xref(pdf); xref != NULL; xref = ppxref_prev(xref)) { for (xi = 0, xsec = xref->sects; xi < xref->size; ++xi, ++xsec) { for (refnum = xsec->first, ref = xsec->refs; refnum <= xsec->last; ++refnum, ++ref) { if (ref->object.type != PPSTREAM) continue; print_stream_info(ref, ref->object.stream); check_stream_chunks(ref->object.stream); } } } ppdoc_free(pdf); } if (USE_BUFFERS_POOL) ppstream_free_buffers(); return 0; } luametatex-2.10.08/source/libraries/pplib/ppxref.c000066400000000000000000000132631442250314700220740ustar00rootroot00000000000000 #include "pplib.h" #define PPXREF_MAP_INIT 16 // number of xref sections ppxref * ppxref_create (ppdoc *pdf, size_t initsize, size_t xrefoffset) { ppxref *xref; if (initsize == 0) // unknown initsize = PPXREF_MAP_INIT; xref = (ppxref *)ppstruct_take(&pdf->heap, sizeof(ppxref)); xref->sects = (ppxsec *)ppstruct_take(&pdf->heap, initsize * sizeof(ppxsec)); xref->size = 0; xref->space = initsize; xref->count = 0; xref->trailer.type = PPNONE; xref->trailer.dict = NULL; xref->prev = NULL; xref->pdf = pdf; xref->offset = xrefoffset; return xref; } ppxsec * ppxref_push_section (ppxref *xref, ppheap *heap) { ppxsec *sects; if (xref->size < xref->space) return &xref->sects[xref->size++]; xref->space <<= 1; sects = xref->sects; xref->sects = (ppxsec *)ppstruct_take(heap, xref->space * sizeof(ppxsec)); // waste but rare memcpy(xref->sects, sects, xref->size * sizeof(ppxsec)); return &xref->sects[xref->size++]; } /* When loading xref table, we don't know how many sections is there. We assume 16, which is more than usual (waste). But if there is more, we double the size, wasting again. This could be made better with a dedicated allocator for xref sections (heap or generic malloc). Or an ephemeric malloced c-array stored in heap once ready (analogical to stack used for dicts/arrays). For xref streams we have explicit num of sections. */ /* void ppxref_done_sections (ppxref *xref, ppheap *heap) { // if xref->sects was initialized with mallocted array we could do ppxsec *sects; size_t size; sects = xref->sects; size = xref->size * sizeof(ppxsec); xref->sects = (ppxsec *)ppstruct_take(heap, size); memcpy(xref->sects, sects, size); pp_free(sects); xref->space = xref->size; } */ static void ppxref_sort_sects (ppxsec *left, ppxsec *right) { ppxsec *l, *r, *m, t; ppuint first, last; l = left, r = right, m = l + ((r - l) / 2); first = m->first, last = m->last; do { // don't take first/last from pointer while (l->first < first) ++l; while (r->first > last) --r; if (l <= r) { t = *l; *l = *r; *r = t; ++l, --r; } } while (l <= r); if (l < right) ppxref_sort_sects(l, right); if (r > left) ppxref_sort_sects(left, r); } int ppxref_sort (ppxref *xref) { if (xref->size == 0) return 0; ppxref_sort_sects(xref->sects, xref->sects + xref->size - 1); return 1; } ppref * ppxref_find_local (ppxref *xref, ppuint refnumber) { ppxsec *left, *right, *mid; //if (xref->size == 0) // we don't allow that // return NULL; left = xref->sects; right = xref->sects + xref->size - 1; do { mid = left + ((right - left) / 2); if (refnumber > mid->last) left = mid + 1; else if (refnumber < mid->first) right = mid - 1; else return &mid->refs[refnumber - mid->first]; } while (left <= right); return NULL; } /* PJ 20180910 So far we were resolving references in the context of the current xref: - if a given object is found in this xref, than this is the object - otherwise older xrefs are queried in order - only in linearized documents older body may refer to object from newer xref Hans sent a document where an incremental update (newer body) has only an updated page object (plus /Metadata and /Info), but /Root (catalog) and /Pages dict refs are defined only in the older body. If we resolve references using the approach so far, we actually drop the update; newer objects are parsed and linked to the newest xref, but never linked to objects tree. Assuming we will never need to interpret older versions, makes sense to assume, that the newest object version is always the correct version. */ #if 0 ppref * ppxref_find (ppxref *xref, ppuint refnumber) { ppref *ref; ppxref *other; if ((ref = ppxref_find_local(xref, refnumber)) != NULL) return ref; if (xref->pdf->flags & PPDOC_LINEARIZED) { for (other = xref->pdf->xref; other != NULL; other = other->prev) if (other != xref && (ref = ppxref_find_local(other, refnumber)) != NULL) return ref; } else { for (other = xref->prev; other != NULL; other = other->prev) if ((ref = ppxref_find_local(other, refnumber)) != NULL) return ref; /* This shouldn't happen, but I've met documents that have no linearized dict, but their xrefs are prepared as for linearized; with "older" xrefs referring to "newer". */ for (other = xref->pdf->xref; other != NULL && other != xref; other = other->prev) if ((ref = ppxref_find_local(other, refnumber)) != NULL) return ref; } return NULL; } #else ppref * ppxref_find (ppxref *xref, ppuint refnumber) { ppref *ref; ppxref *other; for (other = xref->pdf->xref; other != NULL; other = other->prev) if ((ref = ppxref_find_local(other, refnumber)) != NULL) return ref; return NULL; } #endif ppdict * ppxref_trailer (ppxref *xref) { switch (xref->trailer.type) { case PPDICT: return xref->trailer.dict; case PPSTREAM: return xref->trailer.stream->dict; default: break; } return NULL; } ppxref * ppdoc_xref (ppdoc *pdf) { return pdf->xref; } ppxref * ppxref_prev (ppxref *xref) { return xref->prev; } ppdict * ppxref_catalog (ppxref *xref) { ppdict *trailer; return (trailer = ppxref_trailer(xref)) != NULL ? ppdict_rget_dict(trailer, "Root") : NULL; } ppdict * ppxref_info (ppxref *xref) { ppdict *trailer; return (trailer = ppxref_trailer(xref)) != NULL ? ppdict_rget_dict(trailer, "Info") : NULL; } ppref * ppxref_pages (ppxref *xref) { ppdict *dict; ppref *ref; if ((dict = ppxref_catalog(xref)) == NULL || (ref = ppdict_get_ref(dict, "Pages")) == NULL) return NULL; return ref->object.type == PPDICT ? ref : NULL; } luametatex-2.10.08/source/libraries/pplib/ppxref.h000066400000000000000000000024551442250314700221020ustar00rootroot00000000000000 #ifndef PP_XREF_H #define PP_XREF_H /* What we call xref is actually "xref section" in PDF spec and what we call section is "xref subsection". Our ppxref is a list of sections, sorted by xrefsection->first and xrefsection->last bounds. Every section keeps a list of ppref *refs, enumerated from xrefsection->first to xrefsection->last. To find a reference by number we make a binary search over sections bounds, then jump to the proper ppref *ref. */ typedef struct { ppuint first; // first reference number in section ppuint last; // last reference number in section ppref *refs; // references list } ppxsec; struct ppxref { ppxsec *sects; // subsections list size_t size; // actual sections size size_t space; // available sections space ppobj trailer; // trailer dict or stream ppuint count; // count of references in all sections ppxref *prev; // previous xref ppdoc *pdf; // parent pdf to access entries in linearized docs size_t offset; // file offset of xref //ppcrypt *crypt; // per xref encryption state? }; ppxref * ppxref_create (ppdoc *pdf, size_t initsize, size_t xrefoffset); ppxsec * ppxref_push_section (ppxref *xref, ppheap *heap); int ppxref_sort (ppxref *xref); ppref * ppxref_find_local (ppxref *xref, ppuint refnumber); #endifluametatex-2.10.08/source/libraries/pplib/readme.txt000066400000000000000000000001221442250314700224100ustar00rootroot00000000000000This is (to be) added to util/utilflate.c: # include "../../utilities/auxzlib.h" luametatex-2.10.08/source/libraries/pplib/util/000077500000000000000000000000001442250314700213745ustar00rootroot00000000000000luametatex-2.10.08/source/libraries/pplib/util/README.md000066400000000000000000000006261442250314700226570ustar00rootroot00000000000000# pplib util This part is a toolbox. Contains utilities that are used by `pplib` but aren't tightly related to `PDF`. I use the toolbox in different projects and repos. It is important to me to keep this part in a perfect sync, at the cost of some redundant code (not used in `pplib`). `pplib` is hopefully not a subject for eternal development, so once it become final, we will make some cleanups here. luametatex-2.10.08/source/libraries/pplib/util/utilbasexx.c000066400000000000000000001265001442250314700237340ustar00rootroot00000000000000 #include "utilnumber.h" #include "utilmem.h" #include "utilbasexx.h" /* filters state structs */ struct basexx_state { size_t line, maxline; size_t left; int tail[5]; int flush; }; struct runlength_state { int run; int flush; int c1, c2; uint8_t *pos; }; typedef union { basexx_state *basexxstate; runlength_state *runlengthstate; void *voidstate; } basexx_state_pointer; // to avoid 'dereferencing type-puned ...' warnings /* config */ #if defined(BASEXX_PDF) # define ignored(c) (c == 0x20 || c == 0x0A || c == 0x0C || c == 0x0D || c == 0x09 || c == 0x00) # define base16_eof(c) (c == '>' || c < 0) # define base85_eof(c) (c == '~' || c < 0) #else # define ignored(c) (c == 0x20 || c == 0x0A || c == 0x0D || c == 0x09) # define base16_eof(c) (c < 0) # define base85_eof(c) (c < 0) #endif #define base64_eof(c) (c == '=' || c < 0) #define basexx_nl '\x0A' //#define put_nl(O, line, maxline, n) ((void)((line += n) > maxline && ((line = n), iof_set(O, basexx_nl)))) // assignment in conditional warning #define put_nl(O, line, maxline, n) do { line += n; if (line > maxline) { line = n; iof_set(O, basexx_nl); }} while (0) /* tail macros */ #define set_tail1(state, c1) (state->left = 1, state->tail[0] = c1) #define set_tail2(state, c1, c2) (state->left = 2, state->tail[0] = c1, state->tail[1] = c2) #define set_tail3(state, c1, c2, c3) (state->left = 3, state->tail[0] = c1, state->tail[1] = c2, state->tail[2] = c3) #define set_tail4(state, c1, c2, c3, c4) (state->left = 4, state->tail[0] = c1, state->tail[1] = c2, state->tail[2] = c3, state->tail[3] = c4) #define set_tail5(state, c1, c2, c3, c4, c5) \ (state->left = 5, state->tail[0] = c1, state->tail[1] = c2, state->tail[2] = c3, state->tail[3] = c4, state->tail[4] = c5) #define get_tail1(state, c1) (state->left = 0, c1 = state->tail[0]) #define get_tail2(state, c1, c2) (state->left = 0, c1 = state->tail[0], c2 = state->tail[1]) #define get_tail3(state, c1, c2, c3) (state->left = 0, c1 = state->tail[0], c2 = state->tail[1], c3 = state->tail[2]) #define get_tail4(state, c1, c2, c3, c4) (state->left = 0, c1 = state->tail[0], c2 = state->tail[1], c3 = state->tail[2], c4 = state->tail[3]) /* basexx state initialization */ void basexx_state_init_ln (basexx_state *state, size_t line, size_t maxline) { state->line = line; state->maxline = maxline; state->left = 0; state->flush = 0; } /* base 16; xxxx|xxxx */ iof_status base16_encoded_uc (const void *data, size_t size, iof *O) { const uint8_t *s, *e; for (s = (const uint8_t *)data, e = s + size; s < e; ++s) { if (!iof_ensure(O, 2)) return IOFFULL; iof_set_uc_hex(O, *s); } return IOFEOF; } iof_status base16_encoded_lc (const void *data, size_t size, iof *O) { const uint8_t *s, *e; for (s = (const uint8_t *)data, e = s + size; s < e; ++s) { if (!iof_ensure(O, 2)) return IOFFULL; iof_set_lc_hex(O, *s); } return IOFEOF; } iof_status base16_encoded_uc_ln (const void *data, size_t size, iof *O, size_t line, size_t maxline) { const uint8_t *s, *e; for (s = (const uint8_t *)data, e = s + size; s < e; ++s) { if (!iof_ensure(O, 3)) return IOFFULL; put_nl(O, line, maxline, 2); iof_set_uc_hex(O, *s); } return IOFFULL; } iof_status base16_encoded_lc_ln (const void *data, size_t size, iof *O, size_t line, size_t maxline) { const uint8_t *s, *e; for (s = (const uint8_t *)data, e = s + size; s < e; ++s) { if (!iof_ensure(O, 3)) return IOFFULL; put_nl(O, line, maxline, 2); iof_set_lc_hex(O, *s); } return IOFFULL; } iof_status base16_encode_uc (iof *I, iof *O) { register int c; while (iof_ensure(O, 2)) { if ((c = iof_get(I)) < 0) return IOFEOF; iof_set_uc_hex(O, c); } return IOFFULL; } iof_status base16_encode_state_uc (iof *I, iof *O, basexx_state *state) { register int c; while (iof_ensure(O, 2)) { if ((c = iof_get(I)) < 0) return (state->flush ? IOFEOF : IOFEMPTY); iof_set_uc_hex(O, c); } return IOFFULL; } iof_status base16_encode_lc (iof *I, iof *O) { register int c; while (iof_ensure(O, 2)) { if ((c = iof_get(I)) < 0) return IOFEOF; iof_set_lc_hex(O, c); } return IOFFULL; } iof_status base16_encode_state_lc (iof *I, iof *O, basexx_state *state) { register int c; while (iof_ensure(O, 2)) { if ((c = iof_get(I)) < 0) return (state->flush ? IOFEOF : IOFEMPTY); iof_set_lc_hex(O, c); } return IOFFULL; } iof_status base16_encode_uc_ln (iof *I, iof *O, size_t line, size_t maxline) { register int c; while (iof_ensure(O, 3)) { if ((c = iof_get(I)) < 0) return IOFEOF; put_nl(O, line, maxline, 2); iof_set_uc_hex(O, c); } return IOFFULL; } iof_status base16_encode_state_uc_ln (iof *I, iof *O, basexx_state *state) { register int c; while (iof_ensure(O, 3)) { if ((c = iof_get(I)) < 0) return (state->flush ? IOFEOF : IOFEMPTY); put_nl(O, state->line, state->maxline, 2); iof_set_uc_hex(O, c); } return IOFFULL; } iof_status base16_encode_lc_ln (iof *I, iof *O, size_t line, size_t maxline) { register int c; while (iof_ensure(O, 3)) { if ((c = iof_get(I)) < 0) return IOFEOF; put_nl(O, line, maxline, 2); iof_set_lc_hex(O, c); } return IOFFULL; } iof_status base16_encode_state_lc_ln (iof *I, iof *O, basexx_state *state) { register int c; while (iof_ensure(O, 3)) { if ((c = iof_get(I)) < 0) return (state->flush ? IOFEOF : IOFEMPTY); put_nl(O, state->line, state->maxline, 2); iof_set_lc_hex(O, c); } return IOFFULL; } int base16_getc (iof *I) { register int c1, c2; do { c1 = iof_get(I); } while (ignored(c1)); if (base16_eof(c1)) return IOFEOF; do { c2 = iof_get(I); } while (ignored(c2)); if (base16_eof(c2)) { if ((c1 = base16_value(c1)) < 0) return IOFERR; return c1<<4; } if ((c1 = base16_value(c1)) < 0 || (c2 = base16_value(c2)) < 0) return IOFERR; return (c1<<4)|c2; } int base16_lc_putc (iof *O, int c) { if (iof_ensure(O, 2)) iof_set_lc_hex(O, c); return IOFFULL; } int base16_uc_putc (iof *O, int c) { if (iof_ensure(O, 2)) iof_set_uc_hex(O, c); return IOFFULL; } iof_status base16_decode (iof *I, iof *O) { register int c1, c2; while (iof_ensure(O, 1)) { do { c1 = iof_get(I); } while (ignored(c1)); if (base16_eof(c1)) return IOFEOF; do { c2 = iof_get(I); } while (ignored(c2)); if (base16_eof(c2)) { if ((c1 = base16_value(c1)) < 0) return IOFERR; iof_set(O, c1<<4); // c2 := '0' return IOFEOF; } if ((c1 = base16_value(c1)) < 0 || (c2 = base16_value(c2)) < 0) return IOFERR; iof_set(O, (c1<<4)|c2); } return IOFFULL; } iof_status base16_decode_state (iof *I, iof *O, basexx_state *state) { register int c1, c2, d1, d2; if (!(iof_ensure(O, 1))) return IOFFULL; switch(state->left) { case 0: goto byte0; case 1: get_tail1(state, c1); goto byte1; } while (iof_ensure(O, 1)) { byte0: do { c1 = iof_get(I); } while (ignored(c1)); if (base16_eof(c1)) return (state->flush ? IOFEOF : IOFEMPTY); byte1: do { c2 = iof_get(I); } while (ignored(c2)); if (base16_eof(c2)) { set_tail1(state, c1); /* set tail to let the caller display invalid chars */ if (state->flush) { if ((c1 = base16_value(c1)) < 0) return IOFERR; iof_set(O, c1<<4); // c2 := '0' return IOFEOF; } return IOFEMPTY; } if ((d1 = base16_value(c1)) < 0 || (d2 = base16_value(c2)) < 0) { set_tail2(state, c1, c2); return IOFERR; } iof_set(O, (d1<<4)|d2); } return IOFFULL; } /* base 64; xxxxxx|xx xxxx|xxxx xx|xxxxxx */ const char base64_alphabet[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; const int base64_lookup[] = { -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,62,-1,-1,-1,63, 52,53,54,55,56,57,58,59,60,61,-1,-1,-1,-1,-1,-1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8,9 ,10,11,12,13,14, 15,16,17,18,19,20,21,22,23,24,25,-1,-1,-1,-1,-1, -1,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40, 41,42,43,44,45,46,47,48,49,50,51,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 }; #define base64_value(c) base64_lookup[(uint8_t)(c)] #define base64_digit1(c1) base64_alphabet[c1>>2] #define base64_digit2(c1, c2) base64_alphabet[((c1&3)<<4)|(c2>>4)] #define base64_digit3(c2, c3) base64_alphabet[((c2&15)<<2)|(c3>>6)] #define base64_digit4(c3) base64_alphabet[c3&63] #define base64_encode_word(O, c1, c2, c3) \ iof_set4(O, base64_digit1(c1), base64_digit2(c1, c2), base64_digit3(c2, c3), base64_digit4(c3)) #define base64_encode_tail2(O, c1, c2) \ iof_set3(O, base64_digit1(c1), base64_digit2(c1, c2), base64_digit3(c2, 0)) #define base64_encode_tail1(O, c1) \ iof_set2(O, base64_digit1(c1), base64_digit2(c1, 0)) iof_status base64_encoded (const void *data, size_t size, iof *O) { const uint8_t *s, *e; uint8_t c1, c2, c3; for (s = (const uint8_t *)data, e = s + size; s + 2 < e; ) { if (!iof_ensure(O, 4)) return IOFFULL; c1 = *s++; c2 = *s++; c3 = *s++; base64_encode_word(O, c1, c2, c3); } switch (e - s) { case 0: break; case 1: if (!iof_ensure(O, 2)) return IOFFULL; c1 = *s; base64_encode_tail1(O, c1); break; case 2: if (!iof_ensure(O, 3)) return IOFFULL; c1 = *s++; c2 = *s; base64_encode_tail2(O, c1, c2); break; } return IOFEOF; } iof_status base64_encoded_ln (const void *data, size_t size, iof *O, size_t line, size_t maxline) { const uint8_t *s, *e; uint8_t c1, c2, c3; for (s = (const uint8_t *)data, e = s + size; s + 2 < e; ) { if (!iof_ensure(O, 5)) return IOFFULL; c1 = *s++; c2 = *s++; c3 = *s++; put_nl(O, line, maxline, 4); base64_encode_word(O, c1, c2, c3); } switch (e - s) { case 0: break; case 1: if (!iof_ensure(O, 3)) return IOFFULL; c1 = *s; put_nl(O, line, maxline, 2); base64_encode_tail1(O, c1); break; case 2: if (!iof_ensure(O, 4)) return IOFFULL; c1 = *s++; c2 = *s; put_nl(O, line, maxline, 3); base64_encode_tail2(O, c1, c2); break; } return IOFEOF; } iof_status base64_encode (iof *I, iof *O) { register int c1, c2, c3; while(iof_ensure(O, 4)) { if ((c1 = iof_get(I)) < 0) return IOFEOF; if ((c2 = iof_get(I)) < 0) { base64_encode_tail1(O, c1); return IOFEOF; } if ((c3 = iof_get(I)) < 0) { base64_encode_tail2(O, c1, c2); return IOFEOF; } base64_encode_word(O, c1, c2, c3); } return IOFFULL; } iof_status base64_encode_state (iof *I, iof *O, basexx_state *state) { register int c1, c2, c3; if (!(iof_ensure(O, 4))) return IOFFULL; switch(state->left) { case 0: goto byte0; case 1: get_tail1(state, c1); goto byte1; case 2: get_tail2(state, c1, c2); goto byte2; } while(iof_ensure(O, 4)) { byte0: if ((c1 = iof_get(I)) < 0) return (state->flush ? IOFEOF : IOFEMPTY); byte1: if ((c2 = iof_get(I)) < 0) return (state->flush ? (base64_encode_tail1(O, c1), IOFEOF) : (set_tail1(state, c1), IOFEMPTY)); byte2: if ((c3 = iof_get(I)) < 0) return (state->flush ? (base64_encode_tail2(O, c1, c2), IOFEOF) : (set_tail2(state, c1, c2), IOFEMPTY)); base64_encode_word(O, c1, c2, c3); } return IOFFULL; } iof_status base64_encode_ln (iof *I, iof *O, size_t line, size_t maxline) { register int c1, c2, c3; while(iof_ensure(O, 5)) { if ((c1 = iof_get(I)) < 0) return IOFEOF; if ((c2 = iof_get(I)) < 0) { put_nl(O, line, maxline, 2); base64_encode_tail1(O, c1); return IOFEOF; } if ((c3 = iof_get(I)) < 0) { put_nl(O, line, maxline, 3); base64_encode_tail2(O, c1, c2); return IOFEOF; } put_nl(O, line, maxline, 4); base64_encode_word(O, c1, c2, c3); } return IOFFULL; } iof_status base64_encode_state_ln (iof *I, iof *O, basexx_state *state) { register int c1, c2, c3; if (!(iof_ensure(O, 5))) return IOFFULL; switch(state->left) { case 0: goto byte0; case 1: get_tail1(state, c1); goto byte1; case 2: get_tail2(state, c1, c2); goto byte2; } while(iof_ensure(O, 5)) { byte0: if ((c1 = iof_get(I)) < 0) return (state->flush ? IOFEOF : IOFEMPTY); byte1: if ((c2 = iof_get(I)) < 0) { if (state->flush) { put_nl(O, state->line, state->maxline, 2); base64_encode_tail1(O, c1); return IOFEOF; } set_tail1(state, c1); return IOFEMPTY; } byte2: if ((c3 = iof_get(I)) < 0) { if (state->flush) { put_nl(O, state->line, state->maxline, 3); base64_encode_tail2(O, c1, c2); return IOFEOF; } set_tail2(state, c1, c2); return IOFEMPTY; } put_nl(O, state->line, state->maxline, 4); base64_encode_word(O, c1, c2, c3); } return IOFFULL; } // #define base64_code(c1, c2, c3, c4) ((c1<<18)|(c2<<12)|(c3<<6)|c4) #define base64_decode_word(O, c1, c2, c3, c4) \ iof_set3(O, (c1<<2)|(c2>>4), ((c2&15)<<4)|(c3>>2), ((c3&3)<<6)|c4) #define base64_decode_tail3(O, c1, c2, c3) \ iof_set2(O, (c1<<2)|(c2>>4), ((c2&15)<<4)|(c3>>2)) #define base64_decode_tail2(O, c1, c2) \ iof_set(O, (c1<<2)|(c2>>4)) iof_status base64_decode (iof *I, iof *O) { register int c1, c2, c3, c4; while(iof_ensure(O, 3)) { do { c1 = iof_get(I); } while (ignored(c1)); if (base64_eof(c1)) return IOFEOF; do { c2 = iof_get(I); } while (ignored(c2)); if (base64_eof(c2)) return IOFERR; do { c3 = iof_get(I); } while (ignored(c3)); if (base64_eof(c3)) { if ((c1 = base64_value(c1)) < 0 || (c2 = base64_value(c2)) < 0) return IOFERR; base64_decode_tail2(O, c1, c2); return IOFEOF; } do { c4 = iof_get(I); } while (ignored(c4)); if (base64_eof(c4)) { if ((c1 = base64_value(c1)) < 0 || (c2 = base64_value(c2)) < 0 || (c3 = base64_value(c3)) < 0) return IOFERR; base64_decode_tail3(O, c1, c2, c3); return IOFEOF; } if ((c1 = base64_value(c1)) < 0 || (c2 = base64_value(c2)) < 0 || (c3 = base64_value(c3)) < 0 || (c4 = base64_value(c4)) < 0) return IOFERR; base64_decode_word(O, c1, c2, c3, c4); } return IOFFULL; } iof_status base64_decode_state (iof *I, iof *O, basexx_state *state) { register int c1, c2, c3, c4; register int d1, d2, d3, d4; switch(state->left) { case 0: goto byte0; case 1: get_tail1(state, c1); goto byte1; case 2: get_tail2(state, c1, c2); goto byte2; case 3: get_tail3(state, c1, c2, c3); goto byte3; } while(iof_ensure(O, 3)) { byte0: do { c1 = iof_get(I); } while (ignored(c1)); if (base64_eof(c1)) return (state->flush ? IOFEOF : IOFEMPTY); byte1: do { c2 = iof_get(I); } while (ignored(c2)); if (base64_eof(c2)) { set_tail1(state, c1); /* set tail to let the caller make padding or display invalid char in case of error */ return (state->flush ? IOFERR : IOFEMPTY); /* if state->flush then error; tail must have at least two bytes */ } byte2: do { c3 = iof_get(I); } while (ignored(c3)); if (base64_eof(c3)) { set_tail2(state, c1, c2); if (state->flush) { if ((c1 = base64_value(c1)) < 0 || (c2 = base64_value(c2)) < 0) return IOFERR; base64_decode_tail2(O, c1, c2); return IOFEOF; } else return IOFEMPTY; } byte3: do { c4 = iof_get(I); } while (ignored(c4)); if (base64_eof(c4)) { set_tail3(state, c1, c2, c3); if (state->flush) { if ((c1 = base64_value(c1)) < 0 || (c2 = base64_value(c2)) < 0 || (c3 = base64_value(c3)) < 0) return IOFERR; base64_decode_tail3(O, c1, c2, c3); return IOFEOF; } else return IOFEMPTY; } if ((d1 = base64_value(c1)) < 0 || (d2 = base64_value(c2)) < 0 || (d3 = base64_value(c3)) < 0 || (d4 = base64_value(c4)) < 0) { set_tail4(state, c1, c2, c3, c4); return IOFERR; } base64_decode_word(O, d1, d2, d3, d4); } return IOFFULL; } /* base85 */ const char base85_alphabet[] = "!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstu"; /* for completness, not used below */ const int base85_lookup[] = { -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14, 15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30, 31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46, 47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62, 63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78, 79,80,81,82,83,84,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 }; #define base85_value(c) base85_lookup[(uint8_t)(c)] #define base85_encode_word(O, code) \ (*(O->pos+4) = '!' + code%85, code /= 85, *(O->pos+3) = '!' + code%85, code /= 85, \ *(O->pos+2) = '!' + code%85, code /= 85, *(O->pos+1) = '!' + code%85, code /= 85, \ *(O->pos) = '!' + code, \ O->pos += 5) #define base85_encode_tail3(O, code) \ (*(O->pos+3) = '!' + code%85, code /= 85, *(O->pos+2) = '!' + code%85, code /= 85, \ *(O->pos+1) = '!' + code%85, code /= 85, *(O->pos) = '!' + code, \ O->pos += 4) #define base85_encode_tail2(O, code) \ (*(O->pos+2) = '!' + code%85, code /= 85, *(O->pos+1) = '!' + code%85, code /= 85, \ *(O->pos) = '!' + code, \ O->pos += 3) #define base85_encode_tail1(O, code) \ (*(O->pos+1) = '!' + code%85, code /= 85, *(O->pos) = '!' + code, \ O->pos += 2) iof_status base85_encoded (const void *data, size_t size, iof *O) { unsigned int code; const uint8_t *s, *e; uint8_t c1, c2, c3, c4; for (s = (const uint8_t *)data, e = s + size; s + 3 < e; ) { if (!iof_ensure(O, 5)) return IOFFULL; c1 = *s++; c2 = *s++; c3 = *s++; c4 = *s++; code = (c1<<24)|(c2<<16)|(c3<<8)|c4; if (code == 0) { iof_set(O, 'z'); continue; } base85_encode_word(O, code); } switch (e - s) { case 0: break; case 1: if (!iof_ensure(O, 2)) return IOFFULL; c1 = *s; code = (c1<<24)/85/85/85; base85_encode_tail1(O, code); break; case 2: if (!iof_ensure(O, 3)) return IOFFULL; c1 = *s++; c2 = *s; code = ((c1<<24)|(c2<<16))/85/85; base85_encode_tail2(O, code); break; case 3: if (!iof_ensure(O, 4)) return IOFFULL; c1 = *s++; c2 = *s++; c3 = *s; code = ((c1<<24)|(c2<<16)|(c3<<8))/85; base85_encode_tail3(O, code); break; } return IOFEOF; } iof_status base85_encoded_ln (const void *data, size_t size, iof *O, size_t line, size_t maxline) { unsigned int code; const uint8_t *s, *e; uint8_t c1, c2, c3, c4; for (s = (const uint8_t *)data, e = s + size; s + 3 < e; ) { if (!iof_ensure(O, 6)) return IOFFULL; c1 = *s++; c2 = *s++; c3 = *s++; c4 = *s++; code = (c1<<24)|(c2<<16)|(c3<<8)|c4; if (code == 0) { put_nl(O, line, maxline, 1); iof_set(O, 'z'); continue; } put_nl(O, line, maxline, 5); base85_encode_word(O, code); } switch (e - s) { case 0: break; case 1: if (!iof_ensure(O, 3)) return IOFFULL; c1 = *s; code = (c1<<24)/85/85/85; put_nl(O, line, maxline, 2); base85_encode_tail1(O, code); break; case 2: if (!iof_ensure(O, 4)) return IOFFULL; c1 = *s++; c2 = *s; code = ((c1<<24)|(c2<<16))/85/85; put_nl(O, line, maxline, 3); base85_encode_tail2(O, code); break; case 3: if (!iof_ensure(O, 5)) return IOFFULL; c1 = *s++; c2 = *s++; c3 = *s; code = ((c1<<24)|(c2<<16)|(c3<<8))/85; put_nl(O, line, maxline, 4); base85_encode_tail3(O, code); break; } return IOFEOF; } iof_status base85_encode (iof *I, iof *O) { register int c1, c2, c3, c4; register unsigned int code; while(iof_ensure(O, 5)) { if ((c1 = iof_get(I)) < 0) return IOFEOF; if ((c2 = iof_get(I)) < 0) { code = (c1<<24)/85/85/85; base85_encode_tail1(O, code); return IOFEOF; } if ((c3 = iof_get(I)) < 0) { code = ((c1<<24)|(c2<<16))/85/85; base85_encode_tail2(O, code); return IOFEOF; } if ((c4 = iof_get(I)) < 0) { code = ((c1<<24)|(c2<<16)|(c3<<8))/85; base85_encode_tail3(O, code); return IOFEOF; } code = (c1<<24)|(c2<<16)|(c3<<8)|c4; if (code == 0) { iof_set(O, 'z'); continue; } /* in btoa 'y' character stays for 0x20202020, but pdf does not support this */ /* if (code == 0x20202020) { iof_set(O, 'y'); continue; } */ base85_encode_word(O, code); } return IOFFULL; } iof_status base85_encode_state (iof *I, iof *O, basexx_state *state) { register int c1, c2, c3, c4; register unsigned int code; if (!(iof_ensure(O, 5))) return IOFFULL; switch(state->left) { case 0: goto byte0; case 1: get_tail1(state, c1); goto byte1; case 2: get_tail2(state, c1, c2); goto byte2; case 3: get_tail3(state, c1, c2, c3); goto byte3; } while(iof_ensure(O, 5)) { byte0: if ((c1 = iof_get(I)) < 0) return (state->flush ? IOFEOF : IOFEMPTY); byte1: if ((c2 = iof_get(I)) < 0) { set_tail1(state, c1); if (state->flush) { code = (c1<<24)/85/85/85; base85_encode_tail1(O, code); return IOFEOF; } return IOFEMPTY; } byte2: if ((c3 = iof_get(I)) < 0) { set_tail2(state, c1, c2); if (state->flush) { code = ((c1<<24)|(c2<<16))/85/85; base85_encode_tail2(O, code); return IOFEOF; } return IOFEMPTY; } byte3: if ((c4 = iof_get(I)) < 0) { set_tail3(state, c1, c2, c3); if (state->flush) { code = ((c1<<24)|(c2<<16)|(c3<<8))/85; base85_encode_tail3(O, code); return IOFEOF; } return IOFEMPTY; } code = (c1<<24)|(c2<<16)|(c3<<8)|c4; if (code == 0) { iof_set(O, 'z'); continue; } base85_encode_word(O, code); } return IOFFULL; } iof_status base85_encode_ln (iof *I, iof *O, size_t line, size_t maxline) { register int c1, c2, c3, c4; register unsigned int code; while(iof_ensure(O, 6)) { if ((c1 = iof_get(I)) < 0) return IOFEOF; if ((c2 = iof_get(I)) < 0) { code = (c1<<24)/85/85/85; put_nl(O, line, maxline, 2); base85_encode_tail1(O, code); return IOFEOF; } if ((c3 = iof_get(I)) < 0) { code = ((c1<<24)|(c2<<16))/85/85; put_nl(O, line, maxline, 3); base85_encode_tail2(O, code); return IOFEOF; } if ((c4 = iof_get(I)) < 0) { code = ((c1<<24)|(c2<<16)|(c3<<8))/85; put_nl(O, line, maxline, 4); base85_encode_tail3(O, code); return IOFEOF; } code = (c1<<24)|(c2<<16)|(c3<<8)|c4; if (code == 0) { put_nl(O, line, maxline, 1); iof_set(O, 'z'); continue; } put_nl(O, line, maxline, 5); base85_encode_word(O, code); } return IOFFULL; } iof_status base85_encode_state_ln (iof *I, iof *O, basexx_state *state) { register int c1, c2, c3, c4; register unsigned int code; if (!(iof_ensure(O, 6))) return IOFFULL; switch(state->left) { case 0: goto byte0; case 1: get_tail1(state, c1); goto byte1; case 2: get_tail2(state, c1, c2); goto byte2; case 3: get_tail3(state, c1, c2, c3); goto byte3; } while(iof_ensure(O, 6)) { byte0: if ((c1 = iof_get(I)) < 0) return (state->flush ? IOFEOF : IOFEMPTY); byte1: if ((c2 = iof_get(I)) < 0) { set_tail1(state, c1); if (state->flush) { code = (c1<<24)/85/85/85; put_nl(O, state->line, state->maxline, 2); base85_encode_tail1(O, code); return IOFEOF; } return IOFEMPTY; } byte2: if ((c3 = iof_get(I)) < 0) { set_tail2(state, c1, c2); if (state->flush) { code = ((c1<<24)|(c2<<16))/85/85; put_nl(O, state->line, state->maxline, 3); base85_encode_tail2(O, code); return IOFEOF; } return IOFEMPTY; } byte3: if ((c4 = iof_get(I)) < 0) { set_tail3(state, c1, c2, c3); if (state->flush) { code = ((c1<<24)|(c2<<16)|(c3<<8))/85; put_nl(O, state->line, state->maxline, 4); base85_encode_tail3(O, code); return IOFEOF; } return IOFEMPTY; } code = (c1<<24)|(c2<<16)|(c3<<8)|c4; if (code == 0) { put_nl(O, state->line, state->maxline, 1); iof_set(O, 'z'); continue; } put_nl(O, state->line, state->maxline, 5); base85_encode_word(O, code); } return IOFFULL; } #define base85_code(c1, c2, c3, c4, c5) ((((c1*85+c2)*85+c3)*85+c4)*85+c5) iof_status base85_decode (iof *I, iof *O) { register int c1, c2, c3, c4, c5; register unsigned int code; while (iof_ensure(O, 4)) { do { c1 = iof_get(I); } while (ignored(c1)); if (base85_eof(c1)) return IOFEOF; switch (c1) { case 'z': iof_set4(O, '\0', '\0', '\0', '\0'); continue; case 'y': iof_set4(O, ' ', ' ', ' ', ' '); continue; } do { c2 = iof_get(I); } while (ignored(c2)); if (base85_eof(c2)) return IOFERR; do { c3 = iof_get(I); } while (ignored(c3)); if (base85_eof(c3)) { if ((c1 = base85_value(c1)) < 0 || (c2 = base85_value(c2)) < 0) return IOFERR; code = base85_code(c1, c2, 84, 84, 84); /* padding with 'u' (117); 117-33 = 84 */ iof_set(O, code>>24); return IOFEOF; } do { c4 = iof_get(I); } while (ignored(c4)); if (base85_eof(c4)) { if ((c1 = base85_value(c1)) < 0 || (c2 = base85_value(c2)) < 0 || (c3 = base85_value(c3)) < 0) return IOFERR; code = base85_code(c1, c2, c3, 84, 84); iof_set2(O, code>>24, (code>>16)&255); return IOFEOF; } do { c5 = iof_get(I); } while (ignored(c5)); if (base85_eof(c5)) { if ((c1 = base85_value(c1)) < 0 || (c2 = base85_value(c2)) < 0 || (c3 = base85_value(c3)) < 0 || (c4 = base85_value(c4)) < 0) return IOFERR; code = base85_code(c1, c2, c3, c4, 84); iof_set3(O, code>>24, (code>>16)&255, (code>>8)&255); return IOFEOF; } if ((c1 = base85_value(c1)) < 0 || (c2 = base85_value(c2)) < 0 || (c3 = base85_value(c3)) < 0 || (c4 = base85_value(c4)) < 0 || (c5 = base85_value(c5)) < 0) return IOFERR; code = base85_code(c1, c2, c3, c4, c5); iof_set4(O, code>>24, (code>>16)&255, (code>>8)&255, code&255); } return IOFFULL; } iof_status base85_decode_state (iof *I, iof *O, basexx_state *state) { register int c1, c2, c3, c4, c5; register int d1, d2, d3, d4, d5; register unsigned int code; if (!(iof_ensure(O, 4))) return IOFFULL; switch(state->left) { case 0: goto byte0; case 1: get_tail1(state, c1); goto byte1; case 2: get_tail2(state, c1, c2); goto byte2; case 3: get_tail3(state, c1, c2, c3); goto byte3; case 4: get_tail4(state, c1, c2, c3, c4); goto byte4; } while (iof_ensure(O, 4)) { byte0: do { c1 = iof_get(I); } while (ignored(c1)); if (base85_eof(c1)) return (state->flush ? IOFEOF : IOFEMPTY); switch (c1) { case 'z': iof_set4(O, '\0', '\0', '\0', '\0'); continue; case 'y': iof_set4(O, ' ', ' ', ' ', ' '); continue; } byte1: do { c2 = iof_get(I); } while (ignored(c2)); if (base85_eof(c2)) { set_tail1(state, c1); return (state->flush ? IOFERR : IOFEMPTY); /* if state->flush then error; tail must have at least two bytes */ } byte2: do { c3 = iof_get(I); } while (ignored(c3)); if (base85_eof(c3)) { set_tail2(state, c1, c2); if (state->flush) { if ((c1 = base85_value(c1)) < 0 || (c2 = base85_value(c2)) < 0) return IOFERR; code = base85_code(c1, c2, 84, 84, 84); iof_set(O, code>>24); return IOFEOF; } return IOFEMPTY; } byte3: do { c4 = iof_get(I); } while (ignored(c4)); if (base85_eof(c4)) { set_tail3(state, c1, c2, c3); if (state->flush) { if ((c1 = base85_value(c1)) < 0 || (c2 = base85_value(c2)) < 0 || (c3 = base85_value(c3)) < 0) return IOFERR; code = base85_code(c1, c2, c3, 84, 84); iof_set2(O, code>>24, (code>>16)&255); return IOFEOF; } return IOFEMPTY; } byte4: do { c5 = iof_get(I); } while (ignored(c5)); if (base85_eof(c5)) { set_tail4(state, c1, c2, c3, c4); if (state->flush) { if ((c1 = base85_value(c1)) < 0 || (c2 = base85_value(c2)) < 0 || (c3 = base85_value(c3)) < 0 || (c4 = base85_value(c4)) < 0) return IOFERR; code = base85_code(c1, c2, c3, c4, 84); iof_set3(O, code>>24, (code>>16)&255, (code>>8)&255); return IOFEOF; } return IOFEMPTY; } if ((d1 = base85_value(c1)) < 0 || (d2 = base85_value(c2)) < 0 || (d3 = base85_value(c3)) < 0 || (d4 = base85_value(c4)) < 0 || (d5 = base85_value(c5)) < 0) { set_tail5(state, c1, c2, c3, c4, c5); return IOFERR; } code = base85_code(d1, d2, d3, d4, d5); iof_set4(O, code>>24, (code>>16)&255, (code>>8)&255, code&255); } return IOFFULL; } /* postscript run length */ void runlength_state_init (runlength_state *state) { state->run = -1; state->flush = 0; state->c1 = 0; state->c2 = 0; state->pos = NULL; } iof_status runlength_encode (iof *I, iof *O) { register int c1, c2, run = -1; uint8_t *pos; c1 = 0, c2 = 0; /* avoid warning */ while (iof_ensure(O, 1+128+1)) { /* ensured space for single length byte, up to 128 bytes to be copied, possible eod marker */ pos = O->pos++; switch (run) { case -1: /* initial state; get first byte */ if ((c1 = iof_get(I)) < 0) return (*pos = 128, IOFEOF); run = 0; FALLTHRU // fall through case 0: /* `repeat' state; get another byte and compare */ if ((c2 = iof_get(I)) < 0) return (*pos = 0, iof_set2(O, c1, 128), IOFEOF); run = (c1 == c2 ? 257-2 : 0); break; } if (run < 128) { /* single length byte, up to 128 bytes to be copied, possible eod marker */ iof_set(O, c1); for (c1 = c2, c2 = iof_char(I); c1 != c2 && run < 127; c1 = c2, c2 = iof_next(I)) { if (c2 < 0) /* O->pos must not change until next call to calling encoder!!! */ return (*pos = (uint8_t)run+1, iof_set2(O, c1, 128), IOFEOF); iof_set(O, c1); ++run; } } else // if run > 128 { for (c2 = iof_get(I); c1 == c2 && run > 129; c2 = iof_get(I)) --run; if (c2 < 0) return (*pos = (uint8_t)run, iof_set2(O, c1, 128), IOFEOF); iof_set(O, c1); } *pos = (uint8_t)run; c1 = c2; run = 0; } return IOFFULL; } iof_status runlength_encode_state (iof *I, iof *O, runlength_state *state) { while (iof_ensure(O, 3)) /* single length byte, the byte to be repeated and eod */ { state->pos = O->pos++; switch (state->run) { case -1: /* initial state; get first byte */ if ((state->c1 = iof_get(I)) < 0) return (state->flush ? (*state->pos = 128, IOFEOF) : IOFEMPTY); state->run = 0; FALLTHRU // fall through case 0: /* `repeat' state; get another byte and compare */ if ((state->c2 = iof_get(I)) < 0) return (state->flush ? (*state->pos = 0, iof_set2(O, state->c1, 128), IOFEOF) : IOFEMPTY); state->run = (state->c1 == state->c2 ? 257-2 : 0); break; } if (state->run < 128) { /* ensure space for single length byte, up to 128 bytes to be copied, plus possible eod marker, minus those already copied */ if (!iof_ensure(O, 1+128+1-state->run)) return IOFFULL; iof_set(O, state->c1); for (state->c1 = state->c2, state->c2 = iof_char(I); state->c1 != state->c2 && state->run < 127; state->c1 = state->c2, state->c2 = iof_next(I)) { if (state->c2 < 0) /* O->pos must not change until next call to calling encoder!!! */ return (state->flush ? (*state->pos = (uint8_t)state->run+1, iof_set2(O, state->c1, 128), IOFEOF) : IOFEMPTY); iof_set(O, state->c1); ++state->run; } } else // if run > 128 { for (state->c2 = iof_get(I); state->c1 == state->c2 && state->run > 129; state->c2 = iof_get(I)) --state->run; if (state->c2 < 0) return (state->flush ? (*state->pos = (uint8_t)state->run, iof_set2(O, state->c1, 128), IOFEOF) : IOFEMPTY); iof_set(O, state->c1); } *state->pos = (uint8_t)state->run; state->c1 = state->c2; state->run = 0; } return IOFFULL; } iof_status runlength_decode (iof *I, iof *O) { register int c, run = -1; while (1) { if (run == -1) /* initial state */ { if ((run = iof_get(I)) < 0) { run = -1; /* don't assume IOFEOF == -1 */ return IOFEOF; } } if (run < 128) { /* copy (run + 1) following bytes */ while (run > -1) { if (iof_ensure(O, 1)) { if ((c = iof_get(I)) < 0) return IOFERR; iof_set(O, c); --run; continue; } return IOFFULL; } } else if (run > 128) { /* replicate the following byte (257 - run) times */ if ((c = iof_get(I)) < 0) /* cf. state-wise version; don't change input position until we got this byte */ return IOFERR; while (run < 257) { if (iof_ensure(O, 1)) { iof_set(O, c); ++run; continue; } return IOFFULL; } run = -1; } else // c == 128 return IOFEOF; } // return IOFFULL; } iof_status runlength_decode_state (iof *I, iof *O, runlength_state *state) { register int c; while (1) { if (state->run == -1) /* initial state */ { if ((state->run = iof_char(I)) < 0) { state->run = -1; /* don't assume IOFEOF == -1 */ return (state->flush ? IOFEOF : IOFEMPTY); } ++I->pos; } if (state->run < 128) { /* copy (state->run + 1) following bytes */ while (state->run > -1) { if (iof_ensure(O, 1)) { if ((c = iof_char(I)) < 0) return (state->flush ? IOFERR : IOFEMPTY); ++I->pos; iof_set(O, c); --state->run; continue; } return IOFFULL; } } else if (state->run > 128) { /* replicate the following byte (257 - state->run) times */ if ((c = iof_char(I)) < 0) return (state->flush ? IOFERR : IOFEMPTY); ++I->pos; while (state->run < 257) { if (iof_ensure(O, 1)) { iof_set(O, c); ++state->run; continue; } return IOFFULL; } state->run = -1; } else // c == 128 return IOFEOF; } // return IOFFULL; } /* filters */ // base16 decoder function static size_t base16_decoder (iof *F, iof_mode mode) { basexx_state *state; iof_status status; size_t tail; switch(mode) { case IOFLOAD: case IOFREAD: if (F->flags & IOF_STOPPED) return 0; tail = iof_tail(F); F->pos = F->buf + tail; F->end = F->buf + F->space; state = iof_filter_state(basexx_state *, F); do { status = base16_decode_state(F->next, F, state); } while (mode == IOFLOAD && status == IOFFULL && iof_resize_buffer(F)); return iof_decoder_retval(F, "base16", status); case IOFCLOSE: iof_free(F); return 0; default: break; } return 0; } // base16 encoder function static size_t base16_encoder (iof *F, iof_mode mode) { basexx_state *state; iof_status status; state = iof_filter_state(basexx_state *, F); switch (mode) { case IOFFLUSH: state->flush = 1; FALLTHRU // fall through case IOFWRITE: F->end = F->pos; F->pos = F->buf; status = base16_encode_state_ln(F, F->next, state); return iof_encoder_retval(F, "base16", status); case IOFCLOSE: if (!state->flush) base16_encoder(F, IOFFLUSH); iof_free(F); return 0; default: break; } return 0; } // base64 decoder function static size_t base64_decoder (iof *F, iof_mode mode) { basexx_state *state; iof_status status; size_t tail; switch(mode) { case IOFLOAD: case IOFREAD: if (F->flags & IOF_STOPPED) return 0; tail = iof_tail(F); F->pos = F->buf + tail; F->end = F->buf + F->space; state = iof_filter_state(basexx_state *, F); do { status = base64_decode_state(F->next, F, state); } while (mode == IOFLOAD && status == IOFFULL && iof_resize_buffer(F)); return iof_decoder_retval(F, "base64", status); case IOFCLOSE: iof_free(F); return 0; default: break; } return 0; } // base64 encoder function static size_t base64_encoder (iof *F, iof_mode mode) { basexx_state *state; iof_status status; state = iof_filter_state(basexx_state *, F); switch (mode) { case IOFFLUSH: state->flush = 1; FALLTHRU // fall through case IOFWRITE: F->end = F->pos; F->pos = F->buf; status = base64_encode_state_ln(F, F->next, state); return iof_encoder_retval(F, "base64", status); case IOFCLOSE: if (!state->flush) base64_encoder(F, IOFFLUSH); iof_free(F); return 0; default: break; } return 0; } // base85 decoder function static size_t base85_decoder (iof *F, iof_mode mode) { basexx_state *state; iof_status status; size_t tail; switch(mode) { case IOFLOAD: case IOFREAD: if (F->flags & IOF_STOPPED) return 0; tail = iof_tail(F); F->pos = F->buf + tail; F->end = F->buf + F->space; state = iof_filter_state(basexx_state *, F); do { status = base85_decode_state(F->next, F, state); } while (mode == IOFLOAD && status == IOFFULL && iof_resize_buffer(F)); return iof_decoder_retval(F, "base85", status); case IOFCLOSE: iof_free(F); return 0; default: break; } return 0; } // base85 encoder function static size_t base85_encoder (iof *F, iof_mode mode) { basexx_state *state; iof_status status; state = iof_filter_state(basexx_state *, F); switch (mode) { case IOFFLUSH: state->flush = 1; FALLTHRU // fall through case IOFWRITE: F->end = F->pos; F->pos = F->buf; status = base85_encode_state_ln(F, F->next, state); return iof_encoder_retval(F, "base85", status); case IOFCLOSE: if (!state->flush) base85_encoder(F, IOFFLUSH); iof_free(F); return 0; default: break; } return 0; } // runlength decoder function static size_t runlength_decoder (iof *F, iof_mode mode) { runlength_state *state; iof_status status; size_t tail; switch(mode) { case IOFLOAD: case IOFREAD: if (F->flags & IOF_STOPPED) return 0; tail = iof_tail(F); F->pos = F->buf + tail; F->end = F->buf + F->space; state = iof_filter_state(runlength_state *, F); do { status = runlength_decode_state(F->next, F, state); } while (mode == IOFLOAD && status == IOFFULL && iof_resize_buffer(F)); return iof_decoder_retval(F, "runlength", status); case IOFCLOSE: iof_free(F); return 0; default: break; } return 0; } // runlength encoder function static size_t runlength_encoder (iof *F, iof_mode mode) { runlength_state *state; iof_status status; state = iof_filter_state(runlength_state *, F); switch (mode) { case IOFFLUSH: state->flush = 1; FALLTHRU // fall through case IOFWRITE: F->end = F->pos; F->pos = F->buf; status = runlength_encode_state(F, F->next, state); return iof_encoder_retval(F, "runlength", status); case IOFCLOSE: if (!state->flush) runlength_encoder(F, IOFFLUSH); iof_free(F); return 0; default: break; } return 0; } // int iof_filter_basexx_encoder_ln (iof *F, size_t line, size_t maxline) { basexx_state *state; if (maxline > 8 && line < maxline) { state = iof_filter_state(basexx_state *, F); state->line = line; state->maxline = maxline; return 1; } return 0; } /* base 16 */ iof * iof_filter_base16_decoder (iof *N) { iof *I; basexx_state_pointer P; I = iof_filter_reader(base16_decoder, sizeof(basexx_state), &P.voidstate); iof_setup_next(I, N); basexx_state_init(P.basexxstate); P.basexxstate->flush = 1; // means N is supposed to be continuous input return I; } iof * iof_filter_base16_encoder (iof *N) { iof *O; basexx_state_pointer P; O = iof_filter_writer(base16_encoder, sizeof(basexx_state), &P.voidstate); iof_setup_next(O, N); basexx_state_init(P.basexxstate); return O; } /* base 64 */ iof * iof_filter_base64_decoder (iof *N) { iof *I; basexx_state_pointer P; I = iof_filter_reader(base64_decoder, sizeof(basexx_state), &P.voidstate); iof_setup_next(I, N); basexx_state_init(P.basexxstate); P.basexxstate->flush = 1; return I; } iof * iof_filter_base64_encoder (iof *N) { iof *O; basexx_state_pointer P; O = iof_filter_writer(base64_encoder, sizeof(basexx_state), &P.voidstate); iof_setup_next(O, N); basexx_state_init(P.basexxstate); return O; } /* base 85 */ iof * iof_filter_base85_decoder (iof *N) { iof *I; basexx_state_pointer P; I = iof_filter_reader(base85_decoder, sizeof(basexx_state), &P.voidstate); iof_setup_next(I, N); basexx_state_init(P.basexxstate); P.basexxstate->flush = 1; return I; } iof * iof_filter_base85_encoder (iof *N) { iof *O; basexx_state_pointer P; O = iof_filter_writer(base85_encoder, sizeof(basexx_state), &P.voidstate); iof_setup_next(O, N); basexx_state_init(P.basexxstate); return O; } /* runlength stream filter */ iof * iof_filter_runlength_decoder (iof *N) { iof *I; basexx_state_pointer P; I = iof_filter_reader(runlength_decoder, sizeof(runlength_state), &P.voidstate); iof_setup_next(I, N); runlength_state_init(P.runlengthstate); P.runlengthstate->flush = 1; return I; } iof * iof_filter_runlength_encoder (iof *N) { iof *O; basexx_state_pointer P; O = iof_filter_writer(runlength_encoder, sizeof(runlength_state), &P.voidstate); iof_setup_next(O, N); runlength_state_init(P.runlengthstate); return O; } luametatex-2.10.08/source/libraries/pplib/util/utilbasexx.h000066400000000000000000000074301442250314700237410ustar00rootroot00000000000000 /* base encodings */ #ifndef UTIL_BASEXX_H #define UTIL_BASEXX_H #include "utiliof.h" /* base codecs state */ typedef struct basexx_state basexx_state; #define BASEXX_MAXLINE 80 #define BASEXX_PDF void basexx_state_init_ln (basexx_state *state, size_t line, size_t maxline); #define basexx_state_init(state) basexx_state_init_ln(state, 0, BASEXX_MAXLINE) /* base16 */ int base16_getc (iof *I); int base16_uc_putc (iof *I, int c); int base16_lc_putc (iof *I, int c); #define base16_putc base16_uc_putc iof_status base16_encoded_uc (const void *data, size_t size, iof *O); iof_status base16_encoded_lc (const void *data, size_t size, iof *O); iof_status base16_encoded_uc_ln (const void *data, size_t size, iof *O, size_t line, size_t maxline); iof_status base16_encoded_lc_ln (const void *data, size_t size, iof *O, size_t line, size_t maxline); iof_status base16_encode_uc (iof *I, iof *O); iof_status base16_encode_lc (iof *I, iof *O); iof_status base16_encode_uc_ln (iof *I, iof *O, size_t line, size_t maxline); iof_status base16_encode_lc_ln (iof *I, iof *O, size_t line, size_t maxline); iof_status base16_decode (iof *I, iof *O); #define base16_encoded base16_encoded_uc #define base16_encoded_ln base16_encoded_uc_ln #define base16_encode base16_encode_uc #define base16_encode_ln base16_encode_uc_ln iof_status base16_encode_state_uc (iof *I, iof *O, basexx_state *state); iof_status base16_encode_state_lc (iof *I, iof *O, basexx_state *state); iof_status base16_encode_state_uc_ln (iof *I, iof *O, basexx_state *state); iof_status base16_encode_state_lc_ln (iof *I, iof *O, basexx_state *state); iof_status base16_decode_state (iof *I, iof *O, basexx_state *state); #define base16_encode_state base16_encode_state_uc #define base16_encode_state_ln base16_encode_state_uc_ln /* base64 */ extern const char base64_alphabet[]; extern const int base64_lookup[]; iof_status base64_encoded (const void *data, size_t size, iof *O); iof_status base64_encoded_ln (const void *data, size_t size, iof *O, size_t line, size_t maxline); iof_status base64_encode (iof *I, iof *O); iof_status base64_encode_ln (iof *I, iof *O, size_t line, size_t maxline); iof_status base64_decode (iof *I, iof *O); iof_status base64_encode_state (iof *I, iof *O, basexx_state *state); iof_status base64_encode_state_ln (iof *I, iof *O, basexx_state *state); iof_status base64_decode_state (iof *I, iof *O, basexx_state *state); /* base85 */ extern const char base85_alphabet[]; extern const int base85_lookup[]; iof_status base85_encoded (const void *data, size_t size, iof *O); iof_status base85_encoded_ln (const void *data, size_t size, iof *O, size_t line, size_t maxline); iof_status base85_encode (iof *I, iof *O); iof_status base85_encode_ln (iof *I, iof *O, size_t line, size_t maxline); iof_status base85_decode (iof *I, iof *O); iof_status base85_encode_state (iof *I, iof *O, basexx_state *state); iof_status base85_encode_state_ln (iof *I, iof *O, basexx_state *state); iof_status base85_decode_state (iof *I, iof *O, basexx_state *state); /* run length */ typedef struct runlength_state runlength_state; void runlength_state_init (runlength_state *state); iof_status runlength_encode (iof *I, iof *O); iof_status runlength_encode_state (iof *I, iof *O, runlength_state *state); iof_status runlength_decode (iof *I, iof *O); iof_status runlength_decode_state (iof *I, iof *O, runlength_state *state); /* filters */ int iof_filter_basexx_encoder_ln (iof *N, size_t line, size_t maxline); iof * iof_filter_base16_decoder (iof *N); iof * iof_filter_base16_encoder (iof *N); iof * iof_filter_base64_decoder (iof *N); iof * iof_filter_base64_encoder (iof *N); iof * iof_filter_base85_decoder (iof *N); iof * iof_filter_base85_encoder (iof *N); iof * iof_filter_runlength_decoder (iof *N); iof * iof_filter_runlength_encoder (iof *N); #endif luametatex-2.10.08/source/libraries/pplib/util/utilcrypt.c000066400000000000000000001123531442250314700236040ustar00rootroot00000000000000 #include "utilmem.h" #include "utilcrypt.h" #include "utilcryptdef.h" #include "utilmd5.h" /* rc4 */ /* Initializer arguments: - state - crypt state - map - a space for rc4 bytes map; may be left NULL in which case will be allocated - vkey - crypt key; may be left NULL iff map is provided and properly initialized - keylength - the length of crypt key (from 5 to 16 bytes) */ rc4_state * rc4_state_initialize (rc4_state *state, rc4_map *map, const void *vkey, size_t keylength) { int i, j; uint8_t tmp; const uint8_t *key; key = (const uint8_t *)vkey; if (keylength == 0 || keylength > 256) return NULL; state->flags = 0; if (map != NULL) { state->map = map; } else { state->map = (rc4_map *)util_malloc(sizeof(rc4_map)); state->flags |= RC4_STATE_ALLOC; } if (key != NULL) { for (i = 0; i < 256; ++i) state->smap[i] = (uint8_t)i; for (i = 0, j = 0; i < 256; ++i) { j = (j + state->smap[i] + key[i % keylength]) & 255; tmp = state->smap[i]; state->smap[i] = state->smap[j]; state->smap[j] = tmp; } } state->i = 0; state->j = 0; state->flush = 0; /* caller is responsible to override if necessary */ return state; } void rc4_map_save (rc4_state *state, rc4_map *map) { memcpy(map, state->map, sizeof(rc4_map)); } void rc4_map_restore (rc4_state *state, rc4_map *map) { memcpy(state->map, map, sizeof(rc4_map)); //state->flags = 0; //state->flush = 0; state->i = 0; state->j = 0; } static uint8_t rc4_next_random_byte (rc4_state *state) { uint8_t tmp; state->i = (state->i + 1) & 255; state->j = (state->j + state->smap[state->i]) & 255; tmp = state->smap[state->i]; state->smap[state->i] = state->smap[state->j]; state->smap[state->j] = tmp; return state->smap[(state->smap[state->i] + state->smap[state->j]) & 255]; } iof_status rc4_crypt_state (iof *I, iof *O, rc4_state *state) { uint8_t r; int c; while (iof_ensure(O, 1)) { if ((c = iof_get(I)) < 0) return c == IOFERR ? IOFERR : (state->flush ? IOFEOF : IOFEMPTY); r = rc4_next_random_byte(state); //r = r ^ ((uint8_t)c); //iof_set(O, r); iof_set(O, r ^ ((uint8_t)c)); } return IOFFULL; } iof_status rc4_crypt (iof *I, iof *O, const void *key, size_t keylength) { int ret; rc4_state state; rc4_map map; if (rc4_state_initialize(&state, &map, key, keylength) == NULL) return IOFERR; state.flush = 1; ret = rc4_crypt_state(I, O, &state); rc4_state_close(&state); return ret; } /* Variants that operates on c-strings can worn inplace, so output and input can be the same address. Variant that takes rc4_state pointer expects the state properly initialized. Keep in mind the crypt procedure modifies rc4 bytes map. All returns the size of encrypted/decrypted data, which is the same as input data length for rc4. */ size_t rc4_crypt_data (const void *input, size_t length, void *output, const void *key, size_t keylength) { rc4_state state; rc4_map map; if (rc4_state_initialize(&state, &map, key, keylength) == NULL) return 0; return rc4_crypt_state_data(&state, input, length, output); // no need to call rc4_state_close() } size_t rc4_crypt_state_data (rc4_state *state, const void *input, size_t length, void *output) { /* state assumed to be initialized and with the proper state of smap */ const uint8_t *inp; uint8_t r, *out; size_t size; inp = (const uint8_t *)input; out = (uint8_t *)output; for (size = 0; size < length; ++size, ++inp, ++out) { r = rc4_next_random_byte(state); *out = r ^ *inp; } return length; } void rc4_state_close (rc4_state *state) { if (state->smap != NULL && (state->flags & RC4_STATE_ALLOC)) { util_free(state->smap); state->smap = NULL; } } /* aes; parts of code excerpted from https://github.com/kokke/tiny-AES128-C */ static const uint8_t sbox[256] = { 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 }; static const uint8_t rsbox[256] = { 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d }; /* The round constant word array, rcon[i], contains the values given by x to th e power (i-1) being powers of x (x is denoted as {02}) in the field GF(2^8) Note that i starts at 1, not 0). */ static const uint8_t rcon[255] = { 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb }; /* block copying */ #define aes_copy_block(output, input) memcpy(output, input, 16) static void aes_copy_cbc (uint8_t *data, const uint8_t *input) { uint8_t i; for (i = 0; i < 16; ++i) data[i] ^= input[i]; } static void aes_copy_xor (uint8_t *data, const uint8_t *input, const uint8_t *iv) { uint8_t i; for (i = 0; i < 16; ++i) data[i] = input[i] ^ iv[i]; } /* key expansion */ #define AES_COLUMNS 4 // constant in aes static void key_expansion (aes_state *state, const uint8_t *key) { uint32_t i, j; uint8_t t[4], temp; uint8_t *keydata, keywords, columns; keywords = (uint8_t)(state->keylength >> 2); keydata = (uint8_t *)state->keyblock; /* the first round key is the key itself */ for(i = 0; i < keywords; ++i) { keydata[(i * 4) + 0] = key[(i * 4) + 0]; keydata[(i * 4) + 1] = key[(i * 4) + 1]; keydata[(i * 4) + 2] = key[(i * 4) + 2]; keydata[(i * 4) + 3] = key[(i * 4) + 3]; } /* others derived from the first */ for(columns = AES_COLUMNS * (state->rounds + 1); i < columns; ++i) { for(j = 0; j < 4; ++j) t[j] = keydata[(i - 1) * 4 + j]; if (i % keywords == 0) { /* rotate the 4 bytes in a word to the left once; [a0,a1,a2,a3] becomes [a1,a2,a3,a0] */ temp = t[0]; t[0] = t[1]; t[1] = t[2]; t[2] = t[3]; t[3] = temp; /* take a four-byte input word and apply the S-box to each of the four bytes to produce an output word */ t[0] = sbox[t[0]]; t[1] = sbox[t[1]]; t[2] = sbox[t[2]]; t[3] = sbox[t[3]]; t[0] = t[0] ^ rcon[i / keywords]; } else if (keywords > 6 && i % keywords == 4) { t[0] = sbox[t[0]]; t[1] = sbox[t[1]]; t[2] = sbox[t[2]]; t[3] = sbox[t[3]]; } keydata[i * 4 + 0] = keydata[(i - keywords) * 4 + 0] ^ t[0]; keydata[i * 4 + 1] = keydata[(i - keywords) * 4 + 1] ^ t[1]; keydata[i * 4 + 2] = keydata[(i - keywords) * 4 + 2] ^ t[2]; keydata[i * 4 + 3] = keydata[(i - keywords) * 4 + 3] ^ t[3]; } } /* An original implementation uses no private buffers except a keyblock. We need private buffers to keep a CBC vector between calls and to be able to read input data not necessarily in 16-bytes blocks. Encrypter would actually require only one such buffer, as CBC vector is applied on input data before the actual cipher procedure. And CBC for the next chunk is simply the output from the previous. Decrypter, however, applies the cipher first, then applies CBC to the output with a buffered init vector, and the vector for the next call is the row input before cipher. Hence we need two 16-bytes buffers for decrypter. */ /* aes_state * aes_state_initialize_ecb (aes_state *State, uint8_t *keyblock, const uint8_t *key) { state->flags = 0; state->flags |= AES_ECB_MODE; if (keyblock == NULL) { keyblock = util_malloc(sizeof(aes_keyblock)); state->flags |= AES_STATE_ALLOC; } state->keyblock = keyblock; key_expansion(state, key); state->flush = 0; return state; } */ void aes_pdf_mode (aes_state *state) { state->flags |= AES_INLINE_IV; state->flags &= ~AES_NULL_PADDING; } /* Initialize arguments: - state - crypt state - keyblock - a space for aes key expansion; can be left NULL in which case will be allocated - key - crypt key; can be left NULL iff keyblock is given and properly initialized - keylength - the length of the key (16 or 32 bytes) - iv - 16-bytes CBC initialization vector; - if left NULL for encoder, one is generated and stored as state->iv - can also be left NULL for decorer, but then AES_INLINE_IV must be set, as this informs decoder to take an initialization vector from the beginning of the encrypted stream At the first approach, an initialization vector was copied to state block during initialization and encoders assumed that the state block is the current initialization vector. This simplifies encrypting procedure, as the output from every 16-bytes chunk encryption is an initialization vector for the next chunk. However, it makes api usage cumbersome, as the user has to know that iv may need to be copied to state block before each call. */ static int aes_key_length (aes_state *state, size_t keylength) { state->keylength = keylength; switch (keylength) { case 16: state->rounds = 10; break; case 24: state->rounds = 12; break; case 32: state->rounds = 14; break; default: return 0; } return 1; } aes_state * aes_encode_initialize (aes_state *state, aes_keyblock *keyblock, const void *key, size_t keylength, const void *iv) { state->flags = 0; if (!aes_key_length(state, keylength)) return NULL; if (iv != NULL) aes_copy_block(state->iv, iv); else aes_generate_iv(state->iv); state->flags |= AES_HAS_IV; if (keyblock == NULL) { keyblock = (aes_keyblock *)util_malloc(sizeof(aes_keyblock)); state->flags |= AES_STATE_ALLOC; } state->keyblock = keyblock; if (key != NULL) /* if NULL we assume keyblock is given and already expanded */ key_expansion(state, (const uint8_t *)key); state->flush = 0; return state; } aes_state * aes_decode_initialize (aes_state *state, aes_keyblock *keyblock, const void *key, size_t keylength, const void *iv) { state->flags = 0; if (!aes_key_length(state, keylength)) return NULL; if (iv != NULL) { aes_copy_block(state->iv, iv); state->flags |= AES_HAS_IV; } /* else if AES_INLINE_IV flag is set will be read from input */ if (keyblock == NULL) { keyblock = (aes_keyblock *)util_malloc(sizeof(aes_keyblock)); state->flags |= AES_STATE_ALLOC; } state->keyblock = keyblock; if (key != NULL) /* otherwise keyblock is assumed present and properly initialized */ key_expansion(state, (const uint8_t *)key); state->flush = 0; return state; } void aes_state_close (aes_state *state) { if (state->keyblock != NULL && (state->flags & AES_STATE_ALLOC)) util_free(state->keyblock); } /* add round key */ static void aes_round_key (aes_block block, aes_block keyblock) { uint8_t i, j; for(i = 0; i < 4; ++i) for(j = 0; j < 4; ++j) block[i][j] ^= keyblock[i][j]; } #define aes_add_key(block, keyblock, round) aes_round_key(block, (*keyblock)[round]) /* substitution */ static void aes_encode_sub (aes_block block) { uint8_t i, j, v; for(i = 0; i < 4; ++i) for(j = 0; j < 4; ++j) v = block[i][j], block[i][j] = sbox[v]; } /* rows shift; the row index is the shift offset, the first order is not shifted */ static void aes_encode_shift (aes_block block) { uint8_t tmp; /* 1st row rotated once */ tmp = block[0][1]; block[0][1] = block[1][1]; block[1][1] = block[2][1]; block[2][1] = block[3][1]; block[3][1] = tmp; /* 2nd row rotated twice */ tmp = block[0][2]; block[0][2] = block[2][2]; block[2][2] = tmp; tmp = block[1][2]; block[1][2] = block[3][2]; block[3][2] = tmp; /* 3rd row rotated 3 times */ tmp = block[0][3]; block[0][3] = block[3][3]; block[3][3] = block[2][3]; block[2][3] = block[1][3]; block[1][3] = tmp; } static uint8_t xtime (uint8_t x) { return ((x << 1) ^ (((x >> 7) & 1) * 0x1b)); } /* mix columns */ static void aes_encode_mix (aes_block block) { uint8_t i, tmp, tm, t; for(i = 0; i < 4; ++i) { t = block[i][0]; tmp = block[i][0] ^ block[i][1] ^ block[i][2] ^ block[i][3] ; tm = block[i][0] ^ block[i][1]; tm = xtime(tm); block[i][0] ^= tm ^ tmp; tm = block[i][1] ^ block[i][2]; tm = xtime(tm); block[i][1] ^= tm ^ tmp; tm = block[i][2] ^ block[i][3]; tm = xtime(tm); block[i][2] ^= tm ^ tmp; tm = block[i][3] ^ t ; tm = xtime(tm); block[i][3] ^= tm ^ tmp; } } /* multiply is used to multiply numbers in the field GF(2^8) */ #define multiply(x, y) \ ( ((y & 1) * x) ^ \ ((y>>1 & 1) * xtime(x)) ^ \ ((y>>2 & 1) * xtime(xtime(x))) ^ \ ((y>>3 & 1) * xtime(xtime(xtime(x)))) ^ \ ((y>>4 & 1) * xtime(xtime(xtime(xtime(x)))))) \ /* mix columns */ static void aes_decode_mix (aes_block block) { int i; uint8_t a, b, c, d; for(i = 0; i < 4; ++i) { a = block[i][0]; b = block[i][1]; c = block[i][2]; d = block[i][3]; block[i][0] = multiply(a, 0x0e) ^ multiply(b, 0x0b) ^ multiply(c, 0x0d) ^ multiply(d, 0x09); block[i][1] = multiply(a, 0x09) ^ multiply(b, 0x0e) ^ multiply(c, 0x0b) ^ multiply(d, 0x0d); block[i][2] = multiply(a, 0x0d) ^ multiply(b, 0x09) ^ multiply(c, 0x0e) ^ multiply(d, 0x0b); block[i][3] = multiply(a, 0x0b) ^ multiply(b, 0x0d) ^ multiply(c, 0x09) ^ multiply(d, 0x0e); } } /* inverse substitution */ static void aes_decode_sub (aes_block block) { uint8_t i, j, v; for(i = 0; i < 4; ++i) for(j = 0; j < 4; ++j) v = block[i][j], block[i][j] = rsbox[v]; } /* inverse shift rows */ static void aes_decode_shift (aes_block block) { uint8_t tmp; /* 1st row rotated once right */ tmp = block[3][1]; block[3][1] = block[2][1]; block[2][1] = block[1][1]; block[1][1] = block[0][1]; block[0][1] = tmp; /* 2st row rotated twice right */ tmp = block[0][2]; block[0][2] = block[2][2]; block[2][2] = tmp; tmp = block[1][2]; block[1][2] = block[3][2]; block[3][2] = tmp; /* 3rd row rotated 3 times right */ tmp = block[0][3]; block[0][3] = block[1][3]; block[1][3] = block[2][3]; block[2][3] = block[3][3]; block[3][3] = tmp; } /* aes block encoder */ static void aes_encode_cipher (aes_state *state) { uint8_t round; aes_add_key(state->block, state->keyblock, 0); for (round = 1; round < state->rounds; ++round) { aes_encode_sub(state->block); aes_encode_shift(state->block); aes_encode_mix(state->block); aes_add_key(state->block, state->keyblock, round); } aes_encode_sub(state->block); aes_encode_shift(state->block); aes_add_key(state->block, state->keyblock, state->rounds); } /* aes block decoder */ static void aes_decode_cipher (aes_state *state) { uint8_t round; aes_add_key(state->block, state->keyblock, state->rounds); for(round = state->rounds - 1; round > 0; --round) { aes_decode_shift(state->block); aes_decode_sub(state->block); aes_add_key(state->block, state->keyblock, round); aes_decode_mix(state->block); } aes_decode_shift(state->block); aes_decode_sub(state->block); aes_add_key(state->block, state->keyblock, 0); } /* tail block padding; RFC 2898, PKCS #5: Password-Based Cryptography Specification Version 2.0; pdf spec p. 119 */ #define aes_padding(state) ((state->flags & AES_NULL_PADDING) == 0) static void aes_put_padding (aes_state *state, uint8_t length) { uint8_t pad; pad = (aes_padding(state)) ? 16 - length : 0; for (; length < 16; ++length) state->data[length] = state->iv[length] ^ pad; } static int aes_remove_padding (aes_state *state, uint8_t *data, uint8_t *length) { uint8_t pad; *length = 16; /* block length 16 means leave intact */ if (aes_padding(state)) { pad = data[16 - 1]; if (pad > 16) return IOFERR; for ( ; *length > 16 - pad; --(*length)) if (data[*length - 1] != pad) return IOFERR; } else { for ( ; *length > 0; --(*length)) if (data[*length - 1] != '\0') break; } return IOFEOF; } /* aes codec */ /* make the cipher on input xor-ed with iv, save the output as a new iv, write the output */ #define aes_encode_output(state, output) \ (aes_encode_cipher(state), aes_copy_block(state->iv, state->data), aes_copy_block(output, state->data), output += 16) iof_status aes_encode_state (iof *I, iof *O, aes_state *state) { int c; if (!(state->flags & AES_HAS_IV)) // weird return IOFERR; if ((state->flags & AES_INLINE_IV) && !(state->flags & AES_CONTINUE)) { /* write iv at the beginning of encrypted data */ if (!iof_ensure(O, 16)) return IOFFULL; aes_copy_block(O->pos, state->iv); O->pos += 16; state->flags |= AES_CONTINUE; } while (iof_ensure(O, 16)) { while (state->buffered < 16) { if ((c = iof_get(I)) != IOFEOF) { /* get input byte XORed with iv */ state->data[state->buffered] = state->iv[state->buffered] ^ ((uint8_t)c); ++state->buffered; } else { if (state->flush) { if (state->buffered > 0 || aes_padding(state)) { /* pad the last input chunk; for input divisable by 16, add 16 bytes 0x0f */ aes_put_padding(state, state->buffered); state->buffered = 16; aes_encode_output(state, O->pos); } return IOFEOF; } else return IOFEMPTY; } } aes_encode_output(state, O->pos); state->buffered = 0; } return IOFFULL; } /* write iv to the output, save the raw input just buffered as iv for the next chunk, make the cipher, write out xoring with iv */ #define aes_decode_output(state, output) \ (aes_copy_block(output, state->iv), aes_copy_block(state->iv, state->data), aes_decode_cipher(state), aes_copy_cbc(output, state->data), output += 16) iof_status aes_decode_state (iof *I, iof *O, aes_state *state) { int c, ret; uint8_t lastlength; if ((state->flags & AES_INLINE_IV) && !(state->flags & AES_CONTINUE)) { while (state->buffered < 16) { if ((c = iof_get(I)) != IOFEOF) state->iv[state->buffered++] = (uint8_t)c; else return state->flush ? IOFERR : IOFEMPTY; } state->flags |= AES_CONTINUE|AES_HAS_IV; state->buffered = 0; } while (iof_ensure(O, 16)) { while (state->buffered < 16) { if ((c = iof_get(I)) != IOFEOF) state->data[state->buffered++] = (uint8_t)c; else return state->flush ? IOFERR : IOFEMPTY; } aes_decode_output(state, O->pos); if (state->flush) { /* we have to check for EOF here, to remove eventual padding */ if ((c = iof_get(I)) < 0) { /* end of input at 16-bytes boundary; remove padding and quit */ ret = aes_remove_padding(state, O->pos - 16, &lastlength); O->pos -= 16 - lastlength; return ret; } else { /* beginning of the next block */ state->buffered = 1; state->data[0] = (uint8_t)c; } } else state->buffered = 0; } return IOFFULL; } /* variants that works on c-strings; can work inplace (output==input) except encoder in pdf flavour */ /* Codecs operating on c-string can generally work inplace (output==input), except encoder with AES_INLINE_IV flag set, which outputs 16 bytes of initialization vector at the beginning of encrypted data. All return the size of encrypted/decrypted data. Encoders output is the original length padded to a complete 16 bytes (plus eventual 16 bytes of initialization vector, if AES_INLINE_IV is used). Default padding is unambiguously removed during decryption. AES_NULL_PADDING flag forces using (ambiguous) NULL-byte padding, only if input length module 16 is greater then zero. An input data is supposed to be a complete data to be encrypted or decrypted. It is possible, however, to use those codecs for scaterred data chunks by manipulating AES_INLINE_IV, AES_NULL_PADDING, AES_CONTINUE flags and data length. Caller may assume that c-string codecs do not modify state flags. Encoder could actually be optimized by writing an initialization vector to a state block once. After every chunk encryption, the output is the initialization vector for the next chunk. Since we use c-string codec variants on short strings, the gain is neglectable in comparison with the weight of the aes crypt procedure. */ size_t aes_encode_data (const void *input, size_t length, void *output, const void *key, size_t keylength, const void *iv, int flags) { aes_state state; aes_keyblock keyblock; if (aes_encode_initialize(&state, &keyblock, key, keylength, iv) == NULL) return 0; state.flags |= flags; return aes_encode_state_data(&state, input, length, output); // aes_state_close(&state); } size_t aes_encode_state_data (aes_state *state, const void *input, size_t length, void *output) { const uint8_t *inp; uint8_t *out, tail, t; size_t size; inp = (const uint8_t *)input; out = (uint8_t *)output; if (!(state->flags & AES_HAS_IV)) return 0; if ((state->flags & AES_INLINE_IV) && !(state->flags & AES_CONTINUE)) { aes_copy_block(out, state->iv); out += 16; } // state->flags |= AES_CONTINUE; // do not modify state flags for (size = 0; size + 16 <= length; size += 16) { aes_copy_xor(state->data, inp, state->iv); aes_encode_output(state, out); inp += 16; } if ((tail = (length % 16)) > 0 || aes_padding(state)) { for (t = 0; t < tail; ++t) state->data[t] = inp[t] ^ state->iv[t]; aes_put_padding(state, tail); aes_encode_output(state, out); size += 16; } if (state->flags & AES_INLINE_IV) size += 16; /* iv written at the beginning of encoded data */ return size; } size_t aes_decode_data (const void *input, size_t length, void *output, const void *key, size_t keylength, const void *iv, int flags) { aes_state state; aes_keyblock keyblock; if (aes_decode_initialize(&state, &keyblock, key, keylength, iv) == NULL) return 0; state.flags |= flags; return aes_decode_state_data(&state, input, length, output); // aes_state_close(&state); } size_t aes_decode_state_data (aes_state *state, const void *input, size_t length, void *output) { const uint8_t *inp; uint8_t *out, lastlength; size_t size; inp = (const uint8_t *)input; out = (uint8_t *)output; if ((state->flags & AES_INLINE_IV) && !(state->flags & AES_CONTINUE)) { aes_copy_block(state->iv, inp); // state->flags |= AES_HAS_IV; // do not modify state flags inp += 16; length = length >= 16 ? length - 16 : 0; } else if (!(state->flags & AES_HAS_IV)) return 0; // state->flags |= AES_CONTINUE; // do not modify state flags for (size = 0; size + 16 <= length; size += 16) { aes_copy_block(state->data, inp); aes_decode_output(state, out); inp += 16; } if (size >= 16) { aes_remove_padding(state, out - 16, &lastlength); size = size - 16 + lastlength; } return size; } /* pseudo-random bytes chain exceprted from eexec; not expected to have strong cryptographic properties we only expect that it is (reasonably) unique and different for each call (not only function call, but also a program call). A current trick with mangling pointer value gives satisfactory results, generally different for every function call and a programm call. Note that the pseudo-input bytes starts from some inner address bits, as they vary better; without that, the first byte tends to be "lazy". */ void random_bytes (uint8_t *output, size_t size) { size_t i; uint8_t p; static uint16_t k = 55665; for (i = 0; i < size; ++i) { p = ((uint8_t *)(&output))[(i + 2) % sizeof(uint8_t *)] ^ (uint8_t)size; // pseudo input byte ;) k = (((p + k) * 52845 + 22719) & 65535); // xor-ed with pseudo-random sequence (kept between calls) output[i] = p ^ (k >> 8); } } void aes_generate_iv (uint8_t output[16]) { random_bytes(output, 16); } /* filters */ // rc4 decoder function static size_t rc4_decoder (iof *F, iof_mode mode) { rc4_state *state; iof_status status; size_t tail; state = iof_filter_state(rc4_state *, F); switch(mode) { case IOFLOAD: case IOFREAD: if (F->flags & IOF_STOPPED) return 0; tail = iof_tail(F); F->pos = F->buf + tail; F->end = F->buf + F->space; do { status = rc4_decode_state(F->next, F, state); } while (mode == IOFLOAD && status == IOFFULL && iof_resize_buffer(F)); return iof_decoder_retval(F, "rc4", status); case IOFCLOSE: rc4_state_close(state); iof_free(F); return 0; default: break; } return 0; } // rc4 encoder function static size_t rc4_encoder (iof *F, iof_mode mode) { rc4_state *state; iof_status status; state = iof_filter_state(rc4_state *, F); switch (mode) { case IOFFLUSH: state->flush = 1; FALLTHRU // fall through case IOFWRITE: F->end = F->pos; F->pos = F->buf; status = rc4_encode_state(F, F->next, state); return iof_encoder_retval(F, "rc4", status); case IOFCLOSE: if (!state->flush) rc4_encoder(F, IOFFLUSH); rc4_state_close(state); iof_free(F); return 0; default: break; } return 0; } // aes decoder function static size_t aes_decoder (iof *F, iof_mode mode) { aes_state *state; iof_status status; size_t tail; state = iof_filter_state(aes_state *, F); switch(mode) { case IOFLOAD: case IOFREAD: if (F->flags & IOF_STOPPED) return 0; tail = iof_tail(F); F->pos = F->buf + tail; F->end = F->buf + F->space; do { status = aes_decode_state(F->next, F, state); } while (mode == IOFLOAD && status == IOFFULL && iof_resize_buffer(F)); return iof_decoder_retval(F, "aes", status); case IOFCLOSE: aes_state_close(state); iof_free(F); return 0; default: break; } return 0; } // aes encoder function static size_t aes_encoder (iof *F, iof_mode mode) { aes_state *state; iof_status status; state = iof_filter_state(aes_state *, F); switch (mode) { case IOFFLUSH: state->flush = 1; FALLTHRU // fall through case IOFWRITE: F->end = F->pos; F->pos = F->buf; status = aes_encode_state(F, F->next, state); return iof_encoder_retval(F, "aes", status); case IOFCLOSE: if (!state->flush) aes_encoder(F, IOFFLUSH); aes_state_close(state); iof_free(F); return 0; default: break; } return 0; } iof * iof_filter_rc4_decoder (iof *N, const void *key, size_t keylength) { iof *I; crypt_state_pointer P; I = iof_filter_reader(rc4_decoder, sizeof(rc4_state), &P.voidstate); iof_setup_next(I, N); if (rc4_state_init(P.rc4state, key, keylength) == NULL) { iof_discard(I); return NULL; } P.rc4state->flush = 1; return I; } iof * iof_filter_rc4_encoder (iof *N, const void *key, size_t keylength) { iof *O; crypt_state_pointer P; O = iof_filter_writer(rc4_encoder, sizeof(rc4_state), &P.voidstate); iof_setup_next(O, N); if (rc4_state_init(P.rc4state, key, keylength) == NULL) { iof_discard(O); return NULL; } // P.rc4state->flush = 1; return O; } /* aes crypt filters */ iof * iof_filter_aes_decoder (iof *N, const void *key, size_t keylength) { iof *I; crypt_state_pointer P; I = iof_filter_reader(aes_decoder, sizeof(aes_state), &P.voidstate); iof_setup_next(I, N); if (aes_decode_init(P.aesstate, key, keylength) == NULL) { iof_discard(I); return NULL; } aes_pdf_mode(P.aesstate); P.aesstate->flush = 1; return I; } iof * iof_filter_aes_encoder (iof *N, const void *key, size_t keylength) { iof *O; crypt_state_pointer P; O = iof_filter_writer(aes_encoder, sizeof(aes_state), &P.voidstate); iof_setup_next(O, N); if (aes_encode_init(P.aesstate, key, keylength) == NULL) { iof_discard(O); return NULL; } aes_pdf_mode(P.aesstate); // P.aesstate->flush = 1; return O; } /* test */ /* static void show (void *p, size_t size, uint8_t round, uint8_t sym) { uint8_t i; printf("%c%c:", round, sym); for (i = 0; i < size; ++i) printf("%02x", ((uint8_t *)p)[i]); printf("\n"); } void aes_test (void) { const uint8_t key[] = { 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c }; const uint8_t iv[] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }; const uint8_t inp[] = { 0x6b, 0xc1, 0xbe, 0xe2, 0x2e, 0x40, 0x9f, 0x96, 0xe9, 0x3d, 0x7e, 0x11, 0x73, 0x93, 0x17, 0x2a, 0xae, 0x2d, 0x8a, 0x57, 0x1e, 0x03, 0xac, 0x9c, 0x9e, 0xb7, 0x6f, 0xac, 0x45, 0xaf, 0x8e, 0x51, 0x30, 0xc8, 0x1c, 0x46, 0xa3, 0x5c, 0xe4, 0x11, 0xe5, 0xfb, 0xc1, 0x19, 0x1a, 0x0a, 0x52, 0xef, 0xf6, 0x9f, 0x24, 0x45, 0xdf, 0x4f, 0x9b, 0x17, 0xad, 0x2b, 0x41, 0x7b, 0xe6, 0x6c, 0x37, 0x10 }; const uint8_t out[] = { 0x76, 0x49, 0xab, 0xac, 0x81, 0x19, 0xb2, 0x46, 0xce, 0xe9, 0x8e, 0x9b, 0x12, 0xe9, 0x19, 0x7d, 0x50, 0x86, 0xcb, 0x9b, 0x50, 0x72, 0x19, 0xee, 0x95, 0xdb, 0x11, 0x3a, 0x91, 0x76, 0x78, 0xb2, 0x73, 0xbe, 0xd6, 0xb8, 0xe3, 0xc1, 0x74, 0x3b, 0x71, 0x16, 0xe6, 0x9e, 0x22, 0x22, 0x95, 0x16, 0x3f, 0xf1, 0xca, 0xa1, 0x68, 0x1f, 0xac, 0x09, 0x12, 0x0e, 0xca, 0x30, 0x75, 0x86, 0xe1, 0xa7 }; uint8_t input[64], output[64]; size_t inpsize, outsize; int flags = AES_NULL_PADDING; //////////////////////////////////////////////////////////////////////////// //#define ENCODETO output #define ENCODETO input // inplace inpsize = 64; memcpy(input, inp, inpsize); show(input, inpsize, '>', '>'); outsize = aes_encode_data(input, inpsize, ENCODETO, key, 16, iv, flags); show(ENCODETO, outsize, '<', '<'); if (outsize == inpsize && memcmp(ENCODETO, out, outsize) == 0) printf("ENCODER SUCCESS\n"); else printf("ENCODER FAILURE\n"); //////////////////////////////////////////////////////////////////////////// //#define DECODETO input #define DECODETO output // in place outsize = 64; memcpy(output, out, outsize); show(output, outsize, '<', '<'); inpsize = aes_decode_data(output, outsize, DECODETO, key, 16, iv, flags); show(DECODETO, inpsize, '>', '>'); if (inpsize == outsize && memcmp(DECODETO, inp, inpsize) == 0) printf("DECODER SUCCESS\n"); else printf("DECODER FAILURE\n"); } */ /* Some example vectors ================================ AES ECB 128-bit encryption mode ================================ Encryption key: 2b7e151628aed2a6abf7158809cf4f3c Test vector Cipher text 6bc1bee22e409f96e93d7e117393172a 3ad77bb40d7a3660a89ecaf32466ef97 ae2d8a571e03ac9c9eb76fac45af8e51 f5d3d58503b9699de785895a96fdbaaf 30c81c46a35ce411e5fbc1191a0a52ef 43b1cd7f598ece23881b00e3ed030688 f69f2445df4f9b17ad2b417be66c3710 7b0c785e27e8ad3f8223207104725dd4 ================================ AES ECB 192-bit encryption mode ================================ Encryption key: 8e73b0f7da0e6452c810f32b809079e562f8ead2522c6b7b Test vector Cipher text 6bc1bee22e409f96e93d7e117393172a bd334f1d6e45f25ff712a214571fa5cc ae2d8a571e03ac9c9eb76fac45af8e51 974104846d0ad3ad7734ecb3ecee4eef 30c81c46a35ce411e5fbc1191a0a52ef ef7afd2270e2e60adce0ba2face6444e f69f2445df4f9b17ad2b417be66c3710 9a4b41ba738d6c72fb16691603c18e0e ================================ AES ECB 256-bit encryption mode ================================ Encryption key: 603deb1015ca71be2b73aef0857d77811f352c073b6108d72d9810a30914dff4 Test vector Cipher text 6bc1bee22e409f96e93d7e117393172a f3eed1bdb5d2a03c064b5a7e3db181f8 ae2d8a571e03ac9c9eb76fac45af8e51 591ccb10d410ed26dc5ba74a31362870 30c81c46a35ce411e5fbc1191a0a52ef b6ed21b99ca6f4f9f153e7b1beafed1d f69f2445df4f9b17ad2b417be66c3710 23304b7a39f9f3ff067d8d8f9e24ecc7 ================================ AES CBC 128-bit encryption mode ================================ Encryption key: 2b7e151628aed2a6abf7158809cf4f3c Initialization vector Test vector Cipher text 000102030405060708090A0B0C0D0E0F 6bc1bee22e409f96e93d7e117393172a 7649abac8119b246cee98e9b12e9197d 7649ABAC8119B246CEE98E9B12E9197D ae2d8a571e03ac9c9eb76fac45af8e51 5086cb9b507219ee95db113a917678b2 5086CB9B507219EE95DB113A917678B2 30c81c46a35ce411e5fbc1191a0a52ef 73bed6b8e3c1743b7116e69e22229516 73BED6B8E3C1743B7116E69E22229516 f69f2445df4f9b17ad2b417be66c3710 3ff1caa1681fac09120eca307586e1a7 ================================ AES CBC 192-bit encryption mode ================================ Encryption key: 8e73b0f7da0e6452c810f32b809079e562f8ead2522c6b7b Initialization vector Test vector Cipher text 000102030405060708090A0B0C0D0E0F 6bc1bee22e409f96e93d7e117393172a 4f021db243bc633d7178183a9fa071e8 4F021DB243BC633D7178183A9FA071E8 ae2d8a571e03ac9c9eb76fac45af8e51 b4d9ada9ad7dedf4e5e738763f69145a B4D9ADA9AD7DEDF4E5E738763F69145A 30c81c46a35ce411e5fbc1191a0a52ef 571b242012fb7ae07fa9baac3df102e0 571B242012FB7AE07FA9BAAC3DF102E0 f69f2445df4f9b17ad2b417be66c3710 08b0e27988598881d920a9e64f5615cd ================================ AES CBC 256-bit encryption mode ================================ Encryption key: 603deb1015ca71be2b73aef0857d77811f352c073b6108d72d9810a30914dff4 Initialization vector Test vector Cipher text 000102030405060708090A0B0C0D0E0F 6bc1bee22e409f96e93d7e117393172a f58c4c04d6e5f1ba779eabfb5f7bfbd6 F58C4C04D6E5F1BA779EABFB5F7BFBD6 ae2d8a571e03ac9c9eb76fac45af8e51 9cfc4e967edb808d679f777bc6702c7d 9CFC4E967EDB808D679F777BC6702C7D 30c81c46a35ce411e5fbc1191a0a52ef 39f23369a9d9bacfa530e26304231461 39F23369A9D9BACFA530E26304231461 f69f2445df4f9b17ad2b417be66c3710 b2eb05e2c39be9fcda6c19078c6a9d1b */luametatex-2.10.08/source/libraries/pplib/util/utilcrypt.h000066400000000000000000000075171442250314700236160ustar00rootroot00000000000000#ifndef UTIL_CRYPT_H #define UTIL_CRYPT_H #include #include #include "utiliof.h" #ifndef UTIL_CRYPT_TIME # define UTIL_CRYPT_TIME 0 #endif /* RC4 */ typedef uint8_t rc4_map[256]; typedef struct rc4_state rc4_state; #define RC4_STATE_ALLOC (1<<0) UTILAPI rc4_state * rc4_state_initialize (rc4_state *state, rc4_map *map, const void *vkey, size_t keylength); #define rc4_state_init(state, vkey, keylength) rc4_state_initialize(state, NULL, vkey, keylength) UTILAPI void rc4_map_save (rc4_state *state, rc4_map *map); UTILAPI void rc4_map_restore (rc4_state *state, rc4_map *map); /* Codecs operating on iof */ UTILAPI iof_status rc4_crypt_state (iof *I, iof *O, rc4_state *state); #define rc4_encode_state(I, O, state) rc4_crypt_state(I, O, state) #define rc4_decode_state(I, O, state) rc4_crypt_state(I, O, state) UTILAPI iof_status rc4_crypt (iof *I, iof *O, const void *key, size_t length); #define rc4_encode(I, O) rc4_crypt(I, O, key, length) #define rc4_decode(I, O) rc4_crypt(I, O, key, length) UTILAPI size_t rc4_crypt_data (const void *input, size_t length, void *output, const void *key, size_t keylength); UTILAPI size_t rc4_crypt_state_data (rc4_state *state, const void *input, size_t length, void *output); #define rc4_encode_data(input, length, output, key, keylength) rc4_crypt_data(input, length, output, key, keylength) #define rc4_decode_data(input, length, output, key, keylength) rc4_crypt_data(input, length, output, key, keylength) #define rc4_encode_state_data(state, input, length, output) rc4_crypt_state_data(state, input, length, output) #define rc4_decode_state_data(state, input, length, output) rc4_crypt_state_data(state, input, length, output) UTILAPI void rc4_state_close (rc4_state *state); /* AES */ typedef uint8_t aes_block[4][4]; typedef aes_block aes_keyblock[15]; // aes128 - 10+1, aes192 - 12+1, aes256 - 14+1 typedef struct aes_state aes_state; #define AES_STATE_ALLOC (1<<0) //#define AES_ECB_MODE (1<<2) #define AES_HAS_IV (1<<3) #define AES_INLINE_IV (1<<4) #define AES_CONTINUE (1<<5) #define AES_NULL_PADDING (1<<6) UTILAPI void aes_pdf_mode (aes_state *state); //UTILAPI aes_state * aes_state_initialize_ecb (aes_state *State, uint8_t *roundkey, const uint8_t *key); UTILAPI aes_state * aes_encode_initialize (aes_state *state, aes_keyblock *keyblock, const void *key, size_t keylength, const void *iv); UTILAPI aes_state * aes_decode_initialize (aes_state *state, aes_keyblock *keyblock, const void *key, size_t keylength, const void *iv); #define aes_encode_init(state, key, keylength) aes_encode_initialize(state, NULL, key, keylength, NULL) #define aes_decode_init(state, key, keylength) aes_decode_initialize(state, NULL, key, keylength, NULL) UTILAPI void aes_state_close (aes_state *state); /* Codecs operating on iof */ UTILAPI iof_status aes_encode_state (iof *I, iof *O, aes_state *state); UTILAPI iof_status aes_decode_state (iof *I, iof *O, aes_state *state); UTILAPI size_t aes_encode_data (const void *input, size_t length, void *output, const void *key, size_t keylength, const void *iv, int flags); UTILAPI size_t aes_encode_state_data (aes_state *state, const void *input, size_t length, void *output); UTILAPI size_t aes_decode_data (const void *input, size_t length, void *output, const void *key, size_t keylength, const void *iv, int flags); UTILAPI size_t aes_decode_state_data (aes_state *state, const void *input, size_t length, void *output); /* random bytes generator */ UTILAPI void random_bytes (uint8_t *output, size_t size); UTILAPI void aes_generate_iv (uint8_t output[16]); /* filters */ iof * iof_filter_rc4_decoder (iof *N, const void *key, size_t keylength); iof * iof_filter_rc4_encoder (iof *N, const void *key, size_t keylength); iof * iof_filter_aes_decoder (iof *N, const void *key, size_t keylength); iof * iof_filter_aes_encoder (iof *N, const void *key, size_t keylength); #endifluametatex-2.10.08/source/libraries/pplib/util/utilcryptdef.h000066400000000000000000000010371442250314700242640ustar00rootroot00000000000000 #ifndef UTIL_CRYPTDEF_H #define UTIL_CRYPTDEF_H struct rc4_state { union { rc4_map *map; uint8_t *smap; }; int i, j; int flush; int flags; }; struct aes_state { size_t keylength; int rounds; //int keywords; union { aes_block block; uint8_t data[16]; }; aes_keyblock *keyblock; uint8_t iv[16]; uint8_t buffered; int flush; int flags; }; typedef union { rc4_state *rc4state; aes_state *aesstate; void *voidstate; } crypt_state_pointer; // to avoid 'dereferencing type-puned ...' warnings #endifluametatex-2.10.08/source/libraries/pplib/util/utildecl.h000066400000000000000000000007631442250314700233600ustar00rootroot00000000000000 #ifndef UTIL_DECL_H #define UTIL_DECL_H /* UTILDLL - when building .dll UTILEXE - when building .exe to import symbols from .dll */ #if defined (_WIN32) || defined(_WIN64) # ifdef UTILDLL # define UTILAPI __declspec(dllexport) # define UTILDEF __declspec(dllexport) # else # ifdef UTILEXE # define UTILAPI __declspec(dllimport) # define UTILDEF # else # define UTILAPI # define UTILDEF # endif # endif #else # define UTILAPI # define UTILDEF #endif #endifluametatex-2.10.08/source/libraries/pplib/util/utilflate.c000066400000000000000000000226061442250314700235370ustar00rootroot00000000000000 #include "../../utilities/auxzlib.h" #include "utilmem.h" #include "utillog.h" #include "utilflate.h" /* flate codec */ /* Flate codec example provided at http://www.zlib.net/zpipe.c (http://www.zlib.net/zlib_how.html) uses the following scheme: - provide input data buffer - keep providing output until codec function uses it For encoder: z->zalloc = z->zfree = z->zopaque = NULL; deflateInit(z, compression_level); do { z->next_in = z->avail_in = do { z->next_out = z->avail_out = deflate(z, flush); // write obtained output from deflate } while (z->avail_out == 0); assert(z->avail_in == 0); } while (flush != Z_FINISH); deflateEnd(z); 'z' is an internal codec state of type z_stream, 'flush' is either Z_NO_FLUSH or Z_FINISH at the end of data. deflate() ensures to consume the entire input if there are no obstackles to write an output. The inner loop provides an output space as long as it is used by deflate(). When deflate() wrote everything it could, it leaves z->avail_out > 0, which breaks the inner loop. At this point z->avail_in should also be zero. The example documentation claims that the return codes from deflate() doesn't really need to be checked, as checking z->avail_out for zero is enough. The scheme for decoder is pretty similar, but with substantial differences: - the end of stream is automatically found by decoder, so using Z_FINISH flag to indicate an end of stream is not necessary, but if provided, it MUST be given only if the EOF marker actually occurs in the input chunk, and subsequent calls to inflate() must consequently use Z_FINISH - calling inflate() as long as it uses the output buffer provided still works for decoder, but inflate() does not ensure to consume the entire input, as it will read until end of stream marker - the return code from inflate() must be checked to ensure the proper reaction on invalid data stream and end of stream signals - initialization must set an input buffer to NULL or to some existing chunk (the later helps zlib to perform better on inflate(), but inflate() does the research on the first call anyway) z->zalloc = z->zfree = z->zopaque = NULL; z->next_in = NULL, z->avail_in = 0; inflateInit(z); do { z->next_in = z->avail_in = do { z->next_out = z->avail_out = status = inflate(z, flush); // check return status // write obtained output from inflate } while (z->avail_out == 0); } while (status != Z_STREAM_END); inflateEnd(z); Our wrapper generally follows "prepare input, keep pomping output" scheme, but we need to support handler function breaks on IOFEMPTY and IOFFULL. For a consistent come back from those on subsequent calls to the handler function, we use 3 states: - FLATE_IN - get input, when got something then goto FALTE_OUT - FLATE_OUT - set z_stream buffers and keep writing output until enything to write, then goto FLATE_IN or FLATE_DONE - FLATE_DONE - we are done, no return from that state Distinction of FLATE_IN and FLATE_OUT states guarantees that we will not get more input until zlib consumes the stuff from the previous feed, possibly interrupted by IOFFULL return on filling the output buffer. This distinction is not critical, but makes the filter running according to the scheme described above. Note that we set zlib input buffer (z->next_in, z->avail_in) at the beginning of FLATE_OUT state. Also note that we always update our buffers according to updated avail_in / avail_out values, just after a call to inflate() / deflate(). So no matter what have happens between handler calls, zlib input buffer is in sync with ours. */ struct flate_state { z_stream z; int flush; int status; int level; /* encoder compression level -1..9 */ }; typedef union { flate_state *flatestate; void *voidstate; } flate_state_pointer; // to avoid 'dereferencing type-puned ...' warnings enum { FLATE_IN, FLATE_OUT, FLATE_DONE }; flate_state * flate_decoder_init (flate_state *state) { /* initialize zlib */ z_stream *z = &state->z; z->zalloc = &lmt_zlib_alloc; /* Z_NULL */ z->zfree = &lmt_zlib_free; /* Z_NULL */ z->opaque = Z_NULL; z->avail_in = 0; /* must be initialized before inflateInit() */ z->next_in = Z_NULL; /* ditto */ if (inflateInit(z) != Z_OK) return NULL; state->status = FLATE_IN; return state; } flate_state * flate_encoder_init (flate_state *state) { z_stream *z = &state->z; z->zalloc = &lmt_zlib_alloc; /* Z_NULL */ z->zfree = &lmt_zlib_free; /* Z_NULL */ z->opaque = Z_NULL; z->avail_in = 0; z->next_in = Z_NULL; state->level = Z_DEFAULT_COMPRESSION; // will probably be moved upward if (deflateInit(z, state->level) != Z_OK) return NULL; state->status = FLATE_IN; return state; } static const char * zmess (int zstatus) { switch (zstatus) { case Z_OK: return "ok"; case Z_STREAM_END: return "end of stream"; case Z_BUF_ERROR: return "buffer error"; case Z_STREAM_ERROR: return "stream error"; case Z_NEED_DICT: return "need dict"; case Z_DATA_ERROR: return "data error"; case Z_MEM_ERROR: return "memory error"; case Z_VERSION_ERROR: return "version error"; case Z_ERRNO: return "io error"; default: break; } return "unknown error"; } iof_status flate_decode_state (iof *I, iof *O, flate_state *state) { z_stream *z; int zstatus = Z_OK; z = &state->z; while (state->status != FLATE_DONE) { if (state->status == FLATE_IN) { if (!iof_readable(I)) return state->flush ? IOFERR : IOFEMPTY; state->status = FLATE_OUT; } z->next_in = (Bytef *)I->pos; z->avail_in = (uInt)iof_left(I); do { if (!iof_writable(O)) return IOFFULL; z->next_out = (Bytef *)O->pos; z->avail_out = (uInt)iof_left(O); zstatus = inflate(z, Z_NO_FLUSH); I->pos += iof_left(I) - z->avail_in; O->pos += iof_left(O) - z->avail_out; switch (zstatus) { case Z_OK: case Z_STREAM_END: break; default: loggerf("flate decoder %s (%d)", zmess(zstatus), zstatus); return IOFERR; } } while (z->avail_out == 0); state->status = zstatus == Z_STREAM_END ? FLATE_DONE : FLATE_IN; } return IOFEOF; } iof_status flate_encode_state (iof *I, iof *O, flate_state *state) { z_stream *z; int zstatus; z = &state->z; while (state->status != FLATE_DONE) { if (state->status == FLATE_IN) { if (!iof_readable(I)) if (!state->flush) return IOFEMPTY; state->status = FLATE_OUT; } z->next_in = (Bytef *)I->pos; z->avail_in = (uInt)iof_left(I); do { if (!iof_writable(O)) return IOFFULL; z->next_out = (Bytef *)O->pos; z->avail_out = (uInt)iof_left(O); zstatus = deflate(z, state->flush ? Z_FINISH : Z_NO_FLUSH); I->pos += iof_left(I) - z->avail_in; O->pos += iof_left(O) - z->avail_out; switch (zstatus) { case Z_OK: case Z_STREAM_END: break; default: loggerf("flate encoder %s (%d)", zmess(zstatus), zstatus); return IOFERR; } } while (z->avail_out == 0); state->status = state->flush ? FLATE_DONE : FLATE_IN; } return IOFEOF; } void flate_decoder_close (flate_state *state) { inflateEnd(&state->z); } void flate_encoder_close (flate_state *state) { deflateEnd(&state->z); } /* filter */ // flate decoder function static size_t flate_decoder (iof *F, iof_mode mode) { flate_state *state; iof_status status; size_t tail; state = iof_filter_state(flate_state *, F); switch(mode) { case IOFLOAD: case IOFREAD: if (F->flags & IOF_STOPPED) return 0; tail = iof_tail(F); F->pos = F->buf + tail; F->end = F->buf + F->space; do { status = flate_decode_state(F->next, F, state); } while (mode == IOFLOAD && status == IOFFULL && iof_resize_buffer(F)); return iof_decoder_retval(F, "flate", status); case IOFCLOSE: flate_decoder_close(state); iof_free(F); return 0; default: break; } return 0; } // flate encoder function static size_t flate_encoder (iof *F, iof_mode mode) { flate_state *state; iof_status status; state = iof_filter_state(flate_state *, F); switch (mode) { case IOFFLUSH: state->flush = 1; FALLTHRU // fall through case IOFWRITE: F->end = F->pos; F->pos = F->buf; status = flate_encode_state(F, F->next, state); return iof_encoder_retval(F, "flate", status); case IOFCLOSE: if (!state->flush) flate_encoder(F, IOFFLUSH); flate_encoder_close(state); iof_free(F); return 0; default: break; } return 0; } iof * iof_filter_flate_decoder (iof *N) { iof *I; flate_state_pointer P; I = iof_filter_reader(flate_decoder, sizeof(flate_state), &P.voidstate); iof_setup_next(I, N); if (flate_decoder_init(P.flatestate) == NULL) { iof_discard(I); return NULL; } P.flatestate->flush = 1; return I; } iof * iof_filter_flate_encoder (iof *N) { iof *O; flate_state_pointer P; O = iof_filter_writer(flate_encoder, sizeof(flate_state), &P.voidstate); iof_setup_next(O, N); if (flate_encoder_init(P.flatestate) == NULL) { iof_discard(O); return NULL; } return O; } luametatex-2.10.08/source/libraries/pplib/util/utilflate.h000066400000000000000000000010331442250314700235330ustar00rootroot00000000000000#ifndef UTIL_FLATE_H #define UTIL_FLATE_H #include "utiliof.h" typedef struct flate_state flate_state; flate_state * flate_decoder_init (flate_state *state); flate_state * flate_encoder_init (flate_state *state); iof_status flate_decode_state (iof *I, iof *O, flate_state *state); iof_status flate_encode_state (iof *I, iof *O, flate_state *state); void flate_decoder_close (flate_state *state); void flate_encoder_close (flate_state *state); iof * iof_filter_flate_decoder (iof *N); iof * iof_filter_flate_encoder (iof *N); #endifluametatex-2.10.08/source/libraries/pplib/util/utilfpred.c000066400000000000000000000715301442250314700235440ustar00rootroot00000000000000/* predictor filters; common for flate and lzw */ #include "utilmem.h" #include "utillog.h" #include "utilfpred.h" /* Here we implement predictor filters used with flate and lzw compressions in PDF streams. The main idea of data prediction is to compute and output the differences between data records instead of those records. Adjacent pixels in images are usually similar, so differences between pixel values tends to be zero. And both Flate and LZW performs better when the input is rather smooth. Although a preliminary use of predictors is related to bitmap data, The actual need for predictor filter came from the fact that xref streams may also be predicted (usually with PNG up-predictor). PDF specification allows to use several predictor algorithms, specified by /Predictor key in /DecodeParms dictionary: 1 - no predictor (default) 2 - TIFF horizontal predictor 10 - PNG none predictor 11 - PNG sub predictor 12 - PNG up predictor 13 - PNG average predictor 14 - PNG paeth predictor All PNG predictors works on bytes, regardless the image color-depth. While encoding, every input data byte is decreased by the appropriate byte of the previous pixel. Even if the pixel does not fit a full byte, PNG predictors use an artificial pixel size rounded up to a full byte. PNG predictors utilizes previous (left) pixel, pixel above and previous to above pixel. In case of PNG, the type of the predictor is written on a dedicated byte at the beginning of every scanline. It means all predictor functions must maintain and information about left, above and left-above pixels. Despite the same differencing idea, TIFF predictors are different. The prediction process bases on pixel components, which are not necessarily bytes (component of a pixel is added/substracted from a relevant component of a previous pixel). In TIFF predictor 2, only the previous (the left) pixel is taken into account, there is no need to keep an information about other surrounding pixels. Also there is no expicit algorithm marker in data; the same prediction method is applied to all input rows. Not surprisingly, predictor encoders and decoders are pretty similar. Encoders take some input value and the previous input value (or 0 at the beginning of the scanline) and output a difference between them. Decoders takes an input value, previously decoded value (or zero) and outputs their sum. When encoding, the result is cast to the proper unsigned integer, when decoding, modulo 256 (or appropriate) is used, which makes encoding and decoding looseless. Some extra bits trickery is involved in TIFF predictor function, when components don't fit bytes boundary. In that case, an input is treated as a bits stream. Every input byte is "buffered" in a larger integer, as its lower bits (from right). Every output value is taken from its higher (left) bits. In a special case of bits-per-component equal 1, we buffer all pixel bits and use XOR to compute bits difference between pixels. I've excerpted that trick from poppler, but I'm not really sure if it works any better, especially when the number of components per pixel is 1. In that case we do a hard bit-by-bit work anyway. In PNG prediction, we record every pixel byte (in decoded form) in state->rowsave. At the end of a scanline we copy state->rowsave to state->rowup, so that in the next scanline we can access up-pixel byte. Left pixel byte is accessed as state->rowsave (the byte recently stored or virtual left edge byte \0). Up-left pixel byte is accessed via state->rowup, but with state->pixelsize offset (same as left byte, possibly \0 at the left edge of the row). Both state->rowup and state->rowsave has a safe span of pixelsize bytes on the left, that are permanently \0. */ #define predictor_component_t uint16_t #define predictor_pixel1b_t uint32_t #define MAX_COMPONENTS 8 struct predictor_state { int default_predictor; /* default predictor indicator */ int current_predictor; /* current predictor, possibly taken from algorithm marker in PNG data */ int rowsamples; /* number of pixels in a scanline (/DecodeParms << /Columns ... >>) */ int compbits; /* number of bits per component (/DecodeParms << /BitsPerComponent ... >>) */ int components; /* number of components (/DecodeParms << /Colors ... >>) */ uint8_t *buffer; /* temporary private buffer area */ uint8_t *rowin; /* an input row buffer position */ int rowsize; /* size of a current scanline in bytes (rounded up) */ int rowend; /* an input buffer end position */ int rowindex; /* an output buffer position */ union { struct { /* used by PNG predictor codecs */ uint8_t *rowup, *rowsave; /* previous scanline buffers */ int predictorbyte; /* flag indicating that algorithm byte is read/written */ int pixelsize; /* number of bytes per pixel (rounded up) */ }; struct { /* used by TIFF predictor codecs */ predictor_component_t compbuffer[MAX_COMPONENTS]; union { predictor_component_t *prevcomp; /* an array of left pixel components, typically eq ->compbuffer */ predictor_pixel1b_t *prevpixel; /* left pixel value stored on a single integer (for 1bit color-depth) */ }; int compin, compout; /* bit stream buffers */ int bitsin, bitsout; /* bit stream counters */ int sampleindex; /* pixel counter */ int compindex; /* component counter */ int pixbufsize; /* size of pixel buffer in bytes */ }; }; int flush; int status; }; typedef union { predictor_state *predictorstate; void *voidstate; } predictor_state_pointer; // to avoid 'dereferencing type-puned ...' warnings enum { STATUS_LAST = 0, STATUS_CONTINUE = 1 // any value different then IOFEOF, IOFERR, ... which are < 0 }; /* Predictor type identifiers (pdf spec 76). lpdf doesn't hire the codec if predictor is 1. Predictor 15 indicates that the type of PNG prediction algorithm may change in subsequent lines. We always check algorithm marker anyway. */ enum predictor_code { NONE_PREDICTOR = 1, TIFF_PREDICTOR = 2, PNG_NONE_PREDICTOR = 10, PNG_SUB_PREDICTOR = 11, PNG_UP_PREDICTOR = 12, PNG_AVERAGE_PREDICTOR = 13, PNG_PAETH_PREDICTOR = 14, PNG_OPTIMUM_PREDICTOR = 15 }; predictor_state * predictor_decoder_init (predictor_state *state, int predictor, int rowsamples, int components, int compbits) { int rowsize, pixelsize; #define storage_pos(b, p, size) ((b = p), (p += size)) uint8_t *buffer, *p; size_t buffersize; pixelsize = (components * compbits + 7) >> 3; // to bytes, rounded up rowsize = (rowsamples * components * compbits + 7) >> 3; state->default_predictor = state->current_predictor = predictor; state->rowsamples = rowsamples; state->components = components; state->compbits = compbits; if (predictor == TIFF_PREDICTOR) { /* tiff predictor */ size_t compbuf, pixbuf; compbuf = components * sizeof(predictor_component_t); pixbuf = 1 * sizeof(predictor_pixel1b_t); state->pixbufsize = (int)(compbuf > pixbuf ? compbuf : pixbuf); buffersize = rowsize * sizeof(uint8_t); buffer = (uint8_t *)util_calloc(buffersize, 1); if ((size_t)state->pixbufsize > sizeof(state->compbuffer)) // components > MAX_COMPONENTS state->prevcomp = (predictor_component_t *)util_calloc(state->pixbufsize, 1); else state->prevcomp = state->compbuffer; // &state->prevcomp == &state->prevpixel state->sampleindex = state->compindex = 0; state->bitsin = state->bitsout = 0; state->compin = state->compout = 0; } else { /* png predictors */ buffersize = (3 * rowsize + 2 * pixelsize + 1) * sizeof(uint8_t); p = buffer = (uint8_t *)util_calloc(buffersize, 1); storage_pos(state->rowin, p, 1 + rowsize); // one extra byte for prediction algorithm tag p += pixelsize; // pixelsize extra bytes for virtual left pixel at the edge, eg. rowup[-1] (permanently \0) storage_pos(state->rowup, p, rowsize); // actual row byte p += pixelsize; // ditto storage_pos(state->rowsave, p, rowsize); state->pixelsize = pixelsize; state->predictorbyte = 0; } state->buffer = buffer; state->rowsize = rowsize; state->rowindex = 0; state->rowend = 0; state->status = STATUS_CONTINUE; return state; } predictor_state * predictor_encoder_init (predictor_state *state, int predictor, int rowsamples, int components, int compbits) { return predictor_decoder_init(state, predictor, rowsamples, components, compbits); } void predictor_decoder_close (predictor_state *state) { util_free(state->buffer); if (state->default_predictor == TIFF_PREDICTOR && state->prevcomp != NULL && state->prevcomp != state->compbuffer) util_free(state->prevcomp); } void predictor_encoder_close (predictor_state *state) { predictor_decoder_close(state); } /* All predoctor codecs first read the entire data row into a buffer. This is not crucial for the process, but allows to separate read/write states. In particular, there is one place in which codec functions may return on EOD. */ #define start_row(state) (state->rowindex = 0, state->rowin = state->buffer) static int read_scanline (predictor_state *state, iof *I, int size) { int rowtail, left; while ((rowtail = size - state->rowend) > 0) { left = (int)iof_left(I); if (left >= rowtail) { memcpy(state->buffer + state->rowend, I->pos, (size_t)rowtail); state->rowend += rowtail; I->pos += rowtail; start_row(state); break; } else { if ((rowtail = left) > 0) { memcpy(state->buffer + state->rowend, I->pos, (size_t)rowtail); state->rowend += rowtail; I->pos += rowtail; } if (iof_input(I) == 0) { if (state->rowend == 0) // no scanline to process, no more input return state->flush ? IOFEOF : IOFEMPTY; /* If we are here, there is an incomplete scanline in buffer: - if there is a chance for more (state->flush == 0), than wait for more - otherwise encode/decode the last incomplete line? pdf spec p. 76 says that "A row occupies a whole number of bytes", so this situation should be considered abnormal (not found so far). */ if (!state->flush) return IOFEMPTY; loggerf("incomplete scanline in predictor filter"); //return IOFERR; state->status = STATUS_LAST; state->rowsize -= size - state->rowend; start_row(state); break; } } } return STATUS_CONTINUE; } #define read_row(state, I, size, status) if ((status = read_scanline(state, I, size)) != STATUS_CONTINUE) return status #define ensure_output_bytes(O, n) if (!iof_ensure(O, n)) return IOFFULL #define tobyte(c) ((uint8_t)(c)) #define tocomp(c) ((uint16_t)(c)) #define row_byte(state) (state->rowin[state->rowindex]) /* png predictor macros; on bytes */ #define up_pixel_byte(state) (state->rowup[state->rowindex]) #define upleft_pixel_byte(state) (state->rowup[state->rowindex - state->pixelsize]) #define left_pixel_byte(state) (state->rowsave[state->rowindex - state->pixelsize]) #define save_pixel_byte(state, c) (state->rowsave[state->rowindex] = (uint8_t)(c)) /* tiff predictor macros; on components */ #define left_pixel_component(state) (state->prevcomp[state->compindex]) // tiff predictor with 2, 4, 8, 16 components #define left_pixel_value(state) (state->prevpixel[0]) // tiff predictor with 1bit components /* assignment in conditional #define save_pixel_component(state, c) ((void)\ ((state->prevcomp[state->compindex] = (predictor_component_t)(c)), \ ++state->compindex, (state->compindex < state->components || (state->compindex = 0)))) */ #define save_pixel_component(state, c) \ do { state->prevcomp[state->compindex] = (predictor_component_t)(c); if (++state->compindex >= state->components) state->compindex = 0; } while (0) #define save_pixel_value(state, c) (state->prevpixel[0] = (predictor_pixel1b_t)(c)) /* Once the codec function is done with the scanline, we set imaginary left pixel data to zero, and reset row counters to zero in order to allow buffering another input scanline. */ #define reset_row(state) state->rowend = 0 #define reset_png_row(state) (memcpy(state->rowup, state->rowsave, state->rowsize), state->predictorbyte = 0, reset_row(state)) #define reset_tiff_row(state) \ memset(state->prevcomp, 0, state->pixbufsize), \ state->bitsin = state->bitsout = 0, \ state->compin = state->compout = 0, \ reset_row(state), \ state->sampleindex = state->compindex = 0 /* PNG paeth predictor function; http://www.libpng.org/pub/png/book/chapter09.html Compute the base value p := left + up - upleft, then choose that byte the closest (of the smallest absolute difference) to the base value. Left byte has a precedence. */ static int paeth (predictor_state *state) { int p, p1, p2, p3; p = left_pixel_byte(state) + up_pixel_byte(state) - upleft_pixel_byte(state); p1 = p >= left_pixel_byte(state) ? (p - left_pixel_byte(state)) : (left_pixel_byte(state) - p); p2 = p >= up_pixel_byte(state) ? (p - up_pixel_byte(state)) : (up_pixel_byte(state) - p); p3 = p >= upleft_pixel_byte(state) ? (p - upleft_pixel_byte(state)) : (upleft_pixel_byte(state) - p); return (p1 <= p2 && p1 <= p3) ? left_pixel_byte(state) : (p2 <= p3 ? up_pixel_byte(state) : upleft_pixel_byte(state)); } /* predictor decoder */ iof_status predictor_decode_state (iof *I, iof *O, predictor_state *state) { int status, c, d, outbytes; while (state->status == STATUS_CONTINUE) { if (state->default_predictor >= 10) // PNG predictor? { read_row(state, I, state->rowsize + 1, status); if (state->predictorbyte == 0) { // we could actually check state->rowin <> state->buffer, but we need this flag for encoder anyway state->current_predictor = row_byte(state) + 10; state->predictorbyte = 1; ++state->rowin; } } else { read_row(state, I, state->rowsize, status); } switch (state->current_predictor) { case NONE_PREDICTOR: for ( ; state->rowindex < state->rowsize; ++state->rowindex) { ensure_output_bytes(O, 1); c = row_byte(state); iof_set(O, c); } reset_row(state); break; case TIFF_PREDICTOR: switch (state->compbits) { case 1: outbytes = (state->components + 7) >> 3; for ( ; state->sampleindex < state->rowsamples; ++state->sampleindex) { ensure_output_bytes(O, outbytes); while (state->bitsin < state->components) { state->compin = (state->compin << 8) | row_byte(state); state->bitsin += 8; ++state->rowindex; } state->bitsin -= state->components; d = state->compin >> state->bitsin; state->compin &= (1 << state->bitsin) - 1; c = d ^ left_pixel_value(state); save_pixel_value(state, c); state->compout = (state->compout << state->components) | c; state->bitsout += state->components; while (state->bitsout >= 8) { state->bitsout -= 8; iof_set(O, state->compout >> state->bitsout); state->compout &= (1 << state->bitsout) - 1; } } if (state->bitsout > 0) { ensure_output_bytes(O, 1); iof_set(O, state->compin << (8 - state->bitsout)); } break; case 2: case 4: for ( ; state->sampleindex < state->rowsamples; ++state->sampleindex) { for ( ; state->compindex < state->components; ) // state->compindex is ++ed by save_pixel_component() { ensure_output_bytes(O, 1); if (state->bitsin < state->compbits) { state->compin = (state->compin << 8) | row_byte(state); state->bitsin += 8; ++state->rowindex; } state->bitsin -= state->compbits; d = state->compin >> state->bitsin; state->compin &= (1 << state->bitsin) - 1; c = (d + left_pixel_component(state)) & 0xff; save_pixel_component(state, c); state->compout = (state->compout << state->compbits) | c; state->bitsout += state->compbits; if (state->bitsout >= 8) { state->bitsout -= 8; iof_set(O, state->compout >> state->bitsout); state->compout &= (1 << state->bitsout) - 1; } } } if (state->bitsout > 0) { ensure_output_bytes(O, 1); iof_set(O, state->compin << (8 - state->bitsout)); } break; case 8: for ( ; state->rowindex < state->rowsize; ++state->rowindex) { ensure_output_bytes(O, 1); c = (row_byte(state) + left_pixel_component(state)) & 0xff; save_pixel_component(state, c); iof_set(O, c); } break; case 16: for ( ; state->rowindex < state->rowsize - 1; ++state->rowindex) { ensure_output_bytes(O, 2); d = row_byte(state) << 8; ++state->rowindex; d |= row_byte(state); c = (d + left_pixel_component(state)) & 0xffff; save_pixel_component(state, c); iof_set2(O, c >> 8, c & 0xff); } break; default: return IOFERR; } reset_tiff_row(state); break; case PNG_NONE_PREDICTOR: for ( ; state->rowindex < state->rowsize; ++state->rowindex) { ensure_output_bytes(O, 1); c = row_byte(state); save_pixel_byte(state, c); // next row may need it iof_set(O, c); } reset_png_row(state); break; case PNG_SUB_PREDICTOR: for ( ; state->rowindex < state->rowsize; ++state->rowindex) { ensure_output_bytes(O, 1); c = (row_byte(state) + left_pixel_byte(state)) & 0xff; save_pixel_byte(state, c); iof_set(O, c); } reset_png_row(state); break; case PNG_UP_PREDICTOR: for ( ; state->rowindex < state->rowsize; ++state->rowindex) { ensure_output_bytes(O, 1); c = (row_byte(state) + up_pixel_byte(state)) & 0xff; save_pixel_byte(state, c); iof_set(O, c); } reset_png_row(state); break; case PNG_AVERAGE_PREDICTOR: for ( ; state->rowindex < state->rowsize; ++state->rowindex) { ensure_output_bytes(O, 1); c = (row_byte(state) + ((up_pixel_byte(state) + left_pixel_byte(state)) / 2)) & 0xff; save_pixel_byte(state, c); iof_set(O, c); } reset_png_row(state); break; case PNG_PAETH_PREDICTOR: for ( ; state->rowindex < state->rowsize; ++state->rowindex) { ensure_output_bytes(O, 1); c = (row_byte(state) + paeth(state)) & 0xff; save_pixel_byte(state, c); iof_set(O, c); } reset_png_row(state); break; //case PNG_OPTIMUM_PREDICTOR: // valid as default_redictor, but not as algorithm identifier byte default: return IOFERR; } } return state->status == STATUS_LAST ? IOFERR : IOFEOF; } /* predictor encoder */ iof_status predictor_encode_state (iof *I, iof *O, predictor_state *state) { int status, c, d, outbytes; while (state->status == STATUS_CONTINUE) { read_row(state, I, state->rowsize, status); if (state->current_predictor >= 10 && state->predictorbyte == 0) { ensure_output_bytes(O, 1); iof_set(O, state->current_predictor - 10); state->predictorbyte = 1; } switch (state->current_predictor) { case NONE_PREDICTOR: for ( ; state->rowindex < state->rowsize; ++state->rowindex) { ensure_output_bytes(O, 1); c = row_byte(state); iof_set(O, c); } reset_row(state); break; case TIFF_PREDICTOR: switch (state->compbits) { case 1: outbytes = (state->components + 7) >> 3; for ( ; state->sampleindex < state->rowsamples; ++state->sampleindex) { ensure_output_bytes(O, outbytes); while (state->bitsin < state->components) { state->compin = (state->compin << 8) | row_byte(state); state->bitsin += 8; ++state->rowindex; } state->bitsin -= state->components; c = state->compin >> state->bitsin; state->compin &= (1 << state->bitsin) - 1; d = c ^ left_pixel_value(state); save_pixel_value(state, c); state->compout = (state->compout << state->components) | d; state->bitsout += state->components; while (state->bitsout >= 8) { state->bitsout -= 8; iof_set(O, state->compout >> state->bitsout); state->compout &= (1 << state->bitsout) - 1; } } if (state->bitsout > 0) { ensure_output_bytes(O, 1); iof_set(O, state->compin << (8 - state->bitsout)); } break; case 2: case 4: for ( ; state->sampleindex < state->rowsamples; ++state->sampleindex) { for ( ; state->compindex < state->components; ) { ensure_output_bytes(O, 1); if (state->bitsin < state->compbits) { state->compin = (state->compin << 8) | row_byte(state); state->bitsin += 8; ++state->rowindex; } state->bitsin -= state->compbits; c = state->compin >> state->bitsin; state->compin &= (1 << state->bitsin) - 1; d = tocomp(c - left_pixel_component(state)); save_pixel_component(state, c); state->compout = (state->compout << state->compbits) | d; state->bitsout += state->compbits; if (state->bitsout >= 8) { state->bitsout -= 8; iof_set(O, state->compout >> state->bitsout); state->compout &= (1 << state->bitsout) - 1; } } } if (state->bitsout > 0) { ensure_output_bytes(O, 1); iof_set(O, state->compin << (8 - state->bitsout)); } break; case 8: for ( ; state->rowindex < state->rowsize; ++state->rowindex) { ensure_output_bytes(O, 1); c = row_byte(state); d = tobyte(c - left_pixel_component(state)); save_pixel_component(state, c); iof_set(O, d); } break; case 16: for ( ; state->rowindex < state->rowsize - 1; ++state->rowindex) { ensure_output_bytes(O, 2); c = row_byte(state) << 8; ++state->rowindex; c |= row_byte(state); d = tocomp(c - left_pixel_component(state)); save_pixel_component(state, c); iof_set2(O, d >> 8, d & 0xff); } break; default: return IOFERR; } reset_tiff_row(state); break; case PNG_NONE_PREDICTOR: for ( ; state->rowindex < state->rowsize; ++state->rowindex) { ensure_output_bytes(O, 1); c = row_byte(state); save_pixel_byte(state, c); // next row may need it iof_set(O, c); } reset_png_row(state); break; case PNG_SUB_PREDICTOR: for ( ; state->rowindex < state->rowsize; ++state->rowindex) { ensure_output_bytes(O, 1); c = row_byte(state); d = tobyte(c - left_pixel_byte(state)); save_pixel_byte(state, c); iof_set(O, d); } reset_png_row(state); break; case PNG_OPTIMUM_PREDICTOR: // not worthy to perform optimization case PNG_UP_PREDICTOR: for ( ; state->rowindex < state->rowsize; ++state->rowindex) { ensure_output_bytes(O, 1); c = row_byte(state); d = tobyte(c - up_pixel_byte(state)); save_pixel_byte(state, c); iof_set(O, d); } reset_png_row(state); break; case PNG_AVERAGE_PREDICTOR: for ( ; state->rowindex < state->rowsize; ++state->rowindex) { ensure_output_bytes(O, 1); c = row_byte(state); d = tobyte(c - ((up_pixel_byte(state) + left_pixel_byte(state)) >> 1)); save_pixel_byte(state, c); iof_set(O, d); } reset_png_row(state); break; case PNG_PAETH_PREDICTOR: for ( ; state->rowindex < state->rowsize; ++state->rowindex) { ensure_output_bytes(O, 1); c = row_byte(state); d = tobyte(c - paeth(state)); save_pixel_byte(state, c); iof_set(O, d); } reset_png_row(state); break; default: return IOFERR; } } return state->status == STATUS_LAST ? IOFERR : IOFEOF; } iof_status predictor_decode (iof *I, iof *O, int predictor, int rowsamples, int components, int compbits) { predictor_state state; int ret; predictor_decoder_init(&state, predictor, rowsamples, components, compbits); state.flush = 1; ret = predictor_decode_state(I, O, &state); predictor_decoder_close(&state); return ret; } iof_status predictor_encode (iof *I, iof *O, int predictor, int rowsamples, int components, int compbits) { predictor_state state; int ret; predictor_encoder_init(&state, predictor, rowsamples, components, compbits); state.flush = 1; ret = predictor_encode_state(I, O, &state); predictor_encoder_close(&state); return ret; } /* filters */ // predictor decoder function static size_t predictor_decoder (iof *F, iof_mode mode) { predictor_state *state; iof_status status; size_t tail; state = iof_filter_state(predictor_state *, F); switch(mode) { case IOFLOAD: case IOFREAD: if (F->flags & IOF_STOPPED) return 0; tail = iof_tail(F); F->pos = F->buf + tail; F->end = F->buf + F->space; do { status = predictor_decode_state(F->next, F, state); } while (mode == IOFLOAD && status == IOFFULL && iof_resize_buffer(F)); return iof_decoder_retval(F, "predictor", status); case IOFCLOSE: predictor_decoder_close(state); iof_free(F); return 0; default: break; } return 0; } // predictor encoder function static size_t predictor_encoder (iof *F, iof_mode mode) { predictor_state *state; iof_status status; state = iof_filter_state(predictor_state *, F); switch (mode) { case IOFFLUSH: state->flush = 1; FALLTHRU // fall through case IOFWRITE: F->end = F->pos; F->pos = F->buf; status = predictor_encode_state(F, F->next, state); return iof_encoder_retval(F, "predictor", status); case IOFCLOSE: if (!state->flush) predictor_encoder(F, IOFFLUSH); predictor_encoder_close(state); iof_free(F); return 0; default: break; } return 0; } iof * iof_filter_predictor_decoder (iof *N, int predictor, int rowsamples, int components, int compbits) { iof *I; predictor_state_pointer P; I = iof_filter_reader(predictor_decoder, sizeof(predictor_state), &P.voidstate); iof_setup_next(I, N); if (predictor_decoder_init(P.predictorstate, predictor, rowsamples, components, compbits) == NULL) { iof_discard(I); return NULL; } P.predictorstate->flush = 1; return I; } iof * iof_filter_predictor_encoder (iof *N, int predictor, int rowsamples, int components, int compbits) { iof *O; predictor_state_pointer P; O = iof_filter_writer(predictor_encoder, sizeof(predictor_state), &P.voidstate); iof_setup_next(O, N); if (predictor_encoder_init(P.predictorstate, predictor, rowsamples, components, compbits) == NULL) { iof_discard(O); return NULL; } return O; } luametatex-2.10.08/source/libraries/pplib/util/utilfpred.h000066400000000000000000000021031442250314700235370ustar00rootroot00000000000000#ifndef UTIL_FILTER_PREDICTOR_H #define UTIL_FILTER_PREDICTOR_H #include "utiliof.h" typedef struct predictor_state predictor_state; predictor_state * predictor_decoder_init (predictor_state *state, int predictor, int rowsamples, int components, int compbits); predictor_state * predictor_encoder_init (predictor_state *state, int predictor, int rowsamples, int components, int compbits); void predictor_decoder_close (predictor_state *state); void predictor_encoder_close (predictor_state *state); iof_status predictor_decode_state (iof *I, iof *O, predictor_state *state); iof_status predictor_encode_state (iof *I, iof *O, predictor_state *state); iof_status predictor_decode (iof *I, iof *O, int predictor, int rowsamples, int components, int compbits); iof_status predictor_encode (iof *I, iof *O, int predictor, int rowsamples, int components, int compbits); iof * iof_filter_predictor_decoder (iof *N, int predictor, int rowsamples, int components, int compbits); iof * iof_filter_predictor_encoder (iof *N, int predictor, int rowsamples, int components, int compbits); #endifluametatex-2.10.08/source/libraries/pplib/util/utiliof.c000066400000000000000000002151701442250314700232210ustar00rootroot00000000000000/* input/output stream */ #include #include #include #include "utilmem.h" #include "utillog.h" #include "utiliof.h" /* commons */ void * iof_copy_data (const void *data, size_t size) { return memcpy(util_malloc(size), data, size); } uint8_t * iof_copy_file_data (const char *filename, size_t *psize) { FILE *file; size_t size; uint8_t *data; if ((file = fopen(filename, "rb")) == NULL) return NULL; fseek(file, 0, SEEK_END); size = (size_t)ftell(file); data = (uint8_t *)util_malloc(size); fseek(file, 0, SEEK_SET); if ((*psize = fread(data, 1, size, file)) != size) { util_free(data); data = NULL; } fclose(file); return data; } uint8_t * iof_copy_file_handle_data (FILE *file, size_t *psize) { size_t size; uint8_t *data; //long offset = ftell(file); // keep offset intact? fseek(file, 0, SEEK_END); size = (size_t)ftell(file); data = (uint8_t *)util_malloc(size); fseek(file, 0, SEEK_SET); if ((*psize = fread(data, 1, size, file)) != size) { util_free(data); data = NULL; } //fseek(file, offset, SEEK_SET) return data; } FILE * iof_get_file (iof *F) { if (F->flags & IOF_FILE) return iof_file_get_file(F->iofile); if (F->flags & IOF_FILE_HANDLE) return F->file; return NULL; } const char * iof_status_kind (iof_status status) { switch (status) { case IOFEOF: return "IOFEOF"; case IOFERR: return "IOFERR"; case IOFEMPTY: return "IOFEMPTY"; case IOFFULL: return "IOFFULL"; default: break; } return "(unknown)"; } /* shared pseudofile */ #define IOF_FILE_DEFAULTS 0 iof_file * iof_file_new (FILE *file) { iof_file *iofile = (iof_file *)util_malloc(sizeof(iof_file)); iof_file_set_fh(iofile, file); iofile->offset = NULL; iofile->size = 0; iofile->name = NULL; iofile->refcount = 0; iofile->flags = IOF_FILE_DEFAULTS|IOF_ALLOC; return iofile; } iof_file * iof_file_init (iof_file *iofile, FILE *file) { iof_file_set_fh(iofile, file); iofile->offset = NULL; iofile->size = 0; iofile->name = NULL; iofile->refcount = 0; iofile->flags = IOF_FILE_DEFAULTS; return iofile; } iof_file * iof_file_rdata (const void *data, size_t size) { iof_file *iofile = (iof_file *)util_malloc(sizeof(iof_file)); iofile->rbuf = iofile->rpos = (const uint8_t *)data; iofile->rend = iofile->rbuf + size; iofile->offset = NULL; iofile->size = 0; iofile->name = NULL; iofile->refcount = 0; iofile->flags = IOF_FILE_DEFAULTS|IOF_ALLOC|IOF_DATA; return iofile; } iof_file * iof_file_rdata_init (iof_file *iofile, const void *data, size_t size) { iofile->rbuf = iofile->rpos = (const uint8_t *)data; iofile->rend = iofile->rbuf + size; iofile->offset = NULL; iofile->size = 0; // lets keep it consequently set to zero (only for user disposition) iofile->name = NULL; iofile->refcount = 0; iofile->flags = IOF_FILE_DEFAULTS|IOF_DATA; return iofile; } iof_file * iof_file_wdata (void *data, size_t size) { return iof_file_rdata((const void *)data, size); } iof_file * iof_file_wdata_init (iof_file *iofile, void *data, size_t size) { return iof_file_rdata_init(iofile, (const void *)data, size); } /* typical uses so far */ iof_file * iof_file_reader_from_file_handle (iof_file *iofile, const char *filename, FILE *file, int preload, int closefile) { uint8_t *data; size_t size; if (preload) { if ((data = iof_copy_file_handle_data(file, &size)) == NULL) { if (closefile) // callers expect close also on failure fclose(file); return NULL; } if (iofile == NULL) iofile = iof_file_rdata(data, size); else iof_file_rdata_init(iofile, data, size); iofile->flags |= IOF_BUFFER_ALLOC; if (closefile) fclose(file); } else { if (iofile == NULL) iofile = iof_file_new(file); else iof_file_init(iofile, file); if (closefile) iofile->flags |= IOF_CLOSE_FILE; } if (filename != NULL) iof_file_set_name(iofile, filename); return iofile; } iof_file * iof_file_reader_from_file (iof_file *iofile, const char *filename, int preload) { FILE *file; if ((file = fopen(filename, "rb")) == NULL) return NULL; return iof_file_reader_from_file_handle(iofile, filename, file, preload, 1); // takes care to fclose() on failure } iof_file * iof_file_reader_from_data (iof_file *iofile, const void *data, size_t size, int preload, int freedata) { void *newdata; if (data == NULL) return NULL; if (preload) { newdata = iof_copy_data(data, size); if (iofile == NULL) iofile = iof_file_rdata(newdata, size); else iof_file_rdata_init(iofile, newdata, size); iofile->flags |= IOF_BUFFER_ALLOC; //if (freedata) // hardly makes sense... we can't free const void * // util_free((void *)data); } else { if (iofile == NULL) iofile = iof_file_rdata(data, size); else iof_file_rdata_init(iofile, data, size); if (freedata) iofile->flags |= IOF_BUFFER_ALLOC; } return iofile; } /* iof_file * iof_file_writer_from_file (iof_file *iofile, const char *filename) { FILE *file; if ((file = fopen(filename, "wb")) == NULL) return NULL; if (iofile == NULL) iofile = iof_file_new(file); else iof_file_init(iofile, file); iofile->flags |= IOF_CLOSE_FILE; iof_file_set_name(iofile, filename); return iofile; } */ /* Because of limited number of FILE* handles available, we may need to close/reopen a file handle when accessing it. In applications so far (fonts, images) we typically need the entire source to parse the file on object creation and to rewrite or reload the data on dump. All iof_file API functions assume that iofile has FILE* opened. Reopening it on every access (ftell, fseek, read/write) makes no sense. So if the caller invalidates iofile by closing and NULLing its file handle, it is also responsible to reopen when necessary. */ int iof_file_reclose_input (iof_file *iofile) { FILE *file; if (iofile->flags & IOF_DATA) return 0; if ((file = iof_file_get_fh(iofile)) == NULL) return 0; fclose(file); iof_file_set_fh(iofile, NULL); iofile->flags &= ~IOF_RECLOSE_FILE; iofile->flags |= IOF_REOPEN_FILE; return 1; } int iof_file_reopen_input (iof_file *iofile) { // returns true if iofile readable FILE *file; const char *filename; if (iofile->flags & IOF_DATA) return 1; if ((file = iof_file_get_fh(iofile)) != NULL) return 1; // if present, assumed readable if ((filename = iofile->name) == NULL || (file = fopen(filename, "rb")) == NULL) return 0; iof_file_set_fh(iofile, file); iofile->flags &= ~IOF_REOPEN_FILE; iofile->flags |= IOF_RECLOSE_FILE; return 1; } /* freeing iof_file */ void iof_file_free (iof_file *iofile) { FILE *file; if (iofile->flags & IOF_DATA) { if (iofile->flags & IOF_BUFFER_ALLOC) { iofile->flags &= ~IOF_BUFFER_ALLOC; if (iofile->buf != NULL) { util_free(iofile->buf); iofile->buf = iofile->pos = iofile->end = NULL; } } } else if ((file = iof_file_get_fh(iofile)) != NULL) { if (iofile->flags & IOF_CLOSE_FILE) fclose(file); iof_file_set_fh(iofile, NULL); } iof_file_set_name(iofile, NULL); if (iofile->flags & IOF_ALLOC) util_free(iofile); } /* An attempt to close iofile input keeping things safe. In bindings we sometimes we need to force closing the file handle, otherwise it is closed when garbage collector graciously calls destroyer. Eg. we are done with an object representing pdf/image/font, but we can't move/replace it, as the host language keeps the garbage that keeps a file handle. When we call fclose(), we also have to set the handle to NULL. In many places we assume, that if the iofile wraps FILE *, than the handle is operable (no NULL checks). To close the handle keeping iofile alive safe, we can silently convert it dummy IOF_DATA buffer. */ void iof_file_close_input (iof_file *iofile) { FILE *file; if (iofile->flags & IOF_DATA) { if (iofile->flags & IOF_BUFFER_ALLOC) { iofile->flags &= ~IOF_BUFFER_ALLOC; if (iofile->buf != NULL) { util_free(iofile->buf); //iofile->buf = iofile->pos = iofile->end = NULL; } } } else if ((file = iof_file_get_fh(iofile)) != NULL) { iof_file_set_fh(iofile, NULL); fclose(file); } iof_file_set_name(iofile, NULL); /* now make it a dummy string iofile */ iofile->buf = iofile->pos = iofile->end = NULL; iofile->flags |= IOF_DATA; } /* set filename for reopen */ void iof_file_set_name (iof_file *iofile, const char *name) { if (iofile->name != NULL) util_free(iofile->name); if (name != NULL) iofile->name = iof_copy_data(name, strlen(name) + 1); else iofile->name = NULL; } /* seek */ int iof_file_seek (iof_file *iofile, long offset, int whence) { if (iofile->flags & IOF_DATA) { switch (whence) { case SEEK_SET: if (offset >= 0 && iofile->buf + offset <= iofile->end) { iofile->pos = iofile->buf + offset; return 0; } return -1; case SEEK_CUR: if ((offset >= 0 && iofile->pos + offset <= iofile->end) || (offset < 0 && iofile->pos + offset >= iofile->buf)) { iofile->pos += offset; return 0; } return -1; case SEEK_END: if (offset <= 0 && iofile->end + offset >= iofile->buf) { iofile->pos = iofile->end + offset; return 0; } return -1; } return -1; } return fseek(iof_file_get_fh(iofile), offset, whence); } /* */ long iof_file_tell (iof_file *iofile) { return (iofile->flags & IOF_DATA) ? (long)(iofile->pos - iofile->buf) : ftell(iof_file_get_fh(iofile)); } size_t iof_file_size (iof_file *iofile) { long pos, size; FILE *file; if (iofile->flags & IOF_DATA) return (size_t)iof_space(iofile); file = iof_file_get_fh(iofile); pos = ftell(file); fseek(file, 0, SEEK_END); size = ftell(file); fseek(file, pos, SEEK_SET); return size; } int iof_file_eof (iof_file *iofile) { if (iofile->flags & IOF_DATA) return iofile->pos == iofile->end ? -1 : 0; return feof(iof_file_get_fh(iofile)); } int iof_file_flush (iof_file *iofile) { if (iofile->flags & IOF_DATA) return 0; return fflush(iof_file_get_fh(iofile)); } size_t iof_file_read (void *ptr, size_t size, size_t items, iof_file *iofile) { if (iofile->flags & IOF_DATA) { size_t bytes = size * items; if (bytes > (size_t)iof_left(iofile)) bytes = (size_t)iof_left(iofile); memcpy(ptr, iofile->pos, bytes); iofile->pos += bytes; return bytes / size; // number of elements read } return fread(ptr, size, items, iof_file_get_fh(iofile)); } static size_t iof_file_data_resizeto (iof_file *iofile, size_t space) { uint8_t *newbuf; size_t size; size = iof_size(iofile); if (iofile->flags & IOF_BUFFER_ALLOC) { newbuf = (uint8_t *)util_realloc(iofile->buf, space); } else { newbuf = (uint8_t *)util_malloc(space); if (size > 0) memcpy(newbuf, iofile->buf, size); iofile->flags |= IOF_BUFFER_ALLOC; } iofile->buf = newbuf; iofile->pos = newbuf + size; iofile->end = newbuf + space; return space - size; } #define iof_file_data_resize(iofile) iof_file_data_resizeto(iofile, iof_space(iofile) << 1) size_t iof_file_write (const void *ptr, size_t size, size_t items, iof_file *iofile) { if (iofile->flags & IOF_DATA) { size_t space, sizesofar, bytes; bytes = size * items; if (bytes > (size_t)iof_left(iofile)) { if ((space = iof_space(iofile)) == 0) // allow iofile->buf/end initially NULL space = BUFSIZ; for (sizesofar = iof_size(iofile), space <<= 1; sizesofar + bytes > space; space <<= 1) ; if (iof_file_data_resizeto(iofile, space) == 0) return 0; } memcpy(iofile->pos, ptr, bytes); iofile->pos += bytes; return bytes / size; } return fwrite(ptr, size, items, iof_file_get_fh(iofile)); } size_t iof_file_ensure (iof_file *iofile, size_t bytes) { if (iofile->flags & IOF_DATA) { size_t space, sizesofar, left; left = (size_t)iof_left(iofile); if (bytes > left) { if ((space = iof_space(iofile)) == 0) // allow iofile->buf/end initially NULL space = BUFSIZ; for (sizesofar = iof_size(iofile), space <<= 1; sizesofar + bytes > space; space <<= 1); return iof_file_data_resizeto(iofile, space); } return left; } return 0; } int iof_file_getc (iof_file *iofile) { if (iofile->flags & IOF_DATA) return iofile->pos < iofile->end ? *iofile->pos++ : IOFEOF; return fgetc(iof_file_get_fh(iofile)); } int iof_file_putc (iof_file *iofile, int c) { if (iofile->flags & IOF_DATA) { if (iofile->pos >= iofile->end) if (iof_file_data_resize(iofile) == 0) return IOFEOF; *iofile->pos++ = (uint8_t)c; return c; } return fputc(c, iof_file_get_fh(iofile)); } static int iof_file_sync (iof_file *iofile, size_t *offset) { if (iofile->offset != offset) { if (iofile->offset != NULL) *iofile->offset = iof_file_tell(iofile); iofile->offset = offset; if (offset) // let offset be NULL return iof_file_seek(iofile, (long)*offset, SEEK_SET); } return 0; } //#define iof_file_unsync(iofile, poffset) (void)((iofile)->offset == poffset && (((iofile)->offset = NULL), 0)) #define iof_file_unsync(iofile, poffset) ((void)poffset, (iofile)->offset = NULL) /* iof seek */ #define iof_reader_reset(I) ((I)->pos = (I)->end = (I)->buf) #define iof_reader_reseek_file(I, offset, whence) (fseek((I)->file, offset, whence) == 0 ? (iof_reader_reset(I), 0) : -1) #define iof_reader_reseek_iofile(I, offset, whence) (iof_file_seek((I)->iofile, offset, whence) == 0 ? (iof_reader_reset(I), 0) : -1) #define iof_writer_reset(O) ((O)->pos = (O)->buf) #define iof_writer_reseek_file(O, offset, whence) (iof_flush(O), (fseek((O)->file, offset, whence) == 0 ? (iof_writer_reset(O), 0) : -1)) #define iof_writer_reseek_iofile(O, offset, whence) (iof_flush(O), (iof_file_seek((O)->iofile, offset, whence) == 0 ? (iof_writer_reset(O), 0) : -1)) static int iof_reader_seek_data (iof *I, long offset, int whence) { switch (whence) { case SEEK_SET: if (offset >= 0 && I->buf + offset <= I->end) { I->pos = I->buf + offset; return 0; } return -1; case SEEK_CUR: if ((offset >= 0 && I->pos + offset <= I->end) || (offset < 0 && I->pos + offset >= I->buf)) { I->pos += offset; return 0; } return -1; case SEEK_END: if (offset <= 0 && I->end + offset >= I->buf) { I->pos = I->end + offset; return 0; } return -1; } return -1; } static int iof_reader_seek_iofile (iof *I, long offset, int whence) { long fileoffset; switch (whence) { case SEEK_SET: fileoffset = iof_file_tell(I->iofile); if (offset <= fileoffset && offset >= fileoffset - iof_space(I)) { I->pos = I->end - (fileoffset - offset); return 0; } return iof_reader_reseek_iofile(I, offset, SEEK_SET); case SEEK_CUR: if ((offset >= 0 && I->pos + offset <= I->end) || (offset < 0 && I->pos + offset >= I->buf)) { I->pos += offset; return 0; } return iof_reader_reseek_iofile(I, offset, SEEK_CUR); case SEEK_END: return iof_reader_reseek_iofile(I, offset, SEEK_END); // can we do better? } return -1; } static int iof_reader_seek_file (iof *I, long offset, int whence) { long fileoffset; switch (whence) { case SEEK_SET: fileoffset = ftell(I->file); if (offset <= fileoffset && offset >= fileoffset - iof_space(I)) { I->pos = I->end - (fileoffset - offset); return 0; } return iof_reader_reseek_file(I, offset, SEEK_SET); case SEEK_CUR: if ((offset >= 0 && I->pos + offset <= I->end) || (offset < 0 && I->pos + offset >= I->buf)) { I->pos += offset; return 0; } return iof_reader_reseek_file(I, offset, SEEK_CUR); case SEEK_END: return iof_reader_reseek_file(I, offset, SEEK_END); // can we do better? } return -1; } int iof_reader_seek (iof *I, long offset, int whence) { I->flags &= ~IOF_STOPPED; if (I->flags & IOF_FILE) return iof_reader_seek_iofile(I, offset, whence); if (I->flags & IOF_FILE_HANDLE) return iof_reader_seek_file(I, offset, whence); if (I->flags & IOF_DATA) return iof_reader_seek_data(I, offset, whence); return -1; } int iof_reader_reseek (iof *I, long offset, int whence) { I->flags &= ~IOF_STOPPED; if (I->flags & IOF_FILE) return iof_reader_reseek_iofile(I, offset, whence); if (I->flags & IOF_FILE_HANDLE) return iof_reader_reseek_file(I, offset, whence); if (I->flags & IOF_DATA) return iof_reader_seek_data(I, offset, whence); return -1; } static int iof_writer_seek_data (iof *O, long offset, int whence) { /* fseek() allows to seek after the end of file. Seeking does not increase the output file. No byte is written before fwirte(). It seems to fill the gap with zeros. Until we really need that, no seeking out of bounds for writers. */ O->flags &= ~IOF_STOPPED; return iof_reader_seek_data(O, offset, whence); } static int iof_writer_seek_iofile (iof *O, long offset, int whence) { long fileoffset; switch (whence) { case SEEK_SET: fileoffset = iof_file_tell(O->iofile); if (offset >= fileoffset && offset <= fileoffset + iof_space(O)) { O->pos = O->buf + (offset - fileoffset); return 0; } return iof_writer_reseek_iofile(O, offset, SEEK_SET); case SEEK_CUR: if ((offset >=0 && O->pos + offset <= O->end) || (offset < 0 && O->pos + offset >= O->buf)) { O->pos += offset; return 0; } return iof_writer_reseek_iofile(O, offset, SEEK_CUR); case SEEK_END: return iof_writer_reseek_iofile(O, offset, SEEK_END); } return -1; } static int iof_writer_seek_file (iof *O, long offset, int whence) { long fileoffset; switch (whence) { case SEEK_SET: fileoffset = ftell(O->file); if (offset >= fileoffset && offset <= fileoffset + iof_space(O)) { O->pos = O->buf + (offset - fileoffset); return 0; } return iof_writer_reseek_file(O, offset, SEEK_SET); case SEEK_CUR: if ((offset >=0 && O->pos + offset <= O->end) || (offset < 0 && O->pos + offset >= O->buf)) { O->pos += offset; return 0; } return iof_writer_reseek_file(O, offset, SEEK_CUR); case SEEK_END: return iof_writer_reseek_file(O, offset, SEEK_END); } return -1; } int iof_writer_seek (iof *I, long offset, int whence) { I->flags &= ~IOF_STOPPED; if (I->flags & IOF_FILE) return iof_writer_seek_iofile(I, offset, whence); if (I->flags & IOF_FILE_HANDLE) return iof_writer_seek_file(I, offset, whence); if (I->flags & IOF_DATA) return iof_writer_seek_data(I, offset, whence); return -1; } int iof_writer_reseek (iof *I, long offset, int whence) { I->flags &= ~IOF_STOPPED; if (I->flags & IOF_FILE) return iof_writer_reseek_iofile(I, offset, whence); if (I->flags & IOF_FILE_HANDLE) return iof_writer_reseek_file(I, offset, whence); if (I->flags & IOF_DATA) return iof_writer_seek_data(I, offset, whence); return -1; } int iof_seek (iof *F, long offset, int whence) { return (F->flags & IOF_WRITER) ? iof_writer_seek(F, offset, whence) : iof_reader_seek(F, offset, whence); } int iof_reseek (iof *F, long offset, int whence) { return (F->flags & IOF_WRITER) ? iof_writer_reseek(F, offset, whence) : iof_reader_reseek(F, offset, whence); } /* tell */ long iof_reader_tell (iof *I) { if (I->flags & IOF_FILE) return iof_file_tell(I->iofile) - (long)iof_left(I); if (I->flags & IOF_FILE_HANDLE) return ftell(I->file) - (long)iof_left(I); //if (I->flags & IOF_DATA) return (long)iof_size(I); } long iof_writer_tell (iof *O) { if (O->flags & IOF_FILE) return iof_file_tell(O->iofile) + (long)iof_size(O); if (O->flags & IOF_FILE_HANDLE) return ftell(O->file) + (long)iof_size(O); //if (I->flags & IOF_DATA) return (long)iof_size(O); } long iof_tell (iof *I) { return (I->flags & IOF_WRITER) ? iof_writer_tell(I) : iof_reader_tell(I); } size_t iof_fsize (iof *I) { size_t pos, size; if (I->flags & IOF_FILE) return iof_file_size(I->iofile); if (I->flags & IOF_FILE_HANDLE) { pos = (size_t)ftell(I->file); fseek(I->file, 0, SEEK_END); size = (size_t)ftell(I->file); fseek(I->file, (long)pos, SEEK_SET); return size; } //if (I->flags & IOF_DATA) return (size_t)iof_space(I); } /* save reader tail */ size_t iof_save_tail (iof *I) { size_t size, left; size = iof_size(I); left = iof_left(I); if (size >= left) memcpy(I->buf, I->pos, left); else memmove(I->buf, I->pos, left); return left; } size_t iof_input_save_tail (iof *I, size_t back) { size_t size; I->flags |= IOF_TAIL; I->pos -= back; size = iof_input(I); I->pos += back; I->flags &= ~IOF_TAIL; return size; // + back - back } /* read from file */ /* iof free*/ static size_t file_read (iof *I); static size_t file_load (iof *I); static size_t file_reader (iof *I, iof_mode mode) { switch (mode) { case IOFREAD: return file_read(I); case IOFLOAD: return file_load(I); case IOFCLOSE: iof_free(I); return 0; default: return 0; } } iof * iof_setup_file_handle_reader (iof *I, void *buffer, size_t space, FILE *f) { iof_setup_reader(I, buffer, space); iof_setup_file(I, f); I->more = file_reader; return I; } iof * iof_setup_file_reader (iof *I, void *buffer, size_t space, const char *filename) { FILE *f; if ((f = fopen(filename, "rb")) == NULL) return NULL; iof_setup_reader(I, buffer, space); iof_setup_file(I, f); I->flags |= IOF_CLOSE_FILE; I->more = file_reader; return I; } /* write to file */ static size_t file_write (iof *O, int flush); static size_t file_writer (iof *O, iof_mode mode) { switch (mode) { case IOFWRITE: return file_write(O, 0); case IOFFLUSH: return file_write(O, 1); case IOFCLOSE: file_write(O, 1); iof_free(O); return 0; default: return 0; } } iof * iof_setup_file_handle_writer (iof *O, void *buffer, size_t space, FILE *f) { iof_setup_writer(O, buffer, space); iof_setup_file(O, f); O->more = file_writer; return O; } iof * iof_setup_file_writer (iof *O, void *buffer, size_t space, const char *filename) { FILE *f; if ((f = fopen(filename, "wb")) == NULL) return NULL; iof_setup_writer(O, buffer, space); iof_setup_file(O, f); O->flags |= IOF_CLOSE_FILE; O->more = file_writer; return O; } /* a dedicated handler for stdout/stderr */ static size_t stdout_writer (iof *O, iof_mode mode) { switch(mode) { case IOFWRITE: { fwrite(O->buf, sizeof(uint8_t), iof_size(O), stdout); O->pos = O->buf; return O->space; } case IOFCLOSE: case IOFFLUSH: { fwrite(O->buf, sizeof(uint8_t), iof_size(O), stdout); fflush(stdout); O->pos = O->buf; return 0; } default: break; } return 0; } static size_t stderr_writer (iof *O, iof_mode mode) { switch(mode) { case IOFWRITE: { fwrite(O->buf, sizeof(uint8_t), iof_size(O), stderr); O->pos = O->buf; return O->space; } case IOFCLOSE: case IOFFLUSH: { fwrite(O->buf, sizeof(uint8_t), iof_size(O), stderr); fflush(stderr); O->pos = O->buf; return 0; } default: break; } return 0; } static uint8_t iof_stdout_buffer[BUFSIZ]; iof iof_stdout = IOF_WRITER_INIT(stdout_writer, NULL, iof_stdout_buffer, BUFSIZ, 0); static uint8_t iof_stderr_buffer[BUFSIZ]; iof iof_stderr = IOF_WRITER_INIT(stderr_writer, NULL, iof_stderr_buffer, BUFSIZ, 0); /* read from somewhere */ iof * iof_reader (iof *I, void *link, iof_handler reader, const void *m, size_t bytes) { I->space = 0; I->link = link; I->more = reader; I->flags = 0; I->refcount = 0; if (m != NULL) { I->rbuf = I->rpos = (const uint8_t *)m; I->rend = (const uint8_t *)m + bytes; return I; } return NULL; } iof * iof_string_reader (iof *I, const void *s, size_t bytes) { I->space = 0; I->link = NULL; I->more = NULL; I->flags = 0; // iof_string() sets IOF_DATA I->refcount = 0; if (s != NULL) return iof_string(I, s, bytes); return NULL; } /* write somewhere */ iof * iof_writer (iof *O, void *link, iof_handler writer, void *m, size_t bytes) { O->space = 0; O->link = link; O->more = writer; O->flags = 0; O->refcount = 0; if (m != NULL && bytes > 0) { O->buf = O->pos = (uint8_t *)m; O->end = (uint8_t *)m + bytes; return O; } // return iof_null(O); return NULL; } /* write to growing bytes buffer */ static size_t iof_mem_handler (iof *O, iof_mode mode) { switch(mode) { case IOFWRITE: return iof_resize_buffer(O); case IOFCLOSE: iof_free(O); return 0; default: return 0; } } iof * iof_setup_buffer (iof *O, void *buffer, size_t space) { iof_setup_writer(O, buffer, space); O->link = NULL; O->flags |= IOF_DATA; O->more = iof_mem_handler; return O; } iof * iof_setup_buffermin (iof *O, void *buffer, size_t space, size_t min) { iof_setup_buffer(O, buffer, space); if (space < min) // allocate min to avoid further rewriting { O->buf = O->pos = (uint8_t *)util_malloc(min); O->flags |= IOF_BUFFER_ALLOC; O->end = O->buf + min; } return O; } iof * iof_buffer_create (size_t space) { uint8_t *buffer; iof *O; O = (iof *)util_malloc(space); buffer = (uint8_t *)(O + 1); iof_setup_buffer(O, buffer, space); O->flags |= IOF_ALLOC; return O; } /* set/get */ int iof_getc (iof *I) { if (iof_readable(I)) return *I->pos++; return IOFEOF; } int iof_putc (iof *O, int u) { if (iof_writable(O)) { iof_set(O, u); return (uint8_t)u; } return IOFFULL; } size_t iof_skip (iof *I, size_t bytes) { while (bytes) { if (iof_readable(I)) ++I->pos; else break; --bytes; } return bytes; } /* from iof to iof */ iof_status iof_pass (iof *I, iof *O) { size_t leftin, leftout; if ((leftin = iof_left(I)) == 0) leftin = iof_input(I); while (leftin) { if ((leftout = iof_left(O)) == 0) if ((leftout = iof_output(O)) == 0) return IOFFULL; while (leftin > leftout) { memcpy(O->pos, I->pos, leftout); I->pos += leftout; O->pos = O->end; /* eq. += leftout */ leftin -= leftout; if ((leftout = iof_output(O)) == 0) return IOFFULL; } if (leftin) { memcpy(O->pos, I->pos, leftin); I->pos = I->end; /* eq. += leftin */ O->pos += leftin; } leftin = iof_input(I); } return IOFEOF; } /* read n-bytes */ size_t iof_read (iof *I, void *to, size_t size) { size_t leftin, done = 0; char *s = (char *)to; if ((leftin = iof_left(I)) == 0) if ((leftin = iof_input(I)) == 0) return done; while (size > leftin) { memcpy(s, I->pos, leftin * sizeof(uint8_t)); size -= leftin; done += leftin; s += leftin; I->pos = I->end; if ((leftin = iof_input(I)) == 0) return done; } if (size) { memcpy(s, I->pos, size * sizeof(uint8_t)); I->pos += size; done += size; } return done; } /* rewrite FILE content (use fseek if needed) */ size_t iof_write_file_handle (iof *O, FILE *file) { size_t leftout, size, readout; if ((leftout = iof_left(O)) == 0) if ((leftout = iof_output(O)) == 0) return 0; size = 0; do { readout = fread(O->pos, 1, leftout, file); O->pos += readout; size += readout; } while(readout == leftout && (leftout = iof_output(O)) > 0); return size; } size_t iof_write_file (iof *O, const char *filename) { FILE *file; size_t size; if ((file = fopen(filename, "rb")) == NULL) return 0; size = iof_write_file_handle(O, file); fclose(file); return size; } size_t iof_write_iofile (iof *O, iof_file *iofile, int savepos) { long offset; size_t size; FILE *file; if (iofile->flags & IOF_DATA) return iof_write(O, iofile->pos, (size_t)(iofile->end - iofile->pos)); file = iof_file_get_fh(iofile); if (savepos) { offset = ftell(file); size = iof_write_file_handle(O, file); fseek(file, offset, SEEK_SET); return size; } return iof_write_file_handle(O, file); } /* write n-bytes */ size_t iof_write (iof *O, const void *data, size_t size) { size_t leftout, done = 0; const char *s = (const char *)data; if ((leftout = iof_left(O)) == 0) if ((leftout = iof_output(O)) == 0) return done; while (size > leftout) { memcpy(O->pos, s, leftout * sizeof(uint8_t)); size -= leftout; done += leftout; s += leftout; O->pos = O->end; if ((leftout = iof_output(O)) == 0) return done; } if (size) { memcpy(O->pos, s, size * sizeof(uint8_t)); O->pos += size; done += size; } return done; } /* write '\0'-terminated string */ iof_status iof_puts (iof *O, const void *data) { const char *s = (const char *)data; while (*s) { if (iof_writable(O)) iof_set(O, *s++); else return IOFFULL; } return IOFEOF; // ? } size_t iof_put_string (iof *O, const void *data) { const char *p, *s = (const char *)data; for (p = s; *p != '\0' && iof_writable(O); iof_set(O, *p++)); return p - s; } /* write byte n-times */ /* iof_status iof_repc (iof *O, char c, size_t bytes) { while (bytes) { if (iof_writable(O)) iof_set(O, c); else return IOFFULL; --bytes; } return IOFEOF; // ? } */ size_t iof_repc (iof *O, char c, size_t bytes) { size_t leftout, todo = bytes; if ((leftout = iof_left(O)) == 0) if ((leftout = iof_output(O)) == 0) return 0; while (bytes > leftout) { memset(O->pos, c, leftout); bytes -= leftout; O->pos = O->end; if ((leftout = iof_output(O)) == 0) return todo - bytes; } if (bytes) { memset(O->pos, c, bytes); O->pos += bytes; } return todo; } /* putfs */ #define IOF_FMT_SIZE 1024 size_t iof_putfs (iof *O, const char *format, ...) { static char buffer[IOF_FMT_SIZE]; va_list args; va_start(args, format); if (vsnprintf(buffer, IOF_FMT_SIZE, format, args) > 0) { va_end(args); return iof_put_string(O, buffer); } else { va_end(args); return iof_write(O, buffer, IOF_FMT_SIZE); } } /* integer from iof; return 1 on success, 0 otherwise */ int iof_get_int32 (iof *I, int32_t *number) { int sign, c = iof_char(I); iof_scan_sign(I, c, sign); if (!base10_digit(c)) return 0; iof_read_integer(I, c, *number); if (sign) *number = -*number; return 1; } int iof_get_slong (iof *I, long *number) { int sign, c = iof_char(I); iof_scan_sign(I, c, sign); if (!base10_digit(c)) return 0; iof_read_integer(I, c, *number); if (sign) *number = -*number; return 1; } int iof_get_int64 (iof *I, int64_t *number) { int sign, c = iof_char(I); iof_scan_sign(I, c, sign); if (!base10_digit(c)) return 0; iof_read_integer(I, c, *number); if (sign) *number = -*number; return 1; } int iof_get_uint32 (iof *I, uint32_t *number) { int c = iof_char(I); if (!base10_digit(c)) return 0; iof_read_integer(I, c, *number); return 1; } int iof_get_ulong (iof *I, unsigned long *number) { int c = iof_char(I); if (!base10_digit(c)) return 0; iof_read_integer(I, c, *number); return 1; } int iof_get_usize (iof *I, size_t *number) { int c = iof_char(I); if (!base10_digit(c)) return 0; iof_read_integer(I, c, *number); return 1; } int iof_get_uint64 (iof *I, uint64_t *number) { int c = iof_char(I); if (!base10_digit(c)) return 0; iof_read_integer(I, c, *number); return 1; } int iof_get_int32_radix (iof *I, int32_t *number, int radix) { int sign, c = iof_char(I); iof_scan_sign(I, c, sign); if (!base10_digit(c)) return 0; iof_read_radix(I, c, *number, radix); if (sign) *number = -*number; return 1; } int iof_get_slong_radix (iof *I, long *number, int radix) { int sign, c = iof_char(I); iof_scan_sign(I, c, sign); if (!base10_digit(c)) return 0; iof_read_radix(I, c, *number, radix); if (sign) *number = -*number; return 1; } int iof_get_int64_radix (iof *I, int64_t *number, int radix) { int sign, c = iof_char(I); iof_scan_sign(I, c, sign); if (!base10_digit(c)) return 0; iof_read_radix(I, c, *number, radix); if (sign) *number = -*number; return 1; } int iof_get_uint32_radix (iof *I, uint32_t *number, int radix) { int c = iof_char(I); if (!base10_digit(c)) return 0; iof_read_radix(I, c, *number, radix); return 1; } int iof_get_ulong_radix (iof *I, unsigned long *number, int radix) { int c = iof_char(I); if (!base10_digit(c)) return 0; iof_read_radix(I, c, *number, radix); return 1; } int iof_get_usize_radix (iof *I, size_t *number, int radix) { int c = iof_char(I); if (!base10_digit(c)) return 0; iof_read_radix(I, c, *number, radix); return 1; } int iof_get_uint64_radix (iof *I, uint64_t *number, int radix) { int c = iof_char(I); if (!base10_digit(c)) return 0; iof_read_radix(I, c, *number, radix); return 1; } /* get roman to uint16_t, cf. roman_to_uint16() from utilnumber.c*/ /* todo: some trick in place of this macro horror? */ #define roman1000(c) (c == 'M' || c == 'm') #define roman500(c) (c == 'D' || c == 'd') #define roman100(c) (c == 'C' || c == 'c') #define roman50(c) (c == 'L' || c == 'l') #define roman10(c) (c == 'X' || c == 'x') #define roman5(c) (c == 'V' || c == 'v') #define roman1(c) (c == 'I' || c == 'i') #define roman100s(I, c) \ (roman100(c) ? (100 + ((c = iof_next(I), roman100(c)) ? (100 + ((c = iof_next(I), roman100(c)) ? (c = iof_next(I), 100) : 0)) : 0)) : 0) #define roman10s(I, c) \ (roman10(c) ? (10 + ((c = iof_next(I), roman10(c)) ? (10 + ((c = iof_next(I), roman10(c)) ? (c = iof_next(I), 10) : 0)) : 0)) : 0) #define roman1s(I, c) \ (roman1(c) ? (1 + ((c = iof_next(I), roman1(c)) ? (1 + ((c = iof_next(I), roman1(c)) ? (c = iof_next(I), 1) : 0)) : 0)) : 0) int iof_get_roman (iof *I, uint16_t *number) { int c; /* M */ for (*number = 0, c = iof_char(I); roman1000(c); *number += 1000, c = iof_next(I)); /* D C */ if (roman500(c)) { c = iof_next(I); *number += 500 + roman100s(I, c); } else if (roman100(c)) { c = iof_next(I); if (roman1000(c)) { c = iof_next(I); *number += 900; } else if (roman500(c)) { c = iof_next(I); *number += 400; } else *number += 100 + roman100s(I, c); } /* L X */ if (roman50(c)) { c = iof_next(I); *number += 50 + roman10s(I, c); } else if (roman10(c)) { c = iof_next(I); if (roman100(c)) { c = iof_next(I); *number += 90; } else if (roman50(c)) { c = iof_next(I); *number += 40; } else *number += 10 + roman10s(I, c); } /* V I */ if (roman5(c)) { c = iof_next(I); *number += 5 + roman1s(I, c); } else if (roman1(c)) { c = iof_next(I); if (roman10(c)) { c = iof_next(I); *number += 9; } else if (roman5(c)) { c = iof_next(I); *number += 4; } else *number += 1 + roman1s(I, c); } return 1; } /* double from iof; return 1 on success */ int iof_get_double (iof *I, double *number) // cf. string_to_double() { int sign, exponent10, c = iof_char(I); iof_scan_sign(I, c, sign); iof_scan_decimal(I, c, *number); if (c == '.') { c = iof_next(I); iof_scan_fraction(I, c, *number, exponent10); } else exponent10 = 0; if (c == 'e' || c == 'E') { c = iof_next(I); iof_scan_exponent10(I, c, exponent10); } double_exp10(*number, exponent10); if (sign) *number = -*number; return 1; } int iof_get_float (iof *I, float *number) // cf. string_to_float() { int sign, exponent10, c = iof_char(I); iof_scan_sign(I, c, sign); iof_scan_decimal(I, c, *number); if (c == '.') { c = iof_next(I); iof_scan_fraction(I, c, *number, exponent10); } else exponent10 = 0; if (c == 'e' || c == 'E') { c = iof_next(I); iof_scan_exponent10(I, c, exponent10); } float_exp10(*number, exponent10); if (sign) *number = -*number; return 1; } int iof_conv_double (iof *I, double *number) // cf. convert_to_double() { int sign, exponent10, c = iof_char(I); iof_scan_sign(I, c, sign); iof_scan_decimal(I, c, *number); if (c == '.' || c == ',') { c = iof_next(I); iof_scan_fraction(I, c, *number, exponent10); if (exponent10 < 0) double_negative_exp10(*number, exponent10); } if (sign) *number = -*number; return 1; } int iof_conv_float (iof *I, float *number) // cf. convert_to_float() { int sign, exponent10, c = iof_char(I); iof_scan_sign(I, c, sign); iof_scan_decimal(I, c, *number); if (c == '.' || c == ',') { c = iof_next(I); iof_scan_fraction(I, c, *number, exponent10); if (exponent10 < 0) float_negative_exp10(*number, exponent10); } if (sign) *number = -*number; return 1; } /* integer to iof; return a number of written bytes */ size_t iof_put_int32 (iof *O, int32_t number) { const char *s; size_t size; s = int32_to_string(number, &size); return iof_write(O, s, size); } size_t iof_put_slong (iof *O, long number) { const char *s; size_t size; s = slong_to_string(number, &size); return iof_write(O, s, size); } size_t iof_put_int64 (iof *O, int64_t number) { const char *s; size_t size; s = int64_to_string(number, &size); return iof_write(O, s, size); } size_t iof_put_uint32 (iof *O, uint32_t number) { const char *s; size_t size; s = uint32_to_string(number, &size); return iof_write(O, s, size); } size_t iof_put_ulong (iof *O, unsigned long number) { const char *s; size_t size; s = ulong_to_string(number, &size); return iof_write(O, s, size); } size_t iof_put_usize (iof *O, size_t number) { const char *s; size_t size; s = usize_to_string(number, &size); return iof_write(O, s, size); } size_t iof_put_uint64 (iof *O, uint64_t number) { const char *s; size_t size; s = uint64_to_string(number, &size); return iof_write(O, s, size); } size_t iof_put_int32_radix (iof *O, int32_t number, int radix, int uc) { const char *s; size_t size; s = int32_to_radix(number, radix, uc, &size); return iof_write(O, s, size); } size_t iof_put_slong_radix (iof *O, long number, int radix, int uc) { const char *s; size_t size; s = slong_to_radix(number, radix, uc, &size); return iof_write(O, s, size); } size_t iof_put_int64_radix (iof *O, int64_t number, int radix, int uc) { const char *s; size_t size; s = int64_to_radix(number, radix, uc, &size); return iof_write(O, s, size); } size_t iof_put_uint32_radix (iof *O, uint32_t number, int radix, int uc) { const char *s; size_t size; s = uint32_to_radix(number, radix, uc, &size); return iof_write(O, s, size); } size_t iof_put_ulong_radix (iof *O, unsigned long number, int radix, int uc) { const char *s; size_t size; s = ulong_to_radix(number, radix, uc, &size); return iof_write(O, s, size); } size_t iof_put_usize_radix (iof *O, size_t number, int radix, int uc) { const char *s; size_t size; s = usize_to_radix(number, radix, uc, &size); return iof_write(O, s, size); } size_t iof_put_uint64_radix (iof *O, uint64_t number, int radix, int uc) { const char *s; size_t size; s = uint64_to_radix(number, radix, uc, &size); return iof_write(O, s, size); } /* roman numerals */ size_t iof_put_roman (iof *O, uint16_t number, int uc) { const char *s; size_t size; s = uint16_to_roman(number, uc, &size); return iof_write(O, s, size); } /* double/float to iof; return the number of written bytes */ size_t iof_put_double (iof *O, double number, int digits) { const char *s; size_t size; s = double_to_string(number, digits, &size); return iof_write(O, s, size); } size_t iof_put_float (iof *O, float number, int digits) { const char *s; size_t size; s = float_to_string(number, digits, &size); return iof_write(O, s, size); } /* iof to binary integer; pretty common */ int iof_get_be_uint2 (iof *I, uint32_t *pnumber) { int c1, c2; if ((c1 = iof_get(I)) < 0 || (c2 = iof_get(I)) < 0) return 0; *pnumber = (c1<<8)|c2; return 1; } int iof_get_be_uint3 (iof *I, uint32_t *pnumber) { int c1, c2, c3; if ((c1 = iof_get(I)) < 0 || (c2 = iof_get(I)) < 0 || (c3 = iof_get(I)) < 0) return 0; *pnumber = (c1<<16)|(c2<<8)|c3; return 1; } int iof_get_be_uint4 (iof *I, uint32_t *pnumber) { int c1, c2, c3, c4; if ((c1 = iof_get(I)) < 0 || (c2 = iof_get(I)) < 0 || (c3 = iof_get(I)) < 0 || (c4 = iof_get(I)) < 0) return 0; *pnumber = (c1<<24)|(c2<<16)|(c3<<8)|c4; return 1; } int iof_get_le_uint2 (iof *I, uint32_t *pnumber) { int c1, c2; if ((c1 = iof_get(I)) < 0 || (c2 = iof_get(I)) < 0) return 0; *pnumber = (c2<<8)|c1; return 1; } int iof_get_le_uint3 (iof *I, uint32_t *pnumber) { int c1, c2, c3; if ((c1 = iof_get(I)) < 0 || (c2 = iof_get(I)) < 0 || (c3 = iof_get(I)) < 0) return 0; *pnumber = (c3<<16)|(c2<<8)|c1; return 1; } int iof_get_le_uint4 (iof *I, uint32_t *pnumber) { int c1, c2, c3, c4; if ((c1 = iof_get(I)) < 0 || (c2 = iof_get(I)) < 0 || (c3 = iof_get(I)) < 0 || (c4 = iof_get(I)) < 0) return 0; *pnumber = (c4<<24)|(c3<<16)|(c2<<8)|c1; return 1; } /* iof input data */ uint8_t * iof_file_input_data (iof_file *iofile, size_t *psize, int *isnew) { uint8_t *data; if (iofile->flags & IOF_DATA) { data = iofile->buf; *psize = iofile->end - iofile->buf; *isnew = 0; return data; } if (iof_file_reopen(iofile)) { data = iof_copy_file_handle_data(iof_file_get_fh(iofile), psize); *isnew = 1; iof_file_reclose(iofile); return data; } return NULL; } /* uint8_t * iof_file_reader_data (iof_file *iofile, size_t *size) { uint8_t *data; if (!(iofile->flags & IOF_DATA) || iofile->pos == NULL || (*size = (size_t)iof_left(iofile)) == 0) return NULL; if (iofile->flags & IOF_BUFFER_ALLOC) { data = iofile->buf; // iofile->pos; // returned must be freeable, makes sense when ->buf == ->pos iofile->flags &= ~IOF_BUFFER_ALLOC; iofile->buf = iofile->pos = iofile->end = NULL; return data; } data = (uint8_t *)util_malloc(*size); memcpy(data, iofile->buf, *size); return data; } uint8_t * iof_file_writer_data (iof_file *iofile, size_t *size) { uint8_t *data; if (!(iofile->flags & IOF_DATA) || iofile->buf == NULL || (*size = (size_t)iof_size(iofile)) == 0) return NULL; if (iofile->flags & IOF_BUFFER_ALLOC) { iofile->flags &= ~IOF_BUFFER_ALLOC; data = iofile->buf; iofile->buf = iofile->pos = iofile->end = NULL; return data; } data = (uint8_t *)util_malloc(*size); memcpy(data, iofile->buf, *size); return data; } */ uint8_t * iof_reader_data (iof *I, size_t *psize) { uint8_t *data; *psize = (size_t)iof_left(I); if (I->flags & IOF_BUFFER_ALLOC) { data = I->buf; // actually I->pos, but we have to return something freeable I->flags &= ~IOF_BUFFER_ALLOC; I->buf = NULL; } else { data = util_malloc(*psize); memcpy(data, I->pos, *psize); } iof_close(I); return data; } uint8_t * iof_writer_data (iof *O, size_t *psize) { uint8_t *data; *psize = (size_t)iof_size(O); if (O->flags & IOF_BUFFER_ALLOC) { data = O->buf; O->flags &= ~IOF_BUFFER_ALLOC; O->buf = NULL; } else { data = util_malloc(*psize); memcpy(data, O->buf, *psize); } iof_close(O); return data; } size_t iof_reader_to_file_handle (iof *I, FILE *file) { size_t size; for (size = 0; iof_readable(I); I->pos = I->end) size += fwrite(I->buf, sizeof(uint8_t), iof_left(I), file); return size; } size_t iof_reader_to_file (iof *I, const char *filename) { FILE *file; size_t size; if ((file = fopen(filename, "wb")) == NULL) return 0; for (size = 0; iof_readable(I); I->pos = I->end) size += fwrite(I->buf, sizeof(uint8_t), iof_left(I), file); fclose(file); return size; } /* debug */ size_t iof_data_to_file (const void *data, size_t size, const char *filename) { FILE *fh; if ((fh = fopen(filename, "wb")) == NULL) return 0; // size = fwrite(data, size, sizeof(uint8_t), fh); // WRONG, this always returns 1, as fwrite returns the number of elements successfully written out size = fwrite(data, sizeof(uint8_t), size, fh); fclose(fh); return size; } size_t iof_result_to_file_handle (iof *F, FILE *file) { const void *data; size_t size; data = iof_result(F, size); return iof_data_to_file_handle(data, size, file); } size_t iof_result_to_file (iof *F, const char *filename) { const void *data; size_t size; data = iof_result(F, size); return iof_data_to_file(data, size, filename); } void iof_debug (iof *I, const char *filename) { FILE *file = fopen(filename, "wb"); if (file != NULL) { fprintf(file, ">>> buf %p <<<\n", I->buf); fwrite(I->buf, sizeof(uint8_t), iof_size(I), file); fprintf(file, "\n>>> pos %p (%ld) <<<\n", I->pos, (long)iof_size(I)); fwrite(I->pos, sizeof(uint8_t), iof_left(I), file); fprintf(file, "\n>>> end %p (%ld) <<<\n", I->end, (long)iof_left(I)); fwrite(I->end, sizeof(uint8_t), I->space - iof_space(I), file); fprintf(file, "\n>>> end of buffer %p (%ld) <<<\n", I->buf + I->space, (long)(I->buf + I->space - I->end)); fclose(file); } } /* common filters api */ /* sizes of filter states on x64 size of iof_filter: 640 (no longer used; sizeof(iof) + sizeof larger state) size of file_state: 16 size of stream_state: 16 size of flate_state: 104 size of lzw_state: 56 size of predictor_state: 104 size of basexx_state: 48 size of basexx_state: 48 size of basexx_state: 48 size of eexec_state: 40 size of runlength_state: 24 size of rc4_state: 24 size of aes_state: 72 size of img_state: 576 size of img: 496 */ typedef struct iof_heap iof_heap; typedef struct { iof_heap *heap; } iof_heap_ghost; struct iof_heap { union { uint8_t *data; iof_heap_ghost *gdata; }; // union instead of casts (ARM) union { uint8_t *pos; iof_heap_ghost *gpos; }; size_t size, space; iof_heap *next, *prev; int refcount; uint8_t dummy[4]; // pad to 8N bytes }; /* We use hidden heap pointer for every allocated buffer, so heap->data should be kept properly aligned. Dummy 4-bytes pad doesn't really matter (the pad is there anyway), but iof_heap_take() must pad the requested size. */ static iof_heap * iof_buffers_heap = NULL; static iof_heap * iof_filters_heap = NULL; #define IOF_HEAP_FILTERS_COUNT 4 #define IOF_BUFFER_SIZE 262144 // (1<<18) #define IOF_FILTER_SIZE 1024 // sizeof(iof_filter) on x64 is now 640, img_state 576, img 496, others 16-104 #define IOF_BUFFER_HEAP_SIZE (IOF_HEAP_FILTERS_COUNT * (IOF_BUFFER_SIZE + sizeof(iof_heap_ghost))) #define IOF_FILTER_HEAP_SIZE (IOF_HEAP_FILTERS_COUNT * (IOF_FILTER_SIZE + sizeof(iof_heap_ghost))) static iof_heap * iof_heap_new (size_t space) { iof_heap *iofheap; iofheap = (iof_heap *)util_malloc(sizeof(iof_heap) + space); iofheap->gdata = iofheap->gpos = (iof_heap_ghost *)(iofheap + 1); iofheap->size = iofheap->space = space; iofheap->next = NULL; iofheap->prev = NULL; iofheap->refcount = 0; return iofheap; } #define iof_heap_free(iofheap) util_free(iofheap) void iof_filters_init (void) { if (iof_buffers_heap == NULL) iof_buffers_heap = iof_heap_new(IOF_BUFFER_HEAP_SIZE); if (iof_filters_heap == NULL) iof_filters_heap = iof_heap_new(IOF_FILTER_HEAP_SIZE); } void iof_filters_free (void) { iof_heap *heap, *next; for (heap = iof_buffers_heap; heap != NULL; heap = next) { next = heap->next; if (heap->refcount != 0) loggerf("not closed iof filters left (%d)", heap->refcount); if (next != NULL) loggerf("iof filters heap left"); iof_heap_free(heap); } iof_buffers_heap = NULL; for (heap = iof_filters_heap; heap != NULL; heap = next) { next = heap->next; if (heap->refcount != 0) loggerf("not closed iof buffers left (%d)", heap->refcount); if (next != NULL) loggerf("iof buffers heap left"); iof_heap_free(heap); } iof_filters_heap = NULL; } #define iof_heap_get(hp, ghost, data, siz) \ (ghost = (hp)->gpos, ghost->heap = (hp), data = (uint8_t *)(ghost + 1), (hp)->pos += siz, (hp)->size -= siz, ++(hp)->refcount) static void * iof_heap_take (iof_heap **pheap, size_t size) { uint8_t *data; iof_heap_ghost *ghost; iof_heap *heap, *newheap, *next; heap = *pheap; if (size & 7) size += 8 - (size & 7); // pad to 8N bytes so that (heap->pos + size) remains properly aligned size += sizeof(iof_heap_ghost); if (heap->size >= size) { /* take cheap mem from main heap */ iof_heap_get(heap, ghost, data, size); return data; } if (size <= (heap->space >> 1)) { /* make new cheap heap, make it front */ *pheap = newheap = iof_heap_new(heap->space); newheap->next = heap; heap->prev = newheap; iof_heap_get(newheap, ghost, data, size); return data; } /* size much larger than expected? should not happen. make a single-item heap, keep the front heap intact. */ newheap = iof_heap_new(size); if ((next = heap->next) != NULL) { newheap->next = next; next->prev = newheap; } heap->next = newheap; newheap->prev = heap; iof_heap_get(newheap, ghost, data, size); return data; } static void iof_heap_back (void *data) { iof_heap_ghost *ghost; iof_heap *heap, *next, *prev; ghost = ((iof_heap_ghost *)data) - 1; heap = ghost->heap; if (heap->refcount == 0) loggerf("invalid use of iof heap, refcount < 0"); if (--heap->refcount <= 0) { if ((prev = heap->prev) != NULL) { /* free the heap */ if ((next = heap->next) != NULL) prev->next = next, next->prev = prev; else prev->next = NULL; iof_heap_free(heap); } else { /* this is the front heap, just reset */ heap->pos = heap->data; heap->size = heap->space; } } } /**/ /* void * iof_filter_new (size_t size) { void *data; iof_filters_init(); data = iof_heap_take(&iof_filters_heap, size); return memset(data, 0, size); } */ iof * iof_filter_reader_new (iof_handler handler, size_t statesize, void **pstate) { iof *F; void *filter; uint8_t *buffer; size_t buffersize; iof_filters_init(); filter = iof_heap_take(&iof_filters_heap, sizeof(iof) + statesize); F = (iof *)memset(filter, 0, sizeof(iof) + statesize); buffer = iof_heap_take(&iof_buffers_heap, IOF_BUFFER_SIZE); buffersize = IOF_BUFFER_SIZE; iof_setup_reader(F, buffer, buffersize); F->flags |= IOF_HEAP|IOF_BUFFER_HEAP; F->more = handler; *pstate = (F + 1); return F; } iof * iof_filter_reader_with_buffer_new (iof_handler handler, size_t statesize, void **pstate, void *buffer, size_t buffersize) { // for filters that has own buffer (string, some image filters) iof *F; void *filter; iof_filters_init(); filter = iof_heap_take(&iof_filters_heap, sizeof(iof) + statesize); F = (iof *)memset(filter, 0, sizeof(iof) + statesize); iof_setup_reader(F, buffer, buffersize); F->flags |= IOF_HEAP; F->more = handler; *pstate = (F + 1); return F; } iof * iof_filter_writer_new (iof_handler handler, size_t statesize, void **pstate) { iof *F; void *filter; uint8_t *buffer; size_t buffersize; iof_filters_init(); filter = iof_heap_take(&iof_filters_heap, sizeof(iof) + statesize); F = (iof *)memset(filter, 0, sizeof(iof) + statesize); buffer = iof_heap_take(&iof_buffers_heap, IOF_BUFFER_SIZE); buffersize = IOF_BUFFER_SIZE; iof_setup_writer(F, buffer, buffersize); F->flags |= IOF_HEAP|IOF_BUFFER_HEAP; F->more = handler; *pstate = (F + 1); return F; } iof * iof_filter_writer_with_buffer_new (iof_handler handler, size_t statesize, void **pstate, void *buffer, size_t buffersize) { iof *F; void *filter; iof_filters_init(); filter = iof_heap_take(&iof_filters_heap, sizeof(iof) + statesize); F = (iof *)memset(filter, 0, sizeof(iof) + statesize); iof_setup_writer(F, buffer, buffersize); F->flags |= IOF_HEAP; F->more = handler; *pstate = (F + 1); return F; } /**/ #define iof_filter_free(F) iof_heap_back(F) #define iof_filter_buffer_free(data) iof_heap_back(data) /* close */ #define iof_close_next(F) ((void)(iof_decref((F)->next), (F)->next = NULL, 0)) /* when filter creation fails, we should take care to destroy the filter but leave ->next intact */ #define iof_clear_next(F) ((void)(iof_unref((F)->next), (F)->next = NULL, 0)) #define iof_close_buffer(F) ((void)\ ((F)->buf != NULL ? \ ((F->flags & IOF_BUFFER_ALLOC) ? (util_free((F)->buf), (F)->buf = NULL, 0) : \ ((F->flags & IOF_BUFFER_HEAP) ? (iof_filter_buffer_free((F)->buf), (F)->buf = NULL, 0) : ((F)->buf = NULL, 0))) : 0)) /* closing underlying file handle */ static void iof_close_file (iof *F) { FILE *file; //if (F->flags & IOF_FILE_HANDLE) //{ if ((file = F->file) != NULL) { if (F->flags & IOF_CLOSE_FILE) fclose(F->file); F->file = NULL; } //} } /* a very special variant for reader filters initiated with iof_file_reopen(). It also calls iof_file_reclose(), which takes an effect only if previously reopened, but better to keep all this thin ice separated. Used in filters: iofile_reader, iofile_stream_reader, image decoders. */ static void iof_close_iofile (iof *F) { iof_file *iofile; //if (F->flags & IOF_FILE) //{ if ((iofile = F->iofile) != NULL) { iof_file_unsync(iofile, NULL); iof_file_reclose(iofile); // takes an effect iff prevoiusly reopened iof_file_decref(iofile); F->iofile = NULL; } //} } void iof_free (iof *F) { if (F->flags & IOF_FILE_HANDLE) iof_close_file(F); else if (F->flags & IOF_FILE) iof_close_iofile(F); else if (F->flags & IOF_NEXT) iof_close_next(F); iof_close_buffer(F); if (F->flags & IOF_HEAP) iof_filter_free(F); else if (F->flags & IOF_ALLOC) util_free(F); } void iof_discard (iof *F) { // so far used only on failed filters creation; as iof_free() but don't dare to release ->next if (F->flags & IOF_FILE_HANDLE) iof_close_file(F); else if (F->flags & IOF_FILE) iof_close_iofile(F); //else if (F->flags & IOF_NEXT) // iof_close_next(F); iof_close_buffer(F); if (F->flags & IOF_HEAP) iof_filter_free(F); else if (F->flags & IOF_ALLOC) util_free(F); } /* resizing buffer */ size_t iof_resize_buffer_to (iof *O, size_t space) { uint8_t *buf; if (O->flags & IOF_BUFFER_ALLOC) { buf = (uint8_t *)util_realloc(O->buf, space); } else { buf = (uint8_t *)util_malloc(space); memcpy(buf, O->buf, iof_size(O)); if (O->flags & IOF_BUFFER_HEAP) { iof_filter_buffer_free(O->buf); O->flags &= ~IOF_BUFFER_HEAP; } O->flags |= IOF_BUFFER_ALLOC; } O->pos = buf + iof_size(O); O->end = buf + space; O->buf = buf; O->space = space; return iof_left(O); } /* */ size_t iof_decoder_retval (iof *I, const char *type, iof_status status) { switch (status) { case IOFERR: case IOFEMPTY: // should never happen as we set state.flush = 1 on decoders init loggerf("%s decoder error (%d, %s)", type, status, iof_status_kind(status)); I->flags |= IOF_STOPPED; return 0; case IOFEOF: // this is the last chunk, I->flags |= IOF_STOPPED; // so stop it and fall FALLTHRU // fall through case IOFFULL: // prepare pointers to read from I->buf I->end = I->pos; I->pos = I->buf; return I->end - I->buf; } loggerf("%s decoder bug, invalid retval %d", type, status); return 0; } size_t iof_encoder_retval (iof *O, const char *type, iof_status status) { switch (status) { case IOFERR: case IOFFULL: loggerf("%s encoder error (%d, %s)", type, status, iof_status_kind(status)); return 0; case IOFEMPTY: O->pos = O->buf; O->end = O->buf + O->space; return O->space; case IOFEOF: return 0; } loggerf("%s encoder bug, invalid retval %d", type, status); return 0; } /* file/stream state */ typedef struct { size_t length; size_t offset; } file_state; #define file_state_init(state, off, len) ((state)->offset = off, (state)->length = len) typedef struct { size_t length; size_t offset; } stream_state; #define stream_state_init(state, off, len) ((state)->offset = off, (state)->length = len) /* union type to avoid 'dereferencing type-punned .. ' warnings on (void **) case */ typedef union { file_state *filestate; stream_state *streamstate; void *voidstate; } fs_state_pointer; /**/ static size_t file_read (iof *I) { size_t bytes, tail; if (I->flags & IOF_STOPPED) return 0; tail = iof_tail(I); if ((bytes = tail + fread(I->buf + tail, sizeof(uint8_t), I->space - tail, I->file)) < I->space) I->flags |= IOF_STOPPED; I->pos = I->buf; I->end = I->buf + bytes; return bytes; } static size_t iofile_read (iof *I, size_t *poffset) { size_t bytes, tail; if (I->flags & IOF_STOPPED) return 0; iof_file_sync(I->iofile, poffset); tail = iof_tail(I); if ((bytes = tail + iof_file_read(I->buf + tail, sizeof(uint8_t), I->space - tail, I->iofile)) < I->space) { I->flags |= IOF_STOPPED; iof_file_unsync(I->iofile, poffset); } I->pos = I->buf; I->end = I->buf + bytes; return bytes; } static size_t file_load (iof *I) { size_t bytes, left, tail; if (I->flags & IOF_STOPPED) return 0; tail = iof_tail(I); I->pos = I->buf + tail; I->end = I->buf + I->space; /* don't assume its done when initializing the filter */ left = I->space - tail; do { bytes = fread(I->pos, sizeof(uint8_t), left, I->file); I->pos += bytes; } while (bytes == left && (left = iof_resize_buffer(I)) > 0); I->flags |= IOF_STOPPED; return iof_loaded(I); } static size_t iofile_load (iof *I, size_t *poffset) { size_t bytes, left, tail; if (I->flags & IOF_STOPPED) return 0; tail = iof_tail(I); I->pos = I->buf + tail; I->end = I->buf + I->space; /* don't assume its done when initializing the filter */ left = I->space - tail; iof_file_sync(I->iofile, poffset); do { bytes = iof_file_read(I->pos, sizeof(uint8_t), left, I->iofile); I->pos += bytes; } while (bytes == left && (left = iof_resize_buffer(I)) > 0); I->flags |= IOF_STOPPED; iof_file_unsync(I->iofile, poffset); return iof_loaded(I); } static size_t filter_file_reader (iof *I, iof_mode mode) { switch (mode) { case IOFREAD: return file_read(I); case IOFLOAD: return file_load(I); case IOFCLOSE: iof_free(I); return 0; default: return 0; } } static size_t filter_iofile_reader (iof *I, iof_mode mode) { file_state *state; state = iof_filter_state(file_state *, I); switch (mode) { case IOFREAD: return iofile_read(I, &state->offset); case IOFLOAD: return iofile_load(I, &state->offset); case IOFCLOSE: iof_free(I); return 0; default: return 0; } } static size_t file_write (iof *O, int flush) { size_t bytes; if ((bytes = iof_size(O)) > 0) if (bytes != fwrite(O->buf, sizeof(uint8_t), bytes, O->file)) return 0; if (flush) fflush(O->file); O->end = O->buf + O->space; // remains intact actually O->pos = O->buf; return O->space; } static size_t iofile_write (iof *O, size_t *poffset, int flush) { size_t bytes; iof_file_sync(O->iofile, poffset); if ((bytes = iof_size(O)) > 0) { if (bytes != iof_file_write(O->buf, sizeof(uint8_t), bytes, O->iofile)) { iof_file_unsync(O->iofile, poffset); return 0; } } if (flush) iof_file_flush(O->iofile); O->end = O->buf + O->space; // remains intact actually O->pos = O->buf; return O->space; } static size_t filter_file_writer (iof *O, iof_mode mode) { switch (mode) { case IOFWRITE: return file_write(O, 0); case IOFFLUSH: return file_write(O, 1); case IOFCLOSE: file_write(O, 1); iof_free(O); return 0; default: return 0; } } static size_t filter_iofile_writer (iof *O, iof_mode mode) { file_state *state; state = iof_filter_state(file_state *, O); switch (mode) { case IOFWRITE: return iofile_write(O, &state->offset, 0); case IOFFLUSH: return iofile_write(O, &state->offset, 1); case IOFCLOSE: iofile_write(O, &state->offset, 1); iof_free(O); return 0; default: return 0; } } /* filter from FILE* */ iof * iof_filter_file_handle_reader (FILE *file) { iof *I; fs_state_pointer P; if (file == NULL) return NULL; I = iof_filter_reader(filter_file_reader, sizeof(file_state), &P.voidstate); iof_setup_file(I, file); file_state_init(P.filestate, 0, 0); return I; } iof * iof_filter_file_handle_writer (FILE *file) { iof *O; fs_state_pointer P; if (file == NULL) return NULL; O = iof_filter_writer(filter_file_writer, sizeof(file_state), &P.voidstate); iof_setup_file(O, file); file_state_init(P.filestate, 0, 0); return O; } /* filter from iof_file * */ iof * iof_filter_iofile_reader (iof_file *iofile, size_t offset) { iof *I; fs_state_pointer P; if (!iof_file_reopen(iofile)) return NULL; I = iof_filter_reader(filter_iofile_reader, sizeof(file_state), &P.voidstate); iof_setup_iofile(I, iofile); file_state_init(P.filestate, offset, 0); return I; } iof * iof_filter_iofile_writer (iof_file *iofile, size_t offset) { iof *O; fs_state_pointer P; O = iof_filter_writer(filter_iofile_writer, sizeof(file_state), &P.voidstate); iof_setup_iofile(O, iofile); file_state_init(P.filestate, offset, 0); return O; } /* filter from filename */ iof * iof_filter_file_reader (const char *filename) { iof *I; fs_state_pointer P; FILE *file; if ((file = fopen(filename, "rb")) == NULL) return NULL; I = iof_filter_reader(filter_file_reader, sizeof(file_state), &P.voidstate); iof_setup_file(I, file); file_state_init(P.filestate, 0, 0); I->flags |= IOF_CLOSE_FILE; return I; } iof * iof_filter_file_writer (const char *filename) { iof *O; fs_state_pointer P; FILE *file; if ((file = fopen(filename, "wb")) == NULL) return NULL; O = iof_filter_writer(filter_file_writer, sizeof(file_state), &P.voidstate); iof_setup_file(O, file); file_state_init(P.filestate, 0, 0); O->flags |= IOF_CLOSE_FILE; return O; } /* from string */ static size_t dummy_handler (iof *I, iof_mode mode) { switch (mode) { case IOFCLOSE: iof_free(I); return 0; default: return 0; } } iof * iof_filter_string_reader (const void *s, size_t length) { iof *I; void *dummy; I = iof_filter_reader_with_buffer(dummy_handler, 0, &dummy, NULL, 0); I->rbuf = I->rpos = (const uint8_t *)s; I->rend = (const uint8_t *)s + length; // I->space = length; return I; } iof * iof_filter_string_writer (const void *s, size_t length) { iof *O; void *dummy; O = iof_filter_reader_with_buffer(dummy_handler, 0, &dummy, NULL, 0); O->rbuf = O->rpos = (const uint8_t *)s; O->rend = (const uint8_t *)s + length; // O->space = length; return O; } iof * iof_filter_buffer_writer (size_t size) { // cmp iof_buffer_create() iof *O; fs_state_pointer dummy; uint8_t *buffer; if (size > IOF_BUFFER_SIZE) { buffer = (uint8_t *)util_malloc(size); O = iof_filter_writer_with_buffer(iof_mem_handler, 0, &dummy.voidstate, buffer, size); O->flags |= IOF_BUFFER_ALLOC; return O; } return iof_filter_writer(iof_mem_handler, 0, &dummy.voidstate); } /* stream */ static size_t file_stream_read (iof *I, size_t *plength) { size_t bytes, tail; if (I->flags & IOF_STOPPED || *plength == 0) return 0; tail = iof_tail(I); if (I->space - tail >= *plength) { bytes = tail + fread(I->buf + tail, sizeof(uint8_t), *plength, I->file); I->flags |= IOF_STOPPED; *plength = 0; } else { bytes = tail + fread(I->buf + tail, sizeof(uint8_t), I->space - tail, I->file); *plength -= bytes - tail; } I->pos = I->buf; I->end = I->buf + bytes; return bytes; } static size_t iofile_stream_read (iof *I, size_t *plength, size_t *poffset) { size_t bytes, tail; if (I->flags & IOF_STOPPED || *plength == 0) return 0; tail = iof_tail(I); iof_file_sync(I->iofile, poffset); if (I->space - tail >= *plength) { bytes = tail + iof_file_read(I->buf + tail, sizeof(uint8_t), *plength, I->iofile); iof_file_unsync(I->iofile, poffset); I->flags |= IOF_STOPPED; *plength = 0; } else { bytes = tail + iof_file_read(I->buf + tail, sizeof(uint8_t), I->space - tail, I->iofile); *plength -= bytes - tail; } I->pos = I->buf; I->end = I->buf + bytes; return bytes; } static size_t file_stream_load (iof *I, size_t *plength) { size_t bytes, tail; if (I->flags & IOF_STOPPED || *plength == 0) return 0; tail = iof_tail(I); if (I->space - tail < *plength) if (iof_resize_buffer_to(I, tail + *plength) == 0) return 0; bytes = tail + fread(I->buf + tail, sizeof(uint8_t), *plength, I->file); I->flags |= IOF_STOPPED; *plength = 0; I->pos = I->buf; I->end = I->buf + bytes; return bytes; } static size_t iofile_stream_load (iof *I, size_t *plength, size_t *poffset) { size_t bytes, tail; if (I->flags & IOF_STOPPED || *plength == 0) return 0; iof_file_sync(I->iofile, poffset); tail = iof_tail(I); if (I->space - tail < *plength) if (iof_resize_buffer_to(I, tail + *plength) == 0) return 0; bytes = tail + iof_file_read(I->buf + tail, sizeof(uint8_t), *plength, I->iofile); iof_file_unsync(I->iofile, poffset); I->flags |= IOF_STOPPED; *plength = 0; I->pos = I->buf; I->end = I->buf + bytes; return bytes; } static size_t filter_file_stream_reader (iof *I, iof_mode mode) { stream_state *state; state = iof_filter_state(stream_state *, I); switch(mode) { case IOFREAD: return file_stream_read(I, &state->length); case IOFLOAD: return file_stream_load(I, &state->length); case IOFCLOSE: iof_free(I); return 0; default: return 0; } } static size_t filter_iofile_stream_reader (iof *I, iof_mode mode) { stream_state *state; state = iof_filter_state(stream_state *, I); switch(mode) { case IOFREAD: return iofile_stream_read(I, &state->length, &state->offset); case IOFLOAD: return iofile_stream_load(I, &state->length, &state->offset); case IOFCLOSE: iof_free(I); return 0; default: return 0; } } iof * iof_filter_stream_reader (FILE *file, size_t offset, size_t length) { iof *I; fs_state_pointer P; I = iof_filter_reader(filter_file_stream_reader, sizeof(stream_state), &P.voidstate); iof_setup_file(I, file); stream_state_init(P.streamstate, offset, length); fseek(file, (long)offset, SEEK_SET); // or perhaps it should be call in file_stream_read(), like iof_file_sync()? return I; } iof * iof_filter_stream_coreader (iof_file *iofile, size_t offset, size_t length) { iof *I; fs_state_pointer P; if (!iof_file_reopen(iofile)) return NULL; I = iof_filter_reader(filter_iofile_stream_reader, sizeof(stream_state), &P.voidstate); iof_setup_iofile(I, iofile); stream_state_init(P.streamstate, offset, length); return I; } static size_t file_stream_write (iof *O, size_t *plength, int flush) { size_t bytes; if ((bytes = iof_size(O)) > 0) { if (bytes != fwrite(O->buf, sizeof(uint8_t), bytes, O->file)) { *plength += bytes; return 0; } } if (flush) fflush(O->file); *plength += bytes; O->end = O->buf + O->space; // remains intact O->pos = O->buf; return O->space; } static size_t iofile_stream_write (iof *O, size_t *plength, size_t *poffset, int flush) { size_t bytes; if ((bytes = iof_size(O)) > 0) { iof_file_sync(O->iofile, poffset); if (bytes != iof_file_write(O->buf, sizeof(uint8_t), bytes, O->iofile)) { *plength += bytes; iof_file_unsync(O->iofile, poffset); return 0; } } if (flush) iof_file_flush(O->iofile); *plength += bytes; O->end = O->buf + O->space; // remains intact O->pos = O->buf; return O->space; } static size_t filter_file_stream_writer (iof *O, iof_mode mode) { stream_state *state; state = iof_filter_state(stream_state *, O); switch (mode) { case IOFWRITE: return file_stream_write(O, &state->length, 0); case IOFFLUSH: return file_stream_write(O, &state->length, 1); case IOFCLOSE: file_stream_write(O, &state->length, 1); iof_free(O); return 0; default: return 0; } } static size_t filter_iofile_stream_writer (iof *O, iof_mode mode) { stream_state *state; state = iof_filter_state(stream_state *, O); switch (mode) { case IOFWRITE: return iofile_stream_write(O, &state->length, &state->offset, 0); case IOFFLUSH: return iofile_stream_write(O, &state->length, &state->offset, 1); case IOFCLOSE: iofile_stream_write(O, &state->length, &state->offset, 1); iof_free(O); return 0; default: return 0; } } iof * iof_filter_stream_writer (FILE *file) { iof *O; fs_state_pointer P; O = iof_filter_writer(filter_file_stream_writer, sizeof(stream_state), &P.voidstate); iof_setup_file(O, file); stream_state_init(P.streamstate, 0, 0); return O; } iof * iof_filter_stream_cowriter (iof_file *iofile, size_t offset) { iof *O; fs_state_pointer P; O = iof_filter_writer(filter_iofile_stream_writer, sizeof(stream_state), &P.voidstate); iof_setup_iofile(O, iofile); stream_state_init(P.streamstate, offset, 0); return O; } /* very specific for images; get input from already created strem filter, exchange the filter but keep the buffer */ FILE * iof_filter_file_reader_source (iof *I, size_t *poffset, size_t *plength) { fs_state_pointer P; if (I->more == filter_file_stream_reader) // I is the result of iof_filter_stream_reader() { P.streamstate = iof_filter_state(stream_state *, I); *poffset = P.streamstate->offset; *plength = P.streamstate->length; // might be 0 but it is ok for file readers return I->file; } if (I->more == filter_file_reader) { P.filestate = iof_filter_state(file_state *, I); *poffset = P.filestate->offset; *plength = P.filestate->length; // might be 0 but it is ok for file readers return I->file; } return NULL; } iof_file * iof_filter_file_coreader_source (iof *I, size_t *poffset, size_t *plength) { fs_state_pointer P; if (I->more == filter_iofile_stream_reader) // I is the result of iof_filter_stream_coreader() { P.streamstate = iof_filter_state(stream_state *, I); *poffset = P.streamstate->offset; *plength = P.streamstate->length; return I->iofile; } if (I->more == filter_iofile_reader) { P.filestate = iof_filter_state(file_state *, I); *poffset = P.filestate->offset; *plength = P.filestate->length; return I->iofile; } return NULL; } iof * iof_filter_reader_replacement (iof *P, iof_handler handler, size_t statesize, void **pstate) { // called after iof_filter_file_reader_source(), no need to check if F is filter from iof heap and if has buffer from iof heap iof *F; F = iof_filter_reader_with_buffer(handler, statesize, pstate, P->buf, P->space); F->flags |= IOF_BUFFER_HEAP; //iof_setup_reader(P, NULL, 0); //P->flags &= ~IOF_BUFFER_HEAP; iof_filter_free(P); return F; } luametatex-2.10.08/source/libraries/pplib/util/utiliof.h000066400000000000000000000664241442250314700232340ustar00rootroot00000000000000 #ifndef UTIL_IOF_H #define UTIL_IOF_H #include // for FILE * #include // for errno #include // for strerror() #include // for uintN_t #include "utildecl.h" #include "utilnumber.h" /* handler call modes */ typedef enum { IOFREAD = 0, /* read to buffer */ IOFLOAD = 1, /* read all to buffer */ IOFWRITE = 2, /* write buffer to the output */ IOFFLUSH = 3, /* flush buffer to the output */ IOFCLOSE = 4 /* (flush and) close */ } iof_mode; /* return statuses */ typedef enum { IOFEOF = -1, /* end of input */ IOFEMPTY = -2, /* end of input buffer*/ IOFFULL = -3, /* end of output buffer */ IOFERR = -4 /* error */ } iof_status; const char * iof_status_kind (iof_status status); /* iof_file */ typedef struct iof_file { union { FILE *iofh; // access via iof_file_get_fh / iof_file_set_fh (below) union { struct { uint8_t *buf, *pos, *end; }; struct { const uint8_t *rbuf, *rpos, *rend; }; // to trick compiler warnings about cast discarding const }; }; size_t *offset; char *name; size_t size; int refcount; int flags; } iof_file; /* iof handler function */ typedef struct iof iof; typedef size_t (*iof_handler) (iof *I, iof_mode mode); /* iof structure; keep 8N bytes */ #define IOF_MEMBERS \ union { \ struct { uint8_t *buf, *pos, *end; }; \ struct { uint16_t *hbuf, *hpos, *hend; }; \ struct { uint32_t *ibuf, *ipos, *iend; }; \ struct { const uint8_t *rbuf, *rpos, *rend; }; \ }; \ size_t space; \ iof_handler more; \ union { void *link; iof *next; FILE *file; iof_file *iofile; }; \ int flags; \ int refcount /* buf -- the beginning of buffer pos -- the current position end -- the end of buffer space -- private space size, not always eq. (end - buf) more -- handler function next/file/iofile/link -- reader source or writer target source -- source filter flags -- private filter info refcount -- refcount */ struct iof { IOF_MEMBERS; }; typedef void (*iof_dump_function) (const void *value, iof *O); /* flags */ #define IOF_ALLOC (1<<0) // iof is allocated #define IOF_HEAP (1<<1) // iof taken from iof heap #define IOF_BUFFER_ALLOC (1<<2) // buffer allocated #define IOF_BUFFER_HEAP (1<<3) // buffer taken from iof heap #define IOF_SHORT (1<<4) // buffer uses 16bit integers #define IOF_LONG (1<<5) // buffer uses 32bit integers #define IOF_TAIL (1<<6) // preserve reader tail #define IOF_READER (1<<7) // is reader #define IOF_WRITER (1<<8) // is writer #define IOF_DATA (1<<9) // binds some memory #define IOF_FILE_HANDLE (1<<10) // links FILE * #define IOF_FILE (1<<11) // links iof_file * #define IOF_NEXT (1<<12) // links next iof * #define IOF_CLOSE_FILE (1<<13) // close FILE * on free #define IOF_REOPEN_FILE (1<<14) // close/reopen mode for iof_file #define IOF_RECLOSE_FILE (1<<15) // ditto #define IOF_STOPPED (1<<16) // stopped // #define IOF_CUSTOM (1<<17) // first custom flag #define IOF_BUFSIZ (sizeof(iof) + BUFSIZ*sizeof(uint8_t)) /* reading buffer -- all of buf, pos, end pointers are initialized to the beginning of the private buffer, next call to a handler function moves the end pointer to bufer+space writer -- buf and pos pointers initialized to the beginning of the buffer, end initialized to bufer+space Every call to handler returns size_t number of bytes available (to write/read) or 0 if there is no more space. We usually align the data buffer just after the iof structure. This is convenient, especially when a memory for the structure and its buffer is to be allocated. In the case of growing output buffers we used to check if the memory of the buffer is allocated by the handler function using test (O->buf != (O+1)). We don't use it any longer not to rely on little secrets. Now there is an explicit IOF_BUFFER_ALLOC flag for that. IOF_ALLOC tells if the structure itself is taken from malloc (not used so far). Assuming the buffer size is way larger the sizeof(iof) */ /* initializers */ #define IOF_READER_INIT(handler, file, buffer, size, flags) \ { {{ (uint8_t *)(buffer), (uint8_t *)(buffer), (uint8_t *)(buffer) }}, size, handler, { file }, (flags)|IOF_READER, 0 } #define IOF_WRITER_INIT(handler, file, buffer, size, flags) \ { {{ (uint8_t *)(buffer), (uint8_t *)(buffer), (uint8_t *)(buffer) + size }}, size, handler, { file }, (flags)|IOF_WRITER, 0 } #define IOF_STRING_INIT(buffer, size) \ { {{ (uint8_t *)(buffer), (uint8_t *)(buffer), (uint8_t *)(buffer) + size }}, size, NULL, { NULL }, 0|IOF_READER|IOF_DATA, 0 } #define IOF_STRING() IOF_STRING_INIT(0, 0) /* refcount */ #define iof_incref(I) (++(I)->refcount) #define iof_decref(I) ((void)(--(I)->refcount <= 0 && iof_close(I))) #define iof_unref(I) (--(I)->refcount) /* binding buffer of a given size */ #define iof_setup_reader(I, buffer, size) \ ((I)->buf = (I)->pos = (I)->end = (uint8_t *)(buffer), \ (I)->space = size, (I)->flags = 0|IOF_READER, (I)->refcount = 0) #define iof_setup_writer(O, buffer, size) \ ((O)->buf = (O)->pos = (uint8_t *)(buffer), \ (O)->end = (uint8_t *)(buffer) + size, \ (O)->space = size, (O)->flags = 0|IOF_WRITER, (O)->refcount = 0) /* basics */ #define iof_space(I) ((I)->end - (I)->buf) #define iof_left(I) ((I)->end - (I)->pos) #define iof_size(I) ((I)->pos - (I)->buf) #define iof_input(I) ((I)->more ? (I)->more((I), IOFREAD) : 0lu) #define iof_load(I) ((I)->more ? (I)->more((I), IOFLOAD) : 0lu) #define iof_output(O) ((O)->more ? (O)->more((O), IOFWRITE) : 0lu) //#define iof_flush(O) ((O)->pos > (O)->buf && (O)->more ? (O)->more(O, IOFFLUSH) : 0lu) // flush should be unconditional, because encoders emits EOD markers only on flush #define iof_flush(O) ((O)->more ? (O)->more(O, IOFFLUSH) : 0lu) #define iof_close(O) ((O)->more ? (O)->more(O, IOFCLOSE) : 0lu) #define iof_stop(F) ((void)(F->pos = F->end = F->buf, F->flags |= IOF_STOPPED)) /* Rewriting reader tail to the beginning of new data portion; readers reacting on IOFREAD mode must be aware of some not yet read data, but treat it necessary only if IOF_TAIL flag is set. Parsers using iof input may protect not yet read data when there may be a need to put bytes back to the stream. This is trivial when I->pos > I->buf, as we can make a move by --I->pos. But when there is a need to put back more then one byte, we can protect the data tail, so that realoder will rewrite it to the beginning of new data chunk. iof_tail(I) - internal, used by iof handlers at IOFREAD mode iof_protect_tail(I) - used by parsers to ensure some bytes chunk in one piece */ size_t iof_save_tail (iof *I); #define iof_tail(I) (((I)->flags & IOF_TAIL) && (I)->pos < (I)->end ? iof_save_tail(I) : 0) size_t iof_input_save_tail (iof *I, size_t back); #define iof_protect_tail(I, back, length) ((iof_left(I) >= (length) - (back)) ? 1 : (iof_input_save_tail(I, back) >= length - back)) //uint8_t * iof_tail_data (iof *I, size_t *ptail); //#define iof_tail_free(data) util_free(data) /* panic */ // #define iof_panic(mess) return 0 #ifndef iof_panic #define iof_panic(mess) (fputs(mess, stderr), abort()) #endif //#define iof_memory_error() iof_panic(strerror(errno)) #define iof_fwrite_error() iof_panic(strerror(errno)) /* generic helpers */ UTILAPI uint8_t * iof_copy_file_data (const char *filename, size_t *psize); UTILAPI uint8_t * iof_copy_file_handle_data (FILE *file, size_t *psize); /* In the future we may need releasing file handle and restoring it from iofile->name, so access file handle via macros */ #define iof_file_get_fh(iofile) ((iofile)->iofh) #define iof_file_set_fh(iofile, fh) ((iofile)->iofh = fh) #define iof_file_get_file(iofile) (((iofile)->flags & IOF_DATA) ? NULL : iof_file_get_fh(iofile)) FILE * iof_get_file (iof *F); /* basic iof_file interface */ iof_file * iof_file_new (FILE *file); iof_file * iof_file_init (iof_file *iofile, FILE *file); iof_file * iof_file_rdata (const void *data, size_t size); iof_file * iof_file_wdata (void *data, size_t size); iof_file * iof_file_rdata_init (iof_file *iofile, const void *data, size_t size); iof_file * iof_file_wdata_init (iof_file *iofile, void *data, size_t size); iof_file * iof_file_reader_from_file_handle (iof_file *iofile, const char *filename, FILE *file, int preload, int closefile); iof_file * iof_file_reader_from_file (iof_file *iofile, const char *filename, int preload); iof_file * iof_file_reader_from_data (iof_file *iofile, const void *data, size_t size, int preload, int freedata); //iof_file * iof_file_writer_from_file (iof_file *iofile, const char *filename); void * iof_copy_data (const void *data, size_t size); #define iof_data_free(data) util_free(data) #define iof_file_wdata_copy(data, size) iof_file_wdata(iof_copy_data(data, size), size) #define iof_file_rdata_copy(data, size) iof_file_rdata(iof_copy_data(data, size), size) void iof_file_free (iof_file *iofile); #define iof_file_get_name(iofile) ((iofile)->name) void iof_file_set_name (iof_file *iofile, const char *name); #define iof_file_incref(iofile) (++(iofile)->refcount) #define iof_file_decref(iofile) ((void)(--(iofile)->refcount <= 0 && (iof_file_free(iofile), 0))) int iof_file_seek (iof_file *iofile, long offset, int whence); long iof_file_tell (iof_file *iofile); size_t iof_file_size (iof_file *iofile); int iof_file_eof (iof_file *iofile); size_t iof_file_read (void *ptr, size_t size, size_t items, iof_file *iofile); size_t iof_file_write (const void *ptr, size_t size, size_t items, iof_file *iofile); size_t iof_file_ensure (iof_file *iofile, size_t bytes); int iof_file_flush (iof_file *iofile); int iof_file_getc (iof_file *iofile); int iof_file_putc (iof_file *iofile, int c); int iof_file_reclose_input (iof_file *iofile); int iof_file_reopen_input (iof_file *iofile); #define iof_file_reopen(iofile) (((iofile)->flags & IOF_REOPEN_FILE) ? iof_file_reopen_input(iofile) : 1) #define iof_file_reclose(iofile) (void)(((iofile)->flags & IOF_RECLOSE_FILE) ? iof_file_reclose_input(iofile) : 0) void iof_file_close_input (iof_file *iofile); /* wrappers of basic operations for iof */ int iof_reader_seek (iof *I, long offset, int whence); int iof_reader_reseek (iof *I, long offset, int whence); int iof_writer_seek (iof *I, long offset, int whence); int iof_writer_reseek (iof *I, long offset, int whence); int iof_seek (iof *I, long offset, int whence); int iof_reseek (iof *I, long offset, int whence); long iof_reader_tell (iof *I); long iof_writer_tell (iof *I); long iof_tell (iof *I); size_t iof_fsize (iof *I); #define iof_setup_iofile(I, f) (iof_file_incref(f), (I)->iofile = f, (I)->flags |= IOF_FILE) #define iof_setup_file(I, fh) ((I)->file = fh, (I)->flags |= IOF_FILE_HANDLE) #define iof_setup_next(I, N) ((I)->next = N, iof_incref(N), (I)->flags |= IOF_NEXT) /* file handler reader and writer */ UTILAPI iof * iof_setup_file_handle_reader (iof *I, void *buffer, size_t space, FILE *f); UTILAPI iof * iof_setup_file_handle_writer (iof *O, void *buffer, size_t space, FILE *f); /* file reader and writer */ UTILAPI iof * iof_setup_file_reader (iof *I, void *buffer, size_t space, const char *filename); UTILAPI iof * iof_setup_file_writer (iof *O, void *buffer, size_t space, const char *filename); /* mem writer */ UTILAPI iof * iof_setup_buffer (iof *O, void *buffer, size_t space); UTILAPI iof * iof_setup_buffermin (iof *O, void *buffer, size_t space, size_t min); UTILAPI iof * iof_buffer_create (size_t space); #define iof_buffer_new() iof_buffer_create(BUFSIZ) /* custom handler */ UTILAPI iof * iof_reader (iof *I, void *link, iof_handler reader, const void *s, size_t bytes); UTILAPI iof * iof_writer (iof *O, void *link, iof_handler writer, void *s, size_t bytes); /* stdout wrapper */ extern UTILAPI iof iof_stdout; extern UTILAPI iof iof_stderr; /* simple string reader */ UTILAPI iof * iof_string_reader (iof *I, const void *s, size_t bytes); #define iof_string(I, s, bytes) \ (((I)->rbuf = (I)->rpos = (const uint8_t *)s), ((I)->rend = (I)->rbuf + (bytes)), ((I)->flags |= IOF_DATA), (I)) /* dummies */ UTILAPI iof * iof_dummy (void *buffer, size_t space); UTILAPI iof * iof_null (void *buffer, size_t space); /* checking available space */ #define iof_loadable(I) ((I)->pos < (I)->end || iof_load(I)) #define iof_readable(I) ((I)->pos < (I)->end || iof_input(I)) #define iof_writable(O) ((O)->pos < (O)->end || iof_output(O)) #define iof_hloadable iof_loadable #define iof_iloadable iof_loadable #define iof_hreadable iof_readable #define iof_ireadable iof_readable #define iof_hwritable iof_writable #define iof_iwritable iof_writable /* ensure space to write several bytes (several means less then I->space) */ #define iof_ensure(O, n) ((O)->pos+(n)-1 < (O)->end || iof_output(O)) // iof_ensure(O, 1) eq iof_writable(O) #define iof_hensure(O, n) ((O)->hpos+(n)-1 < (O)->hend || iof_output(O)) #define iof_iensure(O, n) ((O)->ipos+(n)-1 < (O)->iend || iof_output(O)) /* reading */ UTILAPI int iof_getc (iof *I); UTILAPI int iof_hgetc (iof *I); UTILAPI int iof_igetc (iof *I); // UTILAPI int iof_cmp (iof *I, const char *s); // UTILAPI int iof_cmpn (iof *I, const char *s, size_t bytes); UTILAPI iof_status iof_pass (iof *I, iof *O); #define iof_hpass iof_pass #define iof_ipass iof_pass /* readers helpers */ UTILAPI size_t iof_read (iof *I, void *s, size_t bytes); UTILAPI size_t iof_hread (iof *I, void *s, size_t bytes); UTILAPI size_t iof_iread (iof *I, void *s, size_t bytes); UTILAPI size_t iof_skip (iof *I, size_t bytes); UTILAPI size_t iof_hskip (iof *I, size_t bytes); UTILAPI size_t iof_iskip (iof *I, size_t bytes); /* get */ #define iof_pos(I) (*(I)->pos++) #define iof_hpos(I) (*(I)->hpos++) #define iof_ipos(I) (*(I)->ipos++) #define iof_get(I) (iof_readable(I) ? (int)(*(I)->pos++) : IOFEOF) #define iof_hget(I) (iof_hreadable(I) ? (int)(*(I)->hpos++) : IOFEOF) #define iof_iget(I) (iof_ireadable(I) ? (int)(*(I)->ipos++) : IOFEOF) #define iof_char(I) (iof_readable(I) ? (int)(*(I)->pos) : IOFEOF) #define iof_hcurr(I) (iof_hreadable(I) ? (int)(*(I)->hpos) : IOFEOF) #define iof_icurr(I) (iof_ireadable(I) ? (int)(*(I)->ipos) : IOFEOF) #define iof_next(I) (++(I)->pos, iof_char(I)) #define iof_hnext(I) (++(I)->hpos, iof_hcurr(I)) #define iof_inext(I) (++(I)->ipos, iof_icurr(I)) /* unget */ /* If possible, we just move the position backward. If it is not possible to move backward, we call iof_backup(I, c) that sets all pointers to the end of a private backup space, then moves buf AND pos pointers backward and set c at pos (==buf). We can backup characters as long as there is a private space. If several calls to iof_backup() are followed by iof_get(), pos pointer increases in normal way and so the use of another iof_unget() works just fine by moving the position. Once we swallow all backup characters (when pos==end), backup handler restores the previous pointers. Obviously we assume that the character provided to iof_unget() is always the character just obtained from iof_get(). We CAN'T just overwrite the character at a given position as the space we read may not be writable. When backup is in use, we can only get bytes until automatically restored. */ /* backup */ /* #define iof_uses_backup(I) ((I)->more == iof_unget_handler) #define iof_save(I, B) \ ((B)->buf = (I)->buf, (B)->pos = (I)->pos, (B)->end = (I)->end, (B)->space = (I)->space, \ (B)->link = I->link, (B)->more = (I)->more, (B)->flags = (I)->flags) #define iof_restore(B, I) iof_save(I, B) #define iof_unget(I, c) \ ((void)(c == (uint8_t)c ? ((I)->pos > (I)->buf ? --(I)->pos : iof_backup(I, c)) : 0) int iof_backup (iof *I, int c); */ /* writing */ UTILAPI size_t iof_write_file_handle (iof *O, FILE *file); UTILAPI size_t iof_write_file (iof *O, const char *filename); UTILAPI size_t iof_write_iofile (iof *O, iof_file *iofile, int savepos); UTILAPI int iof_putc (iof *O, int u); UTILAPI int iof_hputc (iof *O, int u); UTILAPI int iof_iputc (iof *O, int u); UTILAPI size_t iof_write (iof *O, const void *data, size_t size); UTILAPI size_t iof_hwrite (iof *O, const void *data, size_t size); UTILAPI size_t iof_iwrite (iof *O, const void *data, size_t size); UTILAPI iof_status iof_puts (iof *O, const void *data); UTILAPI size_t iof_put_string (iof *O, const void *data); UTILAPI size_t iof_putfs (iof *O, const char *format, ...); UTILAPI size_t iof_repc (iof *O, char c, size_t bytes); #define iof_putl(O, s) iof_write(O, "" s, sizeof(s)-1) //#define iof_putl iof_puts #define iof_set(O, c) (*(O)->pos++ = (uint8_t)(c)) #define iof_set2(O, c1, c2) (iof_set(O, c1), iof_set(O, c2)) #define iof_set3(O, c1, c2, c3) (iof_set(O, c1), iof_set(O, c2), iof_set(O, c3)) #define iof_set4(O, c1, c2, c3, c4) (iof_set(O, c1), iof_set(O, c2), iof_set(O, c3), iof_set(O, c4)) #define iof_set5(O, c1, c2, c3, c4, c5) (iof_set(O, c1), iof_set(O, c2), iof_set(O, c3), iof_set(O, c4), iof_set(O, c5)) #define iof_hset(O, c) (*(O)->hpos++ = (uint16_t)(c)) #define iof_iset(O, c) (*(O)->ipos++ = (uint32_t)(c)) #define iof_put(O, c) ((void)iof_ensure(O, 1), iof_set(O, c)) #define iof_put2(O, c1, c2) ((void)iof_ensure(O, 2), iof_set2(O, c1, c2)) #define iof_put3(O, c1, c2, c3) ((void)iof_ensure(O, 3), iof_set3(O, c1, c2, c3)) #define iof_put4(O, c1, c2, c3, c4) ((void)iof_ensure(O, 4), iof_set4(O, c1, c2, c3, c4)) #define iof_put5(O, c1, c2, c3, c4, c5) ((void)iof_ensure(O, 5), iof_set5(O, c1, c2, c3, c4, c5)) #define iof_hput(O, c) ((void)iof_hensure(O, 1), iof_hset(O, c)) #define iof_iput(O, c) ((void)iof_iensure(O, 1), iof_iset(O, c)) #define iof_put_uc_hex(O, c) iof_put2(O, base16_uc_digit1(c), base16_uc_digit2(c)) #define iof_put_lc_hex(O, c) iof_put2(O, base16_lc_digit1(c), base16_lc_digit2(c)) #define iof_set_uc_hex(O, c) iof_set2(O, base16_uc_digit1(c), base16_uc_digit2(c)) #define iof_set_lc_hex(O, c) iof_set2(O, base16_lc_digit1(c), base16_lc_digit2(c)) #define iof_put_hex iof_put_uc_hex #define iof_set_hex iof_set_uc_hex /* number from iof; return 1 on success, 0 otherwise */ #define iof_scan_sign(I, c, sign) _scan_sign(c, sign, iof_next(I)) #define iof_scan_integer(I, c, number) _scan_integer(c, number, iof_next(I)) #define iof_scan_radix(I, c, number, radix) _scan_radix(c, number, radix, iof_next(I)) #define iof_read_integer(I, c, number) _read_integer(c, number, iof_next(I)) #define iof_read_radix(I, c, number, radix) _read_radix(c, number, radix, iof_next(I)) #define iof_scan_decimal(I, c, number) _scan_decimal(c, number, iof_next(I)) #define iof_scan_fraction(I, c, number, exponent10) _scan_fraction(c, number, exponent10, iof_next(I)) #define iof_scan_exponent10(I, c, exponent10) _scan_exponent10(c, exponent10, iof_next(I)) UTILAPI int iof_get_int32 (iof *I, int32_t *number); UTILAPI int iof_get_slong (iof *I, long *number); UTILAPI int iof_get_int64 (iof *I, int64_t *number); UTILAPI int iof_get_uint32 (iof *I, uint32_t *number); UTILAPI int iof_get_ulong (iof *I, unsigned long *number); UTILAPI int iof_get_usize (iof *I, size_t *number); UTILAPI int iof_get_uint64 (iof *I, uint64_t *number); UTILAPI int iof_get_int32_radix (iof *I, int32_t *number, int radix); UTILAPI int iof_get_slong_radix (iof *I, long *number, int radix); UTILAPI int iof_get_int64_radix (iof *I, int64_t *number, int radix); UTILAPI int iof_get_uint32_radix (iof *I, uint32_t *number, int radix); UTILAPI int iof_get_ulong_radix (iof *I, unsigned long *number, int radix); UTILAPI int iof_get_usize_radix (iof *I, size_t *number, int radix); UTILAPI int iof_get_uint64_radix (iof *I, uint64_t *number, int radix); #if defined(INTLW_IS_INT64) # define iof_get_intlw(I, number) iof_get_int64(I, number) # define iof_get_uintlw(I, number) iof_get_uint64(I, number) # define iof_get_intlw_radix(I, number, radix) iof_get_int64_radix(I, number, radix) # define iof_get_uintlw_radix(I, number, radix) iof_get_uint64_radix(I, number, radix) #elif defined(INTLW_IS_LONG) # define iof_get_intlw(I, number) iof_get_slong(I, number) # define iof_get_uintlw(I, number) iof_get_ulong(I, number) # define iof_get_intlw_radix(I, number, radix) iof_get_slong_radix(I, number, radix) # define iof_get_uintlw_radix(I, number, radix) iof_get_ulong_radix(I, number, radix) #endif UTILAPI int iof_get_roman (iof *I, uint16_t *number); UTILAPI int iof_get_double (iof *I, double *number); UTILAPI int iof_get_float (iof *I, float *number); UTILAPI int iof_conv_double (iof *I, double *number); UTILAPI int iof_conv_float (iof *I, float *number); /* number to iof; return a number of written bytes */ UTILAPI size_t iof_put_int32 (iof *O, int32_t number); UTILAPI size_t iof_put_slong (iof *O, long number); UTILAPI size_t iof_put_int64 (iof *O, int64_t number); UTILAPI size_t iof_put_uint32 (iof *O, uint32_t number); UTILAPI size_t iof_put_ulong (iof *O, unsigned long number); UTILAPI size_t iof_put_usize (iof *O, size_t number); UTILAPI size_t iof_put_uint64 (iof *O, uint64_t number); UTILAPI size_t iof_put_int32_radix (iof *O, int32_t number, int radix, int uc); UTILAPI size_t iof_put_slong_radix (iof *O, long number, int radix, int uc); UTILAPI size_t iof_put_int64_radix (iof *O, int64_t number, int radix, int uc); UTILAPI size_t iof_put_uint32_radix (iof *O, uint32_t number, int radix, int uc); UTILAPI size_t iof_put_ulong_radix (iof *O, unsigned long number, int radix, int uc); UTILAPI size_t iof_put_usize_radix (iof *O, size_t number, int radix, int uc); UTILAPI size_t iof_put_uint64_radix (iof *O, uint64_t number, int radix, int uc); #if defined(INTLW_IS_INT64) # define iof_put_intlw(O, number) iof_put_int64(O, number) # define iof_put_uintlw(O, number) iof_put_uint64(O, number) # define iof_put_intlw_radix(O, number, radix, uc) iof_put_int64_radix(O, number, radix, uc) # define iof_put_uintlw_radix(O, number, radix, uc) iof_put_uint64_radix(O, number, radix, uc) #elif defined(INTLW_IS_LONG) # define iof_put_intlw(O, number) iof_put_slong(O, number) # define iof_put_uintlw(O, number) iof_put_ulong(O, number) # define iof_put_intlw_radix(O, number, radix, uc) iof_put_slong_radix(O, number, radix, uc) # define iof_put_uintlw_radix(O, number, radix, uc) iof_put_ulong_radix(O, number, radix, uc) #endif UTILAPI size_t iof_put_roman (iof *O, uint16_t number, int uc); UTILAPI size_t iof_put_double(iof *O, double number, int digits); UTILAPI size_t iof_put_float(iof *O, float number, int digits); /* common helpers for binary parsers */ UTILAPI int iof_get_be_uint2 (iof *I, uint32_t *pnumber); UTILAPI int iof_get_be_uint3 (iof *I, uint32_t *pnumber); UTILAPI int iof_get_be_uint4 (iof *I, uint32_t *pnumber); UTILAPI int iof_get_le_uint2 (iof *I, uint32_t *pnumber); UTILAPI int iof_get_le_uint3 (iof *I, uint32_t *pnumber); UTILAPI int iof_get_le_uint4 (iof *I, uint32_t *pnumber); // iof_set() and iof_put() suite casts arguments to uint8_t, so we don't need &0xff mask #define iof_set_be_uint1(O, u) iof_set(O, u) #define iof_set_be_uint2(O, u) iof_set2(O, (u)>>8, u) #define iof_set_be_uint3(O, u) iof_set3(O, (u)>>16, (u)>>8, u) #define iof_set_be_uint4(O, u) iof_set4(O, (u)>>24, (u)>>16, (u)>>8, u) #define iof_set_le_uint1(O, u) iof_set(O, u) #define iof_set_le_uint2(O, u) iof_set2(O, u, (u)>>8) #define iof_set_le_uint3(O, u) iof_set3(O, u, (u)>>8, (u)>>16) #define iof_set_le_uint4(O, u) iof_set4(O, u, (u)>>8, (u)>>16, (u)>>24) #define iof_put_be_uint1(O, u) iof_put(O, u) #define iof_put_be_uint2(O, u) iof_put2(O, (u)>>8, u) #define iof_put_be_uint3(O, u) iof_put3(O, (u)>>16, (u)>>8, u) #define iof_put_be_uint4(O, u) iof_put4(O, (u)>>24, (u)>>16, (u)>>8, u) #define iof_put_le_uint1(O, u) iof_put(O, u) #define iof_put_le_uint2(O, u) iof_put2(O, u, (u)>>8) #define iof_put_le_uint3(O, u) iof_put3(O, u, (u)>>8, (u)>>16) #define iof_put_le_uint4(O, u) iof_put4(O, u, (u)>>8, (u)>>16, (u)>>24) /* buffer results */ #define iof_reader_result(I, size) ((size = (size_t)iof_left(I)), (I)->pos) #define iof_writer_result(I, size) ((size = (size_t)iof_size(I)), (I)->buf) #define iof_result(I, size) (((I)->flags & IOF_READER) ? iof_reader_result(I, size) : iof_writer_result(I, size)) uint8_t * iof_file_input_data (iof_file *iofile, size_t *psize, int *isnew); //uint8_t * iof_file_reader_data (iof_file *iofile, size_t *size); //uint8_t * iof_file_writer_data (iof_file *iofile, size_t *size); uint8_t * iof_reader_data (iof *I, size_t *psize); uint8_t * iof_writer_data (iof *O, size_t *psize); size_t iof_reader_to_file_handle (iof *I, FILE *file); size_t iof_reader_to_file (iof *I, const char *filename); #define iof_loaded(I) ((I)->end = (I)->pos, (I)->pos = (I)->buf, iof_left(I)) #define iof_data_to_file_handle(data, size, file) fwrite(data, sizeof(uint8_t), size, file) UTILAPI size_t iof_data_to_file (const void *data, size_t size, const char *filename); UTILAPI size_t iof_result_to_file_handle (iof *F, FILE *file); UTILAPI size_t iof_result_to_file (iof *F, const char *filename); UTILAPI void iof_debug (iof *I, const char *filename); /* common filters allocator */ void iof_filters_init (void); void iof_filters_free (void); iof * iof_filter_reader_new (iof_handler handler, size_t statesize, void **pstate); #define iof_filter_reader(handler, statesize, pstate) iof_filter_reader_new(handler, statesize, (void **)(pstate)) iof * iof_filter_reader_with_buffer_new (iof_handler handler, size_t statesize, void **pstate, void *buffer, size_t buffersize); #define iof_filter_reader_with_buffer(handler, statesize, pstate, buffer, buffersize) iof_filter_reader_with_buffer_new(handler, statesize, (void **)(pstate), buffer, buffersize) iof * iof_filter_writer_new (iof_handler handler, size_t statesize, void **pstate); #define iof_filter_writer(handler, statesize, pstate) iof_filter_writer_new(handler, statesize, (void **)(pstate)) iof * iof_filter_writer_with_buffer_new (iof_handler handler, size_t statesize, void **pstate, void *buffer, size_t buffersize); #define iof_filter_writer_with_buffer(handler, statesize, pstate, buffer, buffersize) iof_filter_writer_with_buffer_new(handler, statesize, (void **)(pstate), buffer, buffersize) #define iof_filter_state(statetype, F) (statetype)((void *)((F) + 1)) void iof_free (iof *F); void iof_discard (iof *F); size_t iof_resize_buffer_to (iof *O, size_t space); #define iof_resize_buffer(O) iof_resize_buffer_to(O, (O)->space << 1) size_t iof_decoder_retval (iof *I, const char *type, iof_status status); size_t iof_encoder_retval (iof *O, const char *type, iof_status status); /* filters */ iof * iof_filter_file_handle_reader (FILE *file); iof * iof_filter_file_handle_writer (FILE *file); iof * iof_filter_iofile_reader (iof_file *iofile, size_t offset); iof * iof_filter_iofile_writer (iof_file *iofile, size_t offset); iof * iof_filter_file_reader (const char *filename); iof * iof_filter_file_writer (const char *filename); iof * iof_filter_string_reader (const void *s, size_t length); iof * iof_filter_string_writer (const void *s, size_t length); iof * iof_filter_buffer_writer (size_t size); iof * iof_filter_stream_reader (FILE *file, size_t offset, size_t length); iof * iof_filter_stream_coreader (iof_file *iofile, size_t offset, size_t length); iof * iof_filter_stream_writer (FILE *file); iof * iof_filter_stream_cowriter (iof_file *iofile, size_t offset); FILE * iof_filter_file_reader_source (iof *I, size_t *poffset, size_t *plength); iof_file * iof_filter_file_coreader_source (iof *I, size_t *poffset, size_t *plength); iof * iof_filter_reader_replacement (iof *P, iof_handler handler, size_t statesize, void **pstate); #define iof_filter_reader_replace(P, handler, statesize, pstate) iof_filter_reader_replacement(P, handler, statesize, (void **)(pstate)) #endifluametatex-2.10.08/source/libraries/pplib/util/utillog.c000066400000000000000000000023621442250314700232220ustar00rootroot00000000000000 #include #include // strlen #include #include "utillog.h" #define LOGGER_BUFFER_SIZE 256 #define LOGGER_PREFIX_SIZE 32 typedef struct { logger_function callback; void *context; size_t pfxlen; } logger_struct; static logger_struct logger = { 0, NULL, 0 }; static char logger_buffer[LOGGER_BUFFER_SIZE+LOGGER_PREFIX_SIZE]; void loggerf (const char *format, ...) { va_list args; int length; va_start(args, format); length = vsnprintf(logger_buffer + logger.pfxlen, LOGGER_BUFFER_SIZE, format, args); if (length > 0) { if (length > LOGGER_BUFFER_SIZE) length = LOGGER_BUFFER_SIZE; } else { loggerf("logger encoding error '%s'", format); length = (int)strlen(logger_buffer); } length += (int)logger.pfxlen; if (logger.callback) logger.callback(logger_buffer, logger.context); else printf("\n%s\n", logger_buffer); va_end(args); } void logger_callback (logger_function callback, void *context) { logger.callback = callback; logger.context = context; } int logger_prefix (const char *prefix) { size_t pfxlen; pfxlen = strlen(prefix); if (pfxlen > LOGGER_PREFIX_SIZE) return 0; memcpy(logger_buffer, prefix, pfxlen); logger.pfxlen = pfxlen; return 1; } luametatex-2.10.08/source/libraries/pplib/util/utillog.h000066400000000000000000000004031442250314700232210ustar00rootroot00000000000000 #ifndef UTIL_LOG_H #define UTIL_LOG_H typedef void (*logger_function) (const char *message, void *alien); void loggerf (const char *format, ...); void logger_callback (logger_function callback, void *context); int logger_prefix (const char *prefix); #endifluametatex-2.10.08/source/libraries/pplib/util/utillzw.c000066400000000000000000000646531442250314700232700ustar00rootroot00000000000000/* lzw implementation for postscript/pdf filters # Notes on LZW # Encoder Initially the table contains 256 entires for single bytes. Encoder consumes input bytes trying to find the longest sequence stored so far in the table. Once it finds a sequence that is not present in the table, it outputs the table index of the longest sequence found (accumulated bytes except the last consumed) and pushes the new sequence (accumulated bytes including the last one) on the top of the table. The last taken byte is not yet written to the output, it becomes the beginning of the new sequence to accumulate. Initially, encoder outputs 9-bit codes. While the table grows, the number of bits for each code increases up to 12. In example, after adding a table entry of index 511 it is high time to switch to 10-bit bytes. /EarlyChange=true parameter in stream dictionary (both postscript and pdf) informs to increase the number of bits one code earlier then necessary. Looks pretty much like an early days bug that became a specification :) I have never found a PDF having /EarlyChange key specified anyway. Once the table becomes full (or when encoder decides it is worthy), a clear-table marker (code 256) purges the table and restores codes length to 9. End-of-data marker (code 257) ends the stream. Conventionally, the beginning of the stream starts with clear-table marker. Postscript allows to provide a /UnitLength which determines the bit length of codes. The above description assumes UnitLength=8 (default). Allowed values are from 3 to 8. Different UnitLength also affects markers; clear-table is then 2^UnitLength and end-of-data marker is 2^UnitLenth+1. Encoder outputs 9-12bit codes that are packed into bytes using high-bits-first scheme (default) or low-bits-scheme. PDF spec p. 73 (PS spec p. 135 gives an mistaken output sequence and so mistaken output bytes) Input character sequence (decimal) 45 45 45 45 45 65 45 45 45 66 Output 9bit codes (decimal) 256 45 258 258 65 259 66 257 Output 9bit codes (binary) 100000000 000101101 100000010 100000010 001000001 100000011 001000010 100000001 Output bytes (LowBitsFirst=false); eight high-order bits of code becomes the first byte, remaining low-order bit of code becomes the high-order bit of the next byte; 10000000 00001011 01100000 01010000 00100010 00001100 00001100 10000101 00000001 -> 80 0B 60 50 22 0C 0C 85 01 Output bytes (binary, LowBitsFirst=true); eight low-order bits of code becomes the first byte, remaining high-order bit of code becomes low-order bit of the next byte; 00000000 01011011 00001000 00010100 00011000 01100100 10100000 10000000 10010000 -> 00 5B 08 14 18 64 A0 80 90 # Decoder Decoder consumes input bytes transforming them to 9 to 12 bit codes. Initially it starts with 9bit codes and the table of 258 fixed codes (same as encoder). Basically, it interprets incoming codes as table indices (except 256 and 257 markers) and it outputs byte sequences stored at given indices. It also upbuilds the table and changes the number of bits of codes when necessary. The key point on lzw is that both encoder and decoder builds the table synchronously. However, decoder needs some "knowledge" about how encoder works to be able to interpret a table index that it doesn't have so far. Look that the output from encoder in the example above. The first output code is conventional clear-table (256). Then comes a code 45. So far so good, decoder interprets code 45 as a (fixed) entry of the table, emitting byte 45. The next code is 258, which is should be interpreted as an index in the table. Oops, encoder doesn't have one yet. If that occurs, it means that encoder was able to output the new entry code just after adding it to a table. It means that sequence_before + next_byte == next_byte + sequence_after This may happen not only for sequences like 45 45 45, but also symmetric series such as abcbabcba; abcb + a == a + bcba. Decoder must be aware of that and if it gets a code one larger than the top table index, it should create one on-fly by appending last entry sequence by the first by of the last entry. # UnitLength Postscript specification mentions about UnitLength parameter that can be used in LZW decoder (not allowed in encoder), with possible values from 3 to 8. This parameter determines the number of bits per code; form UnitLength + 1 to 12. It also determines which codes are used for clear-table marker (2^UnitLength) and end-of-data marker ((2^UnitLength)+1). Postscript specification says (page 134): "Initially, the code length is (UnitLength + 1) bits and the table contains only entries for the (2^UnitLength + 2) fixed codes. As encoding proceeds, entries are appended to the table, associating new codes with longer and longer input character sequences. The encoding and decoding filters maintain identical copies of this table." Later on page 136 Postscript specification says: "Data that has been LZW-encoded with a UnitLength less than 8 consists only of codes in the range 0 to 2^UnitLength - 1; consequently, the LZWDecode filter produces only codes in that range when read. UnitLength also affects the encoded representation, as described above." UnitLength (Postscript only) and LowBitsFirst are used only by decoder. EarlyChange should obviously be respected by both encoder and decoder. When table index reaches current bit length boundary (511, 1023, ...) it must react by increasing the number of bits of input code. But if the index reaches it maximum value (when the table is full), decoder is NOT supposed to clear the table. When the table is full, encoder must emit clear-table marker and it emits this code using 12 bits and reinitialize code bits after that. It means that, when the table is full, decoder should get one more 12-bit code (which should be clear-table marker) and actually clear the table and reinitialize code bits after that. # Clear-table vs last entry track (after tries and checks) It is also not quite clear what should actually happen when encoder gets a full table and it is supposed to emit clear-table marker. When it gets full, it means that it has just appended another entry to the table. And that happens only the input sequence collected so far plus the last byte is not present in the table. Encoder is supposed to output the table index of the present sequence and set the recent byte as a starting index of the new sequence to be collected. Even if it is time to clear the table, encoder is still supposed to keep the track of the last table entry. Decoder, however, must drop the track of the last code on clear-table. # Decoder table vs encoder table While decoding we need query lzw table by (subsequent) numeric codes and output character sequences stored in the table. While encoding we need to query the table on every input byte and fetch indices pointing to character sequences. Note that we never need to query the entire table for the longest sequence found so far. The encoder table do not need to access the longest character sequence at one piece. It is enough to keep the track of the current table index and the very next byte. We organize an encoder table into a search tree, where every node contains its table index (value) and last byte (key). Except initial tree content, every node is created on the base of the previous node and it conceptually point the sequence represented by that nodo consists of the previous node sequence plus the next byte. Every new node is a descendant of the node it has been derived from. Every node has a map (a search subtree) indexed by suffix byte value, pointing to descendants nodes. Every node also has binary tentackles (left/right fields) necessary to search the map (except initials, every node lives in a map of some ancestor node). The key point is that on every input byte we don't search the entire tree, but only the map of the current node children. The map tree is a simple binary tree with no balancing mechanism (not worthy to optimize an ephemeric structure that may be upbuilt more often then queried). In our implementation, decoder table requires 4069 entries (topmost index 4095). Encoder table, however, needs 4097 entries to handle the case when EarlyIndex parameter is 0 (I have never a chance to test that in practise). The node of index 4096 might be added to a search tree, but its code is never emitted; the lookup is purged just after adding that node. todo: - support for LowBitsFirst encoding */ #include "utilmem.h" #include "utillzw.h" /* filter state struct */ typedef struct lzw_entry { union { const char *rdata; // to be able to init with string literal char *data; }; int size; } lzw_entry; #define lzw_index short typedef struct lzw_node lzw_node; struct lzw_node { lzw_index index; unsigned char suffix; lzw_node *left; lzw_node *right; lzw_node *map; }; struct lzw_state { union { lzw_node *lookup; /* encoder table */ lzw_entry *table; /* decoder table */ }; lzw_index index; /* table index */ union { lzw_node *lastnode; /* previous encoder table node */ struct { lzw_entry *lastentry; /* previous decoder table entry */ int tailbytes; /* num of bytes of lastentry not yet written out */ }; }; int basebits; /* /UnitLength parameter (8) */ int codebits; /* current code bits */ int lastbyte; /* previosly read byte */ int tailbits; /* lastbyte bits not yet consumed */ int flush; /* encoder */ int flags; /* options */ }; typedef union { lzw_state *lzwstate; void *voidstate; } lzw_state_pointer; // to avoid 'dereferencing type-puned ...' warnings #define LZW_INIT_STATE { { 0 }, 0, { 0 }, 0, 0, 0, 0, 0, 0 } /* macros */ #define LZW_MIN_BITS 3 #define LZW_MAX_BITS 12 #define LZW_TABLE_SIZE (1 << LZW_MAX_BITS) #define LZW_LOOKUP_SIZE (LZW_TABLE_SIZE + 1) #define lzw_bit_range(bits) (bits >= LZW_MIN_BITS && bits <= LZW_BASE_BITS) #define lzw_base_bits(flags) (flags & ((1 << 4) - 1)) // 4 low bits of flags is basebits (UnitLength) #define lzw_initial_codes(state) (1 << state->basebits) #define lzw_clear_code(state) lzw_initial_codes(state) #define lzw_eod_code(state) (lzw_initial_codes(state) + 1) #define lzw_initial_index(state) (lzw_initial_codes(state) + 2) #define lzw_max_index(state) ((1 << state->codebits) - ((state->flags & LZW_EARLY_INDEX) ? 1 : 0)) #define lzw_check_bits(state) ((void)(state->index == lzw_max_index(state) && state->codebits < LZW_MAX_BITS && ++state->codebits)) #define lzw_malloc util_malloc #define lzw_free util_free /* decoder */ static struct lzw_entry lzw_initial_table[] = { {{"\x00"}, 1}, {{"\x01"}, 1}, {{"\x02"}, 1}, {{"\x03"}, 1}, {{"\x04"}, 1}, {{"\x05"}, 1}, {{"\x06"}, 1}, {{"\x07"}, 1}, {{"\x08"}, 1}, {{"\x09"}, 1}, {{"\x0A"}, 1}, {{"\x0B"}, 1}, {{"\x0C"}, 1}, {{"\x0D"}, 1}, {{"\x0E"}, 1}, {{"\x0F"}, 1}, {{"\x10"}, 1}, {{"\x11"}, 1}, {{"\x12"}, 1}, {{"\x13"}, 1}, {{"\x14"}, 1}, {{"\x15"}, 1}, {{"\x16"}, 1}, {{"\x17"}, 1}, {{"\x18"}, 1}, {{"\x19"}, 1}, {{"\x1A"}, 1}, {{"\x1B"}, 1}, {{"\x1C"}, 1}, {{"\x1D"}, 1}, {{"\x1E"}, 1}, {{"\x1F"}, 1}, {{"\x20"}, 1}, {{"\x21"}, 1}, {{"\x22"}, 1}, {{"\x23"}, 1}, {{"\x24"}, 1}, {{"\x25"}, 1}, {{"\x26"}, 1}, {{"\x27"}, 1}, {{"\x28"}, 1}, {{"\x29"}, 1}, {{"\x2A"}, 1}, {{"\x2B"}, 1}, {{"\x2C"}, 1}, {{"\x2D"}, 1}, {{"\x2E"}, 1}, {{"\x2F"}, 1}, {{"\x30"}, 1}, {{"\x31"}, 1}, {{"\x32"}, 1}, {{"\x33"}, 1}, {{"\x34"}, 1}, {{"\x35"}, 1}, {{"\x36"}, 1}, {{"\x37"}, 1}, {{"\x38"}, 1}, {{"\x39"}, 1}, {{"\x3A"}, 1}, {{"\x3B"}, 1}, {{"\x3C"}, 1}, {{"\x3D"}, 1}, {{"\x3E"}, 1}, {{"\x3F"}, 1}, {{"\x40"}, 1}, {{"\x41"}, 1}, {{"\x42"}, 1}, {{"\x43"}, 1}, {{"\x44"}, 1}, {{"\x45"}, 1}, {{"\x46"}, 1}, {{"\x47"}, 1}, {{"\x48"}, 1}, {{"\x49"}, 1}, {{"\x4A"}, 1}, {{"\x4B"}, 1}, {{"\x4C"}, 1}, {{"\x4D"}, 1}, {{"\x4E"}, 1}, {{"\x4F"}, 1}, {{"\x50"}, 1}, {{"\x51"}, 1}, {{"\x52"}, 1}, {{"\x53"}, 1}, {{"\x54"}, 1}, {{"\x55"}, 1}, {{"\x56"}, 1}, {{"\x57"}, 1}, {{"\x58"}, 1}, {{"\x59"}, 1}, {{"\x5A"}, 1}, {{"\x5B"}, 1}, {{"\x5C"}, 1}, {{"\x5D"}, 1}, {{"\x5E"}, 1}, {{"\x5F"}, 1}, {{"\x60"}, 1}, {{"\x61"}, 1}, {{"\x62"}, 1}, {{"\x63"}, 1}, {{"\x64"}, 1}, {{"\x65"}, 1}, {{"\x66"}, 1}, {{"\x67"}, 1}, {{"\x68"}, 1}, {{"\x69"}, 1}, {{"\x6A"}, 1}, {{"\x6B"}, 1}, {{"\x6C"}, 1}, {{"\x6D"}, 1}, {{"\x6E"}, 1}, {{"\x6F"}, 1}, {{"\x70"}, 1}, {{"\x71"}, 1}, {{"\x72"}, 1}, {{"\x73"}, 1}, {{"\x74"}, 1}, {{"\x75"}, 1}, {{"\x76"}, 1}, {{"\x77"}, 1}, {{"\x78"}, 1}, {{"\x79"}, 1}, {{"\x7A"}, 1}, {{"\x7B"}, 1}, {{"\x7C"}, 1}, {{"\x7D"}, 1}, {{"\x7E"}, 1}, {{"\x7F"}, 1}, {{"\x80"}, 1}, {{"\x81"}, 1}, {{"\x82"}, 1}, {{"\x83"}, 1}, {{"\x84"}, 1}, {{"\x85"}, 1}, {{"\x86"}, 1}, {{"\x87"}, 1}, {{"\x88"}, 1}, {{"\x89"}, 1}, {{"\x8A"}, 1}, {{"\x8B"}, 1}, {{"\x8C"}, 1}, {{"\x8D"}, 1}, {{"\x8E"}, 1}, {{"\x8F"}, 1}, {{"\x90"}, 1}, {{"\x91"}, 1}, {{"\x92"}, 1}, {{"\x93"}, 1}, {{"\x94"}, 1}, {{"\x95"}, 1}, {{"\x96"}, 1}, {{"\x97"}, 1}, {{"\x98"}, 1}, {{"\x99"}, 1}, {{"\x9A"}, 1}, {{"\x9B"}, 1}, {{"\x9C"}, 1}, {{"\x9D"}, 1}, {{"\x9E"}, 1}, {{"\x9F"}, 1}, {{"\xA0"}, 1}, {{"\xA1"}, 1}, {{"\xA2"}, 1}, {{"\xA3"}, 1}, {{"\xA4"}, 1}, {{"\xA5"}, 1}, {{"\xA6"}, 1}, {{"\xA7"}, 1}, {{"\xA8"}, 1}, {{"\xA9"}, 1}, {{"\xAA"}, 1}, {{"\xAB"}, 1}, {{"\xAC"}, 1}, {{"\xAD"}, 1}, {{"\xAE"}, 1}, {{"\xAF"}, 1}, {{"\xB0"}, 1}, {{"\xB1"}, 1}, {{"\xB2"}, 1}, {{"\xB3"}, 1}, {{"\xB4"}, 1}, {{"\xB5"}, 1}, {{"\xB6"}, 1}, {{"\xB7"}, 1}, {{"\xB8"}, 1}, {{"\xB9"}, 1}, {{"\xBA"}, 1}, {{"\xBB"}, 1}, {{"\xBC"}, 1}, {{"\xBD"}, 1}, {{"\xBE"}, 1}, {{"\xBF"}, 1}, {{"\xC0"}, 1}, {{"\xC1"}, 1}, {{"\xC2"}, 1}, {{"\xC3"}, 1}, {{"\xC4"}, 1}, {{"\xC5"}, 1}, {{"\xC6"}, 1}, {{"\xC7"}, 1}, {{"\xC8"}, 1}, {{"\xC9"}, 1}, {{"\xCA"}, 1}, {{"\xCB"}, 1}, {{"\xCC"}, 1}, {{"\xCD"}, 1}, {{"\xCE"}, 1}, {{"\xCF"}, 1}, {{"\xD0"}, 1}, {{"\xD1"}, 1}, {{"\xD2"}, 1}, {{"\xD3"}, 1}, {{"\xD4"}, 1}, {{"\xD5"}, 1}, {{"\xD6"}, 1}, {{"\xD7"}, 1}, {{"\xD8"}, 1}, {{"\xD9"}, 1}, {{"\xDA"}, 1}, {{"\xDB"}, 1}, {{"\xDC"}, 1}, {{"\xDD"}, 1}, {{"\xDE"}, 1}, {{"\xDF"}, 1}, {{"\xE0"}, 1}, {{"\xE1"}, 1}, {{"\xE2"}, 1}, {{"\xE3"}, 1}, {{"\xE4"}, 1}, {{"\xE5"}, 1}, {{"\xE6"}, 1}, {{"\xE7"}, 1}, {{"\xE8"}, 1}, {{"\xE9"}, 1}, {{"\xEA"}, 1}, {{"\xEB"}, 1}, {{"\xEC"}, 1}, {{"\xED"}, 1}, {{"\xEE"}, 1}, {{"\xEF"}, 1}, {{"\xF0"}, 1}, {{"\xF1"}, 1}, {{"\xF2"}, 1}, {{"\xF3"}, 1}, {{"\xF4"}, 1}, {{"\xF5"}, 1}, {{"\xF6"}, 1}, {{"\xF7"}, 1}, {{"\xF8"}, 1}, {{"\xF9"}, 1}, {{"\xFA"}, 1}, {{"\xFB"}, 1}, {{"\xFC"}, 1}, {{"\xFD"}, 1}, {{"\xFE"}, 1}, {{"\xFF"}, 1} }; #define lzw_entry_at(state, index) (&state->table[index]) static lzw_state * lzw_decoder_init_table (lzw_state *state, lzw_entry *table, int flags) { state->basebits = lzw_base_bits(flags); // first four bits or flags if (!lzw_bit_range(state->basebits)) return NULL; state->flags = flags; if ((state->table = table) == NULL) { state->table = (lzw_entry *)lzw_malloc(LZW_TABLE_SIZE * sizeof(lzw_entry)); state->flags |= LZW_TABLE_ALLOC; } memcpy(state->table, lzw_initial_table, (size_t)lzw_initial_codes(state)*sizeof(lzw_entry)); // memset(&state->table[lzw_initial_codes(state)], 0, 2*sizeof(lzw_entry)); // eod and clear entries never accessed state->codebits = state->basebits + 1; state->index = lzw_initial_index(state); state->lastentry = NULL; state->tailbytes = 0; state->lastbyte = 0; state->tailbits = 0; return state; } lzw_state * lzw_decoder_init (lzw_state *state, int flags) { return lzw_decoder_init_table(state, NULL, flags); } static void lzw_decoder_clear (lzw_state *state) { lzw_entry *entry; lzw_index initindex = lzw_initial_index(state); while (state->index > initindex) { entry = lzw_entry_at(state, --state->index); lzw_free(entry->data); // entry->data = NULL; // entry->size = 0; } state->lastentry = NULL; state->tailbytes = 0; state->codebits = state->basebits + 1; } void lzw_decoder_close (lzw_state *state) { lzw_decoder_clear(state); if (state->flags & LZW_TABLE_ALLOC) lzw_free(state->table); } static int lzw_next_entry (lzw_state *state, lzw_entry *nextentry) { lzw_entry *lastentry, *newentry; if ((lastentry = state->lastentry) == NULL) return 1; /* its ok */ if (state->index == LZW_TABLE_SIZE) return 0; /* invalid input; eod marker expected earlier */ /* put the new entry on the top of the table */ newentry = lzw_entry_at(state, state->index++); /* its size is the last entrtyy size plus 1 */ newentry->size = lastentry->size + 1; /* its content is the content of the last entry, */ newentry->data = (char *)lzw_malloc((size_t)newentry->size); memcpy(newentry->data, lastentry->data, lastentry->size); /* plus the first byte of the new entry (usually fixed code entry) */ newentry->data[newentry->size - 1] = nextentry->data[0]; return 1; } #define lzw_write_bytes(O, state) ((state->tailbytes -= (int)iof_write(O, state->lastentry->data, (size_t)state->tailbytes)) == 0) iof_status lzw_decode_state (iof *I, iof *O, lzw_state *state) { const lzw_index clear = lzw_clear_code(state), eod = lzw_eod_code(state); lzw_index code; lzw_entry *entry; if (state->lastentry != NULL) { /* write out the tail from the last call */ if (state->tailbytes > 0 && !lzw_write_bytes(O, state)) return IOFFULL; /* do what we normally do at the end of the loop body below */ lzw_check_bits(state); } // if (state->flags & LZW_LOW_BITS_FIRST) // return IOFERR; while (1) { /* get input code of length state->codebits */ code = (state->lastbyte & ((1 << state->tailbits) - 1)) << (state->codebits - state->tailbits); for (state->tailbits -= state->codebits; state->tailbits < 0; ) { get_code: if ((state->lastbyte = iof_get(I)) < 0) return state->flush ? IOFEOF : state->lastbyte; state->tailbits += 8; if (state->tailbits < 0) { code |= (state->lastbyte << (-state->tailbits)); goto get_code; } else { code |= (state->lastbyte >> state->tailbits); break; } } /* interpret the code */ if (code < state->index) { /* single byte code or special marker */ if (code == clear) { lzw_decoder_clear(state); continue; } if (code == eod) return IOFEOF; entry = lzw_entry_at(state, code); if (!lzw_next_entry(state, entry)) return IOFERR; } else if (code == state->index) { /* apparently encoder has emitted the code of the key just created (see notes) */ if (!lzw_next_entry(state, state->lastentry)) return IOFERR; entry = lzw_entry_at(state, state->index - 1); } else { /* invalid input code */ return IOFERR; } /* record the entry found */ state->lastentry = entry; /* emit the sequence pointed by that entry */ state->tailbytes = entry->size; if (!lzw_write_bytes(O, state)) return IOFFULL; /* check and update code bits */ lzw_check_bits(state); } return state->lastbyte; // never reached } /* encoder */ #define lzw_node_at(state, index) (&state->lookup[index]) #define lzw_node_init(node, i, c) (node->index = i, node->suffix = c, node->left = NULL, node->right = NULL, node->map = NULL) static lzw_state * lzw_encoder_init_table (lzw_state *state, lzw_node *lookup, int flags) { lzw_index index; lzw_node *node; state->basebits = lzw_base_bits(flags); // first four bits of flags is base bits of code (default 8) if (!lzw_bit_range(state->basebits)) return NULL; state->flags = flags; if ((state->lookup = lookup) == NULL) { state->lookup = lzw_malloc(LZW_LOOKUP_SIZE*sizeof(lzw_node)); state->flags |= LZW_TABLE_ALLOC; } state->index = lzw_initial_index(state); for (index = 0; index < lzw_initial_codes(state); ++index) { node = lzw_node_at(state, index); lzw_node_init(node, index, (unsigned char)index); } state->codebits = state->basebits + 1; state->lastnode = NULL; state->lastbyte = 0; state->tailbits = 0; return state; } lzw_state * lzw_encoder_init (lzw_state *state, int flags) { return lzw_encoder_init_table(state, NULL, flags); } void lzw_encoder_close (lzw_state *state) { if (state->flags & LZW_TABLE_ALLOC) lzw_free(state->lookup); } static void lzw_encoder_clear (lzw_state *state) { lzw_node *node; lzw_index index; /* clear fixed nodes */ for (index = 0; index < lzw_initial_codes(state); ++index) { node = lzw_node_at(state, index); lzw_node_init(node, index, (unsigned char)index); } /* reset table index */ state->index = lzw_initial_index(state); /* reset code bits */ state->codebits = state->basebits + 1; } static void lzw_put_code (iof *O, lzw_state *state, lzw_index code, int todobits) { int leftbits, rightbits; do { leftbits = 8 - state->tailbits; rightbits = todobits - leftbits; if (rightbits >= 0) { state->lastbyte |= (code >> rightbits); iof_put(O, state->lastbyte); code = code & ((1 << rightbits) - 1); todobits -= leftbits; state->lastbyte = 0; state->tailbits = 0; } else { state->lastbyte |= (code << (-rightbits)); state->tailbits += todobits; return; } } while (1); } static iof_status lzw_encode_last (iof *O, lzw_state *state) { if (state->flush) { /* put the last code if any */ if (state->lastnode != NULL) lzw_put_code(O, state, state->lastnode->index, state->codebits); /* put eod marker, */ lzw_put_code(O, state, lzw_eod_code(state), state->codebits); /* with tail bits set to 0 */ if (state->tailbits > 0) lzw_put_code(O, state, 0, 8 - state->tailbits); return IOFEOF; } return IOFEMPTY; } static lzw_node * lzw_node_push (lzw_state *state, unsigned char suffix) { lzw_node *node; node = lzw_node_at(state, state->index); lzw_node_init(node, state->index, suffix); ++state->index; return node; } static int lzw_next_node (lzw_state *state, unsigned char suffix) { lzw_node *node; if ((node = state->lastnode->map) == NULL) { state->lastnode->map = lzw_node_push(state, suffix); return 0; } while (1) { if (suffix < node->suffix) { if (node->left == NULL) { node->left = lzw_node_push(state, suffix); return 0; } node = node->left; } else if (suffix > node->suffix) { if (node->right == NULL) { node->right = lzw_node_push(state, suffix); return 0; } node = node->right; } else { state->lastnode = node; return 1; } } return 0; // never reached } iof_status lzw_encode_state (iof *I, iof *O, lzw_state *state) { int byte; if (state->lastnode == NULL) { /* first call only; following convention, put clear-table marker */ if (!iof_ensure(O, 2)) return IOFFULL; lzw_put_code(O, state, lzw_clear_code(state), state->codebits); /* get the first input byte and initialize the current table entry */ if ((byte = iof_get(I)) < 0) return lzw_encode_last(O, state); state->lastnode = lzw_node_at(state, byte); } while (iof_ensure(O, 2)) { /* we need to write at most 2 bytes on each iteration */ if ((byte = iof_get(I)) < 0) return lzw_encode_last(O, state); if (lzw_next_node(state, (unsigned char)byte) == 0) { /* means that the key hasn't been found and the new entry has just been created */ /* output the code pointing the longest sequence so far */ lzw_put_code(O, state, state->lastnode->index, state->codebits); /* update code bits */ if (state->index == lzw_max_index(state) + 1) { if (state->codebits < LZW_MAX_BITS) ++state->codebits; else { /* put clear-table marker */ lzw_put_code(O, state, lzw_clear_code(state), state->codebits); /* reset the table */ lzw_encoder_clear(state); } } /* in any case, recent byte becomes the current table code */ state->lastnode = lzw_node_at(state, byte); } /* otherwise no new entry is appended and state->lastnode points the longer sequence just found */ } return IOFFULL; } /* single call codecs */ iof_status lzw_decode (iof *I, iof *O, int flags) { lzw_state state = LZW_INIT_STATE; lzw_entry table[LZW_TABLE_SIZE]; int ret; lzw_decoder_init_table(&state, table, flags); state.flush = 1; ret = lzw_decode_state(I, O, &state); // iof_flush(O); // ? lzw_decoder_close(&state); return ret; } iof_status lzw_encode (iof *I, iof *O, int flags) { lzw_state state = LZW_INIT_STATE; lzw_node lookup[LZW_LOOKUP_SIZE]; int ret; lzw_encoder_init_table(&state, lookup, flags); state.flush = 1; ret = lzw_encode_state(I, O, &state); // iof_flush(O); // ? lzw_encoder_close(&state); return ret; } /* filters */ // lzw decoder function static size_t lzw_decoder (iof *F, iof_mode mode) { lzw_state *state; iof_status status; size_t tail; state = iof_filter_state(lzw_state *, F); switch(mode) { case IOFLOAD: case IOFREAD: if (F->flags & IOF_STOPPED) return 0; tail = iof_tail(F); F->pos = F->buf + tail; F->end = F->buf + F->space; do { status = lzw_decode_state(F->next, F, state); } while (mode == IOFLOAD && status == IOFFULL && iof_resize_buffer(F)); return iof_decoder_retval(F, "lzw", status); case IOFCLOSE: lzw_decoder_close(state); iof_free(F); return 0; default: break; } return 0; } // lzw encoder function static size_t lzw_encoder (iof *F, iof_mode mode) { lzw_state *state; iof_status status; state = iof_filter_state(lzw_state *, F); switch (mode) { case IOFFLUSH: state->flush = 1; FALLTHRU // fall through case IOFWRITE: F->end = F->pos; F->pos = F->buf; status = lzw_encode_state(F, F->next, state); return iof_encoder_retval(F, "lzw", status); case IOFCLOSE: if (!state->flush) lzw_encoder(F, IOFFLUSH); lzw_encoder_close(state); iof_free(F); return 0; default: break; } return 0; } iof * iof_filter_lzw_decoder (iof *N, int flags) { iof *I; lzw_state_pointer P; I = iof_filter_reader(lzw_decoder, sizeof(lzw_state), &P.voidstate); iof_setup_next(I, N); if (lzw_decoder_init(P.lzwstate, flags) == NULL) { iof_discard(I); return NULL; } P.lzwstate->flush = 1; return I; } iof * iof_filter_lzw_encoder (iof *N, int flags) { iof *O; lzw_state_pointer P; O = iof_filter_writer(lzw_encoder, sizeof(lzw_state), &P.voidstate); iof_setup_next(O, N); if (lzw_encoder_init(P.lzwstate, flags) == NULL) { iof_discard(O); return NULL; } return O; } luametatex-2.10.08/source/libraries/pplib/util/utillzw.h000066400000000000000000000015761442250314700232700ustar00rootroot00000000000000#ifndef UTIL_LZW_H #define UTIL_LZW_H #include "utiliof.h" typedef struct lzw_state lzw_state; #define LZW_BASE_BITS 8 #define LZW_TABLE_ALLOC (1<<4) #define LZW_EARLY_INDEX (1<<5) //#define LZW_LOW_BITS_FIRST (1<<6) #define LZW_DECODER_DEFAULTS (LZW_BASE_BITS|LZW_EARLY_INDEX|0) #define LZW_ENCODER_DEFAULTS (LZW_BASE_BITS|LZW_EARLY_INDEX|0) lzw_state * lzw_decoder_init (lzw_state *state, int flags); lzw_state * lzw_encoder_init (lzw_state *state, int flags); void lzw_decoder_close (lzw_state *state); void lzw_encoder_close (lzw_state *state); iof_status lzw_encode_state (iof *I, iof *O, lzw_state *state); iof_status lzw_decode_state (iof *I, iof *O, lzw_state *state); iof_status lzw_encode (iof *I, iof *O, int flags); iof_status lzw_decode (iof *I, iof *O, int flags); iof * iof_filter_lzw_decoder (iof *N, int flags); iof * iof_filter_lzw_encoder (iof *N, int flags); #endifluametatex-2.10.08/source/libraries/pplib/util/utilmd5.c000066400000000000000000000334341442250314700231320ustar00rootroot00000000000000 /* md5 implementation excerpted from code by Peter Deutsch */ /* begin of md5.c */ /* Copyright (C) 1999, 2000, 2002 Aladdin Enterprises. All rights reserved. This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software. Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions: 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. 3. This notice may not be removed or altered from any source distribution. L. Peter Deutsch ghost@aladdin.com */ /* $Id: md5.c,v 1.6 2002/04/13 19:20:28 lpd Exp $ */ /* Independent implementation of MD5 (RFC 1321). This code implements the MD5 Algorithm defined in RFC 1321, whose text is available at http://www.ietf.org/rfc/rfc1321.txt The code is derived from the text of the RFC, including the test suite (section A.5) but excluding the rest of Appendix A. It does not include any code or documentation that is identified in the RFC as being copyrighted. The original and principal author of md5.c is L. Peter Deutsch . Other authors are noted in the change history that follows (in reverse chronological order): 2002-04-13 lpd Clarified derivation from RFC 1321; now handles byte order either statically or dynamically; added missing #include in library. 2002-03-11 lpd Corrected argument list for main(), and added int return type, in test program and T value program. 2002-02-21 lpd Added missing #include in test program. 2000-07-03 lpd Patched to eliminate warnings about "constant is unsigned in ANSI C, signed in traditional"; made test program self-checking. 1999-11-04 lpd Edited comments slightly for automatic TOC extraction. 1999-10-18 lpd Fixed typo in header comment (ansi2knr rather than md5). 1999-05-03 lpd Original version. */ #include // memcpy #include // FILE #include "utilmd5.h" #undef BYTE_ORDER /* 1 = big-endian, -1 = little-endian, 0 = unknown */ #ifdef ARCH_IS_BIG_ENDIAN # define BYTE_ORDER (ARCH_IS_BIG_ENDIAN ? 1 : -1) #else # define BYTE_ORDER 0 #endif #define T_MASK ((uint32_t)~0) #define T1 /* 0xd76aa478 */ (T_MASK ^ 0x28955b87) #define T2 /* 0xe8c7b756 */ (T_MASK ^ 0x173848a9) #define T3 0x242070db #define T4 /* 0xc1bdceee */ (T_MASK ^ 0x3e423111) #define T5 /* 0xf57c0faf */ (T_MASK ^ 0x0a83f050) #define T6 0x4787c62a #define T7 /* 0xa8304613 */ (T_MASK ^ 0x57cfb9ec) #define T8 /* 0xfd469501 */ (T_MASK ^ 0x02b96afe) #define T9 0x698098d8 #define T10 /* 0x8b44f7af */ (T_MASK ^ 0x74bb0850) #define T11 /* 0xffff5bb1 */ (T_MASK ^ 0x0000a44e) #define T12 /* 0x895cd7be */ (T_MASK ^ 0x76a32841) #define T13 0x6b901122 #define T14 /* 0xfd987193 */ (T_MASK ^ 0x02678e6c) #define T15 /* 0xa679438e */ (T_MASK ^ 0x5986bc71) #define T16 0x49b40821 #define T17 /* 0xf61e2562 */ (T_MASK ^ 0x09e1da9d) #define T18 /* 0xc040b340 */ (T_MASK ^ 0x3fbf4cbf) #define T19 0x265e5a51 #define T20 /* 0xe9b6c7aa */ (T_MASK ^ 0x16493855) #define T21 /* 0xd62f105d */ (T_MASK ^ 0x29d0efa2) #define T22 0x02441453 #define T23 /* 0xd8a1e681 */ (T_MASK ^ 0x275e197e) #define T24 /* 0xe7d3fbc8 */ (T_MASK ^ 0x182c0437) #define T25 0x21e1cde6 #define T26 /* 0xc33707d6 */ (T_MASK ^ 0x3cc8f829) #define T27 /* 0xf4d50d87 */ (T_MASK ^ 0x0b2af278) #define T28 0x455a14ed #define T29 /* 0xa9e3e905 */ (T_MASK ^ 0x561c16fa) #define T30 /* 0xfcefa3f8 */ (T_MASK ^ 0x03105c07) #define T31 0x676f02d9 #define T32 /* 0x8d2a4c8a */ (T_MASK ^ 0x72d5b375) #define T33 /* 0xfffa3942 */ (T_MASK ^ 0x0005c6bd) #define T34 /* 0x8771f681 */ (T_MASK ^ 0x788e097e) #define T35 0x6d9d6122 #define T36 /* 0xfde5380c */ (T_MASK ^ 0x021ac7f3) #define T37 /* 0xa4beea44 */ (T_MASK ^ 0x5b4115bb) #define T38 0x4bdecfa9 #define T39 /* 0xf6bb4b60 */ (T_MASK ^ 0x0944b49f) #define T40 /* 0xbebfbc70 */ (T_MASK ^ 0x4140438f) #define T41 0x289b7ec6 #define T42 /* 0xeaa127fa */ (T_MASK ^ 0x155ed805) #define T43 /* 0xd4ef3085 */ (T_MASK ^ 0x2b10cf7a) #define T44 0x04881d05 #define T45 /* 0xd9d4d039 */ (T_MASK ^ 0x262b2fc6) #define T46 /* 0xe6db99e5 */ (T_MASK ^ 0x1924661a) #define T47 0x1fa27cf8 #define T48 /* 0xc4ac5665 */ (T_MASK ^ 0x3b53a99a) #define T49 /* 0xf4292244 */ (T_MASK ^ 0x0bd6ddbb) #define T50 0x432aff97 #define T51 /* 0xab9423a7 */ (T_MASK ^ 0x546bdc58) #define T52 /* 0xfc93a039 */ (T_MASK ^ 0x036c5fc6) #define T53 0x655b59c3 #define T54 /* 0x8f0ccc92 */ (T_MASK ^ 0x70f3336d) #define T55 /* 0xffeff47d */ (T_MASK ^ 0x00100b82) #define T56 /* 0x85845dd1 */ (T_MASK ^ 0x7a7ba22e) #define T57 0x6fa87e4f #define T58 /* 0xfe2ce6e0 */ (T_MASK ^ 0x01d3191f) #define T59 /* 0xa3014314 */ (T_MASK ^ 0x5cfebceb) #define T60 0x4e0811a1 #define T61 /* 0xf7537e82 */ (T_MASK ^ 0x08ac817d) #define T62 /* 0xbd3af235 */ (T_MASK ^ 0x42c50dca) #define T63 0x2ad7d2bb #define T64 /* 0xeb86d391 */ (T_MASK ^ 0x14792c6e) static void md5_process (md5_state *state, const uint8_t *data /*[64]*/) { uint32_t a = state->words[0], b = state->words[1], c = state->words[2], d = state->words[3]; uint32_t t; #if BYTE_ORDER > 0 /* Define storage only for big-endian CPUs. */ uint32_t X[16]; #else /* Define storage for little-endian or both types of CPUs. */ uint32_t xbuf[16]; const uint32_t *X; #endif { #if BYTE_ORDER == 0 /* * Determine dynamically whether this is a big-endian or * little-endian machine, since we can use a more efficient * algorithm on the latter. */ static const int w = 1; if (*((const uint8_t *)&w)) /* dynamic little-endian */ #endif #if BYTE_ORDER <= 0 /* little-endian */ { /* * On little-endian machines, we can process properly aligned * data without copying it. */ if (!((data - (const uint8_t *)0) & 3)) { /* data are properly aligned */ X = (const uint32_t *)((const void *)data); // avoid compiler warning } else { /* not aligned */ memcpy(xbuf, data, 64); X = xbuf; } } #endif #if BYTE_ORDER == 0 else /* dynamic big-endian */ #endif #if BYTE_ORDER >= 0 /* big-endian */ { /* * On big-endian machines, we must arrange the bytes in the * right order. */ const uint8_t *xp = data; int i; # if BYTE_ORDER == 0 X = xbuf; /* (dynamic only) */ # else # define xbuf X /* (static only) */ # endif for (i = 0; i < 16; ++i, xp += 4) xbuf[i] = xp[0] + (xp[1] << 8) + (xp[2] << 16) + (xp[3] << 24); } #endif } #define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) /* Round 1. */ /* Let [abcd k s i] denote the operation a = b + ((a + F(b,c,d) + X[k] + T[i]) <<< s). */ #define F(x, y, z) (((x) & (y)) | (~(x) & (z))) #define SET(a, b, c, d, k, s, Ti)\ t = a + F(b,c,d) + X[k] + Ti;\ a = ROTATE_LEFT(t, s) + b /* Do the following 16 operations. */ SET(a, b, c, d, 0, 7, T1); SET(d, a, b, c, 1, 12, T2); SET(c, d, a, b, 2, 17, T3); SET(b, c, d, a, 3, 22, T4); SET(a, b, c, d, 4, 7, T5); SET(d, a, b, c, 5, 12, T6); SET(c, d, a, b, 6, 17, T7); SET(b, c, d, a, 7, 22, T8); SET(a, b, c, d, 8, 7, T9); SET(d, a, b, c, 9, 12, T10); SET(c, d, a, b, 10, 17, T11); SET(b, c, d, a, 11, 22, T12); SET(a, b, c, d, 12, 7, T13); SET(d, a, b, c, 13, 12, T14); SET(c, d, a, b, 14, 17, T15); SET(b, c, d, a, 15, 22, T16); #undef SET /* Round 2. */ /* Let [abcd k s i] denote the operation a = b + ((a + G(b,c,d) + X[k] + T[i]) <<< s). */ #define G(x, y, z) (((x) & (z)) | ((y) & ~(z))) #define SET(a, b, c, d, k, s, Ti)\ t = a + G(b,c,d) + X[k] + Ti;\ a = ROTATE_LEFT(t, s) + b /* Do the following 16 operations. */ SET(a, b, c, d, 1, 5, T17); SET(d, a, b, c, 6, 9, T18); SET(c, d, a, b, 11, 14, T19); SET(b, c, d, a, 0, 20, T20); SET(a, b, c, d, 5, 5, T21); SET(d, a, b, c, 10, 9, T22); SET(c, d, a, b, 15, 14, T23); SET(b, c, d, a, 4, 20, T24); SET(a, b, c, d, 9, 5, T25); SET(d, a, b, c, 14, 9, T26); SET(c, d, a, b, 3, 14, T27); SET(b, c, d, a, 8, 20, T28); SET(a, b, c, d, 13, 5, T29); SET(d, a, b, c, 2, 9, T30); SET(c, d, a, b, 7, 14, T31); SET(b, c, d, a, 12, 20, T32); #undef SET /* Round 3. */ /* Let [abcd k s t] denote the operation a = b + ((a + H(b,c,d) + X[k] + T[i]) <<< s). */ #define H(x, y, z) ((x) ^ (y) ^ (z)) #define SET(a, b, c, d, k, s, Ti)\ t = a + H(b,c,d) + X[k] + Ti;\ a = ROTATE_LEFT(t, s) + b /* Do the following 16 operations. */ SET(a, b, c, d, 5, 4, T33); SET(d, a, b, c, 8, 11, T34); SET(c, d, a, b, 11, 16, T35); SET(b, c, d, a, 14, 23, T36); SET(a, b, c, d, 1, 4, T37); SET(d, a, b, c, 4, 11, T38); SET(c, d, a, b, 7, 16, T39); SET(b, c, d, a, 10, 23, T40); SET(a, b, c, d, 13, 4, T41); SET(d, a, b, c, 0, 11, T42); SET(c, d, a, b, 3, 16, T43); SET(b, c, d, a, 6, 23, T44); SET(a, b, c, d, 9, 4, T45); SET(d, a, b, c, 12, 11, T46); SET(c, d, a, b, 15, 16, T47); SET(b, c, d, a, 2, 23, T48); #undef SET /* Round 4. */ /* Let [abcd k s t] denote the operation a = b + ((a + I(b,c,d) + X[k] + T[i]) <<< s). */ #define I(x, y, z) ((y) ^ ((x) | ~(z))) #define SET(a, b, c, d, k, s, Ti)\ t = a + I(b,c,d) + X[k] + Ti;\ a = ROTATE_LEFT(t, s) + b /* Do the following 16 operations. */ SET(a, b, c, d, 0, 6, T49); SET(d, a, b, c, 7, 10, T50); SET(c, d, a, b, 14, 15, T51); SET(b, c, d, a, 5, 21, T52); SET(a, b, c, d, 12, 6, T53); SET(d, a, b, c, 3, 10, T54); SET(c, d, a, b, 10, 15, T55); SET(b, c, d, a, 1, 21, T56); SET(a, b, c, d, 8, 6, T57); SET(d, a, b, c, 15, 10, T58); SET(c, d, a, b, 6, 15, T59); SET(b, c, d, a, 13, 21, T60); SET(a, b, c, d, 4, 6, T61); SET(d, a, b, c, 11, 10, T62); SET(c, d, a, b, 2, 15, T63); SET(b, c, d, a, 9, 21, T64); #undef SET /* Then perform the following additions. (That is increment each of the four registers by the value it had before this block was started.) */ state->words[0] += a; state->words[1] += b; state->words[2] += c; state->words[3] += d; } /* api */ md5_state * md5_digest_init (md5_state *state) { state->bitcount[0] = state->bitcount[1] = 0; state->words[0] = 0x67452301; state->words[1] = /*0xefcdab89*/ T_MASK ^ 0x10325476; state->words[2] = /*0x98badcfe*/ T_MASK ^ 0x67452301; state->words[3] = 0x10325476; return state; } void md5_digest_add (md5_state *state, const void *input, size_t size) { const uint8_t *p = (const uint8_t *)input; int nbytes = (int)size; // PJ int left = nbytes; int offset = (state->bitcount[0] >> 3) & 63; uint32_t nbits = (uint32_t)(nbytes << 3); if (nbytes <= 0) return; /* Update the message length. */ state->bitcount[1] += nbytes >> 29; state->bitcount[0] += nbits; if (state->bitcount[0] < nbits) state->bitcount[1]++; /* Process an initial partial block. */ if (offset) { int copy = (offset + nbytes > 64 ? 64 - offset : nbytes); memcpy(state->buffer + offset, p, copy); if (offset + copy < 64) return; p += copy; left -= copy; md5_process(state, state->buffer); } /* Process full blocks. */ for (; left >= 64; p += 64, left -= 64) md5_process(state, p); /* Process a final partial block. */ if (left) memcpy(state->buffer, p, left); } #define md5_digest_byte(state, i) (uint8_t)(state->words[i >> 2] >> ((i & 3) << 3)) void md5_digest_get (md5_state *state, uint8_t digest[], int flags) { static const uint8_t pad[64] = { 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; uint8_t data[8]; int i; /* Save the length before padding. */ for (i = 0; i < 8; ++i) data[i] = (uint8_t)(state->bitcount[i >> 2] >> ((i & 3) << 3)); /* Pad to 56 bytes mod 64. */ md5_digest_add(state, pad, ((55 - (state->bitcount[0] >> 3)) & 63) + 1); /* Append the length. */ md5_digest_add(state, data, 8); /* Output */ if (flags & MD5_HEX) { // expected digest buffer size MD5_STRING_LENGTH uint8_t byte; const char *alphabet; alphabet = (flags & MD5_LCHEX) ? "0123456789abcdef" : "0123456789ABCDEF"; for (i = 0; i < MD5_DIGEST_LENGTH; ++i) { byte = md5_digest_byte(state, i); *digest++ = (uint8_t)alphabet[byte >> 4]; *digest++ = (uint8_t)alphabet[byte & 15]; } *digest = 0; } else { // expected digest buffer size MD5_DIGEST_LENGTH for (i = 0; i < MD5_DIGEST_LENGTH; ++i) *digest++ = md5_digest_byte(state, i); } } void md5_digest (const void *input, size_t length, uint8_t digest[], int flags) { md5_state md5; md5_digest_init(&md5); md5_digest_add(&md5, input, length); md5_digest_get(&md5, digest, flags); } /* file checksum */ #define DIGEST_BUFFER_SIZE 4096 int md5_digest_add_file (md5_state *state, const char *filename) { FILE *fh; uint8_t buffer[DIGEST_BUFFER_SIZE]; size_t read; if ((fh = fopen(filename, "rb")) == NULL) return 0; do { read = fread(buffer, 1, DIGEST_BUFFER_SIZE, fh); md5_digest_add(state, buffer, read); } while (read == DIGEST_BUFFER_SIZE); fclose(fh); return 1; } int md5_digest_file (const char *filename, uint8_t digest[], int flags) { md5_state state; md5_digest_init(&state); if (md5_digest_add_file(&state, filename)) { md5_digest_get(&state, digest, flags); return 1; } return 0; }luametatex-2.10.08/source/libraries/pplib/util/utilmd5.h000066400000000000000000000021101442250314700231220ustar00rootroot00000000000000 /* sha2 implementation excerpted from code by Aaron D. Gifford */ #ifndef UTIL_MD5_H #define UTIL_MD5_H #include #include // for size_t #include "utildecl.h" //#define md5_state md5_state_t typedef struct { uint32_t bitcount[2]; uint32_t words[4]; uint8_t buffer[64]; } md5_state; #define MD5_DIGEST_LENGTH 16 #define MD5_STRING_LENGTH (MD5_DIGEST_LENGTH * 2 + 1) enum { MD5_BYTES = 0, MD5_UCHEX = (1<<0), MD5_LCHEX = (1<<1) }; #define MD5_DEFAULT MD5_BYTES #define MD5_HEX (MD5_UCHEX|MD5_LCHEX) #ifdef __cplusplus extern "C" { #endif UTILAPI md5_state * md5_digest_init (md5_state *state); UTILAPI void md5_digest_add (md5_state *state, const void *input, size_t size); UTILAPI void md5_digest_get (md5_state *state, uint8_t digest[], int flags); UTILAPI void md5_digest (const void *input, size_t length, uint8_t digest[], int flags); UTILAPI int md5_digest_add_file (md5_state *state, const char *filename); UTILAPI int md5_digest_file (const char *filename, uint8_t digest[], int flags); #ifdef __cplusplus } /* end extern "C" */ #endif #endifluametatex-2.10.08/source/libraries/pplib/util/utilmem.c000066400000000000000000000027271442250314700232240ustar00rootroot00000000000000 #include // for memcpy #include "utilmem.h" #include "utillog.h" #ifndef util_memerr # if defined(_WIN64) || defined(__MINGW32__) # define util_memerr(size) { loggerf("ooops, not enough memory (%I64u)", ((unsigned long long)(size))); abort(); } # else # define util_memerr(size) { loggerf("ooops, not enough memory (%llu)", ((unsigned long long)(size))); abort(); } # endif #endif void * util_malloc (size_t size) { void *m; if ((m = malloc(size)) == NULL) util_memerr(size); return m; } void * util_calloc (size_t num, size_t size) { void *m; if ((m = calloc(num, size)) == NULL) util_memerr(size); return m; } void * util_realloc (void *m, size_t size) { if ((m = realloc(m, size)) == NULL) util_memerr(size); return m; } /* common array resizer data -- the beginning of array unit -- sizeof array element size -- current array size extra -- requested extra size space -- pointer to available space allocated -- flag indicating if *data has been allocated (with malloc) */ void util_resize (void **data, size_t unit, size_t size, size_t extra, size_t *space, int allocated) { if (*space == 0) *space = 4; // better keep *space non-zero to avoid it do { *space <<= 1; } while (size + extra > *space); if (allocated) { *data = util_realloc(*data, *space * unit); } else { void *newdata = util_malloc(*space * unit); if (*data != NULL) memcpy(newdata, *data, size * unit); *data = newdata; } } luametatex-2.10.08/source/libraries/pplib/util/utilmem.h000066400000000000000000000006731442250314700232270ustar00rootroot00000000000000 #ifndef UTIL_MEM_H #define UTIL_MEM_H #include // for size_t and alloc functions #include "utildecl.h" UTILAPI void * util_malloc (size_t size); UTILAPI void * util_calloc (size_t num, size_t size); UTILAPI void * util_realloc (void *m, size_t size); void util_resize (void **data, size_t unit, size_t size, size_t extra, size_t *space, int allocated); #define util_free free // not a call, might be used as identifier #endif luametatex-2.10.08/source/libraries/pplib/util/utilmemallc.h000066400000000000000000000736541442250314700240740ustar00rootroot00000000000000/* Allocators ========== Using allocators defined here makes sense if there is a need to take a space for rather large amount of rather small objects. The basic idea is to take memory in reasonably large blocks and to distribute small chunks from those blocks. Once chunks are no longer needed, one can free them all at once, or free taken chunks individually. We define 3 types of allocators: 1. HEAP - the simplest one, provides variable length memory chunks from larger blocks and frees them all on explicit request. There is no way to free individual objects, only the entire heap. The heap only grows, until freed. 2. STOCK - provides variable length memory chunks from larger blocks, but allows to free individual objects as well as the entire stock. The stock grows and shrinks, by leaps and bounds, depending on parameters given during initialization. 3. POOL - provides fixed size memory chunks from larger blocks. It allows to free individual chunks as well as the entire pool. In opposite to a stock, a pool also reuses a space reclaimed by freeing individual objects; before allocating a new block it firsts recycles freed chunks, if any. In general, memory chunks provided by allocators are never reallocated. Allocators do nothing with provided chunks until freed. Allocators are represented as small structures (several pointers and integers). We assume that such structures are either static variables, or members of larger structures. We don't bother to allocate a memory for them. Usage scheme is pretty similar for all: init() - just inititalize, don't allocate anything yet take() - take chunks take() take() ... free() - free the all at once For stocks and pools there is a possibility to give back individual chunks: init() - like above take() - take chunks take() take() back() - give chunks back when no longer needed take() back() ... free() - needed only if not all chunks have been given back All calls take a shell structure pointer as an argument. take() returns a void pointer, aligned according to used variant (8, 16, 32, 64). back() takes a void pointer as the second argument. It must be the pointer previously returned by take(). back() can be called in any order and can obviously be plotted with take(). By default, after back()-ing all taken chunks, the stock returns to its initial state (zero memory used). A special KEEP flag can be used during initialization to prevent freeing the last (sole) allocated block. If KEEP option is used, the allocator always keeps a single segment for further allocations. This is necessary only when there is a risk that just several take() calls will be immediatelly followed by the same number of back() calls. KEEP flag prevents allocating and freeing rather large blocks just to serve several chunks. And this is actually important only if there are no other blocks taken, that is, if there is only one, nearly empty block in use. In other cases KEEP flag doesn't matter, but allocators takes care to always have a block for fast allocs. There is also clear() operation that frees all but the recent block. One can use it to free all chunks taken so far, but to make the allocator ready for further allocs. If either KEEP flag is used or clear() is called, soner or later the user have to call free() explicitly, to remove all the remaining memory kept by the allocator. There is no KEEP flag for heaps, as heaps don't allow to free individual chunks. And so, the heap never needs to make a decision if the last sole block should be removed or not. The user makes the decision by calling clear() vs free() respectively. Pop === A very last chunk taken can be quickly given back with pop(heap, taken, size) // for heap or stock pop(pool, taken) // for pool taken must be the chunk returned by the very last take(), size must be the size requested. If the chunk has been taken from the head block (more about blocks below), the block pointer returns to its previous position, as it was before the last take(). If the chunk has been taken from the sole block beneatch the head, the entire sole block (containing just that single chunk) is freed. The pop() operation is different than back(); the popped chunk doesn't cause freeing the head block when its refcount gets zero. So pop() operation breaks the concept of stock that frees all the memory once all taken chunks are given back. on the other hand, if for some reason the very last taken chunk is to be ignored, pop() is better, as it doesn't cause blocks scattering. The popped chunk pointer will probably be returned by the very next call to take(). In case of heap, pop() is the only way to discard the chunk, as there is no back() operation. Buffer interface ================ When heap or stock is used by parsers, the caller oftenly doesn't know how many space will be needed for a data (this doesn't apply to pools, which returns constant size memory chunks). Here is an interface for the allocator-as-bufer case (same for heap and stock): some(heap, atleast, &space); ... atleast <<= 1; more(heap, taken, written, atleast, &space); ... done(heap, taken, written); some() operation provides a data pointer to at least a given bytes. The actual space provided for writing is set to the third argument. The caller may write space-bytes. If more space is needed, more() operation takes care to provide a chunk for a given amount of bytes and rewrites already written amount of bytes from a previous chunk to a new location. Same as with() some, the requests for atleast bytes, and the actual provided chunk size is given as space (space >= atleast). The function takes the pointer to the chunk previously taken; the one returned by some() or more(). This argument must not be NULL. If you don't want to copy a data, set written argument to zero. No matter if more() operation was used zero, one or multiple times, all the cycle must end with done(). Calls triple - some(), more() and done() - must not be interrupted by any other api calls. In particular, using take() or back() smells like a segfault. However, if there is a need discard the buffer being written (eg. input data error), instead of done() one may use giveup(heap, taken) If done() has already been called, pop() is the only option to discard the chunk pop(heap, taken, written) some() operation usually doesn't change the state of the heap, unless the heap head block is NULL, or atleast parameter is too large to fit the remaining block. more() usually changes the state, either by allocating a new head block, or by allocating a sole block just beneath the head (blocks and blocks tiling mechanism are described below). If a sole block has been taken for some large chunk subsequent calls to more() reallocate this sole block in place. It is assumed, that the size you request in subsequent calls generally grows. It is ok to request space-bytes, then call done() with written value less then requested. But the drawback is that if the chunk has already been allocated from a sole chunk, the space requested but not used is a waste. iof interface ============= iof is an independent interface for buffers written/read byte-by-byte. When used together with allocators, it provides a convenient way to write byte data to the heap or stock, without a need for intermediate buffers. The buffer is setup with iof output, *O O = buffer_init(heap, &output); // doesn't allocate anything or output = BUFFER_INIT(heap); // doesn't allocate anything O = &output; iof keeps pointers to the beginning of the buffer, end of buffer, and current position. Once the position reaches the end, the iof internal handler updates the buffer providing more space to write. When used in conjunction with heap or stock, the space to write is the space provided by the heap or stock. To start the buffer session: O = buffer_some(heap, O, atleast) // ensure iof *O to have atleast bytes to be written Once you are done with writing some chunk buffer_done(heap, O) instead of buffer_done(), one may also use iof_flush(O) // calls buffer_done() and buffer_some() again which updates the underlying heap or stock, and makes the iof ready for a new chunk. iof itself does not allocate a memory, so it doesn't need finalizer. iof_close(output) does nothing. To drop the buffer use: buffer_giveup(heap, O) // restore the from before buffer_some() More often then not, we need to specify a minimal space for buffer each time, eg. for memcpy() or so. The actual space left can be checked with iof_left(O). The entire space of recent chunk is O->space (eq. O->end - O->buf). Identical interface for heap and stock. Blocks ====== Each alloctor structure keeps a pointer to a head block, initially NULL. Most of new chunks are taken from the head. Once the space left in the head block is to small to provide a chunk of requested size, a new head is created and the previous one is linked to the head (blocks form a linked list). A stock block is named a ream, a heap block is named a pyre, a pool block is named pile (we need to distinguish structure names in code but in the description below they are all called blocks). Every block knows a number of chunks taken from that block (refcont). A stock also keeps a number of freed chunks [actually only for statistics; in most cases it doesn't need an extra space in struct ream, as thies structure member lays in the place f padding bytes.] We change the head block only if the new block is allocated, but we never change the head backward. Once some block became ->prev, it will never became a head again. This ensures that the allocator have the head block that usually has a lot of space for new allocs. This needs a special care when removing a block that is not a head block. We check if the next block to the one being removed is the head. If it is, and if its refcount is zero (and no KEEP flag is used) the head is removed as well. The basis of pools is similar to stocks and heaps, but there are some significant differences. A pool servers memory chunks of equal size, specified during initialization. This also means that the pool knows the boundaries of individual chunks (stock and heap doesn't). A pool provides iterators over chunks in use (taken but not given back yet). A pool shell structure keeps a pointer to a head block and a tail block (both may point a different block, the same block or NULL). This is necessary only for iterators to let the user follow the chunks from the first or from the last taken. The extra cost of maintaining both ->head and ->tail is neglectable. Refcounting =========== Heap refcounting: whenever a new chunk is taken, the block refcount is incremented. It is never decremented, but plays an important role in block tiling algorithm (below). No KEEP flag is used here. All the blocks are removed on free(), all but recent are removed on clear(). Stock refcounting: whenever a new chunk in taken from the block, the block refcount is incremented. Whenever the chunk is given back, the refcount is decremented. When the refcount gets zero, the block is removed and freed. To remove the block from the list (any block, not necessarily a head block), a stock needs 2-directional list; every block has ->next and ->prev links. The head block of the stock is freed only if this is the last (sole) block and no KEEP flag was used during initialization. Otherwise the block is just reset, becoming ready for further allocations - refcount gets zero, data space reset to an initial state. Pool refcounting: pretty much like with stocks, except that any chunk given back can be recycled on further take(). Ghosts ====== Every allocated block starts with a private structure for next/prev links, data pointer, refcount. We call it a block ghost. Except from heap, individual chunks also need a ghost (chunk ghost) so that we are able to know from which block the chunk comes from once the chunk is given back by the user (heaps don't have back() operation so data chunks have no ghosts). We keep ghosts possibly small. Chunk ghosts are of size natural for alignment variant (1, 2, 4 or 8 bytes). Block ghosts are somewhat larger. Statistics show clearly that it is worthy to keep them as small as possible: - chunk ghosts keep offset to the block ghost, not a pointer to it (we use the pointer only if it makes no difference to the chunk size; 64-bit aligned variant on 64-bit machine, 32 and 64 variants on 32-bit machine) - block ghosts uses a data pointer (not an offset) so that we are able to record any requested chunk size (size_t) and to avoid long array indexing on every chunk request At some point we considered storing a sheel structure pointer in the block ghost, then back() operation wouldn't need an extra argument. But stats showed that the size of the block ghost is the most significant factor in memory usage efficiency, so eliminating this extra pointer pays off. Besides, this would make impossible to relocate the shell structure. We don't allocate a memory for the shell, so we shouldn't make assumptions of shell structure address. Tiling algorithm ================ Tiling the block size refers to stocks and heaps that serves memory chunks of variable size. Both stock and heap performs best when the average size of requested chunks is a way smaller that the configured block size. But both also put no limitations on chunk sizes, so they need to cope with situation, where the requested size is quite large, eg. half of the block size or even more than the block size. Here is the algorithm used for blocks tiling: 1. When the requested chunk size fills in the recent block, just serve it from that block. This is the best and hopefully the most common case. 2. When the requested chunk size is larger that the space left in the recent block, the new block must be allocated. But there are two ways: a) either replace the head block with the new block so that the list of blocks is ... <- prev <- head so far <- new head b) or insert the block just "below the head", keeping the head intact, ... <- prev <- new single head <- head The first is the regular case. It is used when the space left in the head so far is small (can be neglected), and the requested size is relatively small (will fit the new block). If the space left in the head block is worthy to bother, or the requested chunk size is rather large, the new chunk is served from a single block, allocated just for that chunk. The block is of the size needed for that chunk. The block never becomes the head, no other chunks will be served from it (its refcount is permanently 1, until freed). Much depends on what is considered 'small, neglectable block space' and 'rather large chunk size'. The later is easier to imagine. When the requested size is larger than the block size used for a given allocator, then the size is definitelly considered large. When it is smaller than the block size, but still large enough to occupy most of the block size (grabbing quite some space for tiny chunks), it is also considered large. As the block size, what is considered 'large' can be spcified during initialization. A setup that works fine for me is (large = block_size / 2). Making a decision what is the left block space we can neglect is quite not obvious. At first approach we used a constant value, requested from the user during allocator initialization. But it is hard to select a good default. Now we compute this value from block params, by dividing a complete space occupied so far in the block by the number of chunks served from that block (the average size of chunks allocated from this block). We assume that the average chunk size (or smaller) is the space we can neglect. The logic behind is the following: if the space left in the block is larger than the average, it makes sense not to waste this space and keep it for further allocs. If the space left in the block is less than the average, there is only a little chance we will get a request for suitable size, so we sacrifice that space and we start allocating from a new block. Statistics showed a caveat in average == treshold approach. Suppose we have a block that has the average chunk size 16, there is 18 bytes left in the block (not neglectable), and the user request is 20 bytes. Allocating a single block for 20 bytes is bad, because the block ghost is 24 bytes (more internal than allocated memory). Allocating many of such blocks gives bad results; much more allocs than necessary, large waste. To avoid that, we help to neglect the remaining block space by checking if the space left is smaller than the block ghost size, which is an inevitable cost anyway. Stats below shows clearly that we should rather focus on "how to avoid producing sole-chunk blocks" instead of "how to feel the remaining space". Recycling ========= Recycling applies only to pools. When a chunk is given back, it is inserted into a list of items for recycling. Every pool block keeps a head of that list. Once a chunk is given back, it is inserted as recycling head and the previous head is attached to a new head. Since every chunk is associated with a ghost, we use ghosts to store a link (pointer or offset) to another item for recycling. Note that the ghost always keeps either a link to the block it belongs to, or a link to another recyclable ghost of the same block. This is used by iteratos to distinguish the chunk currently in use from the chunk that has already been given back; if the link points the block, the chunk is in use. A pool block that has at least one recyclable chunk is called a squot. A pool shell structure keeps 2-directional list of squots. Once a pool block becomes a squot, it is inserted to that list. Once its all recyclable items has been used, it is removed from the squots list. In every moment, the pool has an access to a list of all squots, and therefore, to a list of all recyclable items. Whenever there is a request for a new chunk, at first it is served from the head block, as this is the easiest and the cheapest way. Once the recent block has no more place for new items, recycling list is used, starting from the head recyclable chunk of the head squot. In practise this is always the most recently reclaimed chunk ghost. During further allocs, a pool will first utilize all recyclables from all squots before allocating a new block. Stats ===== Some numbers. The test made on a stock8, block size 255 bytes, 10000 allocations, random chunk sizes from 1 to 32 bytes (average 16). These are rather tight constraints because of 255 buffer limit. First approach: blocks: 903 - this is the actual number of malloc() calls singles: 214, 23.70% of all blocks waste: 20.16% - total memory that was allocated but not requested by the user block ghosts 10.04%, plus single block ghosts 3.12% chunk ghosts 4.55% neglected block tails 2.45% After adding a test for left space that helps in 'neglect remainig space or make sole chunk block' decision: blocks: 723 - a way better singles 0 waste: 19.04% - slightly better block ghosts 10.67% chunk ghosts 4.61% neglected block tails 3.76% The actual numbers vary depending on the buffer size, the average elements size and, of course, taken alignment variant. After some parameters tuning, on various tests we get 5-19% total waste for stocks, 3-14% total waste for heaps. But the basic scheme of statistics remains similar: we take relatively lots of space for blocks ghost (5-10% of total memory taken), some inevitable space for chunk ghosts (varies, 4-13% on various tests), and a little waste of neglected block tails (2-4%). Quite surprisingly, block ghosts are, in sum, oftenly more significant than individual chunk ghosts (for the test above over half of all the waste!). The number of block ghosts (equals the number of blocks) mostly depends on block size vs chunk size relation. But seemingly it is worthy to bother about the size of the block ghost and the number of blocks taken - the less the better. The waste of ghosts of individual objects (stock and pool) is inevitable, and depends only on the number/size of objects taken. We can't use smaller ghosts, we can't do better. Anyways, the least significant is the waste of neglected block tails. Pools stats are pretty similar, but more predictable because of known chunks size. A pool block ghost is somewhat larger structure because it keeps ->nextsquot / ->prevsquot pointers among ->next / ->prev. On the other hand, it doesn't need ->unused counter, as for fixed-length chunks it can always be computed from the refcount and used data. Also somewhat larger block ghost structure is compensated by the fact that the are no tail block waste and there is no 'neglect or not' problem. Alignment ========= Each allocator has 4 variants for 1, 2, 4, 8 bytes alignment respectively. Eg. stock32_take() always returns a pointer aligned to 4 bytes, heap64_take() returns a pointer aligned to 8 bytes. You can ask for any data length, but in practise you'll always obtain 1N, 2N, 4N or 8N. Alignment implies data padding unless the user requests for "aligned" sizes. In statistics the padding is not considered a waste. Zeroing ======= All heap, stock and pool may return zeroed memory chunks, depending on initial flags: HEAP_ZERO STOCK_ZERO POOL_ZERO There are also take0() variants that simply return memset(take(), 0, size), regardless the flag. */ #ifndef UTIL_MEM_ALLC_C #define UTIL_MEM_ALLC_C /* Common internals for allocators suite. A selection or all of the following defines (from api headers) should already be there: UTIL_MEM_HEAP_H // utilmemheap.h UTIL_MEM_STOCK_H // utilmemstock.h UTIL_MEM_POOL_H // utilmempool.h */ #include // memset() #include // printf() #include "utilmem.h" //#if defined(DEBUG) && debug != 0 #if 1 # define ASSERT8(cond) ((void)((cond) || (printf("8bit allocator assertion, %s:%d: %s\n", __FILE__, __LINE__, #cond), 0))) # define ASSERT16(cond) ((void)((cond) || (printf("16bit allocator assertion, %s:%d: %s\n", __FILE__, __LINE__, #cond), 0))) # define ASSERT32(cond) ((void)((cond) || (printf("32bit allocator assertion, %s:%d: %s\n", __FILE__, __LINE__, #cond), 0))) # define ASSERT64(cond) ((void)((cond) || (printf("64bit allocator assertion, %s:%d: %s\n", __FILE__, __LINE__, #cond), 0))) #else # define ASSERT8(cond) (void)0 # define ASSERT16(cond) (void)0 # define ASSERT32(cond) (void)0 # define ASSERT64(cond) (void)0 #endif #if defined(UTIL_MEM_STOCK_H) || defined(UTIL_MEM_POOL_H) struct ghost8{ uint8_t offset; }; struct ghost16 { uint16_t offset; }; #ifdef BIT32 struct ghost32 { union { #ifdef UTIL_MEM_STOCK_H ream32 *ream; #endif #ifdef UTIL_MEM_POOL_H pile32 *pile; ghost32 *nextfree; #endif void *block; }; }; #else struct ghost32 { uint32_t offset; }; #endif struct ghost64 { union { #ifdef UTIL_MEM_STOCK_H ream64 *ream; #endif #ifdef UTIL_MEM_POOL_H pile64 *pile; ghost64 *nextfree; #endif void *block; }; #ifdef BIT32 uint8_t dummy[4]; // force 8 #endif }; #endif /* All offsets related macro horror is here. Block is 4/8-bytes aligned (32/64 pointer size), ream->data is adjusted to 1/2/4/8-bytes accordingly. Therefore all offsets we store and pointers we cast, should be properly aligned. In all cases, sizes and offsets refers to bytes. We need data ghosts only to access the block. For 8 and 16 we use 8/16 bit offsets to keep the ghost smaller. For 32 and 64 we either use offset, or a pointer to the ream. malloc() is obviously expected to return a pointer properly allowed for all standard c-types. For 64-bit we can safely expect at least 8-bytes aligned. (at least, because long double may need 16 bytes on gcc64, or 8 bytes on msvc64, or weird on some exotics). On 32 bit machines pointers are 4 bytes aligned, even long long is 4-bytes aligned. But double on 32bit machine is 8-bytes aligned on windows, 4 bytes aligned in linux (compiler option -malign-double makes it 8-bytes aligned). Anyways, we cannot expect that on 32bit machine the result of malloc is always 8-bytes aligned. This requires a very special treatment of 64-variant on 32bit machine: the first data ghost may need to be 4-bytes off. Should we ensure 4 bytes more from malloc just in case? Hmm padding will be there anyway, as we adjust ream->data size to bytes boundaries. In both 32/64bit environments, the ghost keeps a pointer to the block. On 32bit machine, the first chunk ghost address may need to be +4, as this is not ensured by malloc(). See struct ream64 {}. We have an extra test; the final ghost pointer will be properly aligned iff ((block & 7 == 0) && (sizeof(block64) & 7 == 0)) || ((block & 7 == 4) && (sizeof(block64) & 7 == 4) or in short ((block + 1) & 7) == 0 otherwise it needs 4 bytes offset. */ #define pointer_tointeger(p) ((size_t)(p)) // & not allowed on pointer #define pointer_aligned32(p) ((pointer_tointeger(p) & 3) == 0) #define pointer_aligned64(p) ((pointer_tointeger(p) & 7) == 0) #define void_data(data) ((void *)(data)) #define byte_data(data) ((uint8_t *)(data)) /* top of the block ghost */ #define block_top(block) (byte_data(block + 1)) /* where the data begins */ #define block_edge8(block) block_top(block) #define block_edge16(block) block_top(block) #define block_edge32(block) block_top(block) #ifdef BIT32 # define ALIGN64ON32(block) (pointer_aligned64(block + 1) ? 0 : 4) # define block_edge64(block) (block_top(block) + ALIGN64ON32(block)) #else # define block_edge64(block) block_top(block) #endif #define block_left8(block, size) (size) #define block_left16(block, size) (size) #define block_left32(block, size) (size) #ifdef BIT32 # define block_left64(block, size) (size - ALIGN64ON32(block)) #else # define block_left64(block, size) (size) #endif /* consumed block space; it is important to use edge() macros that involves ALIGN64ON32() */ #define block_used8(block) (block->data - block_edge8(block)) #define block_used16(block) (block->data - block_edge16(block)) #define block_used32(block) (block->data - block_edge32(block)) #define block_used64(block) (block->data - block_edge64(block)) /* align requested size to keep ream->data / pyre->data always aligned. size is always size_t, no insane overflow checks */ #define align_size8(size) ((void)size) #define align_size16(size) (size = aligned_size16(size)) #define align_size32(size) (size = aligned_size32(size)) #define align_size64(size) (size = aligned_size64(size)) /* done() and pop() operations decrements block->left space by an aligned size; block->left -= alignedwritten. Lets have 8-bytes aligned variant block. If we tell the user there is 15 bytes left (block->left == 15) and the user taked 12. Aligned is 16, we cannot substract. We could eventually set block->left to 0, but then pop() operation would no be allowed. Hance, block->left must be aligned. The procedure is different than for size (size_t), we cannot cross 0xff/0xffff,... bondaries. */ #define align_space8(space) ((void)space) #define align_space16(space) (space = aligned_space16(space)) #define align_space32(space) (space = aligned_space32(space)) #define align_space64(space) (space = aligned_space64(space)) /* handling ghost structure (stock and pool) */ #if defined(UTIL_MEM_STOCK_H) || defined(UTIL_MEM_POOL_H) /* ghost offset from block top; not from bottom because we must not exceed offset limit */ #define ghost_offset(block, ghost) (byte_data(ghost) - block_top(block)) /* ghost <-> data */ #define ghost_data(ghost) ((void *)(ghost + 1)) /* cast from data to ghost structure goes via (void *) to shut up warnigns, alignment ok */ #define data_ghost8(data) (((ghost8 *)void_data(data)) - 1) #define data_ghost16(data) (((ghost16 *)void_data(data)) - 1) #define data_ghost32(data) (((ghost32 *)void_data(data)) - 1) #define data_ghost64(data) (((ghost64 *)void_data(data)) - 1) /* ghost <-> block */ #define ghost_block8(ghost, block8) ((block8 *)void_data(byte_data(ghost) - ghost->offset - sizeof(block8))) #define ghost_block16(ghost, block16) ((block16 *)void_data(byte_data(ghost) - ghost->offset - sizeof(block16))) #ifdef BIT32 # define ghost_block32(ghost, block32) (ghost->block) #else # define ghost_block32(ghost, block32) ((block32 *)void_data(byte_data(ghost) - ghost->offset - sizeof(block32))) #endif #define ghost_block64(ghost, block64) (ghost->block) /* ghost init */ #define ghost_next8(block, ghost) ((ghost = block->dataghost), (ghost->offset = (uint8_t)ghost_offset(block, ghost))) #define ghost_next16(block, ghost) ((ghost = block->dataghost), (ghost->offset = (uint16_t)ghost_offset(block, ghost))) #ifdef BIT32 # define ghost_next32(bl0ck, ghost) ((ghost = bl0ck->dataghost), (ghost->block = bl0ck)) #else # define ghost_next32(block, ghost) ((ghost = block->dataghost), (ghost->offset = (uint32_t)ghost_offset(block, ghost))) #endif #define ghost_next64(bl0ck, ghost) ((ghost = bl0ck->dataghost), (ghost->block = bl0ck)) #endif /* average block chunk size */ #define average_block_chunk8(ream) (block_used8(ream) / ream->chunks) #define average_block_chunk16(ream) (block_used16(ream) / ream->chunks) #define average_block_chunk32(ream) (block_used32(ream) / ream->chunks) #define average_block_chunk64(ream) (block_used64(ream) / ream->chunks) /* neglect remaining block tail and start a new block or create a single block; a test for (block->chunks > 0) is a sanity; if block->chunks is zero (block has a full space left), we shouldn't get there, except when alloc->large is larger then alloc->space */ #define take_new_block8(alloc, ghoststruct, block, size) \ ((size < alloc->large) && (block->left <= sizeof(ghoststruct) || (block->chunks > 0 && block->left <= average_block_chunk8(block)))) #define take_new_block16(alloc, ghoststruct, block, size) \ ((size < alloc->large) && (block->left <= sizeof(ghoststruct) || (block->chunks > 0 && block->left <= average_block_chunk16(block)))) #define take_new_block32(alloc, ghoststruct, block, size) \ ((size < alloc->large) && (block->left <= sizeof(ghoststruct) || (block->chunks > 0 && block->left <= average_block_chunk32(block)))) #define take_new_block64(alloc, ghoststruct, block, size) \ ((size < alloc->large) && (block->left <= sizeof(ghoststruct) || (block->chunks > 0 && block->left <= average_block_chunk64(block)))) /* empty */ #define head_block_empty(alloc, block) (((block = alloc->head) == NULL) || (block->chunks == 0 && block->prev == NULL)) #endifluametatex-2.10.08/source/libraries/pplib/util/utilmemallh.h000066400000000000000000000021301442250314700240560ustar00rootroot00000000000000 #ifndef UTIL_MEM_ALLH_H #define UTIL_MEM_ALLH_H #include // size_t #include #include "utildecl.h" typedef struct ghost8 ghost8; typedef struct ghost16 ghost16; typedef struct ghost32 ghost32; typedef struct ghost64 ghost64; #define aligned_size8(size) (size) #define aligned_size16(size) ((((size) + 1) >> 1) << 1) #define aligned_size32(size) ((((size) + 3) >> 2) << 2) #define aligned_size64(size) ((((size) + 7) >> 3) << 3) #define aligned_space8(size) (size) #define aligned_space16(size) (((size) & 1) ? ((size) < 0xFFFF ? ((size) + 1) : ((size) - 1)) : (size)) #define aligned_space32(size) (((size) & 3) ? ((size) < 0xFFFFFFFD ? ((size) - ((size) & 3) + 4) : (size) - ((size) & 3)) : (size)) #define aligned_space64(size) (((size) & 7) ? ((size) < 0xFFFFFFFFFFFFFFF8ULL ? ((size) - ((size) & 7) + 8) : (size) - ((size) & 7)) : (size)) /* info stub */ typedef struct { size_t blocks, singles; size_t chunks, unused; size_t used, singleused, left; size_t ghosts, blockghosts, singleghosts; } mem_info; #define MEM_INFO_INIT() = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } #endifluametatex-2.10.08/source/libraries/pplib/util/utilmemheap.c000066400000000000000000000556541442250314700240710ustar00rootroot00000000000000 #include "utilmemheap.h" #include "utilmemallc.h" #define pyre_alloc8(heap, space) ((pyre8 *)((heap->flags & HEAP_ZERO) ? util_calloc(1, sizeof(pyre8) + space * sizeof(uint8_t)) : util_malloc(sizeof(pyre8) + space * sizeof(uint8_t)))) #define pyre_alloc16(heap, space) ((pyre16 *)((heap->flags & HEAP_ZERO) ? util_calloc(1, sizeof(pyre16) + space * sizeof(uint8_t)) : util_malloc(sizeof(pyre16) + space * sizeof(uint8_t)))) #define pyre_alloc32(heap, space) ((pyre32 *)((heap->flags & HEAP_ZERO) ? util_calloc(1, sizeof(pyre32) + space * sizeof(uint8_t)) : util_malloc(sizeof(pyre32) + space * sizeof(uint8_t)))) #define pyre_alloc64(heap, space) ((pyre64 *)((heap->flags & HEAP_ZERO) ? util_calloc(1, sizeof(pyre64) + space * sizeof(uint8_t)) : util_malloc(sizeof(pyre64) + space * sizeof(uint8_t)))) #define pyre_free(pyre) util_free(pyre) /* block reset */ #define reset_heap_head8(heap, pyre, used) \ ((used = block_used8(pyre)), (pyre->data -= used), ((heap->flags & HEAP_ZERO) ? (memset(pyre->data, 0, used), 0) : 0), (pyre->left += (uint8_t)used)) #define reset_heap_head16(heap, pyre, used) \ ((used = block_used16(pyre)), (pyre->data -= used), ((heap->flags & HEAP_ZERO) ? (memset(pyre->data, 0, used), 0) : 0), (pyre->left += (uint16_t)used)) #define reset_heap_head32(heap, pyre, used) \ ((used = block_used32(pyre)), (pyre->data -= used), ((heap->flags & HEAP_ZERO) ? (memset(pyre->data, 0, used), 0) : 0), (pyre->left += (uint32_t)used)) #define reset_heap_head64(heap, pyre, used) \ ((used = block_used64(pyre)), (pyre->data -= used), ((heap->flags & HEAP_ZERO) ? (memset(pyre->data, 0, used), 0) : 0), (pyre->left += (uint64_t)used)) /* init heap */ heap8 * heap8_init (heap8 *heap, uint8_t space, uint8_t large, uint8_t flags) { align_space8(space); if (large > space) large = space; heap->head = NULL; heap->space = space; heap->large = large; heap->flags = flags; return heap; } heap16 * heap16_init (heap16 *heap, uint16_t space, uint16_t large, uint8_t flags) { align_space16(space); if (large > space) large = space; heap->head = NULL; heap->space = space; heap->large = large; heap->flags = flags; return heap; } heap32 * heap32_init (heap32 *heap, uint32_t space, uint32_t large, uint8_t flags) { align_space32(space); if (large > space) large = space; heap->head = NULL; heap->space = space; heap->large = large; heap->flags = flags; return heap; } heap64 * heap64_init (heap64 *heap, uint64_t space, uint64_t large, uint8_t flags) { align_space64(space); if (large > space) large = space; heap->head = NULL; heap->space = space; heap->large = large; heap->flags = flags; return heap; } /* free heap */ void heap8_free (heap8 *heap) { pyre8 *pyre, *prev; pyre = heap->head; heap->head = NULL; while (pyre != NULL) { prev = pyre->prev; pyre_free(pyre); pyre = prev; } } void heap16_free (heap16 *heap) { pyre16 *pyre, *prev; pyre = heap->head; heap->head = NULL; while (pyre != NULL) { prev = pyre->prev; pyre_free(pyre); pyre = prev; } } void heap32_free (heap32 *heap) { pyre32 *pyre, *prev; pyre = heap->head; heap->head = NULL; while (pyre != NULL) { prev = pyre->prev; pyre_free(pyre); pyre = prev; } } void heap64_free (heap64 *heap) { pyre64 *pyre, *prev; pyre = heap->head; heap->head = NULL; while (pyre != NULL) { prev = pyre->prev; pyre_free(pyre); pyre = prev; } } /* clear heap */ void heap8_clear (heap8 *heap) { pyre8 *pyre, *prev; size_t used; if ((pyre = heap->head) == NULL) return; prev = pyre->prev; pyre->prev = NULL; reset_heap_head8(heap, pyre, used); for (; prev != NULL; prev = pyre) { pyre = prev->prev; pyre_free(prev); } } void heap16_clear (heap16 *heap) { pyre16 *pyre, *prev; size_t used; if ((pyre = heap->head) == NULL) return; prev = pyre->prev; pyre->prev = NULL; reset_heap_head16(heap, pyre, used); for (; prev != NULL; prev = pyre) { pyre = prev->prev; pyre_free(prev); } } void heap32_clear (heap32 *heap) { pyre32 *pyre, *prev; size_t used; if ((pyre = heap->head) == NULL) return; prev = pyre->prev; pyre->prev = NULL; reset_heap_head32(heap, pyre, used); for (; prev != NULL; prev = pyre) { pyre = prev->prev; pyre_free(prev); } } void heap64_clear (heap64 *heap) { pyre64 *pyre, *prev; size_t used; if ((pyre = heap->head) == NULL) return; prev = pyre->prev; pyre->prev = NULL; reset_heap_head64(heap, pyre, used); for (; prev != NULL; prev = pyre) { pyre = prev->prev; pyre_free(prev); } } /* heap head */ void heap8_head (heap8 *heap) { pyre8 *pyre; heap->head = pyre = pyre_alloc8(heap, heap->space); pyre->prev = NULL; pyre->data = block_edge8(pyre); pyre->left = block_left8(pyre, heap->space); pyre->chunks = 0; } void heap16_head (heap16 *heap) { pyre16 *pyre; heap->head = pyre = pyre_alloc16(heap, heap->space); pyre->prev = NULL; pyre->data = block_edge16(pyre); pyre->left = block_left16(pyre, heap->space); pyre->chunks = 0; } void heap32_head (heap32 *heap) { pyre32 *pyre; heap->head = pyre = pyre_alloc32(heap, heap->space); pyre->prev = NULL; pyre->data = block_edge32(pyre); pyre->left = block_left32(pyre, heap->space); pyre->chunks = 0; } void heap64_head (heap64 *heap) { pyre64 *pyre; heap->head = pyre = pyre_alloc64(heap, heap->space); pyre->prev = NULL; pyre->data = block_edge64(pyre); pyre->left = block_left64(pyre, heap->space); pyre->chunks = 0; } /* next heap head */ static pyre8 * heap8_new (heap8 *heap) { pyre8 *pyre; pyre = pyre_alloc8(heap, heap->space); pyre->prev = heap->head; heap->head = pyre; pyre->data = block_edge8(pyre); pyre->left = block_left8(pyre, heap->space); pyre->chunks = 0; return pyre; } static pyre16 * heap16_new (heap16 *heap) { pyre16 *pyre; pyre = pyre_alloc16(heap, heap->space); pyre->prev = heap->head; heap->head = pyre; pyre->data = block_edge16(pyre); pyre->left = block_left16(pyre, heap->space); pyre->chunks = 0; return pyre; } static pyre32 * heap32_new (heap32 *heap) { pyre32 *pyre; pyre = pyre_alloc32(heap, heap->space); pyre->prev = heap->head; heap->head = pyre; pyre->data = block_edge32(pyre); pyre->left = block_left32(pyre, heap->space); pyre->chunks = 0; return pyre; } static pyre64 * heap64_new (heap64 *heap) { pyre64 *pyre; pyre = pyre_alloc64(heap, heap->space); pyre->prev = heap->head; heap->head = pyre; pyre->data = block_edge64(pyre); pyre->left = block_left64(pyre, heap->space); pyre->chunks = 0; return pyre; } /* next heap sole */ static pyre8 * heap8_sole (heap8 *heap, size_t size) { pyre8 *pyre, *head, *prev; pyre = pyre_alloc8(heap, size); head = heap->head; prev = head->prev; pyre->prev = prev; head->prev = pyre; pyre->data = block_edge8(pyre); pyre->left = 0; // (uint8_t)size makes no sense, even with buffer api it will finally become 0 return pyre; } static pyre16 * heap16_sole (heap16 *heap, size_t size) { pyre16 *pyre, *head, *prev; pyre = pyre_alloc16(heap, size); head = heap->head; prev = head->prev; pyre->prev = prev; head->prev = pyre; pyre->data = block_edge16(pyre); pyre->left = 0; return pyre; } static pyre32 * heap32_sole (heap32 *heap, size_t size) { pyre32 *pyre, *head, *prev; pyre = pyre_alloc32(heap, size); head = heap->head; prev = head->prev; pyre->prev = prev; head->prev = pyre; pyre->data = block_edge32(pyre); pyre->left = 0; return pyre; } static pyre64 * heap64_sole (heap64 *heap, size_t size) { pyre64 *pyre, *head, *prev; pyre = pyre_alloc64(heap, size); head = heap->head; prev = head->prev; pyre->prev = prev; head->prev = pyre; pyre->data = block_edge64(pyre); pyre->left = 0; return pyre; } /* take from heap */ #define pyre_next8(d, pyre, size) (d = pyre->data, pyre->data += size, pyre->left -= (uint8_t)size, ++pyre->chunks) #define pyre_next16(d, pyre, size) (d = pyre->data, pyre->data += size, pyre->left -= (uint16_t)size, ++pyre->chunks) #define pyre_next32(d, pyre, size) (d = pyre->data, pyre->data += size, pyre->left -= (uint32_t)size, ++pyre->chunks) #define pyre_next64(d, pyre, size) (d = pyre->data, pyre->data += size, pyre->left -= (uint64_t)size, ++pyre->chunks) // for sole blocks, block->left is permanently 0, we can't store size_t there #define pyre_last8(d, pyre, size) (d = pyre->data, pyre->data += size, pyre->chunks = 1) #define pyre_last16(d, pyre, size) (d = pyre->data, pyre->data += size, pyre->chunks = 1) #define pyre_last32(d, pyre, size) (d = pyre->data, pyre->data += size, pyre->chunks = 1) #define pyre_last64(d, pyre, size) (d = pyre->data, pyre->data += size, pyre->chunks = 1) void * _heap8_take (heap8 *heap, size_t size) { pyre8 *pyre; void *data; pyre = heap->head; align_size8(size); if (size <= pyre->left) { pyre_next8(data, pyre, size); } else if (take_new_block8(heap, pyre8, pyre, size)) { pyre = heap8_new(heap); pyre_next8(data, pyre, size); } else { pyre = heap8_sole(heap, size); pyre_last8(data, pyre, size); } return data; } void * _heap16_take (heap16 *heap, size_t size) { pyre16 *pyre; void *data; pyre = heap->head; align_size16(size); if (size <= pyre->left) { pyre_next16(data, pyre, size); } else if (take_new_block16(heap, pyre16, pyre, size)) { pyre = heap16_new(heap); pyre_next16(data, pyre, size); } else { pyre = heap16_sole(heap, size); pyre_last16(data, pyre, size); } return data; } void * _heap32_take (heap32 *heap, size_t size) { pyre32 *pyre; void *data; pyre = heap->head; align_size32(size); if (size <= pyre->left) { pyre_next32(data, pyre, size); } else if (take_new_block32(heap, pyre32, pyre, size)) { pyre = heap32_new(heap); pyre_next32(data, pyre, size); } else { pyre = heap32_sole(heap, size); pyre_last32(data, pyre, size); } return data; } void * _heap64_take (heap64 *heap, size_t size) { pyre64 *pyre; void *data; pyre = heap->head; align_size64(size); if (size <= pyre->left) { pyre_next64(data, pyre, size); } else if (take_new_block64(heap, pyre64, pyre, size)) { pyre = heap64_new(heap); pyre_next64(data, pyre, size); } else { pyre = heap64_sole(heap, size); pyre_last64(data, pyre, size); } return data; } void * _heap8_take0 (heap8 *heap, size_t size) { return memset(_heap8_take(heap, size), 0, size); } void * _heap16_take0 (heap16 *heap, size_t size) { return memset(_heap16_take(heap, size), 0, size); } void * _heap32_take0 (heap32 *heap, size_t size) { return memset(_heap32_take(heap, size), 0, size); } void * _heap64_take0 (heap64 *heap, size_t size) { return memset(_heap64_take(heap, size), 0, size); } /* pop last heap chunk */ #define taken_from_head(taken, head) (byte_data(taken) == head->data) #define taken_from_sole(taken, head, sole) ((sole = head->prev) != NULL && byte_data(taken) == sole->data) #define taken_prev_head(taken, head, size) (byte_data(taken) == head->data - size) #define taken_prev_sole(taken, head, sole, size) ((sole = head->prev) != NULL && byte_data(taken) == sole->data - size) void heap8_pop (heap8 *heap, void *taken, size_t size) { pyre8 *pyre, *head; head = heap->head; align_size8(size); if (taken_prev_head(taken, head, size)) { head->data -= size; head->left += (uint8_t)size; --head->chunks; } else if (taken_prev_sole(taken, head, pyre, size)) { head->prev = pyre->prev; pyre_free(pyre); } else { ASSERT8(0); } } void heap16_pop (heap16 *heap, void *taken, size_t size) { pyre16 *pyre, *head; head = heap->head; align_size16(size); if (taken_prev_head(taken, head, size)) { head->data -= size; head->left += (uint16_t)size; --head->chunks; } else if (taken_prev_sole(taken, head, pyre, size)) { head->prev = pyre->prev; pyre_free(pyre); } else { ASSERT16(0); } } void heap32_pop (heap32 *heap, void *taken, size_t size) { pyre32 *pyre, *head; head = heap->head; align_size32(size); if (taken_prev_head(taken, head, size)) { head->data -= size; head->left += (uint32_t)size; --head->chunks; } else if (taken_prev_sole(taken, head, pyre, size)) { head->prev = pyre->prev; pyre_free(pyre); } else { ASSERT32(0); } } void heap64_pop (heap64 *heap, void *taken, size_t size) { pyre64 *pyre, *head; head = heap->head; align_size64(size); if (taken_prev_head(taken, head, size)) { head->data -= size; head->left += (uint64_t)size; --head->chunks; } else if (taken_prev_sole(taken, head, pyre, size)) { head->prev = pyre->prev; pyre_free(pyre); } else { ASSERT64(0); } } /* heap buffer */ void * _heap8_some (heap8 *heap, size_t size, size_t *pspace) { pyre8 *pyre; pyre = heap->head; align_size8(size); if (size <= pyre->left) { *pspace = pyre->left; } else if (take_new_block8(heap, pyre8, pyre, size)) { pyre = heap8_new(heap); *pspace = pyre->left; } else { pyre = heap8_sole(heap, size); *pspace = size; } return void_data(pyre->data); } void * _heap16_some (heap16 *heap, size_t size, size_t *pspace) { pyre16 *pyre; pyre = heap->head; align_size16(size); if (size <= pyre->left) { *pspace = pyre->left; } else if (take_new_block16(heap, pyre16, pyre, size)) { pyre = heap16_new(heap); *pspace = pyre->left; } else { pyre = heap16_sole(heap, size); *pspace = size; } return void_data(pyre->data); } void * _heap32_some (heap32 *heap, size_t size, size_t *pspace) { pyre32 *pyre; pyre = heap->head; align_size32(size); if (size <= pyre->left) { *pspace = pyre->left; } else if (take_new_block32(heap, pyre32, pyre, size)) { pyre = heap32_new(heap); *pspace = pyre->left; } else { pyre = heap32_sole(heap, size); *pspace = size; } return void_data(pyre->data); } void * _heap64_some (heap64 *heap, size_t size, size_t *pspace) { pyre64 *pyre; pyre = heap->head; align_size64(size); if (size <= pyre->left) { *pspace = pyre->left; } else if (take_new_block64(heap, pyre64, pyre, size)) { pyre = heap64_new(heap); *pspace = pyre->left; } else { pyre = heap64_sole(heap, size); *pspace = size; } return void_data(pyre->data); } void * heap8_more (heap8 *heap, void *taken, size_t written, size_t size, size_t *pspace) { pyre8 *pyre, *prev; pyre = heap->head; align_size8(size); if (taken_from_head(taken, pyre)) { if (size <= pyre->left) { *pspace = pyre->left; } else if (take_new_block8(heap, pyre8, pyre, size)) { pyre = heap8_new(heap); memcpy(pyre->data, taken, written); *pspace = pyre->left; } else { pyre = heap8_sole(heap, size); memcpy(pyre->data, taken, written); *pspace = size; } } else if (taken_from_sole(taken, pyre, prev)) { pyre = heap8_sole(heap, size); memcpy(pyre->data, taken, written); *pspace = size; pyre->prev = prev->prev; pyre_free(prev); } else { ASSERT8(0); *pspace = 0; return NULL; } return void_data(pyre->data); } void * heap16_more (heap16 *heap, void *taken, size_t written, size_t size, size_t *pspace) { pyre16 *pyre, *prev; pyre = heap->head; align_size16(size); if (taken_from_head(taken, pyre)) { if (size <= pyre->left) { *pspace = pyre->left; } else if (take_new_block16(heap, pyre16, pyre, size)) { pyre = heap16_new(heap); memcpy(pyre->data, taken, written); *pspace = pyre->left; } else { pyre = heap16_sole(heap, size); memcpy(pyre->data, taken, written); *pspace = size; } } else if (taken_from_sole(taken, pyre, prev)) { pyre = heap16_sole(heap, size); memcpy(pyre->data, taken, written); *pspace = size; pyre->prev = prev->prev; pyre_free(prev); } else { ASSERT16(0); *pspace = 0; return NULL; } return void_data(pyre->data); } void * heap32_more (heap32 *heap, void *taken, size_t written, size_t size, size_t *pspace) { pyre32 *pyre, *prev; pyre = heap->head; align_size32(size); if (taken_from_head(taken, pyre)) { if (size <= pyre->left) { *pspace = pyre->left; } else if (take_new_block32(heap, pyre32, pyre, size)) { pyre = heap32_new(heap); memcpy(pyre->data, taken, written); *pspace = pyre->left; } else { pyre = heap32_sole(heap, size); memcpy(pyre->data, taken, written); *pspace = size; } } else if (taken_from_sole(taken, pyre, prev)) { pyre = heap32_sole(heap, size); memcpy(pyre->data, taken, written); *pspace = size; pyre->prev = prev->prev; pyre_free(prev); } else { ASSERT32(0); *pspace = 0; return NULL; } return void_data(pyre->data); } void * heap64_more (heap64 *heap, void *taken, size_t written, size_t size, size_t *pspace) { pyre64 *pyre, *prev; pyre = heap->head; align_size64(size); if (taken_from_head(taken, pyre)) { if (size <= pyre->left) { *pspace = pyre->left; } else if (take_new_block64(heap, pyre64, pyre, size)) { pyre = heap64_new(heap); memcpy(pyre->data, taken, written); *pspace = pyre->left; } else { pyre = heap64_sole(heap, size); memcpy(pyre->data, taken, written); *pspace = size; } } else if (taken_from_sole(taken, pyre, prev)) { pyre = heap64_sole(heap, size); memcpy(pyre->data, taken, written); *pspace = size; pyre->prev = prev->prev; pyre_free(prev); } else { ASSERT64(0); *pspace = 0; return NULL; } return void_data(pyre->data); } void heap8_done (heap8 *heap, void *taken, size_t written) { pyre8 *pyre; pyre = heap->head; align_size8(written); if (taken_from_head(taken, pyre)) { pyre->data += written; pyre->left -= (uint8_t)written; ++pyre->chunks; } else if (taken_from_sole(taken, pyre, pyre)) { pyre->data += written; pyre->chunks = 1; } else { ASSERT8(0); } } void heap16_done (heap16 *heap, void *taken, size_t written) { pyre16 *pyre; pyre = heap->head; align_size16(written); if (taken_from_head(taken, pyre)) { pyre->data += written; pyre->left -= (uint16_t)written; ++pyre->chunks; } else if (taken_from_sole(taken, pyre, pyre)) { pyre->data += written; pyre->chunks = 1; } else { ASSERT16(0); } } void heap32_done (heap32 *heap, void *taken, size_t written) { pyre32 *pyre; pyre = heap->head; align_size32(written); if (taken_from_head(taken, pyre)) { pyre->data += written; pyre->left -= (uint32_t)written; ++pyre->chunks; } else if (taken_from_sole(taken, pyre, pyre)) { pyre->data += written; pyre->chunks = 1; } else { ASSERT32(0); } } void heap64_done (heap64 *heap, void *taken, size_t written) { pyre64 *pyre; pyre = heap->head; align_size64(written); if (taken_from_head(taken, pyre)) { pyre->data += written; pyre->left -= (uint64_t)written; ++pyre->chunks; } else if (taken_from_sole(taken, pyre, pyre)) { pyre->data += written; pyre->chunks = 1; } else { ASSERT64(0); } } /* giveup */ void heap8_giveup (heap8 *heap, void *taken) { pyre8 *head, *pyre; head = heap->head; if (taken_from_sole(taken, head, pyre)) { head->prev = pyre->prev; pyre_free(pyre); } } void heap16_giveup (heap16 *heap, void *taken) { pyre16 *head, *pyre; head = heap->head; if (taken_from_sole(taken, head, pyre)) { head->prev = pyre->prev; pyre_free(pyre); } } void heap32_giveup (heap32 *heap, void *taken) { pyre32 *head, *pyre; head = heap->head; if (taken_from_sole(taken, head, pyre)) { head->prev = pyre->prev; pyre_free(pyre); } } void heap64_giveup (heap64 *heap, void *taken) { pyre64 *head, *pyre; head = heap->head; if (taken_from_sole(taken, head, pyre)) { head->prev = pyre->prev; pyre_free(pyre); } } /* heap empty */ int heap8_empty (heap8 *heap) { pyre8 *pyre; return head_block_empty(heap, pyre); } int heap16_empty (heap16 *heap) { pyre16 *pyre; return head_block_empty(heap, pyre); } int heap32_empty (heap32 *heap) { pyre32 *pyre; return head_block_empty(heap, pyre); } int heap64_empty (heap64 *heap) { pyre64 *pyre; return head_block_empty(heap, pyre); } /* heap stats */ void heap8_stats (heap8 *heap, mem_info *info, int append) { pyre8 *pyre; size_t used, chunks = 0, blocks = 0, singles = 0; if (!append) memset(info, 0, sizeof(mem_info)); for (pyre = heap->head; pyre != NULL; pyre = pyre->prev) { ++blocks; chunks += pyre->chunks; used = block_used8(pyre); info->used += used; info->left += pyre->left; if (pyre->chunks == 1 && pyre->left == 0) { ++singles; info->singleused += used; } } info->chunks += chunks; info->blocks += blocks; info->blockghosts += blocks * sizeof(pyre8); info->singles += singles; info->singleghosts += singles * sizeof(pyre8); } void heap16_stats (heap16 *heap, mem_info *info, int append) { pyre16 *pyre; size_t used, chunks = 0, blocks = 0, singles = 0; if (!append) memset(info, 0, sizeof(mem_info)); for (pyre = heap->head; pyre != NULL; pyre = pyre->prev) { ++blocks; chunks += pyre->chunks; used = block_used16(pyre); info->used += used; info->left += pyre->left; if (pyre->chunks == 1 && pyre->left == 0) { ++singles; info->singleused += used; } } info->chunks += chunks; info->blocks += blocks; info->blockghosts += blocks * sizeof(pyre16); info->singles += singles; info->singleghosts += singles * sizeof(pyre16); } void heap32_stats (heap32 *heap, mem_info *info, int append) { pyre32 *pyre; size_t used, chunks = 0, blocks = 0, singles = 0; if (!append) memset(info, 0, sizeof(mem_info)); for (pyre = heap->head; pyre != NULL; pyre = pyre->prev) { ++blocks; chunks += pyre->chunks; used = block_used32(pyre); info->used += used; info->left += pyre->left; if (pyre->chunks == 1 && pyre->left == 0) { ++singles; info->singleused += used; } } info->chunks += chunks; info->blocks += blocks; info->blockghosts += blocks * sizeof(pyre32); info->singles += singles; info->singleghosts += singles * sizeof(pyre32); } void heap64_stats (heap64 *heap, mem_info *info, int append) { pyre64 *pyre; size_t used, chunks = 0, blocks = 0, singles = 0; if (!append) memset(info, 0, sizeof(mem_info)); for (pyre = heap->head; pyre != NULL; pyre = pyre->prev) { ++blocks; chunks += pyre->chunks; used = block_used64(pyre); info->used += used; info->left += pyre->left; if (pyre->chunks == 1 && pyre->left == 0) { ++singles; info->singleused += used; } } info->chunks += chunks; info->blocks += blocks; info->blockghosts += blocks * sizeof(pyre64); info->singles += singles; info->singleghosts += singles * sizeof(pyre64); } luametatex-2.10.08/source/libraries/pplib/util/utilmemheap.h000066400000000000000000000144671442250314700240730ustar00rootroot00000000000000 #ifndef UTIL_MEM_HEAP_H #define UTIL_MEM_HEAP_H #include "utilmemallh.h" typedef struct pyre8 pyre8; typedef struct pyre16 pyre16; typedef struct pyre32 pyre32; typedef struct pyre64 pyre64; struct pyre8 { pyre8 *prev; uint8_t *data; uint8_t left; uint8_t chunks; #ifdef BIT32 uint8_t dummy[2]; // 10->12 #else uint8_t dummy[6]; // 18->24 #endif }; struct pyre16 { pyre16 *prev; uint8_t *data; uint16_t left; uint16_t chunks; #ifdef BIT32 //uint8_t dummy[0]; // 12->12 #else uint8_t dummy[4]; // 20->24 #endif }; struct pyre32 { pyre32 *prev; uint8_t *data; uint32_t left; uint32_t chunks; #ifdef BIT32 //uint8_t dummy[0]; // 16->16 #else //uint8_t dummy[0]; // 24->24 #endif }; struct pyre64 { pyre64 *prev; uint8_t *data; uint64_t left; uint64_t chunks; #ifdef BIT32 //uint8_t dummy[0]; // 24->24 #else //uint8_t dummy[0]; // 32->32 #endif }; /* heaps */ typedef struct heap8 heap8; typedef struct heap16 heap16; typedef struct heap32 heap32; typedef struct heap64 heap64; struct heap8 { pyre8 *head; uint8_t space; uint8_t large; uint8_t flags; }; struct heap16 { pyre16 *head; uint16_t space; uint16_t large; uint8_t flags; }; struct heap32 { pyre32 *head; uint32_t space; uint32_t large; uint8_t flags; }; struct heap64 { pyre64 *head; uint64_t space; uint64_t large; uint8_t flags; }; #define HEAP_ZERO (1 << 0) #define HEAP_DEFAULTS 0 #define HEAP8_INIT(space, large, flags) { NULL, aligned_space8(space), large, flags } #define HEAP16_INIT(space, large, flags) { NULL, aligned_space16(space), large, flags } #define HEAP32_INIT(space, large, flags) { NULL, aligned_space32(space), large, flags } #define HEAP64_INIT(space, large, flags) { NULL, aligned_space64(space), large, flags } UTILAPI heap8 * heap8_init (heap8 *heap, uint8_t space, uint8_t large, uint8_t flags); UTILAPI heap16 * heap16_init (heap16 *heap, uint16_t space, uint16_t large, uint8_t flags); UTILAPI heap32 * heap32_init (heap32 *heap, uint32_t space, uint32_t large, uint8_t flags); UTILAPI heap64 * heap64_init (heap64 *heap, uint64_t space, uint64_t large, uint8_t flags); UTILAPI void heap8_head (heap8 *heap); UTILAPI void heap16_head (heap16 *heap); UTILAPI void heap32_head (heap32 *heap); UTILAPI void heap64_head (heap64 *heap); #define heap8_ensure_head(heap) ((void)((heap)->head != NULL || (heap8_head(heap), 0))) #define heap16_ensure_head(heap) ((void)((heap)->head != NULL || (heap16_head(heap), 0))) #define heap32_ensure_head(heap) ((void)((heap)->head != NULL || (heap32_head(heap), 0))) #define heap64_ensure_head(heap) ((void)((heap)->head != NULL || (heap64_head(heap), 0))) UTILAPI void heap8_free (heap8 *heap); UTILAPI void heap16_free (heap16 *heap); UTILAPI void heap32_free (heap32 *heap); UTILAPI void heap64_free (heap64 *heap); UTILAPI void heap8_clear (heap8 *heap); UTILAPI void heap16_clear (heap16 *heap); UTILAPI void heap32_clear (heap32 *heap); UTILAPI void heap64_clear (heap64 *heap); UTILAPI void * _heap8_take (heap8 *heap, size_t size); UTILAPI void * _heap16_take (heap16 *heap, size_t size); UTILAPI void * _heap32_take (heap32 *heap, size_t size); UTILAPI void * _heap64_take (heap64 *heap, size_t size); UTILAPI void * _heap8_take0 (heap8 *heap, size_t size); UTILAPI void * _heap16_take0 (heap16 *heap, size_t size); UTILAPI void * _heap32_take0 (heap32 *heap, size_t size); UTILAPI void * _heap64_take0 (heap64 *heap, size_t size); #define heap8_take(heap, size) (heap8_ensure_head(heap), _heap8_take(heap, size)) #define heap16_take(heap, size) (heap16_ensure_head(heap), _heap16_take(heap, size)) #define heap32_take(heap, size) (heap32_ensure_head(heap), _heap32_take(heap, size)) #define heap64_take(heap, size) (heap64_ensure_head(heap), _heap64_take(heap, size)) #define heap8_take0(heap, size) (heap8_ensure_head(heap), _heap8_take0(heap, size)) #define heap16_take0(heap, size) (heap16_ensure_head(heap), _heap16_take0(heap, size)) #define heap32_take0(heap, size) (heap32_ensure_head(heap), _heap32_take0(heap, size)) #define heap64_take0(heap, size) (heap64_ensure_head(heap), _heap64_take0(heap, size)) UTILAPI void heap8_pop (heap8 *heap, void *taken, size_t size); UTILAPI void heap16_pop (heap16 *heap, void *taken, size_t size); UTILAPI void heap32_pop (heap32 *heap, void *taken, size_t size); UTILAPI void heap64_pop (heap64 *heap, void *taken, size_t size); UTILAPI void * _heap8_some (heap8 *heap, size_t size, size_t *pspace); UTILAPI void * _heap16_some (heap16 *heap, size_t size, size_t *pspace); UTILAPI void * _heap32_some (heap32 *heap, size_t size, size_t *pspace); UTILAPI void * _heap64_some (heap64 *heap, size_t size, size_t *pspace); #define heap8_some(heap, size, pspace) (heap8_ensure_head(heap), _heap8_some(heap, size, pspace)) #define heap16_some(heap, size, pspace) (heap16_ensure_head(heap), _heap16_some(heap, size, pspace)) #define heap32_some(heap, size, pspace) (heap32_ensure_head(heap), _heap32_some(heap, size, pspace)) #define heap64_some(heap, size, pspace) (heap64_ensure_head(heap), _heap64_some(heap, size, pspace)) UTILAPI void * heap8_more (heap8 *heap, void *taken, size_t written, size_t size, size_t *pspace); UTILAPI void * heap16_more (heap16 *heap, void *taken, size_t written, size_t size, size_t *pspace); UTILAPI void * heap32_more (heap32 *heap, void *taken, size_t written, size_t size, size_t *pspace); UTILAPI void * heap64_more (heap64 *heap, void *taken, size_t written, size_t size, size_t *pspace); UTILAPI void heap8_done (heap8 *heap, void *taken, size_t written); UTILAPI void heap16_done (heap16 *heap, void *taken, size_t written); UTILAPI void heap32_done (heap32 *heap, void *taken, size_t written); UTILAPI void heap64_done (heap64 *heap, void *taken, size_t written); UTILAPI void heap8_giveup (heap8 *heap, void *taken); UTILAPI void heap16_giveup (heap16 *heap, void *taken); UTILAPI void heap32_giveup (heap32 *heap, void *taken); UTILAPI void heap64_giveup (heap64 *heap, void *taken); UTILAPI int heap8_empty (heap8 *heap); UTILAPI int heap16_empty (heap16 *heap); UTILAPI int heap32_empty (heap32 *heap); UTILAPI int heap64_empty (heap64 *heap); UTILAPI void heap8_stats (heap8 *heap, mem_info *info, int append); UTILAPI void heap16_stats (heap16 *heap, mem_info *info, int append); UTILAPI void heap32_stats (heap32 *heap, mem_info *info, int append); UTILAPI void heap64_stats (heap64 *heap, mem_info *info, int append); #endifluametatex-2.10.08/source/libraries/pplib/util/utilmemheapiof.c000066400000000000000000000062221442250314700245520ustar00rootroot00000000000000 #include "utilmemheapiof.h" // this is identical to stock iof suite, keep in sync size_t heap8_writer (iof *O, iof_mode mode) { heap8 *heap; size_t written; heap = (heap8 *)O->link; switch (mode) { case IOFFLUSH: heap8_buffer_done(heap, O); O->buf = _heap8_some(heap, 0, &O->space); O->pos = O->buf; O->end = O->buf + O->space; break; case IOFWRITE: written = (size_t)iof_size(O); O->buf = heap8_more(heap, O->buf, written, written << 1, &O->space); O->pos = O->buf + written; O->end = O->buf + O->space; return O->space - written; case IOFCLOSE: default: break; } return 0; } size_t heap16_writer (iof *O, iof_mode mode) { heap16 *heap; size_t written; heap = (heap16 *)O->link; switch (mode) { case IOFFLUSH: heap16_buffer_done(heap, O); O->buf = _heap16_some(heap, 0, &O->space); O->pos = O->buf; O->end = O->buf + O->space; break; case IOFWRITE: written = (size_t)iof_size(O); O->buf = heap16_more(heap, O->buf, written, written << 1, &O->space); O->pos = O->buf + written; O->end = O->buf + O->space; return O->space - written; case IOFCLOSE: default: break; } return 0; } size_t heap32_writer (iof *O, iof_mode mode) { heap32 *heap; size_t written; heap = (heap32 *)O->link; switch (mode) { case IOFFLUSH: heap32_buffer_done(heap, O); O->buf = _heap32_some(heap, 0, &O->space); O->pos = O->buf; O->end = O->buf + O->space; break; case IOFWRITE: written = (size_t)iof_size(O); O->buf = heap32_more(heap, O->buf, written, written << 1, &O->space); O->pos = O->buf + written; O->end = O->buf + O->space; return O->space - written; case IOFCLOSE: default: break; } return 0; } size_t heap64_writer (iof *O, iof_mode mode) { heap64 *heap; size_t written; heap = (heap64 *)O->link; switch (mode) { case IOFFLUSH: heap64_buffer_done(heap, O); O->buf = _heap64_some(heap, 0, &O->space); O->pos = O->buf; O->end = O->buf + O->space; break; case IOFWRITE: written = (size_t)iof_size(O); O->buf = heap64_more(heap, O->buf, written, written << 1, &O->space); O->pos = O->buf + written; O->end = O->buf + O->space; return O->space - written; case IOFCLOSE: default: break; } return 0; } /* buffer for some */ iof * _heap8_buffer_some (heap8 *heap, iof *O, size_t atleast) { O->buf = _heap8_some(heap, atleast, &O->space); O->pos = O->buf; O->end = O->buf + O->space; return O; } iof * _heap16_buffer_some (heap16 *heap, iof *O, size_t atleast) { O->buf = _heap16_some(heap, atleast, &O->space); O->pos = O->buf; O->end = O->buf + O->space; return O; } iof * _heap32_buffer_some (heap32 *heap, iof *O, size_t atleast) { O->buf = _heap32_some(heap, atleast, &O->space); O->pos = O->buf; O->end = O->buf + O->space; return O; } iof * _heap64_buffer_some (heap64 *heap, iof *O, size_t atleast) { O->buf = _heap64_some(heap, atleast, &O->space); O->pos = O->buf; O->end = O->buf + O->space; return O; } luametatex-2.10.08/source/libraries/pplib/util/utilmemheapiof.h000066400000000000000000000045401442250314700245600ustar00rootroot00000000000000 #ifndef UTIL_MEM_HEAP_IOF_H #define UTIL_MEM_HEAP_IOF_H #include "utilmemheap.h" #include "utiliof.h" UTILAPI size_t heap8_writer (iof *O, iof_mode mode); UTILAPI size_t heap16_writer (iof *O, iof_mode mode); UTILAPI size_t heap32_writer (iof *O, iof_mode mode); UTILAPI size_t heap64_writer (iof *O, iof_mode mode); #define HEAP8_BUFFER_INIT(heap) IOF_WRITER_INIT(heap8_writer, (void *)(heap), NULL, 0, 0) #define HEAP16_BUFFER_INIT(heap) IOF_WRITER_INIT(heap16_writer, (void *)(heap), NULL, 0, 0) #define HEAP32_BUFFER_INIT(heap) IOF_WRITER_INIT(heap32_writer, (void *)(heap), NULL, 0, 0) #define HEAP64_BUFFER_INIT(heap) IOF_WRITER_INIT(heap64_writer, (void *)(heap), NULL, 0, 0) #define heap8_buffer_init(heap, O) iof_writer(O, (void *)(heap), heap8_writer, NULL, 0) #define heap16_buffer_init(heap, O) iof_writer(O, (void *)(heap), heap16_writer, NULL, 0) #define heap32_buffer_init(heap, O) iof_writer(O, (void *)(heap), heap32_writer, NULL, 0) #define heap64_buffer_init(heap, O) iof_writer(O, (void *)(heap), heap64_writer, NULL, 0) UTILAPI iof * _heap8_buffer_some (heap8 *heap, iof *O, size_t atleast); UTILAPI iof * _heap16_buffer_some (heap16 *heap, iof *O, size_t atleast); UTILAPI iof * _heap32_buffer_some (heap32 *heap, iof *O, size_t atleast); UTILAPI iof * _heap64_buffer_some (heap64 *heap, iof *O, size_t atleast); #define heap8_buffer_some(heap, O, atleast) (heap8_ensure_head(heap), _heap8_buffer_some(heap, O, atleast)) #define heap16_buffer_some(heap, O, atleast) (heap16_ensure_head(heap), _heap16_buffer_some(heap, O, atleast)) #define heap32_buffer_some(heap, O, atleast) (heap32_ensure_head(heap), _heap32_buffer_some(heap, O, atleast)) #define heap64_buffer_some(heap, O, atleast) (heap64_ensure_head(heap), _heap64_buffer_some(heap, O, atleast)) #define heap8_buffer_done(heap, O) heap8_done(heap, (O)->buf, (size_t)iof_size(O)) #define heap16_buffer_done(heap, O) heap16_done(heap, (O)->buf, (size_t)iof_size(O)) #define heap32_buffer_done(heap, O) heap32_done(heap, (O)->buf, (size_t)iof_size(O)) #define heap64_buffer_done(heap, O) heap64_done(heap, (O)->buf, (size_t)iof_size(O)) #define heap8_buffer_giveup(heap, O) heap8_giveup(heap, (O)->buf) #define heap16_buffer_giveup(heap, O) heap16_giveup(heap, (O)->buf) #define heap32_buffer_giveup(heap, O) heap32_giveup(heap, (O)->buf) #define heap64_buffer_giveup(heap, O) heap64_giveup(heap, (O)->buf) #endifluametatex-2.10.08/source/libraries/pplib/util/utilmeminfo.c000066400000000000000000000034651442250314700241000ustar00rootroot00000000000000/* print stats; common for heap, stock and pool */ #include #include "utilmeminfo.h" #define UINT(i) ((unsigned long)(i)) void show_mem_info (mem_info *info) { size_t totalwaste, totalmem, averagechunk, singlemem; double ftotalwaste, fblockwaste, fghostwaste, ftailwaste, fsinglewaste; double funused, fsingles, fsinglemem, fsingleeff; totalwaste = info->ghosts + info->blockghosts + info->left; totalmem = info->used + totalwaste; ftotalwaste = totalmem > 0 ? totalwaste * 100.0 / totalmem : 0; fblockwaste = totalmem > 0 ? (info->blockghosts - info->singleghosts) * 100.0 / totalmem : 0; fsinglewaste = totalmem > 0 ? info->singleghosts * 100.0 / totalmem : 0; fghostwaste = totalmem > 0 ? info->ghosts * 100.0 / totalmem : 0; ftailwaste = totalmem > 0 ? info->left * 100.0 / totalmem : 0; averagechunk = info->chunks > 0 ? info->used / info->chunks : 0; funused = info->chunks > 0 ? info->unused * 100.0 / info->chunks : 0.0; fsingles = info->blocks > 0 ? info->singles * 100.0 / info->blocks : 0; fsinglemem = info->used > 0 ? info->singleused * 100.0 / info->used : 0; singlemem = info->singleused + info->singleghosts; fsingleeff = singlemem > 0 ? info->singleused * 100.0 / singlemem : 0; printf("total: %lu + %lu = %lu\n", UINT(info->used), UINT(totalwaste), UINT(totalmem)); printf("chunks: %lu of average size %lu, unused %lu[%.2f%%]\n", UINT(info->chunks), UINT(averagechunk), UINT(info->unused), funused); printf("blocks: %lu, singles %lu[%.2f%%], %.2f%% of allocs, efficiency %.2f%%\n", UINT(info->blocks), UINT(info->singles), fsingles, fsinglemem, fsingleeff); printf("waste: %lu[%0.2f%%], block ghosts %0.2f%%, single ghosts %.2f%%, chunk ghosts %0.2f%%, tails %0.2f%%\n\n", UINT(totalwaste), ftotalwaste, fblockwaste, fsinglewaste, fghostwaste, ftailwaste); } luametatex-2.10.08/source/libraries/pplib/util/utilmeminfo.h000066400000000000000000000002001442250314700240650ustar00rootroot00000000000000 #ifndef UTIL_MEM_INFO_H #define UTIL_MEM_INFO_H #include "utilmemallh.h" UTILAPI void show_mem_info (mem_info *info); #endifluametatex-2.10.08/source/libraries/pplib/util/utilnumber.c000066400000000000000000001162101442250314700237270ustar00rootroot00000000000000 #include /* for log10() and floor() */ #include /* for printf() */ #include "utilnumber.h" // todo: lookups can be chars // change lookup arrays to some __name to discourage accessing them directly; they always should be accessed via macros; base16_value() base16_digit() // const int base10_lookup[] = { -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 }; const int base16_lookup[] = { -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1, -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 }; const int base26_lookup[] = { -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15, 16,17,18,19,20,21,22,23,24,25,26,-1,-1,-1,-1,-1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15, 16,17,18,19,20,21,22,23,24,25,26,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 }; const int base36_lookup[] = { -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1, -1,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24, 25,26,27,28,29,30,31,32,33,34,35,-1,-1,-1,-1,-1, -1,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24, 25,26,27,28,29,30,31,32,33,34,35,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 }; /* common buffer for quick conversions (unsafe) */ char util_number_buffer[NUMBER_BUFFER_SIZE] = { 0 }; /* integer from string; return a pointer to character next to the last digit */ #define string_scan_sign(s, c, sign) _scan_sign(c, sign, *++s) #define string_scan_integer(s, c, number) _scan_integer(c, number, *++s) #define string_scan_radix(s, c, number, radix) _scan_radix(c, number, radix, *++s) #define string_read_integer(s, c, number) _read_integer(c, number, *++s) #define string_read_radix(s, c, number, radix) _read_radix(c, number, radix, *++s) const char * string_to_int32 (const char *s, int32_t *number) { int sign, c = *s; string_scan_sign(s, c, sign); string_scan_integer(s, c, *number); if (sign) *number = -*number; return s; } const char * string_to_slong (const char *s, long *number) { int sign, c = *s; string_scan_sign(s, c, sign); string_scan_integer(s, c, *number); if (sign) *number = -*number; return s; } const char * string_to_int64 (const char *s, int64_t *number) { int sign, c = *s; string_scan_sign(s, c, sign); string_scan_integer(s, c, *number); if (sign) *number = -*number; return s; } const char * string_to_uint32 (const char *s, uint32_t *number) { int c = *s; string_scan_integer(s, c, *number); return s; } const char * string_to_ulong (const char *s, unsigned long *number) { int c = *s; string_scan_integer(s, c, *number); return s; } const char * string_to_usize (const char *s, size_t *number) { int c = *s; string_scan_integer(s, c, *number); return s; } const char * string_to_uint64 (const char *s, uint64_t *number) { int c = *s; string_scan_integer(s, c, *number); return s; } const char * radix_to_int32 (const char *s, int32_t *number, int radix) { int sign, c = *s; string_scan_sign(s, c, sign); string_scan_radix(s, c, *number, radix); if (sign) *number = -*number; return s; } const char * radix_to_slong (const char *s, long *number, int radix) { int sign, c = *s; string_scan_sign(s, c, sign); string_scan_radix(s, c, *number, radix); if (sign) *number = -*number; return s; } const char * radix_to_int64 (const char *s, int64_t *number, int radix) { int sign, c = *s; string_scan_sign(s, c, sign); string_scan_radix(s, c, *number, radix); if (sign) *number = -*number; return s; } const char * radix_to_uint32 (const char *s, uint32_t *number, int radix) { int c = *s; string_scan_radix(s, c, *number, radix); return s; } const char * radix_to_ulong (const char *s, unsigned long *number, int radix) { int c = *s; string_scan_radix(s, c, *number, radix); return s; } const char * radix_to_usize (const char *s, size_t *number, int radix) { int c = *s; string_scan_radix(s, c, *number, radix); return s; } const char * radix_to_uint64 (const char *s, uint64_t *number, int radix) { int c = *s; string_scan_radix(s, c, *number, radix); return s; } /* roman to uint16_t */ #define roman1000(c) (c == 'M' || c == 'm') #define roman500(c) (c == 'D' || c == 'd') #define roman100(c) (c == 'C' || c == 'c') #define roman50(c) (c == 'L' || c == 'l') #define roman10(c) (c == 'X' || c == 'x') #define roman5(c) (c == 'V' || c == 'v') #define roman1(c) (c == 'I' || c == 'i') #define roman100s(p) (roman100(*p) ? (100 + ((++p, roman100(*p)) ? (100 + ((++p, roman100(*p)) ? (++p, 100) : 0)) : 0)) : 0) #define roman10s(p) (roman10(*p) ? (10 + ((++p, roman10(*p)) ? (10 + ((++p, roman10(*p)) ? (++p, 10) : 0)) : 0)) : 0) #define roman1s(p) (roman1(*p) ? (1 + ((++p, roman1(*p)) ? (1 + ((++p, roman1(*p)) ? (++p, 1) : 0)) : 0)) : 0) const char * roman_to_uint16 (const char *s, uint16_t *number) { const char *p; /* M */ for (*number = 0, p = s; roman1000(*p); *number += 1000, ++p); /* D C */ if (roman500(*p)) { ++p; *number += 500 + roman100s(p); } else if (roman100(*p)) { ++p; if (roman1000(*p)) { ++p; *number += 900; } else if (roman500(*p)) { ++p; *number += 400; } else *number += 100 + roman100s(p); } /* L X */ if (roman50(*p)) { ++p; *number += 50 + roman10s(p); } else if (roman10(*p)) { ++p; if (roman100(*p)) { ++p; *number += 90; } else if (roman50(*p)) { ++p; *number += 40; } else *number += 10 + roman10s(p); } /* V I */ if (roman5(*p)) { ++p; *number += 5 + roman1s(p); } else if (roman1(*p)) { ++p; if (roman10(*p)) { ++p; *number += 9; } else if (roman5(*p)) { ++p; *number += 4; } else *number += 1 + roman1s(p); } return p; } /* integer to string; return a pointer to null-terminated static const string */ #define end_of_integer_buffer(integer_buffer) (integer_buffer + MAX_INTEGER_DIGITS - 1) #define number_printrev_signed(p, number, quotient) \ do { \ quotient = number; number /= 10; \ *--p = base10_palindrome[9 + (quotient - number*10)]; \ } while (number); \ if (quotient < 0) *--p = '-' #define number_printrev_unsigned(p, number, quotient) \ do { \ quotient = number; number /= 10; \ *--p = (char)(quotient - integer_multiplied10(number)) + '0'; \ } while (number) #define SINTTYPE_AS_STRING(inttype, number, ibuf, psize) \ char *p, *e; \ inttype quotient; \ e = p = end_of_integer_buffer(ibuf); *p = '\0'; \ number_printrev_signed(p, number, quotient); \ *psize = (size_t)(e - p) #define UINTTYPE_AS_STRING(inttype, number, ibuf, psize) \ char *p, *e; \ inttype quotient; \ e = p = end_of_integer_buffer(ibuf); *p = '\0'; \ number_printrev_unsigned(p, number, quotient); \ *psize = (size_t)(e - p) char * int32_as_string (int32_t number, char ibuf[MAX_INTEGER_DIGITS], size_t *psize) { SINTTYPE_AS_STRING(int32_t, number, ibuf, psize); return p; } char * slong_as_string (long number, char ibuf[MAX_INTEGER_DIGITS], size_t *psize) { SINTTYPE_AS_STRING(long, number, ibuf, psize); return p; } char * int64_as_string (int64_t number, char ibuf[MAX_INTEGER_DIGITS], size_t *psize) { SINTTYPE_AS_STRING(int64_t, number, ibuf, psize); return p; } char * uint32_as_string (uint32_t number, char ibuf[MAX_INTEGER_DIGITS], size_t *psize) { UINTTYPE_AS_STRING(uint32_t, number, ibuf, psize); return p; } char * ulong_as_string (unsigned long number, char ibuf[MAX_INTEGER_DIGITS], size_t *psize) { UINTTYPE_AS_STRING(unsigned long, number, ibuf, psize); return p; } char * usize_as_string (size_t number, char ibuf[MAX_INTEGER_DIGITS], size_t *psize) { UINTTYPE_AS_STRING(size_t, number, ibuf, psize); return p; } char * uint64_as_string (uint64_t number, char ibuf[MAX_INTEGER_DIGITS], size_t *psize) { UINTTYPE_AS_STRING(uint64_t, number, ibuf, psize); return p; } /* radix variant */ #define number_printrev_signed_radix_uc(p, number, radix, quotient) \ do { \ quotient = number; number /= radix; \ *--p = base36_uc_palindrome[MAX_RADIX - 1 + (quotient - number*radix)]; \ } while (number) #define number_printrev_signed_radix_lc(p, number, radix, quotient) \ do { \ quotient = number; number /= radix; \ *--p = base36_lc_palindrome[MAX_RADIX - 1 + (quotient - number*radix)]; \ } while (number) #define number_printrev_signed_radix(p, number, radix, quotient, uc) \ do { \ if (uc) { number_printrev_signed_radix_uc(p, number, radix, quotient); } \ else { number_printrev_signed_radix_lc(p, number, radix, quotient); } \ if (quotient < 0) *--p = '-'; \ } while (0) #define number_printrev_unsigned_radix_uc(p, number, radix, quotient) \ do { \ quotient = number; number /= radix; \ *--p = base36_uc_alphabet[quotient % radix]; \ } while (number) #define number_printrev_unsigned_radix_lc(p, number, radix, quotient) \ do { \ quotient = number; number /= radix; \ *--p = base36_lc_alphabet[quotient % radix]; \ } while (number) #define number_printrev_unsigned_radix(p, number, radix, quotient, uc) \ do { \ if (uc) { number_printrev_unsigned_radix_uc(p, number, radix, quotient); } \ else { number_printrev_unsigned_radix_lc(p, number, radix, quotient); } \ } while (0) #define SINTTYPE_AS_RADIX(inttype, number, radix, uc, ibuf, psize) \ char *p, *e; \ inttype quotient; \ e = p = end_of_integer_buffer(ibuf); *p = '\0'; \ number_printrev_signed_radix(p, number, radix, quotient, uc); \ *psize = (size_t)(e - p) #define UINTTYPE_AS_RADIX(inttype, number, radix, uc, ibuf, psize) \ char *p, *e; \ inttype quotient; \ e = p = end_of_integer_buffer(ibuf); *p = '\0'; \ number_printrev_unsigned_radix(p, number, radix, quotient, uc); \ *psize = (size_t)(e - p) char * int32_as_radix (int32_t number, int radix, int uc, char ibuf[MAX_INTEGER_DIGITS], size_t *psize) { SINTTYPE_AS_RADIX(int32_t, number, radix, uc, ibuf, psize); return p; } char * slong_as_radix (long number, int radix, int uc, char ibuf[MAX_INTEGER_DIGITS], size_t *psize) { SINTTYPE_AS_RADIX(long, number, radix, uc, ibuf, psize); return p; } /* char * ssize_as_radix (ssize_t number, int radix, int uc, char ibuf[MAX_INTEGER_DIGITS], size_t *psize) { SINTTYPE_AS_RADIX(ssize_t, number, radix, uc, ibuf, psize); return p; } */ char * int64_as_radix (int64_t number, int radix, int uc, char ibuf[MAX_INTEGER_DIGITS], size_t *psize) { SINTTYPE_AS_RADIX(int64_t, number, radix, uc, ibuf, psize); return p; } char * uint32_as_radix (uint32_t number, int radix, int uc, char ibuf[MAX_INTEGER_DIGITS], size_t *psize) { UINTTYPE_AS_RADIX(uint32_t, number, radix, uc, ibuf, psize); return p; } char * ulong_as_radix (unsigned long number, int radix, int uc, char ibuf[MAX_INTEGER_DIGITS], size_t *psize) { UINTTYPE_AS_RADIX(unsigned long, number, radix, uc, ibuf, psize); return p; } char * usize_as_radix (size_t number, int radix, int uc, char ibuf[MAX_INTEGER_DIGITS], size_t *psize) { UINTTYPE_AS_RADIX(size_t, number, radix, uc, ibuf, psize); return p; } char * uint64_as_radix (uint64_t number, int radix, int uc, char ibuf[MAX_INTEGER_DIGITS], size_t *psize) { UINTTYPE_AS_RADIX(uint64_t, number, radix, uc, ibuf, psize); return p; } /* aaa, aab, aac, ...; unsigned only. 0 gives empty string */ #define string_scan_alpha(s, c, number, radix) \ for (number = 0, c = *s; (c = base26_value(c)) > 0; number = number * radix + c, c = *++s) const char * alpha_to_uint32 (const char *s, uint32_t *number) { int c; string_scan_alpha(s, c, *number, 26); return s; } const char * alpha_to_ulong (const char *s, unsigned long *number) { int c; string_scan_alpha(s, c, *number, 26); return s; } const char * alpha_to_usize (const char *s, size_t *number) { int c; string_scan_alpha(s, c, *number, 26); return s; } const char * alpha_to_uint64 (const char *s, uint64_t *number) { int c; string_scan_alpha(s, c, *number, 26); return s; } #define number_printrev_unsigned_alpha_uc(p, number, radix, quotient) \ while (number > 0) { \ quotient = --number; number /= radix; \ *--p = base26_uc_alphabet[quotient % radix]; \ } #define number_printrev_unsigned_alpha_lc(p, number, radix, quotient) \ while (number > 0) { \ quotient = --number; number /= radix; \ *--p = base26_lc_alphabet[quotient % radix]; \ } #define UINTTYPE_AS_ALPHA(inttype, number, uc, ibuf, psize) \ char *p, *e; \ inttype quotient; \ e = p = end_of_integer_buffer(ibuf); *p = '\0'; \ if (uc) { number_printrev_unsigned_alpha_uc(p, number, 26, quotient); } \ else { number_printrev_unsigned_alpha_lc(p, number, 26, quotient); } \ *psize = (size_t)(e - p) char * uint32_as_alpha (uint32_t number, int uc, char ibuf[MAX_INTEGER_DIGITS], size_t *psize) { UINTTYPE_AS_ALPHA(uint32_t, number, uc, ibuf, psize); return p; } char * ulong_as_alpha (unsigned long number, int uc, char ibuf[MAX_INTEGER_DIGITS], size_t *psize) { UINTTYPE_AS_ALPHA(unsigned long, number, uc, ibuf, psize); return p; } char * usize_as_alpha (size_t number, int uc, char ibuf[MAX_INTEGER_DIGITS], size_t *psize) { UINTTYPE_AS_ALPHA(size_t, number, uc, ibuf, psize); return p; } char * uint64_as_alpha (uint64_t number, int uc, char ibuf[MAX_INTEGER_DIGITS], size_t *psize) { UINTTYPE_AS_ALPHA(uint64_t, number, uc, ibuf, psize); return p; } /* a variant of alphabetic, a, b, c, ..., z, aa, bb, cc, ..., zz (eg. pdf page labelling) watch out: unsafe for large numbers; for buffer size N we can handle max. N * 26. */ #define string_scan_alphan(s, c, number, radix) \ do { \ number = 0; \ if ((c = (uint16_t)base26_value(*s)) > 0) { \ number = c; \ while (c == (uint16_t)base26_value(*++s)) number += radix; \ } \ } while (0) const char * alphan_to_uint16 (const char *s, uint16_t *number) { uint16_t c; string_scan_alphan(s, c, *number, 26); return s; } #define number_print_alphan_uc(p, e, c, number, radix) \ for (c = (--number) % radix, number -= c; ; number -= radix) { \ *p++ = base26_uc_alphabet[c]; \ if (number == 0 || p >= e) break; \ } #define number_print_alphan_lc(p, e, c, number, radix) \ for (c = (--number) % radix, number -= c; ; number -= radix) { \ *p++ = base26_lc_alphabet[c]; \ if (number == 0 || p >= e) break; \ } #define UINTTYPE_AS_ALPHAN(inttype, number, uc, ibuf, size, psize) \ char *p, *e; \ uint8_t c; \ p = ibuf; \ e = p + size; \ if (number > 0) { \ if (uc) { number_print_alphan_uc(p, e, c, number, 26); } \ else { number_print_alphan_lc(p, e, c, number, 26); } \ } \ *p = '\0'; \ *psize = (size_t)(p - ibuf) char * uint16_as_alphan (uint16_t number, int uc, char ibuf[], size_t size, size_t *psize) { UINTTYPE_AS_ALPHAN(uint16_t, number, uc, ibuf, size, psize); return ibuf; } /* roman numeral */ /* large roman numerals? http://mathforum.org/library/drmath/view/57569.html */ #define base_roman_uc_alphabet "MDCLXVI" #define base_roman_lc_alphabet "mdclxvi" char * uint16_as_roman (uint16_t number, int uc, char ibuf[MAX_ROMAN_DIGITS], size_t *psize) { static const uint32_t base_roman_values[] = { 1000, 500, 100, 50, 10, 5, 1 }; const char *alphabet; char *p; uint32_t k, j, v, u, n; n = (uint32_t)number; // uint16_t used to limit leding 'M' alphabet = uc ? base_roman_uc_alphabet : base_roman_lc_alphabet; for (p = ibuf, j = 0, v = base_roman_values[0]; n > 0; ) { if (n >= v) { *p++ = alphabet[j]; n -= v; continue; } if (j & 1) k = j + 1; else k = j + 2; u = base_roman_values[k]; if (n + u >= v) { *p++ = alphabet[k]; n += u; } else v = base_roman_values[++j]; } *p = '\0'; *psize = (size_t)(p - ibuf); return ibuf; } /* IEEE-754 */ #define BINARY_MODF 1 #define NOT_A_NUMBER_STRING "NaN" #define INFINITY_STRING "INF" #define SIGNED_INFINITY 1 #define SIGNED_ZERO 0 #define SIGNED_NOT_A_NUMBER 0 #define RADIX_CHAR '.' /* double/float to decimal */ typedef struct ieee_double { union { double number; uint64_t bits; }; uint64_t fraction; int exponent, sign; } ieee_double; typedef struct ieee_float { union { float number; uint32_t bits; }; uint32_t fraction; int exponent, sign; } ieee_float; #define IEEE_DOUBLE_BIAS 1023 #define IEEE_DOUBLE_MIN_EXPONENT -1023 #define IEEE_DOUBLE_MAX_EXPONENT (0x7ff - IEEE_DOUBLE_BIAS) #define IEEE_FLOAT_BIAS 127 #define IEEE_FLOAT_MIN_EXPONENT -127 #define IEEE_FLOAT_MAX_EXPONENT (0xff - IEEE_FLOAT_BIAS) #define ieee_double_fraction(i) (i & 0x000fffffffffffffull) #define ieee_double_exponent(i) ((0x7ff & (i >> 52)) - IEEE_DOUBLE_BIAS) #define ieee_double_init(ieee_number, number) \ ieee_number.number = number, \ ieee_number.fraction = ieee_double_fraction(ieee_number.bits), \ ieee_number.exponent = ieee_double_exponent(ieee_number.bits) #define ieee_float_fraction(i) (i & 0x007fffff) #define ieee_float_exponent(i) ((0xff & (i >> 23)) - IEEE_FLOAT_BIAS) #define ieee_float_init(ieee_number, number) \ ieee_number.number = number, \ ieee_number.fraction = ieee_float_fraction(ieee_number.bits), \ ieee_number.exponent = ieee_float_exponent(ieee_number.bits) /* special cases */ #define ieee_double_is_zero(ieee_number) (ieee_number.number == 0) // || ieee_double_too_small(ieee_number) ? #define ieee_double_too_small(ieee_number) (ieee_number.exponent == 0 && ieee_number.fraction != 0) // denormalized, implicit fracion bit not set #define ieee_float_is_zero(ieee_number) (ieee_number.number == 0) // || ieee_float_too_small(ieee_number) ? #define ieee_float_too_small(ieee_number) (ieee_number.exponent == 0 && ieee_number.fraction != 0) #define ieee_double_zero_string(ieee_number) (SIGNED_ZERO && ieee_number.sign ? "-0" : "0") #define ieee_double_infinity_string(ieee_number) (SIGNED_INFINITY && ieee_number.sign ? "-" INFINITY_STRING : INFINITY_STRING) #define ieee_float_zero_string ieee_double_zero_string #define ieee_float_infinity_string ieee_double_infinity_string #define ieee_double_special_case(ieee_number) (ieee_number.exponent == IEEE_DOUBLE_MAX_EXPONENT) #define ieee_double_special_string(ieee_number) (ieee_number.fraction ? NOT_A_NUMBER_STRING : ieee_double_infinity_string(ieee_number)) #define ieee_float_special_case(ieee_number) (ieee_number.exponent == IEEE_FLOAT_MAX_EXPONENT) #define ieee_float_special_string(ieee_number) (ieee_number.fraction ? NOT_A_NUMBER_STRING : ieee_float_infinity_string(ieee_number)) #if 0 const double double_binary_power10[] = { 1.0e1, 1.0e2, 1.0e4, 1.0e8, 1.0e16, 1.0e32, 1.0e64, 1.0e128, 1.0e256 }; const float float_binary_power10[] = { 1.0e1, 1.0e2, 1.0e4, 1.0e8, 1.0e16, 1.0e32 }; const double double_binary_negpower10[] = { 1.0e-1, 1.0e-2, 1.0e-4, 1.0e-8, 1.0e-16, 1.0e-32 }; const float float_binary_negpower10[] = { 1.0e-1, 1.0e-2, 1.0e-4, 1.0e-8, 1.0e-16, 1.0e-32 }; #else const double double_decimal_power10[] = { 1.0e0, 1.0e1, 1.0e2, 1.0e3, 1.0e4, 1.0e5, 1.0e6, 1.0e7, 1.0e8, 1.0e9, 1.0e10, 1.0e11, 1.0e12, 1.0e13, 1.0e14, 1.0e15, 1.0e16, 1.0e17, 1.0e18, 1.0e19, 1.0e20, 1.0e21, 1.0e22, 1.0e23, 1.0e24, 1.0e25, 1.0e26, 1.0e27, 1.0e28, 1.0e29, 1.0e30, 1.0e31, 1.0e32, 1.0e33, 1.0e34, 1.0e35, 1.0e36, 1.0e37, 1.0e38, 1.0e39, 1.0e40, 1.0e41, 1.0e42, 1.0e43, 1.0e44, 1.0e45, 1.0e46, 1.0e47, 1.0e48, 1.0e49, 1.0e50, 1.0e51, 1.0e52, 1.0e53, 1.0e54, 1.0e55, 1.0e56, 1.0e57, 1.0e58, 1.0e59, 1.0e60, 1.0e61, 1.0e62, 1.0e63, 1.0e64, 1.0e65, 1.0e66, 1.0e67, 1.0e68, 1.0e69, 1.0e70, 1.0e71, 1.0e72, 1.0e73, 1.0e74, 1.0e75, 1.0e76, 1.0e77, 1.0e78, 1.0e79, 1.0e80, 1.0e81, 1.0e82, 1.0e83, 1.0e84, 1.0e85, 1.0e86, 1.0e87, 1.0e88, 1.0e89, 1.0e90, 1.0e91, 1.0e92, 1.0e93, 1.0e94, 1.0e95, 1.0e96, 1.0e97, 1.0e98, 1.0e99, 1.0e100, 1.0e101, 1.0e102, 1.0e103, 1.0e104, 1.0e105, 1.0e106, 1.0e107, 1.0e108, 1.0e109, 1.0e110, 1.0e111, 1.0e112, 1.0e113, 1.0e114, 1.0e115, 1.0e116, 1.0e117, 1.0e118, 1.0e119, 1.0e120, 1.0e121, 1.0e122, 1.0e123, 1.0e124, 1.0e125, 1.0e126, 1.0e127, 1.0e128, 1.0e129, 1.0e130, 1.0e131, 1.0e132, 1.0e133, 1.0e134, 1.0e135, 1.0e136, 1.0e137, 1.0e138, 1.0e139, 1.0e140, 1.0e141, 1.0e142, 1.0e143, 1.0e144, 1.0e145, 1.0e146, 1.0e147, 1.0e148, 1.0e149, 1.0e150, 1.0e151, 1.0e152, 1.0e153, 1.0e154, 1.0e155, 1.0e156, 1.0e157, 1.0e158, 1.0e159, 1.0e160, 1.0e161, 1.0e162, 1.0e163, 1.0e164, 1.0e165, 1.0e166, 1.0e167, 1.0e168, 1.0e169, 1.0e170, 1.0e171, 1.0e172, 1.0e173, 1.0e174, 1.0e175, 1.0e176, 1.0e177, 1.0e178, 1.0e179, 1.0e180, 1.0e181, 1.0e182, 1.0e183, 1.0e184, 1.0e185, 1.0e186, 1.0e187, 1.0e188, 1.0e189, 1.0e190, 1.0e191, 1.0e192, 1.0e193, 1.0e194, 1.0e195, 1.0e196, 1.0e197, 1.0e198, 1.0e199, 1.0e200, 1.0e201, 1.0e202, 1.0e203, 1.0e204, 1.0e205, 1.0e206, 1.0e207, 1.0e208, 1.0e209, 1.0e210, 1.0e211, 1.0e212, 1.0e213, 1.0e214, 1.0e215, 1.0e216, 1.0e217, 1.0e218, 1.0e219, 1.0e220, 1.0e221, 1.0e222, 1.0e223, 1.0e224, 1.0e225, 1.0e226, 1.0e227, 1.0e228, 1.0e229, 1.0e230, 1.0e231, 1.0e232, 1.0e233, 1.0e234, 1.0e235, 1.0e236, 1.0e237, 1.0e238, 1.0e239, 1.0e240, 1.0e241, 1.0e242, 1.0e243, 1.0e244, 1.0e245, 1.0e246, 1.0e247, 1.0e248, 1.0e249, 1.0e250, 1.0e251, 1.0e252, 1.0e253, 1.0e254, 1.0e255, 1.0e256, 1.0e257, 1.0e258, 1.0e259, 1.0e260, 1.0e261, 1.0e262, 1.0e263, 1.0e264, 1.0e265, 1.0e266, 1.0e267, 1.0e268, 1.0e269, 1.0e270, 1.0e271, 1.0e272, 1.0e273, 1.0e274, 1.0e275, 1.0e276, 1.0e277, 1.0e278, 1.0e279, 1.0e280, 1.0e281, 1.0e282, 1.0e283, 1.0e284, 1.0e285, 1.0e286, 1.0e287, 1.0e288, 1.0e289, 1.0e290, 1.0e291, 1.0e292, 1.0e293, 1.0e294, 1.0e295, 1.0e296, 1.0e297, 1.0e298, 1.0e299, 1.0e300, 1.0e301, 1.0e302, 1.0e303, 1.0e304, 1.0e305, 1.0e306, 1.0e307, 1.0e308 }; const float float_decimal_power10[] = { 1.0e0f, 1.0e1f, 1.0e2f, 1.0e3f, 1.0e4f, 1.0e5f, 1.0e6f, 1.0e7f, 1.0e8f, 1.0e9f, 1.0e10f, 1.0e11f, 1.0e12f, 1.0e13f, 1.0e14f, 1.0e15f, 1.0e16f, 1.0e17f, 1.0e18f, 1.0e19f, 1.0e20f, 1.0e21f, 1.0e22f, 1.0e23f, 1.0e24f, 1.0e25f, 1.0e26f, 1.0e27f, 1.0e28f, 1.0e29f, 1.0e30f, 1.0e31f, 1.0e32f, 1.0e33f, 1.0e34f, 1.0e35f, 1.0e36f, 1.0e37f, 1.0e38f }; const double double_decimal_negpower10[] = { 1.0e0, 1.0e-1, 1.0e-2, 1.0e-3, 1.0e-4, 1.0e-5, 1.0e-6, 1.0e-7, 1.0e-8, 1.0e-9, 1.0e-10, 1.0e-11, 1.0e-12, 1.0e-13, 1.0e-14, 1.0e-15, 1.0e-16, 1.0e-17, 1.0e-18, 1.0e-19, 1.0e-20, 1.0e-21, 1.0e-22, 1.0e-23, 1.0e-24, 1.0e-25, 1.0e-26, 1.0e-27, 1.0e-28, 1.0e-29, 1.0e-30, 1.0e-31, 1.0e-32, 1.0e-33, 1.0e-34, 1.0e-35, 1.0e-36, 1.0e-37, 1.0e-38, 1.0e-39, 1.0e-40, 1.0e-41, 1.0e-42, 1.0e-43, 1.0e-44, 1.0e-45, 1.0e-46, 1.0e-47, 1.0e-48, 1.0e-49, 1.0e-50, 1.0e-51, 1.0e-52, 1.0e-53, 1.0e-54, 1.0e-55, 1.0e-56, 1.0e-57, 1.0e-58, 1.0e-59, 1.0e-60, 1.0e-61, 1.0e-62, 1.0e-63, 1.0e-64, 1.0e-65, 1.0e-66, 1.0e-67, 1.0e-68, 1.0e-69, 1.0e-70, 1.0e-71, 1.0e-72, 1.0e-73, 1.0e-74, 1.0e-75, 1.0e-76, 1.0e-77, 1.0e-78, 1.0e-79, 1.0e-80, 1.0e-81, 1.0e-82, 1.0e-83, 1.0e-84, 1.0e-85, 1.0e-86, 1.0e-87, 1.0e-88, 1.0e-89, 1.0e-90, 1.0e-91, 1.0e-92, 1.0e-93, 1.0e-94, 1.0e-95, 1.0e-96, 1.0e-97, 1.0e-98, 1.0e-99, 1.0e-100, 1.0e-101, 1.0e-102, 1.0e-103, 1.0e-104, 1.0e-105, 1.0e-106, 1.0e-107, 1.0e-108, 1.0e-109, 1.0e-110, 1.0e-111, 1.0e-112, 1.0e-113, 1.0e-114, 1.0e-115, 1.0e-116, 1.0e-117, 1.0e-118, 1.0e-119, 1.0e-120, 1.0e-121, 1.0e-122, 1.0e-123, 1.0e-124, 1.0e-125, 1.0e-126, 1.0e-127, 1.0e-128, 1.0e-129, 1.0e-130, 1.0e-131, 1.0e-132, 1.0e-133, 1.0e-134, 1.0e-135, 1.0e-136, 1.0e-137, 1.0e-138, 1.0e-139, 1.0e-140, 1.0e-141, 1.0e-142, 1.0e-143, 1.0e-144, 1.0e-145, 1.0e-146, 1.0e-147, 1.0e-148, 1.0e-149, 1.0e-150, 1.0e-151, 1.0e-152, 1.0e-153, 1.0e-154, 1.0e-155, 1.0e-156, 1.0e-157, 1.0e-158, 1.0e-159, 1.0e-160, 1.0e-161, 1.0e-162, 1.0e-163, 1.0e-164, 1.0e-165, 1.0e-166, 1.0e-167, 1.0e-168, 1.0e-169, 1.0e-170, 1.0e-171, 1.0e-172, 1.0e-173, 1.0e-174, 1.0e-175, 1.0e-176, 1.0e-177, 1.0e-178, 1.0e-179, 1.0e-180, 1.0e-181, 1.0e-182, 1.0e-183, 1.0e-184, 1.0e-185, 1.0e-186, 1.0e-187, 1.0e-188, 1.0e-189, 1.0e-190, 1.0e-191, 1.0e-192, 1.0e-193, 1.0e-194, 1.0e-195, 1.0e-196, 1.0e-197, 1.0e-198, 1.0e-199, 1.0e-200, 1.0e-201, 1.0e-202, 1.0e-203, 1.0e-204, 1.0e-205, 1.0e-206, 1.0e-207, 1.0e-208, 1.0e-209, 1.0e-210, 1.0e-211, 1.0e-212, 1.0e-213, 1.0e-214, 1.0e-215, 1.0e-216, 1.0e-217, 1.0e-218, 1.0e-219, 1.0e-220, 1.0e-221, 1.0e-222, 1.0e-223, 1.0e-224, 1.0e-225, 1.0e-226, 1.0e-227, 1.0e-228, 1.0e-229, 1.0e-230, 1.0e-231, 1.0e-232, 1.0e-233, 1.0e-234, 1.0e-235, 1.0e-236, 1.0e-237, 1.0e-238, 1.0e-239, 1.0e-240, 1.0e-241, 1.0e-242, 1.0e-243, 1.0e-244, 1.0e-245, 1.0e-246, 1.0e-247, 1.0e-248, 1.0e-249, 1.0e-250, 1.0e-251, 1.0e-252, 1.0e-253, 1.0e-254, 1.0e-255, 1.0e-256, 1.0e-257, 1.0e-258, 1.0e-259, 1.0e-260, 1.0e-261, 1.0e-262, 1.0e-263, 1.0e-264, 1.0e-265, 1.0e-266, 1.0e-267, 1.0e-268, 1.0e-269, 1.0e-270, 1.0e-271, 1.0e-272, 1.0e-273, 1.0e-274, 1.0e-275, 1.0e-276, 1.0e-277, 1.0e-278, 1.0e-279, 1.0e-280, 1.0e-281, 1.0e-282, 1.0e-283, 1.0e-284, 1.0e-285, 1.0e-286, 1.0e-287, 1.0e-288, 1.0e-289, 1.0e-290, 1.0e-291, 1.0e-292, 1.0e-293, 1.0e-294, 1.0e-295, 1.0e-296, 1.0e-297, 1.0e-298, 1.0e-299, 1.0e-300, 1.0e-301, 1.0e-302, 1.0e-303, 1.0e-304, 1.0e-305, 1.0e-306, 1.0e-307, 1.0e-308 }; const float float_decimal_negpower10[] = { 1.0e0f, 1.0e-1f, 1.0e-2f, 1.0e-3f, 1.0e-4f, 1.0e-5f, 1.0e-6f, 1.0e-7f, 1.0e-8f, 1.0e-9f, 1.0e-10f, 1.0e-11f, 1.0e-12f, 1.0e-13f, 1.0e-14f, 1.0e-15f, 1.0e-16f, 1.0e-17f, 1.0e-18f, 1.0e-19f, 1.0e-20f, 1.0e-21f, 1.0e-22f, 1.0e-23f, 1.0e-24f, 1.0e-25f, 1.0e-26f, 1.0e-27f, 1.0e-28f, 1.0e-29f, 1.0e-30f, 1.0e-31f, 1.0e-32f, 1.0e-33f, 1.0e-34f, 1.0e-35f, 1.0e-36f, 1.0e-37f, 1.0e-38f }; #endif /* scale number by floor(log10(number)) + 1 so that the result is in range [0.1, 1) */ #define ieee_double_exponent10(ieee_number) ((int)floor(log10(ieee_number.number)) + 1) #define ieee_float_exponent10(ieee_number) ((int)floorf(log10f(ieee_number.number)) + 1) // floorf, log10f ? #define ieee_double_exp10(ieee_number, exponent10) \ exponent10 = ieee_double_exponent10(ieee_number); \ if (exponent10 > 0) { \ double_negative_exp10(ieee_number.number, -exponent10); \ ieee_number.fraction = ieee_double_fraction(ieee_number.bits); \ ieee_number.exponent = ieee_double_exponent(ieee_number.bits); \ } else if (exponent10 < 0) { \ double_positive_exp10(ieee_number.number, -exponent10); \ ieee_number.fraction = ieee_double_fraction(ieee_number.bits); \ ieee_number.exponent = ieee_double_exponent(ieee_number.bits); \ } #define ieee_float_exp10(ieee_number, exponent10) \ exponent10 = ieee_float_exponent10(ieee_number); \ if (exponent10 > 0) { \ float_negative_exp10(ieee_number.number, -exponent10); \ ieee_number.fraction = ieee_float_fraction(ieee_number.bits); \ ieee_number.exponent = ieee_float_exponent(ieee_number.bits); \ } else if (exponent10 < 0) { \ float_positive_exp10(ieee_number.number, -exponent10); \ ieee_number.fraction = ieee_float_fraction(ieee_number.bits); \ ieee_number.exponent = ieee_float_exponent(ieee_number.bits); \ } #if BINARY_MODF /* unhide implicit bit 53, produce 56-bit denormalised fraction (binary exponent already in range [-4, -1]) */ #define ieee_double_denormalize(ieee_number) \ (ieee_number.exponent == IEEE_DOUBLE_MIN_EXPONENT ? (++ieee_number.exponent, 0) : (ieee_number.fraction |= (1ull<<52))), \ ieee_number.fraction <<= (ieee_number.exponent + 4) /* unhide implicit bit 24, produce 27-bit denormalized fraction (binary exponent already in range [-4, -1]) */ #define ieee_float_denormalize(ieee_number) \ (ieee_number.exponent == IEEE_FLOAT_MIN_EXPONENT ? (++ieee_number.exponent, 0) : (ieee_number.fraction |= (1<<23))), \ ieee_number.fraction <<= (ieee_number.exponent + 4) /* turn off significant bits over 56 (integer part), multiply by 10, return new integer part (subsequent decimal digit) */ #define ieee_double_binary_fraction(ieee_number) \ (ieee_number.fraction &= ((1ull<<56) - 1), \ ieee_number.fraction = (ieee_number.fraction << 1) + (ieee_number.fraction << 3), \ ieee_number.fraction >> 56) /* turn off significant bits over 27 (integer part), multiply by 10, return the integer part (subsequent decimal digit) */ #define ieee_float_binary_fraction(ieee_number) \ (ieee_number.fraction &= ((1<<27) - 1), \ ieee_number.fraction = (ieee_number.fraction << 1) + (ieee_number.fraction << 3), \ ieee_number.fraction >> 27) #define ieee_double_decimal(ieee_number, exponent10, digits, p) \ ieee_number_decimal(ieee_double_binary_fraction, ieee_number, exponent10, digits, p) #define ieee_float_decimal(ieee_number, exponent10, digits, p) \ ieee_number_decimal(ieee_float_binary_fraction, ieee_number, exponent10, digits, p) #else /* generic method */ #define ieee_double_decimal_fraction(ieee_number, i) (ieee_number.number = modf(10*ieee_number.number, &i), i) #define ieee_float_decimal_fraction(ieee_number, i) (ieee_number.number = (float)modf(10*ieee_number.number, &i), i) // ??? #define ieee_double_decimal(ieee_number, exponent10, digits, p) \ ieee_number_decimal(ieee_double_decimal_fraction, ieee_number, exponent10, digits, p) #define ieee_float_decimal(ieee_number, exponent10, digits, p) \ ieee_number_decimal(ieee_float_decimal_fraction, ieee_number, exponent10, digits, p) #endif #define ieee_number_decimal(method, ieee_number, exponent10, digits, p) \ ieee_double_denormalize(ieee_number); \ if (ieee_number.sign) *p++ = '-'; \ if (exponent10 <= 0) \ for (*p++ = '0', *p++ = RADIX_CHAR; exponent10 && digits; *p++ = '0', ++exponent10, --digits); \ else \ { \ do { *p++ = '0' + (char)method(ieee_number); } while (--exponent10); \ *p++ = RADIX_CHAR; \ } \ for ( ; digits && ieee_number.fraction; --digits) \ *p++ = '0' + (char)method(ieee_number) /* rounding to nearest integer */ #if BINARY_MODF /* check if the mantissa has the most significant bit set, means >= 0.5 */ # define ieee_double_half(ieee_number) (ieee_number.fraction & (1ull<<55)) # define ieee_float_half(ieee_number) (ieee_number.fraction & (1<<26)) #else # define ieee_double_half(ieee_number) (ieee_number.number >= 0.5) # define ieee_float_half(ieee_number) (ieee_number.number >= 0.5) #endif /* rounding to nearest integer */ #define buffer_ceil(s, p, sign) \ { \ while (*--p == '9'); \ if (*p != RADIX_CHAR) ++*p++; \ else { \ char *q; \ for (q = p - 1; ; --q) { \ if (*q < '9') { ++*q; break; } \ *q = '0'; \ if (q == s) \ *--s = '1'; \ else if (sign && q - 1 == s) \ *s = '1', *--s = '-'; \ } \ } \ } #define buffer_remove_trailing_zeros(s, p, sign) \ { \ while (*--p == '0'); \ if (*p != RADIX_CHAR) \ ++p; \ else if (!SIGNED_ZERO && sign && p - 2 == s && *(p - 1) == '0') \ p -= 2, *p++ = '0'; \ } // if digits parameter was initially less then exponent10, then exponent10 > 0 and ieee_double_half(ieee_number) is irrelevant #define ieee_double_round(ieee_number, exponent10, s, p) \ if (exponent10 == 0 && ieee_double_half(ieee_number)) \ { buffer_ceil(s, p, ieee_number.sign); } \ else \ { buffer_remove_trailing_zeros(s, p, ieee_number.sign); } #define ieee_float_round(ieee_number, exponent10, s, p) \ if (exponent10 == 0 && ieee_float_half(ieee_number)) \ { buffer_ceil(s, p, ieee_number.sign); } \ else \ { buffer_remove_trailing_zeros(s, p, ieee_number.sign); } /* double to decimal */ #define ieee_copy_special_string(nbuf, special, p, _p) \ for (p = nbuf, _p = special; ; ++p, ++_p) { \ if ((*p = *_p) == '\0') break; \ } #define ieee_copy_special_string_re(nbuf, special, p, _p, r, e) \ for (p = nbuf, _p = special; ; ++p, ++_p) { \ if ((*p = *_p) == '\0') { \ if (r != NULL) *r = NULL; \ if (e != NULL) *e = p; \ break; \ } \ } char * double_as_string (double number, int digits, char nbuf[MAX_NUMBER_DIGITS], size_t *psize) { ieee_double ieee_number; int exponent10; char *s, *p; const char *_p; s = p = nbuf + 1; // for sign/rounding ieee_double_init(ieee_number, number); if ((ieee_number.sign = ieee_number.bits >> 63) != 0) ieee_number.number = -ieee_number.number; if (ieee_double_is_zero(ieee_number)) // to avoid crash on log10(number) { ieee_copy_special_string(nbuf, ieee_double_zero_string(ieee_number), p, _p); *psize = (size_t)(p - nbuf); return nbuf; } if (ieee_double_special_case(ieee_number)) { ieee_copy_special_string(nbuf, ieee_double_special_string(ieee_number), p, _p); *psize = (size_t)(p - nbuf); return nbuf; } ieee_double_exp10(ieee_number, exponent10); ieee_double_decimal(ieee_number, exponent10, digits, p); ieee_double_round(ieee_number, exponent10, s, p); *p = '\0'; *psize = (size_t)(p - s); return s; } /* float to decimal */ char * float_as_string (float number, int digits, char nbuf[MAX_NUMBER_DIGITS], size_t *psize) { ieee_float ieee_number; int exponent10; char *s, *p; const char *_p; s = p = nbuf + 1; // for sign/rounding ieee_float_init(ieee_number, number); if ((ieee_number.sign = ieee_number.bits >> 31) != 0) ieee_number.number = -ieee_number.number; if (ieee_float_is_zero(ieee_number)) { ieee_copy_special_string(nbuf, ieee_float_zero_string(ieee_number), p, _p); *psize = (size_t)(p - nbuf); return nbuf; } if (ieee_float_special_case(ieee_number)) { ieee_copy_special_string(nbuf, ieee_float_special_string(ieee_number), p, _p); *psize = (size_t)(p - nbuf); return nbuf; } ieee_float_exp10(ieee_number, exponent10); ieee_float_decimal(ieee_number, exponent10, digits, p); ieee_float_round(ieee_number, exponent10, s, p); *p = '\0'; *psize = (size_t)(p - s); return s; } /* decimal string to double/float */ #define string_scan_decimal(s, c, number) _scan_decimal(c, number, *++s) #define string_scan_fraction(s, c, number, exponent10) _scan_fraction(c, number, exponent10, *++s) #define string_scan_exponent10(s, c, exponent10) _scan_exponent10(c, exponent10, *++s) const char * string_to_double (const char *s, double *number) { int sign, exponent10, c = *s; string_scan_sign(s, c, sign); string_scan_decimal(s, c, *number); if (c == '.') { c = *++s; string_scan_fraction(s, c, *number, exponent10); } else exponent10 = 0; if (c == 'e' || c == 'E') { c = *++s; string_scan_exponent10(s, c, exponent10); } double_exp10(*number, exponent10); if (sign) *number = -*number; return s; } const char * string_to_float (const char *s, float *number) { int sign, exponent10, c = *s; string_scan_sign(s, c, sign); string_scan_decimal(s, c, *number); if (c == '.') { c = *++s; string_scan_fraction(s, c, *number, exponent10); } else exponent10 = 0; if (c == 'e' || c == 'E') { c = *++s; string_scan_exponent10(s, c, exponent10); } float_exp10(*number, exponent10); if (sign) *number = -*number; return s; } /* conventional form */ const char * convert_to_double (const char *s, double *number) { int sign, c = *s; string_scan_sign(s, c, sign); string_scan_decimal(s, c, *number); if (c == '.' || c == ',') { int exponent10; c = *++s; string_scan_fraction(s, c, *number, exponent10); if (exponent10 < 0) double_negative_exp10(*number, exponent10); } if (sign) *number = -*number; return s; } const char * convert_to_float (const char *s, float *number) { int sign, c = *s; string_scan_sign(s, c, sign); string_scan_decimal(s, c, *number); if (c == '.' || c == ',') { int exponent10; c = *++s; string_scan_fraction(s, c, *number, exponent10); if (exponent10 < 0) float_negative_exp10(*number, exponent10); } if (sign) *number = -*number; return s; } /* pretty common stuff */ size_t bytes_to_hex_lc (const void *input, size_t size, unsigned char *output) { size_t i; const unsigned char *p; for (i = 0, p = (const unsigned char *)input; i < size; ++i, ++p) { *output++ = base16_lc_digit1(*p); *output++ = base16_lc_digit2(*p); } *output = '\0'; return 2*size + 1; } size_t bytes_to_hex_uc (const void *input, size_t size, unsigned char *output) { size_t i; const unsigned char *p; for (i = 0, p = (const unsigned char *)input; i < size; ++i, ++p) { *output++ = base16_uc_digit1(*p); *output++ = base16_uc_digit2(*p); } *output = '\0'; return 2*size + 1; } size_t hex_to_bytes (const void *input, size_t size, unsigned char *output) { size_t i; int c1, c2; const unsigned char *p; for (i = 1, p = (const unsigned char *)input; i < size; i += 2) { c1 = base16_value(*p); ++p; c2 = base16_value(*p); ++p; if (c1 >= 0 && c2 >= 0) *output++ = (unsigned char)((c1<<4)|c2); else break; } return i >> 1; } void print_as_hex (const void *input, size_t bytes) { const unsigned char *p; for (p = (const unsigned char *)input; bytes > 0; --bytes, ++p) printf("%02x", *p); } luametatex-2.10.08/source/libraries/pplib/util/utilnumber.h000066400000000000000000000515261442250314700237440ustar00rootroot00000000000000#ifndef UTIL_NUMBER_H #define UTIL_NUMBER_H #include // for size_t #include "utilplat.h" #include "utildecl.h" #if defined(__cplusplus) && defined(_MSC_VER) // int*_t types are in standard in msvc++ #else # include #endif /* 'long' isn't long for msvc64/mingw64, we need a type for machine word */ #if defined(_WIN64) || defined(__MINGW32__) # define INT64F "%I64d" # define UINT64F "%I64u" #else # define INT64F "%lld" # define UINT64F "%llu" #endif #if defined(MSVC64) # define INTLW_IS_INT64 # define intlw_t int64_t # define uintlw_t uint64_t # define INTLW(N) N##I64 # define UINTLW(N) N##UI64 # define INTLWF INT64F # define UINTLWF UINT64F #elif defined(__MINGW64__) # define INTLW_IS_INT64 # define intlw_t int64_t # define uintlw_t uint64_t # define INTLW(N) N##LL # define UINTLW(N) N##ULL # define INTLWF INT64F # define UINTLWF UINT64F #else // 32bit or sane 64bit (LP64) # define INTLW_IS_LONG # define intlw_t long # define uintlw_t unsigned long # define INTLW(N) N##L # define UINTLW(N) N##UL # define INTLWF "%ld" # define UINTLWF "%lu" #endif // ssize_t is missing in MSVC, but defining it is risky; some environments (eg. python) typedefs ssize_t on its own way.. // #if defined(MSVC64) // # define ssize_t int32_t // #else // # if defined(MSVC32) // # define ssize_t int64_t // # endif // #endif /* basic constants */ #define MAX_RADIX 36 #define MAX_INTEGER_DIGITS 65 /* 64-bit number in binary form plus '\0' */ #define MAX_ROMAN_DIGITS 128 /* to handle romannumeral of short int (up to 65 leading 'M') */ #define MAX_NUMBER_DIGITS 512 #define NUMBER_BUFFER_SIZE MAX_NUMBER_DIGITS #define base36_uc_alphabet "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" #define base36_lc_alphabet "0123456789abcdefghijklmnopqrstuvwxyz" #define base26_uc_alphabet "ABCDEFGHIJKLMNOPQRSTUVWXYZ" #define base26_lc_alphabet "abcdefghijklmnopqrstuvwxyz" extern const int base26_lookup[]; #define base36_lc_palindrome "zyxwvutsrqponmlkjihgfedcba9876543210123456789abcdefghijklmnopqrstuvwxyz" #define base36_uc_palindrome "ZYXWVUTSRQPONMLKJIHGFEDCBA9876543210123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" extern const int base36_lookup[]; #define base10_palindrome "9876543210123456789" #define base10_alphabet "0123456789" extern const int base10_lookup[]; #define base16_uc_alphabet "0123456789ABCDEF" #define base16_lc_alphabet "0123456789abcdef" extern const int base16_lookup[]; #define base16_uc_digit1(c) base16_uc_alphabet[(c)>>4] #define base16_uc_digit2(c) base16_uc_alphabet[(c)&15] #define base16_lc_digit1(c) base16_lc_alphabet[(c)>>4] #define base16_lc_digit2(c) base16_lc_alphabet[(c)&15] #define base8_digit(c) ((unsigned)(c - '0') <= (unsigned)('7' - '0')) #define base8_value(c) (base8_digit(c) ? (c) - '0' : -1) #define base10_digit(c) ((unsigned)(c - '0') <= (unsigned)('9' - '0')) #define base10_value(c) (base10_lookup[(uint8_t)(c)]) #define base16_digit(c) (base16_lookup[(uint8_t)(c)] >= 0) #define base16_value(c) (base16_lookup[(uint8_t)(c)]) #define base26_digit(c) (base26_lookup[(uint8_t)(c)] >= 0) #define base26_value(c) (base26_lookup[(uint8_t)(c)]) #define base36_digit(c) (base36_lookup[(uint8_t)(c)] >= 0) #define base36_value(c) (base36_lookup[(uint8_t)(c)]) //#define base_digit(c, radix) ((unsigned)(base36_lookup[c]) < (unsigned)(radix)) //#define base_value(c, radix) (base_digit(c, radix) ? base36_lookup[c] : -1) UTILDEF extern char util_number_buffer[NUMBER_BUFFER_SIZE]; /* integer from string; return a pointer to character next to the last digit */ UTILAPI const char * string_to_int32 (const char *s, int32_t *number); UTILAPI const char * string_to_slong (const char *s, long *number); UTILAPI const char * string_to_int64 (const char *s, int64_t *number); UTILAPI const char * string_to_uint32 (const char *s, uint32_t *number); UTILAPI const char * string_to_ulong (const char *s, unsigned long *number); UTILAPI const char * string_to_usize (const char *s, size_t *number); UTILAPI const char * string_to_uint64 (const char *s, uint64_t *number); UTILAPI const char * radix_to_int32 (const char *s, int32_t *number, int radix); UTILAPI const char * radix_to_slong (const char *s, long *number, int radix); UTILAPI const char * radix_to_int64 (const char *s, int64_t *number, int radix); UTILAPI const char * radix_to_uint32 (const char *s, uint32_t *number, int radix); UTILAPI const char * radix_to_ulong (const char *s, unsigned long *number, int radix); UTILAPI const char * radix_to_usize (const char *s, size_t *number, int radix); UTILAPI const char * radix_to_uint64 (const char *s, uint64_t *number, int radix); UTILAPI const char * alpha_to_uint32 (const char *s, uint32_t *number); UTILAPI const char * alpha_to_ulong (const char *s, unsigned long *number); UTILAPI const char * alpha_to_usize (const char *s, size_t *number); UTILAPI const char * alpha_to_uint64 (const char *s, uint64_t *number); /* integer to string */ UTILAPI char * int32_as_string (int32_t number, char ibuf[MAX_INTEGER_DIGITS], size_t *psize); UTILAPI char * slong_as_string (long number, char ibuf[MAX_INTEGER_DIGITS], size_t *psize); UTILAPI char * int64_as_string (int64_t number, char ibuf[MAX_INTEGER_DIGITS], size_t *psize); #define int32_to_string(number, psize) int32_as_string(number, util_number_buffer, psize) #define slong_to_string(number, psize) slong_as_string(number, util_number_buffer, psize) #define int64_to_string(number, psize) int64_as_string(number, util_number_buffer, psize) UTILAPI char * uint32_as_string (uint32_t number, char ibuf[MAX_INTEGER_DIGITS], size_t *psize); UTILAPI char * ulong_as_string (unsigned long number, char ibuf[MAX_INTEGER_DIGITS], size_t *psize); UTILAPI char * usize_as_string (size_t number, char ibuf[MAX_INTEGER_DIGITS], size_t *psize); UTILAPI char * uint64_as_string (uint64_t number, char ibuf[MAX_INTEGER_DIGITS], size_t *psize); #define uint32_to_string(number, psize) uint32_as_string(number, util_number_buffer, psize) #define ulong_to_string(number, psize) ulong_as_string(number, util_number_buffer, psize) #define usize_to_string(number, psize) usize_as_string(number, util_number_buffer, psize) #define uint64_to_string(number, psize) uint64_as_string(number, util_number_buffer, psize) UTILAPI char * int32_as_radix (int32_t number, int radix, int uc, char ibuf[MAX_INTEGER_DIGITS], size_t *psize); UTILAPI char * slong_as_radix (long number, int radix, int uc, char ibuf[MAX_INTEGER_DIGITS], size_t *psize); UTILAPI char * int64_as_radix (int64_t number, int radix, int uc, char ibuf[MAX_INTEGER_DIGITS], size_t *psize); #define int32_to_radix(number, radix, uc, psize) int32_as_radix(number, radix, uc, util_number_buffer, psize) #define slong_to_radix(number, radix, uc, psize) slong_as_radix(number, radix, uc, util_number_buffer, psize) #define int64_to_radix(number, radix, uc, psize) int64_as_radix(number, radix, uc, util_number_buffer, psize) UTILAPI char * uint32_as_radix (uint32_t number, int radix, int uc, char ibuf[MAX_INTEGER_DIGITS], size_t *psize); UTILAPI char * ulong_as_radix (unsigned long number, int radix, int uc, char ibuf[MAX_INTEGER_DIGITS], size_t *psize); UTILAPI char * usize_as_radix (size_t number, int radix, int uc, char ibuf[MAX_INTEGER_DIGITS], size_t *psize); UTILAPI char * uint64_as_radix (uint64_t number, int radix, int uc, char ibuf[MAX_INTEGER_DIGITS], size_t *psize); #define uint32_to_radix(number, radix, uc, psize) uint32_as_radix(number, radix, uc, util_number_buffer, psize) #define ulong_to_radix(number, radix, uc, psize) ulong_as_radix(number, radix, uc, util_number_buffer, psize) #define usize_to_radix(number, radix, uc, psize) usize_as_radix(number, radix, uc, util_number_buffer, psize) #define uint64_to_radix(number, radix, uc, psize) uint64_as_radix(number, radix, uc, util_number_buffer, psize) UTILAPI char * uint32_as_alpha (uint32_t number, int uc, char ibuf[MAX_INTEGER_DIGITS], size_t *psize); UTILAPI char * ulong_as_alpha (unsigned long number, int uc, char ibuf[MAX_INTEGER_DIGITS], size_t *psize); UTILAPI char * usize_as_alpha (size_t number, int uc, char ibuf[MAX_INTEGER_DIGITS], size_t *psize); UTILAPI char * uint64_as_alpha (uint64_t number, int uc, char ibuf[MAX_INTEGER_DIGITS], size_t *psize); #define uint32_to_alpha(number, uc, psize) uint32_as_alpha(number, uc, util_number_buffer, psize) #define ulong_to_alpha(number, uc, psize) ulong_as_alpha(number, uc, util_number_buffer, psize) #define usize_to_alpha(number, uc, psize) usize_as_alpha(number, uc, util_number_buffer, psize) #define uint64_to_alpha(number, uc, psize) uint64_as_alpha(number, uc, util_number_buffer, psize) #if defined(INTLW_IS_INT64) # define string_to_intlw(s, number) string_to_int64(s, number) # define string_to_uintlw(s, number) string_to_uint64(s, number) # define radix_to_intlw(s, number, radix) radix_to_int64(s, number, radix) # define radix_to_uintlw(s, number, radix) radix_to_uint64(s, number, radix) # define alpha_to_uintlw(s, number) alpha_to_uint64(s, number) # define intlw_as_string(number, ibuf, psize) int64_as_string(number, ibuf, psize) # define uintlw_as_string(number, ibuf, psize) uint64_as_string(number, ibuf, psize) # define intlw_to_string(number, psize) int64_to_string(number, psize) # define uintlw_to_string(number, psize) uint64_to_string(number, psize) # define intlw_as_radix(number, radix, uc, ibuf, psize) int64_as_radix(number, radix, uc, ibuf, psize) # define uintlw_as_radix(number, radix, uc, ibuf, psize) uint64_as_radix(number, radix, uc, ibuf, psize) # define intlw_to_radix(number, radix, uc, psize) int64_to_radix(number, radix, uc, psize) # define uintlw_to_radix(number, radix, uc, psize) uint64_to_radix(number, radix, uc, psize) # define uintlw_as_alpha(number, uc, ibuf, psize) uint64_as_alpha(number, uc, ibuf, psize) # define uintlw_to_alpha(number, uc, psize) uint64_to_alpha(number, uc, ibuf, psize) #elif defined(INTLW_IS_LONG) # define string_to_intlw(s, number) string_to_slong(s, number) # define string_to_uintlw(s, number) string_to_ulong(s, number) # define radix_to_intlw(s, number, radix) radix_to_slong(s, number, radix) # define radix_to_uintlw(s, number, radix) radix_to_ulong(s, number, radix) # define alpha_to_uintlw(s, number) alpha_to_ulong(s, number) # define intlw_as_string(number, ibuf, psize) slong_as_string(number, ibuf, psize) # define uintlw_as_string(number, ibuf, psize) ulong_as_string(number, ibuf, psize) # define intlw_to_string(number, psize) slong_to_string(number, psize) # define uintlw_to_string(number, psize) ulong_to_string(number, psize) # define intlw_as_radix(number, radix, uc, ibuf, psize) slong_as_radix(number, radix, uc, ibuf, psize) # define uintlw_as_radix(number, radix, uc, ibuf, psize) ulong_as_radix(number, radix, uc, ibuf, psize) # define intlw_to_radix(number, radix, uc, psize) slong_to_radix(number, radix, uc, psize) # define uintlw_to_radix(number, radix, uc, psize) ulong_to_radix(number, radix, uc, psize) # define uintlw_as_alpha(number, uc, ibuf, psize) ulong_as_alpha(number, uc, ibuf, psize) # define uintlw_to_alpha(number, uc, psize) ulong_to_alpha(number, uc, ibuf, psize) #endif /* a..z, aa..zz, aaa..zzz (limited to uint16_t, valid for N <= buffer_size * 26) */ UTILAPI const char * alphan_to_uint16 (const char *s, uint16_t *number); UTILAPI char * uint16_as_alphan (uint16_t number, int uc, char ibuf[], size_t size, size_t *psize); #define uint16_to_alphan(number, uc, psize) uint16_as_alphan(number, uc, util_number_buffer, NUMBER_BUFFER_SIZE, psize) /* roman numeral (limited to uint16_t) */ UTILAPI const char * roman_to_uint16 (const char *s, uint16_t *number); UTILAPI char * uint16_as_roman (uint16_t number, int uc, char ibuf[MAX_ROMAN_DIGITS], size_t *psize); #define uint16_to_roman(number, uc, psize) uint16_as_roman(number, uc, util_number_buffer, psize) /* double/float to string */ UTILAPI char * double_as_string (double number, int digits, char nbuf[MAX_NUMBER_DIGITS], size_t *psize); #define double_to_string(number, digits, psize) double_as_string(number, digits, util_number_buffer, psize) UTILAPI char * float_as_string (float number, int digits, char nbuf[MAX_NUMBER_DIGITS], size_t *psize); #define float_to_string(number, digits, psize) float_as_string(number, digits, util_number_buffer, psize) /* string to double/float */ UTILAPI const char * string_to_double (const char *s, double *number); UTILAPI const char * string_to_float (const char *s, float *number); /* convenience form accepting comma among a dot, with not exp notation (eg. pdf) */ UTILAPI const char * convert_to_double (const char *s, double *number); UTILAPI const char * convert_to_float (const char *s, float *number); /* binary data parsers helpers */ #if 0 // masking gives more overactive warnings #define get_number_byte1(n) ((n) & 0x000000ffu) #define get_number_byte2(n) (((n) & 0x0000ff00u) >> 8) #define get_number_byte3(n) (((n) & 0x00ff0000u) >> 16) #define get_number_byte4(n) (((n) & 0xff000000u) >> 24) #define get_number_byte5(n) (((n) & 0x000000ff00000000ull) >> 32) #define get_number_byte6(n) (((n) & 0x0000ff0000000000ull) >> 40) #define get_number_byte7(n) (((n) & 0x00ff000000000000ull) >> 48) #define get_number_byte8(n) (((n) & 0xff00000000000000ull) >> 56) #else #define get_number_byte1(n) ((n) & 0xff) #define get_number_byte2(n) (((n) >> 8) & 0xff) #define get_number_byte3(n) (((n) >> 16) & 0xff) #define get_number_byte4(n) (((n) >> 24) & 0xff) #define get_number_byte5(n) (((n) >> 32) & 0xff) #define get_number_byte6(n) (((n) >> 40) & 0xff) #define get_number_byte7(n) (((n) >> 48) & 0xff) #define get_number_byte8(n) (((n) >> 56) & 0xff) #endif #define get_number_bytes_be1(n, b) (b[0] = (uint8_t)get_number_byte1(n)) #define get_number_bytes_be2(n, b) (b[0] = (uint8_t)get_number_byte2(n), b[1] = (uint8_t)get_number_byte1(n)) #define get_number_bytes_be3(n, b) (b[0] = (uint8_t)get_number_byte3(n), b[1] = (uint8_t)get_number_byte2(n), b[2] = (uint8_t)get_number_byte1(n)) #define get_number_bytes_be4(n, b) (b[0] = (uint8_t)get_number_byte4(n), b[1] = (uint8_t)get_number_byte3(n), b[2] = (uint8_t)get_number_byte2(n), b[3] = (uint8_t)get_number_byte1(n)) #define get_number_bytes_be5(n, b) (b[0] = (uint8_t)get_number_byte5(n), b[1] = (uint8_t)get_number_byte4(n), b[2] = (uint8_t)get_number_byte3(n), b[3] = (uint8_t)get_number_byte2(n), \ b[4] = (uint8_t)get_number_byte1(n)) #define get_number_bytes_be6(n, b) (b[0] = (uint8_t)get_number_byte6(n), b[1] = (uint8_t)get_number_byte5(n), b[2] = (uint8_t)get_number_byte4(n), b[3] = (uint8_t)get_number_byte3(n), \ b[4] = (uint8_t)get_number_byte2(n), b[5] = (uint8_t)get_number_byte1(n)) #define get_number_bytes_be7(n, b) (b[0] = (uint8_t)get_number_byte7(n), b[1] = (uint8_t)get_number_byte6(n), b[2] = (uint8_t)get_number_byte5(n), b[3] = (uint8_t)get_number_byte4(n), \ b[4] = (uint8_t)get_number_byte3(n), b[5] = (uint8_t)get_number_byte2(n), b[6] = (uint8_t)get_number_byte1(n)) #define get_number_bytes_be8(n, b) (b[0] = (uint8_t)get_number_byte8(n), b[1] = (uint8_t)get_number_byte7(n), b[2] = (uint8_t)get_number_byte6(n), b[3] = (uint8_t)get_number_byte5(n), \ b[4] = (uint8_t)get_number_byte4(n), b[5] = (uint8_t)get_number_byte3(n), b[6] = (uint8_t)get_number_byte2(n), b[7] = (uint8_t)get_number_byte1(n)) #define read_uint16be_as(s, int_type) ((int_type)((s[0]<<8)|s[1])) #define read_uint32be_as(s, int_type) ((int_type)((s[0]<<24)|(s[1]<<16)|(s[2]<<8)|s[3])) #define read_uint16le_as(s, int_type) ((int_type)((s[1]<<8)|s[0])) #define read_uint32le_as(s, int_type) ((int_type)((s[3]<<24)|(s[2]<<16)|(s[1]<<8)|s[0])) #define read_uint16_native(s) (*((uint16_t *)(s))) #define read_uint32_native(s) (*((uint32_t *)(s))) #define read_int16_native(s) (*((int16_t *)(s))) #define read_int32_native(s) (*((int32_t *)(s))) #define scan_uint16be_as(s, int_type) (s += 2, (int_type)((s[-2]<<8)|s[-1])) #define scan_uint32be_as(s, int_type) (s += 4, (int_type)((s[-4]<<24)|(s[-3]<<16)|(s[-2]<<8)|s[-1])) #define scan_uint16le_as(s, int_type) (s += 2, (int_type)((s[-1]<<8)|s[-2])) #define scan_uint32le_as(s, int_type) (s += 4, (int_type)((s[-1]<<24)|(s[-2]<<16)|(s[-3]<<8)|s[-4])) #define scan_uint16_native(s) (s += 2, read_uint16_native(s-2)) #define scan_uint32_native(s) (s += 4, read_uint32_native(s-4)) #define scan_int16_native(s) (s += 2, read_int16_native(s-2)) #define scan_int32_native(s) (s += 4, read_int32_native(s-4)) #define read_fixed16_16_as(s, float_type) (((float_type)read_uint32be_as(s, signed int))/(1<<16)) #define read_fixed2_14_as(s, float_type) (((float_type)read_uint16be_as(s, signed short))/(1<<14)) #define scan_fixed16_16_as(s, float_type) (((float_type)scan_uint32be_as(s, signed int))/(1<<16)) #define scan_fixed2_14_as(s, float_type) (((float_type)scan_uint16be_as(s, signed short))/(1<<14)) /* internal procedures */ #define _scan_sign(c, sign, next) \ do { if (c == '-') { sign = 1; c = next; } else if (c == '+') { sign = 0; c = next; } else sign = 0; } while (0) #define integer_multiplied10(number) (((number) << 1) + ((number) << 3)) #define _scan_integer(c, number, next) \ for (number = 0; base10_digit(c); number = integer_multiplied10(number) + (c - '0'), c = next) #define _scan_radix(c, number, radix, next) \ for (number = 0; (c = base36_value(c)) >= 0 && c < radix; number = number * radix + c, c = next) #define _read_integer(c, number, next) \ for (number = c - '0', c = next; base10_digit(c); number = integer_multiplied10(number) + (c - '0'), c = next) #define _read_radix(c, number, radix, next) \ for (number = c - '0', c = next; (c = base36_value(c)) >= 0 && c < radix; number = number * radix + c, c = next) /* rationals */ #define _scan_decimal(c, number, next) \ for (number = 0; base10_digit(c); number = number*10 + (c - '0'), c = next) #define _scan_fraction(c, number, exponent10, next) \ for (exponent10 = 0; base10_digit(c); --exponent10, number = number*10 + (c - '0'), c = next) #define _scan_exponent10(c, exponent10, next) \ do { \ int eexponent10, eexpsign; \ _scan_sign(c, eexpsign, next); \ _scan_integer(c, eexponent10, next); \ if (eexpsign) \ exponent10 -= eexponent10; \ else \ exponent10 += eexponent10; \ } while(0) #if 0 // kept just for sentiment ;) extern const double double_binary_power10[]; extern const float float_binary_power10[]; extern const double double_binary_negpower10[]; extern const float float_binary_negpower10[]; #define double_negative_exp10(number, exponent) \ { const double *bp10; int e = ((exponent) < 511 ? 511 : -(exponent)); \ for (bp10 = double_binary_negpower10; e > 0; e >>= 1, ++bp10) \ if (e & 1) number *= *bp10; } #define float_negative_exp10(number, exponent) \ { const float *bp10; int e = ((exponent) < 64 ? 64 : -(exponent)); \ for (bp10 = float_binary_negpower10; e > 0; e >>= 1, ++bp10) \ if (e & 1) number *= *bp10; } #define double_positive_exp10(number, exponent) \ { const double *bp10; int e = ((exponent) > 511 ? 511 : (exponent)); \ for (bp10 = double_binary_power10; e > 0; e >>= 1, ++bp10) \ if (e & 1) number *= *bp10; } #define float_positive_exp10(number, exponent) \ { const float *bp10; int e = ((exponent) > 64 ? 64 : (exponent)); \ for (bp10 = double_binary_power10; e > 0; e >>= 1, ++bp10) \ if (e & 1) number *= *bp10; } #define double_exp10(number, exponent) \ if ((exponent) < 0) double_negative_exp10(number, exponent) else if ((exponent) > 0) double_positive_exp10(number, exponent) #define float_exp10(number, exponent) \ if ((exponent) < 0) float_negative_exp10(number, exponent) else if ((exponent) > 0) float_positive_exp10(number, exponent) #else extern const double double_decimal_power10[]; extern const float float_decimal_power10[]; extern const double double_decimal_negpower10[]; extern const float float_decimal_negpower10[]; #define double_negative_exp10(number, exponent) ((number) *= double_decimal_negpower10[(exponent) < -308 ? 308 : -(exponent)]) #define double_positive_exp10(number, exponent) ((number) *= double_decimal_power10[(exponent) > 308 ? 308 : (exponent)]) #define float_negative_exp10(number, exponent) ((number) *= float_decimal_negpower10[(exponent) < -38 ? 38 : -(exponent)]) #define float_positive_exp10(number, exponent) ((number) *= float_decimal_power10[(exponent) > 38 ? 38 : (exponent)]) #define double_exp10(number, exponent) ((void)(((exponent) < 0 && double_negative_exp10(number, exponent)) || (((exponent) > 0 && double_positive_exp10(number, exponent))))) #define float_exp10(number, exponent) ((void)(((exponent) < 0 && float_negative_exp10(number, exponent)) || (((exponent) > 0 && float_positive_exp10(number, exponent))))) #endif /* pretty common stuff */ #define bytes_to_hex(input, size, output) bytes_to_hex_lc(input, size, output) UTILAPI size_t bytes_to_hex_lc (const void *input, size_t size, uint8_t *output); UTILAPI size_t bytes_to_hex_uc (const void *input, size_t size, uint8_t *output); UTILAPI size_t hex_to_bytes (const void *input, size_t size, uint8_t *output); UTILAPI void print_as_hex (const void *input, size_t bytes); #endifluametatex-2.10.08/source/libraries/pplib/util/utilplat.h000066400000000000000000000011071442250314700234020ustar00rootroot00000000000000 #ifndef UTIL_PLAT_H #define UTIL_PLAT_H #if defined(_WIN32) || defined(WIN32) # ifdef _MSC_VER # if defined(_M_64) || defined(_WIN64) # define MSVC64 # else # define MSVC32 # endif # else # if defined(__MINGW64__) # define MINGW64 # else # if defined(__MINGW32__) # define MINGW32 # endif # endif # endif #endif #ifdef __GNUC__ //# define FALLTHRU [[fallthrough]] // c++17 //# define FALLTHRU [[gnu:fallthrough]] // c++14 # define FALLTHRU __attribute__((fallthrough)); // C and C++03 #else # define FALLTHRU #endif #endifluametatex-2.10.08/source/libraries/pplib/util/utilsha.c000066400000000000000000000775411442250314700232270ustar00rootroot00000000000000/* sha2 implementation excerpted from code by Aaron D. Gifford */ /* * AUTHOR: Aaron D. Gifford - http://www.aarongifford.com/ * * Copyright (c) 2000-2001, Aaron D. Gifford * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTOR(S) ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTOR(S) BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $Id: sha2.c,v 1.1 2001/11/08 00:01:51 adg Exp adg $ */ #include /* FILE */ #include /* memcpy()/memset() or bcopy()/bzero() */ //#include /* assert() */ #include "utilsha.h" /* * UNROLLED TRANSFORM LOOP NOTE: * You can define SHA2_UNROLL_TRANSFORM to use the unrolled transform * loop version for the hash transform rounds (defined using macros * later in this file). Either define on the command line, for example: * * cc -DSHA2_UNROLL_TRANSFORM -o sha2 sha2.c sha2prog.c * * or define below: * * #define SHA2_UNROLL_TRANSFORM * */ /*** SHA-256/384/512 Machine Architecture Definitions *****************/ /* * BYTE_ORDER NOTE: * * Please make sure that your system defines BYTE_ORDER. If your * architecture is little-endian, make sure it also defines * LITTLE_ENDIAN and that the two (BYTE_ORDER and LITTLE_ENDIAN) are * equivilent. * * If your system does not define the above, then you can do so by * hand like this: * * #define LITTLE_ENDIAN 1234 * #define BIG_ENDIAN 4321 * * And for little-endian machines, add: * * #define BYTE_ORDER LITTLE_ENDIAN * * Or for big-endian machines: * * #define BYTE_ORDER BIG_ENDIAN * * The FreeBSD machine this was written on defines BYTE_ORDER * appropriately by including (which in turn includes * where the appropriate definitions are actually * made). */ #ifndef BYTE_ORDER #define BYTE_ORDER LITTLE_ENDIAN #endif //#if !defined(BYTE_ORDER) || (BYTE_ORDER != LITTLE_ENDIAN && BYTE_ORDER != BIG_ENDIAN) //#error Define BYTE_ORDER to be equal to either LITTLE_ENDIAN or BIG_ENDIAN //#endif /* * Define the following sha2_* types to types of the correct length on * the native archtecture. Most BSD systems and Linux define u_intXX_t * types. Machines with very recent ANSI C headers, can use the * uintXX_t definintions from inttypes.h by defining SHA2_USE_INTTYPES_H * during compile or in the sha.h header file. * * Machines that support neither u_intXX_t nor inttypes.h's uintXX_t * will need to define these three typedefs below (and the appropriate * ones in sha.h too) by hand according to their system architecture. * * Thank you, Jun-ichiro itojun Hagino, for suggesting using u_intXX_t * types and pointing out recent ANSI C support for uintXX_t in inttypes.h. * * PJ: replace by uintX_t */ //typedef uint8_t sha2_byte; /* Exactly 1 byte */ //typedef uint32_t sha2_word32; /* Exactly 4 bytes */ //typedef uint64_t sha2_word64; /* Exactly 8 bytes */ /*** SHA-256/384/512 Various Length Definitions ***********************/ /* NOTE: Most of these are in header */ #define SHA256_SHORT_BLOCK_LENGTH (SHA256_BLOCK_LENGTH - 8) #define SHA384_SHORT_BLOCK_LENGTH (SHA384_BLOCK_LENGTH - 16) #define SHA512_SHORT_BLOCK_LENGTH (SHA512_BLOCK_LENGTH - 16) /*** ENDIAN REVERSAL MACROS *******************************************/ #if BYTE_ORDER == LITTLE_ENDIAN #define REVERSE32(w, x) { \ uint32_t tmp = (w); \ tmp = (tmp >> 16) | (tmp << 16); \ (x) = ((tmp & 0xff00ff00UL) >> 8) | ((tmp & 0x00ff00ffUL) << 8); \ } #define REVERSE64(w, x) { \ uint64_t tmp = (w); \ tmp = (tmp >> 32) | (tmp << 32); \ tmp = ((tmp & 0xff00ff00ff00ff00ULL) >> 8) | \ ((tmp & 0x00ff00ff00ff00ffULL) << 8); \ (x) = ((tmp & 0xffff0000ffff0000ULL) >> 16) | \ ((tmp & 0x0000ffff0000ffffULL) << 16); \ } #endif /* BYTE_ORDER == LITTLE_ENDIAN */ /* * Macro for incrementally adding the unsigned 64-bit integer n to the * unsigned 128-bit integer (represented using a two-element array of * 64-bit words): */ #define ADDINC128(w,n) { \ (w)[0] += (uint64_t)(n); \ if ((w)[0] < (n)) { \ (w)[1]++; \ } \ } #define MEMSET_BZERO(p,l) memset((p), 0, (l)) #define MEMCPY_BCOPY(d,s,l) memcpy((d), (s), (l)) /*** THE SIX LOGICAL FUNCTIONS ****************************************/ /* * Bit shifting and rotation (used by the six SHA-XYZ logical functions: * * NOTE: The naming of R and S appears backwards here (R is a SHIFT and * S is a ROTATION) because the SHA-256/384/512 description document * (see http://csrc.nist.gov/cryptval/shs/sha256-384-512.pdf) uses this * same "backwards" definition. */ /* Shift-right (used in SHA-256, SHA-384, and SHA-512): */ #define R(b,x) ((x) >> (b)) /* 32-bit Rotate-right (used in SHA-256): */ #define S32(b,x) (((x) >> (b)) | ((x) << (32 - (b)))) /* 64-bit Rotate-right (used in SHA-384 and SHA-512): */ #define S64(b,x) (((x) >> (b)) | ((x) << (64 - (b)))) /* Two of six logical functions used in SHA-256, SHA-384, and SHA-512: */ #define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z))) #define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) /* Four of six logical functions used in SHA-256: */ #define Sigma0_256(x) (S32(2, (x)) ^ S32(13, (x)) ^ S32(22, (x))) #define Sigma1_256(x) (S32(6, (x)) ^ S32(11, (x)) ^ S32(25, (x))) #define sigma0_256(x) (S32(7, (x)) ^ S32(18, (x)) ^ R(3 , (x))) #define sigma1_256(x) (S32(17, (x)) ^ S32(19, (x)) ^ R(10, (x))) /* Four of six logical functions used in SHA-384 and SHA-512: */ #define Sigma0_512(x) (S64(28, (x)) ^ S64(34, (x)) ^ S64(39, (x))) #define Sigma1_512(x) (S64(14, (x)) ^ S64(18, (x)) ^ S64(41, (x))) #define sigma0_512(x) (S64( 1, (x)) ^ S64( 8, (x)) ^ R( 7, (x))) #define sigma1_512(x) (S64(19, (x)) ^ S64(61, (x)) ^ R( 6, (x))) static void sha512_last (sha512_state *state); static void sha256_transform (sha256_state *state, const uint32_t idata[16]); static void sha512_transform (sha512_state *state, const uint64_t idata[16]); /*** SHA-XYZ INITIAL HASH VALUES AND CONSTANTS ************************/ /* Hash constant words K for SHA-256: */ static const uint32_t K256[64] = { 0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL, 0x3956c25bUL, 0x59f111f1UL, 0x923f82a4UL, 0xab1c5ed5UL, 0xd807aa98UL, 0x12835b01UL, 0x243185beUL, 0x550c7dc3UL, 0x72be5d74UL, 0x80deb1feUL, 0x9bdc06a7UL, 0xc19bf174UL, 0xe49b69c1UL, 0xefbe4786UL, 0x0fc19dc6UL, 0x240ca1ccUL, 0x2de92c6fUL, 0x4a7484aaUL, 0x5cb0a9dcUL, 0x76f988daUL, 0x983e5152UL, 0xa831c66dUL, 0xb00327c8UL, 0xbf597fc7UL, 0xc6e00bf3UL, 0xd5a79147UL, 0x06ca6351UL, 0x14292967UL, 0x27b70a85UL, 0x2e1b2138UL, 0x4d2c6dfcUL, 0x53380d13UL, 0x650a7354UL, 0x766a0abbUL, 0x81c2c92eUL, 0x92722c85UL, 0xa2bfe8a1UL, 0xa81a664bUL, 0xc24b8b70UL, 0xc76c51a3UL, 0xd192e819UL, 0xd6990624UL, 0xf40e3585UL, 0x106aa070UL, 0x19a4c116UL, 0x1e376c08UL, 0x2748774cUL, 0x34b0bcb5UL, 0x391c0cb3UL, 0x4ed8aa4aUL, 0x5b9cca4fUL, 0x682e6ff3UL, 0x748f82eeUL, 0x78a5636fUL, 0x84c87814UL, 0x8cc70208UL, 0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL }; /* Initial hash value H for SHA-256: */ static const uint32_t sha256_initial_hash_value[8] = { 0x6a09e667UL, 0xbb67ae85UL, 0x3c6ef372UL, 0xa54ff53aUL, 0x510e527fUL, 0x9b05688cUL, 0x1f83d9abUL, 0x5be0cd19UL }; /* Hash constant words K for SHA-384 and SHA-512: */ static const uint64_t K512[80] = { 0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL, 0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL, 0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL, 0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL, 0xd807aa98a3030242ULL, 0x12835b0145706fbeULL, 0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL, 0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL, 0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL, 0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL, 0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL, 0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL, 0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL, 0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL, 0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL, 0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL, 0x06ca6351e003826fULL, 0x142929670a0e6e70ULL, 0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL, 0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL, 0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL, 0x81c2c92e47edaee6ULL, 0x92722c851482353bULL, 0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL, 0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL, 0xd192e819d6ef5218ULL, 0xd69906245565a910ULL, 0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL, 0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL, 0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL, 0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL, 0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL, 0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL, 0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL, 0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL, 0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL, 0xca273eceea26619cULL, 0xd186b8c721c0c207ULL, 0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL, 0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL, 0x113f9804bef90daeULL, 0x1b710b35131c471bULL, 0x28db77f523047d84ULL, 0x32caab7b40c72493ULL, 0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL, 0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL, 0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL }; /* Initial hash value H for SHA-384 */ static const uint64_t sha384_initial_hash_value[8] = { 0xcbbb9d5dc1059ed8ULL, 0x629a292a367cd507ULL, 0x9159015a3070dd17ULL, 0x152fecd8f70e5939ULL, 0x67332667ffc00b31ULL, 0x8eb44a8768581511ULL, 0xdb0c2e0d64f98fa7ULL, 0x47b5481dbefa4fa4ULL }; /* Initial hash value H for SHA-512 */ static const uint64_t sha512_initial_hash_value[8] = { 0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, 0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL, 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL, 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL }; /*** SHA-256: *********************************************************/ sha256_state * sha256_digest_init (sha256_state *state) { MEMCPY_BCOPY(state->words, sha256_initial_hash_value, SHA256_DIGEST_LENGTH); MEMSET_BZERO(state->buffer, SHA256_BLOCK_LENGTH); state->bitcount = 0; return state; } #ifdef SHA2_UNROLL_TRANSFORM /* Unrolled SHA-256 round macros: */ #if BYTE_ORDER == LITTLE_ENDIAN #define ROUND256_0_TO_15(v, a, b, c, d, e, f, g, h) \ REVERSE32(v, W256[j]); \ T1 = (h) + Sigma1_256(e) + Ch((e), (f), (g)) + K256[j] + W256[j]; \ (d) += T1; \ (h) = T1 + Sigma0_256(a) + Maj((a), (b), (c)) #else /* BYTE_ORDER == LITTLE_ENDIAN */ #define ROUND256_0_TO_15(v, a, b, c, d, e, f, g, h) \ T1 = (h) + Sigma1_256(e) + Ch((e), (f), (g)) + K256[j] + (W256[j] = v); \ (d) += T1; \ (h) = T1 + Sigma0_256(a) + Maj((a), (b), (c)) #endif /* BYTE_ORDER == LITTLE_ENDIAN */ #define ROUND256(a, b, c, d, e, f, g, h) \ s0 = W256[(j+1)&0x0f]; \ s0 = sigma0_256(s0); \ s1 = W256[(j+14)&0x0f]; \ s1 = sigma1_256(s1); \ T1 = (h) + Sigma1_256(e) + Ch((e), (f), (g)) + K256[j] + (W256[j&0x0f] += s1 + W256[(j+9)&0x0f] + s0); \ (d) += T1; \ (h) = T1 + Sigma0_256(a) + Maj((a), (b), (c)) static void sha256_transform (sha256_state *state, const uint32_t idata[16]) { uint32_t a, b, c, d, e, f, g, h, s0, s1; uint32_t T1, *W256, v; int j; W256 = state->buffer32; /* Initialize registers with the prev. intermediate value */ a = state->words[0]; b = state->words[1]; c = state->words[2]; d = state->words[3]; e = state->words[4]; f = state->words[5]; g = state->words[6]; h = state->words[7]; j = 0; do { /* Rounds 0 to 15 (unrolled): */ v = idata[j]; ROUND256_0_TO_15(v, a, b, c, d, e, f, g, h); ++j; v = idata[j]; ROUND256_0_TO_15(v, h, a, b, c, d, e, f, g); ++j; v = idata[j]; ROUND256_0_TO_15(v, g, h, a, b, c, d, e, f); ++j; v = idata[j]; ROUND256_0_TO_15(v, f, g, h, a, b, c, d, e); ++j; v = idata[j]; ROUND256_0_TO_15(v, e, f, g, h, a, b, c, d); ++j; v = idata[j]; ROUND256_0_TO_15(v, d, e, f, g, h, a, b, c); ++j; v = idata[j]; ROUND256_0_TO_15(v, c, d, e, f, g, h, a, b); ++j; v = idata[j]; ROUND256_0_TO_15(v, b, c, d, e, f, g, h, a); ++j; } while (j < 16); /* Now for the remaining rounds to 64: */ do { ROUND256(a, b, c, d, e, f, g, h); ++j; ROUND256(h, a, b, c, d, e, f, g); ++j; ROUND256(g, h, a, b, c, d, e, f); ++j; ROUND256(f, g, h, a, b, c, d, e); ++j; ROUND256(e, f, g, h, a, b, c, d); ++j; ROUND256(d, e, f, g, h, a, b, c); ++j; ROUND256(c, d, e, f, g, h, a, b); ++j; ROUND256(b, c, d, e, f, g, h, a); ++j; } while (j < 64); /* Compute the current intermediate hash value */ state->words[0] += a; state->words[1] += b; state->words[2] += c; state->words[3] += d; state->words[4] += e; state->words[5] += f; state->words[6] += g; state->words[7] += h; } #else /* SHA2_UNROLL_TRANSFORM */ static void sha256_transform (sha256_state *state, const uint32_t idata[16]) { uint32_t a, b, c, d, e, f, g, h, s0, s1; uint32_t T1, T2, *W256, v; int j; W256 = state->buffer32; /* Initialize registers with the prev. intermediate value */ a = state->words[0]; b = state->words[1]; c = state->words[2]; d = state->words[3]; e = state->words[4]; f = state->words[5]; g = state->words[6]; h = state->words[7]; j = 0; do { v = idata[j]; #if BYTE_ORDER == LITTLE_ENDIAN /* Copy data while converting to host byte order */ REVERSE32(v, W256[j]); /* Apply the SHA-256 compression function to update a..h */ T1 = h + Sigma1_256(e) + Ch(e, f, g) + K256[j] + W256[j]; #else /* BYTE_ORDER == LITTLE_ENDIAN */ /* Apply the SHA-256 compression function to update a..h with copy */ T1 = h + Sigma1_256(e) + Ch(e, f, g) + K256[j] + (W256[j] = v); #endif /* BYTE_ORDER == LITTLE_ENDIAN */ T2 = Sigma0_256(a) + Maj(a, b, c); h = g; g = f; f = e; e = d + T1; d = c; c = b; b = a; a = T1 + T2; j++; } while (j < 16); do { /* Part of the message block expansion: */ s0 = W256[(j+1)&0x0f]; s0 = sigma0_256(s0); s1 = W256[(j+14)&0x0f]; s1 = sigma1_256(s1); /* Apply the SHA-256 compression function to update a..h */ T1 = h + Sigma1_256(e) + Ch(e, f, g) + K256[j] + (W256[j&0x0f] += s1 + W256[(j+9)&0x0f] + s0); T2 = Sigma0_256(a) + Maj(a, b, c); h = g; g = f; f = e; e = d + T1; d = c; c = b; b = a; a = T1 + T2; j++; } while (j < 64); /* Compute the current intermediate hash value */ state->words[0] += a; state->words[1] += b; state->words[2] += c; state->words[3] += d; state->words[4] += e; state->words[5] += f; state->words[6] += g; state->words[7] += h; } #endif /* SHA2_UNROLL_TRANSFORM */ /* PJ: alignment-safe version */ #define data_aligned4(data) (((data - (const uint8_t *)(0UL)) & 3) == 0) #define data_aligned8(data) (((data - (const uint8_t *)(0ULL)) & 7) == 0) static void sha256_transform_aligned (sha256_state *state, const uint8_t *data) { if (data_aligned4(data)) { sha256_transform(state, (const uint32_t *)((const void *)data)); // alignment ok } else { uint32_t idata[16]; memcpy(&idata[0], data, 16 * sizeof(uint32_t)); sha256_transform(state, idata); } } void sha256_digest_add (sha256_state *state, const void *vdata, size_t len) { unsigned int freespace, usedspace; const uint8_t *data; if (len == 0) /* Calling with no data is valid - we do nothing */ return; data = (const uint8_t *)vdata; usedspace = (state->bitcount >> 3) % SHA256_BLOCK_LENGTH; if (usedspace > 0) { /* Calculate how much free space is available in the buffer */ freespace = SHA256_BLOCK_LENGTH - usedspace; if (len >= freespace) { /* Fill the buffer completely and process it */ MEMCPY_BCOPY(&state->buffer[usedspace], data, freespace); state->bitcount += freespace << 3; len -= freespace; data += freespace; sha256_transform(state, state->buffer32); } else { /* The buffer is not yet full */ MEMCPY_BCOPY(&state->buffer[usedspace], data, len); state->bitcount += len << 3; return; } } while (len >= SHA256_BLOCK_LENGTH) { /* Process as many complete blocks as we can */ sha256_transform_aligned(state, data); state->bitcount += SHA256_BLOCK_LENGTH << 3; len -= SHA256_BLOCK_LENGTH; data += SHA256_BLOCK_LENGTH; } if (len > 0) { /* There's left-overs, so save 'em */ MEMCPY_BCOPY(state->buffer, data, len); state->bitcount += len << 3; } } static void digest_hex (uint8_t digest[], const void *data, size_t size, int flags); void sha256_digest_get (sha256_state *state, uint8_t digest[], int flags) { unsigned int usedspace; usedspace = (state->bitcount >> 3) % SHA256_BLOCK_LENGTH; #if BYTE_ORDER == LITTLE_ENDIAN /* Convert FROM host byte order */ REVERSE64(state->bitcount,state->bitcount); #endif if (usedspace > 0) { /* Begin padding with a 1 bit: */ state->buffer[usedspace++] = 0x80; if (usedspace <= SHA256_SHORT_BLOCK_LENGTH) { /* Set-up for the last transform: */ MEMSET_BZERO(&state->buffer[usedspace], SHA256_SHORT_BLOCK_LENGTH - usedspace); } else { if (usedspace < SHA256_BLOCK_LENGTH) { MEMSET_BZERO(&state->buffer[usedspace], SHA256_BLOCK_LENGTH - usedspace); } /* Do second-to-last transform: */ sha256_transform(state, state->buffer32); /* And set-up for the last transform: */ MEMSET_BZERO(state->buffer, SHA256_SHORT_BLOCK_LENGTH); } } else { /* Set-up for the last transform: */ MEMSET_BZERO(state->buffer, SHA256_SHORT_BLOCK_LENGTH); /* Begin padding with a 1 bit: */ *state->buffer = 0x80; } /* Set the bit count: */ //*(uint64_t*)&state->buffer[SHA256_SHORT_BLOCK_LENGTH] = state->bitcount; // aliasing violation warning state->buffer64[SHA256_SHORT_BLOCK_LENGTH / sizeof(uint64_t)] = state->bitcount; /* Final transform: */ sha256_transform(state, state->buffer32); #if BYTE_ORDER == LITTLE_ENDIAN { /* Convert TO host byte order */ int j; for (j = 0; j < 8; j++) { REVERSE32(state->words[j], state->words[j]); } } #endif if (flags & SHA_HEX) digest_hex(digest, state->words, SHA256_DIGEST_LENGTH, flags); else memcpy(digest, state->words, SHA256_DIGEST_LENGTH); } /*** SHA-512: *********************************************************/ sha512_state * sha512_digest_init (sha512_state *state) { MEMCPY_BCOPY(state->words, sha512_initial_hash_value, SHA512_DIGEST_LENGTH); MEMSET_BZERO(state->buffer, SHA512_BLOCK_LENGTH); state->bitcount[0] = 0; state->bitcount[1] = 0; return state; } #ifdef SHA2_UNROLL_TRANSFORM /* PJ: ++ operations moved out of macros! */ /* Unrolled SHA-512 round macros: */ #if BYTE_ORDER == LITTLE_ENDIAN #define ROUND512_0_TO_15(v, a, b, c, d, e, f, g, h) \ REVERSE64(v, W512[j]); \ T1 = (h) + Sigma1_512(e) + Ch((e), (f), (g)) + K512[j] + W512[j]; \ (d) += T1; \ (h) = T1 + Sigma0_512(a) + Maj((a), (b), (c)) #else /* BYTE_ORDER == LITTLE_ENDIAN */ #define ROUND512_0_TO_15(v, a, b, c, d, e, f, g, h) \ T1 = (h) + Sigma1_512(e) + Ch((e), (f), (g)) + K512[j] + (W512[j] = v); \ (d) += T1; \ (h) = T1 + Sigma0_512(a) + Maj((a), (b), (c)) #endif /* BYTE_ORDER == LITTLE_ENDIAN */ #define ROUND512(a, b, c, d, e, f, g, h) \ s0 = W512[(j+1)&0x0f]; \ s0 = sigma0_512(s0); \ s1 = W512[(j+14)&0x0f]; \ s1 = sigma1_512(s1); \ T1 = (h) + Sigma1_512(e) + Ch((e), (f), (g)) + K512[j] + (W512[j&0x0f] += s1 + W512[(j+9)&0x0f] + s0); \ (d) += T1; \ (h) = T1 + Sigma0_512(a) + Maj((a), (b), (c)) static void sha512_transform (sha512_state *state, const uint64_t idata[16]) { uint64_t a, b, c, d, e, f, g, h, s0, s1; uint64_t T1, *W512, v; int j; W512 = state->buffer64; /* Initialize registers with the prev. intermediate value */ a = state->words[0]; b = state->words[1]; c = state->words[2]; d = state->words[3]; e = state->words[4]; f = state->words[5]; g = state->words[6]; h = state->words[7]; j = 0; do { v = idata[j]; ROUND512_0_TO_15(v, a, b, c, d, e, f, g, h); ++j; v = idata[j]; ROUND512_0_TO_15(v, h, a, b, c, d, e, f, g); ++j; v = idata[j]; ROUND512_0_TO_15(v, g, h, a, b, c, d, e, f); ++j; v = idata[j]; ROUND512_0_TO_15(v, f, g, h, a, b, c, d, e); ++j; v = idata[j]; ROUND512_0_TO_15(v, e, f, g, h, a, b, c, d); ++j; v = idata[j]; ROUND512_0_TO_15(v, d, e, f, g, h, a, b, c); ++j; v = idata[j]; ROUND512_0_TO_15(v, c, d, e, f, g, h, a, b); ++j; v = idata[j]; ROUND512_0_TO_15(v, b, c, d, e, f, g, h, a); ++j; } while (j < 16); /* Now for the remaining rounds up to 79: */ do { ROUND512(a, b, c, d, e, f, g, h); ++j; ROUND512(h, a, b, c, d, e, f, g); ++j; ROUND512(g, h, a, b, c, d, e, f); ++j; ROUND512(f, g, h, a, b, c, d, e); ++j; ROUND512(e, f, g, h, a, b, c, d); ++j; ROUND512(d, e, f, g, h, a, b, c); ++j; ROUND512(c, d, e, f, g, h, a, b); ++j; ROUND512(b, c, d, e, f, g, h, a); ++j; } while (j < 80); /* Compute the current intermediate hash value */ state->words[0] += a; state->words[1] += b; state->words[2] += c; state->words[3] += d; state->words[4] += e; state->words[5] += f; state->words[6] += g; state->words[7] += h; } #else /* SHA2_UNROLL_TRANSFORM */ static void sha512_transform (sha512_state *state, const uint64_t idata[16]) { uint64_t a, b, c, d, e, f, g, h, s0, s1; uint64_t T1, T2, *W512, v; int j; W512 = state->buffer64; /* Initialize registers with the prev. intermediate value */ a = state->words[0]; b = state->words[1]; c = state->words[2]; d = state->words[3]; e = state->words[4]; f = state->words[5]; g = state->words[6]; h = state->words[7]; j = 0; do { v = idata[j]; #if BYTE_ORDER == LITTLE_ENDIAN /* Convert TO host byte order */ REVERSE64(v, W512[j]); /* Apply the SHA-512 compression function to update a..h */ T1 = h + Sigma1_512(e) + Ch(e, f, g) + K512[j] + W512[j]; #else /* BYTE_ORDER == LITTLE_ENDIAN */ /* Apply the SHA-512 compression function to update a..h with copy */ T1 = h + Sigma1_512(e) + Ch(e, f, g) + K512[j] + (W512[j] = v); #endif /* BYTE_ORDER == LITTLE_ENDIAN */ T2 = Sigma0_512(a) + Maj(a, b, c); h = g; g = f; f = e; e = d + T1; d = c; c = b; b = a; a = T1 + T2; j++; } while (j < 16); do { /* Part of the message block expansion: */ s0 = W512[(j+1)&0x0f]; s0 = sigma0_512(s0); s1 = W512[(j+14)&0x0f]; s1 = sigma1_512(s1); /* Apply the SHA-512 compression function to update a..h */ T1 = h + Sigma1_512(e) + Ch(e, f, g) + K512[j] + (W512[j&0x0f] += s1 + W512[(j+9)&0x0f] + s0); T2 = Sigma0_512(a) + Maj(a, b, c); h = g; g = f; f = e; e = d + T1; d = c; c = b; b = a; a = T1 + T2; j++; } while (j < 80); /* Compute the current intermediate hash value */ state->words[0] += a; state->words[1] += b; state->words[2] += c; state->words[3] += d; state->words[4] += e; state->words[5] += f; state->words[6] += g; state->words[7] += h; } #endif /* SHA2_UNROLL_TRANSFORM */ static void sha512_transform_aligned (sha512_state *state, const uint8_t *data) { if (data_aligned8(data)) { sha512_transform(state, (const uint64_t *)((const void *)data)); // alignment ok } else { uint64_t idata[16]; memcpy(&idata[0], data, 16 * sizeof(uint64_t)); sha512_transform(state, idata); } } void sha512_digest_add (sha512_state *state, const void *vdata, size_t len) { unsigned int freespace, usedspace; const uint8_t *data; if (len == 0) /* Calling with no data is valid - we do nothing */ return; /* Sanity check: */ data = (const uint8_t *)vdata; usedspace = (state->bitcount[0] >> 3) % SHA512_BLOCK_LENGTH; if (usedspace > 0) { /* Calculate how much free space is available in the buffer */ freespace = SHA512_BLOCK_LENGTH - usedspace; if (len >= freespace) { /* Fill the buffer completely and process it */ MEMCPY_BCOPY(&state->buffer[usedspace], data, freespace); ADDINC128(state->bitcount, freespace << 3); len -= freespace; data += freespace; sha512_transform(state, state->buffer64); } else { /* The buffer is not yet full */ MEMCPY_BCOPY(&state->buffer[usedspace], data, len); ADDINC128(state->bitcount, len << 3); return; } } while (len >= SHA512_BLOCK_LENGTH) { /* Process as many complete blocks as we can */ sha512_transform_aligned(state, data); ADDINC128(state->bitcount, SHA512_BLOCK_LENGTH << 3); len -= SHA512_BLOCK_LENGTH; data += SHA512_BLOCK_LENGTH; } if (len > 0) { /* There's left-overs, so save 'em */ MEMCPY_BCOPY(state->buffer, data, len); ADDINC128(state->bitcount, len << 3); } } static void sha512_last (sha512_state *state) { unsigned int usedspace; usedspace = (state->bitcount[0] >> 3) % SHA512_BLOCK_LENGTH; #if BYTE_ORDER == LITTLE_ENDIAN /* Convert FROM host byte order */ REVERSE64(state->bitcount[0],state->bitcount[0]); REVERSE64(state->bitcount[1],state->bitcount[1]); #endif if (usedspace > 0) { /* Begin padding with a 1 bit: */ state->buffer[usedspace++] = 0x80; if (usedspace <= SHA512_SHORT_BLOCK_LENGTH) { /* Set-up for the last transform: */ MEMSET_BZERO(&state->buffer[usedspace], SHA512_SHORT_BLOCK_LENGTH - usedspace); } else { if (usedspace < SHA512_BLOCK_LENGTH) { MEMSET_BZERO(&state->buffer[usedspace], SHA512_BLOCK_LENGTH - usedspace); } /* Do second-to-last transform: */ sha512_transform(state, state->buffer64); /* And set-up for the last transform: */ //MEMSET_BZERO(state->buffer, SHA512_BLOCK_LENGTH - 2); // seems a typo, we overwrite last 16 bytes below MEMSET_BZERO(state->buffer, SHA512_SHORT_BLOCK_LENGTH); } } else { /* Prepare for final transform: */ MEMSET_BZERO(state->buffer, SHA512_SHORT_BLOCK_LENGTH); /* Begin padding with a 1 bit: */ *state->buffer = 0x80; } /* Store the length of input data (in bits): */ //*(uint64_t*)&state->buffer[SHA512_SHORT_BLOCK_LENGTH] = state->bitcount[1]; // aliasing violation warning //*(uint64_t*)&state->buffer[SHA512_SHORT_BLOCK_LENGTH+8] = state->bitcount[0]; state->buffer64[SHA512_SHORT_BLOCK_LENGTH / sizeof(uint64_t)] = state->bitcount[1]; state->buffer64[SHA512_SHORT_BLOCK_LENGTH / sizeof(uint64_t) + 1] = state->bitcount[0]; /* Final transform: */ sha512_transform(state, state->buffer64); } void sha512_digest_get (sha512_state *state, uint8_t digest[], int flags) { /* If no digest buffer is passed, we don't bother doing this: */ sha512_last(state); /* Save the hash data for output: */ #if BYTE_ORDER == LITTLE_ENDIAN { /* Convert TO host byte order */ int j; for (j = 0; j < 8; j++) { REVERSE64(state->words[j], state->words[j]); } } #endif if (flags & SHA_HEX) digest_hex(digest, state->words, SHA512_DIGEST_LENGTH, flags); else memcpy(digest, state->words, SHA512_DIGEST_LENGTH); } /*** SHA-384: *********************************************************/ sha384_state * sha384_digest_init (sha384_state *state) { MEMCPY_BCOPY(state->words, sha384_initial_hash_value, SHA512_DIGEST_LENGTH); MEMSET_BZERO(state->buffer, SHA384_BLOCK_LENGTH); state->bitcount[0] = state->bitcount[1] = 0; return state; } void sha384_digest_add (sha384_state *state, const void *data, size_t len) { sha512_digest_add((sha512_state *)state, data, len); } void sha384_digest_get (sha384_state *state, uint8_t digest[], int flags) { sha512_last((sha512_state *)state); /* Save the hash data for output: */ #if BYTE_ORDER == LITTLE_ENDIAN { /* Convert TO host byte order */ int j; for (j = 0; j < 6; j++) { REVERSE64(state->words[j], state->words[j]); } } #endif if (flags & SHA_HEX) digest_hex(digest, state->words, SHA384_DIGEST_LENGTH, flags); else memcpy(digest, state->words, SHA384_DIGEST_LENGTH); } /* hex output */ static void digest_hex (uint8_t digest[], const void *data, size_t size, int flags) { const char *alphabet; const uint8_t *bytes; size_t i; bytes = (const uint8_t *)data; alphabet = (flags & SHA_LCHEX) ? "0123456789abcdef" : "0123456789ABCDEF"; for (i = 0; i < size; ++i, ++bytes) { *digest++ = (uint8_t)alphabet[(*bytes) >> 4]; *digest++ = (uint8_t)alphabet[(*bytes) & 15]; } *digest = 0; } /* string checksum */ void sha256_digest (const void *data, size_t size, uint8_t digest[], int flags) { sha256_state state; sha256_digest_init(&state); sha256_digest_add(&state, data, size); sha256_digest_get(&state, digest, flags); } void sha384_digest (const void *data, size_t size, uint8_t digest[], int flags) { sha384_state state; sha384_digest_init(&state); sha384_digest_add(&state, data, size); sha384_digest_get(&state, digest, flags); } void sha512_digest (const void *data, size_t size, uint8_t digest[], int flags) { sha512_state state; sha512_digest_init(&state); sha512_digest_add(&state, data, size); sha512_digest_get(&state, digest, flags); } /* file checksum */ #define DIGEST_BUFFER_SIZE 4096 int sha256_digest_add_file (sha256_state *state, const char *filename) { FILE *fh; uint8_t buffer[DIGEST_BUFFER_SIZE]; size_t read; if ((fh = fopen(filename, "rb")) == NULL) return 0; do { read = fread(buffer, 1, DIGEST_BUFFER_SIZE, fh); sha256_digest_add(state, buffer, read); } while (read == DIGEST_BUFFER_SIZE); fclose(fh); return 1; } int sha256_digest_file (const char *filename, uint8_t digest[], int flags) { sha256_state state; sha256_digest_init(&state); if (sha256_digest_add_file(&state, filename)) { sha256_digest_get(&state, digest, flags); return 1; } return 0; } int sha384_digest_add_file (sha384_state *state, const char *filename) { FILE *fh; uint8_t buffer[DIGEST_BUFFER_SIZE]; size_t read; if ((fh = fopen(filename, "rb")) == NULL) return 0; do { read = fread(buffer, 1, DIGEST_BUFFER_SIZE, fh); sha384_digest_add(state, buffer, read); } while (read == DIGEST_BUFFER_SIZE); fclose(fh); return 1; } int sha384_digest_file (const char *filename, uint8_t digest[], int flags) { sha384_state state; sha384_digest_init(&state); if (sha384_digest_add_file(&state, filename)) { sha384_digest_get(&state, digest, flags); return 1; } return 0; } int sha512_digest_add_file (sha512_state *state, const char *filename) { FILE *fh; uint8_t buffer[DIGEST_BUFFER_SIZE]; size_t read; if ((fh = fopen(filename, "rb")) == NULL) return 0; do { read = fread(buffer, 1, DIGEST_BUFFER_SIZE, fh); sha512_digest_add(state, buffer, read); } while (read == DIGEST_BUFFER_SIZE); fclose(fh); return 1; } int sha512_digest_file (const char *filename, uint8_t digest[], int flags) { sha512_state state; sha512_digest_init(&state); if (sha512_digest_add_file(&state, filename)) { sha512_digest_get(&state, digest, flags); return 1; } return 0; } luametatex-2.10.08/source/libraries/pplib/util/utilsha.h000066400000000000000000000053141442250314700232210ustar00rootroot00000000000000/* sha2 implementation excerpted from code by Aaron D. Gifford */ #ifndef UTIL_SHA_H #define UTIL_SHA_H #include #include #include "utildecl.h" #define SHA256_BLOCK_LENGTH 64 #define SHA256_DIGEST_LENGTH 32 #define SHA256_STRING_LENGTH (SHA256_DIGEST_LENGTH * 2 + 1) #define SHA384_BLOCK_LENGTH 128 #define SHA384_DIGEST_LENGTH 48 #define SHA384_STRING_LENGTH (SHA384_DIGEST_LENGTH * 2 + 1) #define SHA512_BLOCK_LENGTH 128 #define SHA512_DIGEST_LENGTH 64 #define SHA512_STRING_LENGTH (SHA512_DIGEST_LENGTH * 2 + 1) //#define sha256_state sha256_state_t //#define sha384_state sha384_state_t //#define sha512_state sha512_state_t typedef struct { uint32_t words[8]; uint64_t bitcount; union { uint8_t buffer[SHA256_BLOCK_LENGTH]; uint32_t buffer32[SHA256_BLOCK_LENGTH / sizeof(uint32_t)]; uint64_t buffer64[SHA256_BLOCK_LENGTH / sizeof(uint64_t)]; }; } sha256_state; typedef struct { uint64_t words[8]; uint64_t bitcount[2]; union { uint8_t buffer[SHA512_BLOCK_LENGTH]; uint64_t buffer64[SHA512_BLOCK_LENGTH / sizeof(uint64_t)]; }; } sha512_state; typedef sha512_state sha384_state; enum { SHA_BYTES = 0, SHA_UCHEX = (1<<0), SHA_LCHEX = (1<<1) }; #define SHA_DEFAULT SHA_BYTES #define SHA_HEX (SHA_UCHEX|SHA_LCHEX) UTILAPI sha256_state * sha256_digest_init (sha256_state *state); UTILAPI sha384_state * sha384_digest_init (sha384_state *state); UTILAPI sha512_state * sha512_digest_init (sha512_state *state); UTILAPI void sha256_digest_add (sha256_state *state, const void *data, size_t size); UTILAPI void sha384_digest_add (sha384_state *state, const void *data, size_t size); UTILAPI void sha512_digest_add (sha512_state *state, const void *data, size_t size); UTILAPI void sha256_digest_get (sha256_state *state, uint8_t digest[], int flags); UTILAPI void sha384_digest_get (sha384_state *state, uint8_t digest[], int flags); UTILAPI void sha512_digest_get (sha512_state *state, uint8_t digest[], int flags); UTILAPI void sha256_digest (const void *data, size_t size, uint8_t digest[], int flags); UTILAPI void sha384_digest (const void *data, size_t size, uint8_t digest[], int flags); UTILAPI void sha512_digest (const void *data, size_t size, uint8_t digest[], int flags); UTILAPI int sha256_digest_add_file (sha256_state *state, const char *filename); UTILAPI int sha256_digest_file (const char *filename, uint8_t digest[], int flags); UTILAPI int sha384_digest_add_file (sha384_state *state, const char *filename); UTILAPI int sha384_digest_file (const char *filename, uint8_t digest[], int flags); UTILAPI int sha512_digest_add_file (sha512_state *state, const char *filename); UTILAPI int sha512_digest_file (const char *filename, uint8_t digest[], int flags); #endif luametatex-2.10.08/source/libraries/readme.txt000066400000000000000000000034641442250314700213160ustar00rootroot00000000000000Nota bene, The currently embedded libcerf library might become an optional one as soon as we decide to provide it as such. It doesn't put a dent in filesize but as it's used rarely (and mostly as complement to the complex math support) that makes sense. The library was added because some users wanted it as companion the other math libraries and because TeX is often about math it sort of feels okay. But it looks like there will never be support for the MSVC compiler. Mojca and I (Hans) adapted the sources included here to compile out of the box, but that didn't make it back into the original. The pplib library has a few patches with respect to memory allocation and zip compression so that we can hook in the minizip and mimalloc alternatives. The avl and hnj libraries are adapted to Lua(Meta)TeX and might get some more adaptations depending on our needs. The decnumber library that is also used in mplib is unchanged. In mimalloc we need to patch init.c: #if defined(_M_X64) || defined(_M_ARM64) to get rid of a link error as well as in options.c some snprint issue with the mingw64 cross compiler: /* HH */ snprintf(tprefix, sizeof(tprefix), "%sthread 0x%x: ", prefix, (unsigned) _mi_thread_id()); /* HH: %z is unknown */ In decNumber.c this got added: # include "../../utilities/auxmemory.h" # define malloc lmt_memory_malloc # define free lmt_memory_free In softposit/source/include/softposit_types.h we have to comment the initializations in the unions bcause the compiler complains about it (we're not using c++). So: uint32_t ui; // =0; // patched by HH because the compilers don't like this uint64_t ui[2]; // ={0,0}; // idem uint64_t ui[8]; // ={0,0,0,0, 0,0,0,0}; // idme uint64_t ui[8]; // ={0,0,0,0, 0,0,0,0}; // idem uint64_t ui[8]; // ={0,0,0,0, 0,0,0,0}; // idem Hansluametatex-2.10.08/source/libraries/softposit/000077500000000000000000000000001442250314700213435ustar00rootroot00000000000000luametatex-2.10.08/source/libraries/softposit/LICENSE000066400000000000000000000032721442250314700223540ustar00rootroot00000000000000BSD 3-Clause License This is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017 2018 A*STAR. All rights reserved. Many of the C source files design were based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. luametatex-2.10.08/source/libraries/softposit/README.md000066400000000000000000000275251442250314700226350ustar00rootroot00000000000000# SoftPosit This version (0.4.1) supports: 32-bit with two exponent bit (posit32_t). -> Not exhaustively tested 16-bit with one exponent bit (posit16_t). 8-bit with zero exponent bit (posit8_t). 2-bit to 32-bit with two exponent bits (posit_2_t) -> Not fast : Using 32-bits in the background to store all sizes. Exhaustively tested for X=(2:32) : pX2_rint, pX2_to_pX2, pX2_to_i32/64, pX2_to_ui32/64, pX2_sqrt, ui/i32_to_pX2 Exhaustively tested for X=(2:13) : ui64_to_pX2, i64_to_pX2 Exhaustively tested for X=(2:20) : pX2_add, pX2_sub, pX2_mul, pX2_div Exhaustively tested for X=(2:21) : pX2_mul Exhaustively tested for X=(2:15) : pX2_mulAdd Exhaustively tested for X=(2:14) : quireX2_fdp_add, quireX2_fdp_sub (using quire32 as the underlying code) This code is tested on * GNU gcc (SUSE Linux) 4.8.5 * Apple LLVM version 9.1.0 (clang-902.0.39.2) * Windows 10 (Mingw-w64) Please note that the same Makefile in build/Linux-x86_64-GCC is used for all 3 operating systems. All posit8_t and posit16_t operations are exhaustively tested with exception of p16_mulAdd and q16_fdp_add/sub operations. **posit32_t operations are still being tested exhaustively for correctness. It will take weeks to months before these tests complete.** Versions are offered * [Fast C version](#cversion) : The main source code where all other versions are based on. * [User friendly C++ version](#cppversion) : Documentation can be found below. * [User friendly Python version](https://gitlab.com/cerlane/SoftPosit-Python/) : https://gitlab.com/cerlane/SoftPosit-Python/ * [Julia](#jversion) : Currently only simple .so support. Documentation can be found below. * [Others](#known) ## Fast C version ### Examples #### A 8-bit example on how to use the code to add: ``` #include "softposit.h" int main (int argc, char *argv[]){ posit8_t pA, pB, pZ; pA = castP8(0xF2); pB = castP8(0x23); pZ = p8_add(pA, pB); //To check answer by converting it to double double dZ = convertP8ToDouble(pZ); printf("dZ: %.15f\n", dZ); //To print result in binary uint8_t uiZ = castUI(pZ); printBinary((uint64_t*)&uiZ, 8); return 0; } ``` #### A 16-bit example on how to use the code to multiply: ``` #include "softposit.h" int main (int argc, char *argv[]){ posit16_t pA, pB, pZ; pA = castP16(0x0FF2); pB = castP16(0x2123); pZ = p16_mul(pA, pB); //To check answer by converting it to double double dZ = convertP16ToDouble(pZ); printf("dZ: %.15f\n", dZ); //To print result in binary uint16_t uiZ = castUI(pZ); printBinary((uint64_t*)&uiZ, 16); return 0; } ``` #### A 24-bit (es=2) example on how to use the code: ``` #include "softposit.h" int main (int argc, char *argv[]){ posit_2_t pA, pB, pZ; pA.v = 0xF2; //this is to set the bits (method 1) pB = castPX2(0x23); //this is to set the bits (method 2) pZ = pX2_add(pA, pB, 24); //To check answer by converting it to double double dZ = convertPX2ToDouble(pZ); printf("dZ: %.40f\n", dZ); //To print result in binary printBinaryPX((uint32_t*)&pZ.v, 24); //To print result as double printf("result: %.40f\n", convertPX2ToDouble(pZ)); return 0; } ``` #### For deep learning, please use quire. ``` //Convert double to posit posit16_t pA = convertDoubleToP16(1.02783203125 ); posit16_t pB = convertDoubleToP16(0.987060546875); posit16_t pC = convertDoubleToP16(0.4998779296875); posit16_t pD = convertDoubleToP16(0.8797607421875); quire16_t qZ; //Set quire to 0 qZ = q16_clr(qZ); //accumulate products without roundings qZ = q16_fdp_add(qZ, pA, pB); qZ = q16_fdp_add(qZ, pC, pD); //Convert back to posit posit16_t pZ = q16_to_p16(qZ); //To check answer double dZ = convertP16ToDouble(pZ); ``` ### Build and link #### Build - softposit.a Please note that only 64-bit systems are supported. For Mac OSX and Linux, the same Makefile is used. Note that architecture specific optimisation is removed. To get maximum speed, please update OPTIMISATION flag in build/Linux-x86_64-GCC/Makefile. ``` cd SoftPosit/build/Linux-x86_64-GCC make -j6 all ``` #### Link - softposit.a If your source code is for example "main.c" and you want to create an executable "main". Assume that SoftPosit is installed and installed in the same directory (installing in the same directory is NOT recommended). ``` gcc -lm -o main \ main.c SoftPosit/build/Linux-x86_64-GCC/softposit.a -ISoftPosit/source/include -O2 ``` ### Features #### Main Posit Functionalities: Add : posit16_t p16_add(posit16_t, posit16_t) posit8_t p8_add(posit8_t, posit8_t) Subtract : posit16_t p16_sub(posit16_t, posit16_t) posit8_t p8_sub(posit8_t, posit8_t) Divide : posit16_t p16_div(posit16_t, posit16_t) posit8_t p8_div(posit8_t, posit8_t) Multiply : posit16_t p16_mul(posit16_t, posit16_t) posit8_t p8_mul(posit8_t, posit8_t) Fused Multiply Add : posit16_t p16_mulAdd(posit16_t, posit16_t, posit16_t) posit8_t p8_mulAdd(posit8_t, posit8_t, posit8_t) Note: p16_mulAdd(a, b, c) <=> a*b + c #### Main Quire Functionalities Fused dot product-add : quire16_t q16_fdp_add(quire16_t, posit16_t, posit16_t) quire8_t q16_fdp_add(quire8_t, posit8_t, posit8_t) Note: q8_fdp_add (a, b, c) <=> a + b*c Fused dot product-subtract : quire16_t q16_fdp_sub(quire16_t, posit16_t, posit16_t) quire8_t q8_fdp_sub(quire8_t, posit8_t, posit8_t) Set quire variable to zero : quire16_t q16_clr(quire16_t) quire8_t q8_clr(quire8_t) Convert quire to posit : posit16_t q16_to_p16(quire16_t) posit8_t q8_to_p8(quire8_t) #### Functionalites in Posit Standard Square root : posit16_t p16_sqrt(posit16_t) posit8_t p8_sqrt(posit8_t) Round to nearest integer : posit16_t p16_roundToInt(posit16_t) posit8_t p8_roundToInt(posit8_t) Check equal : bool p16_eq( posit16_t, posit16_t ) bool p8_eq( posit8_t, posit8_t ) Check less than equal : bool p16_le( posit16_t, posit16_t ) bool p8_le( posit8_t, posit8_t ) Check less than : bool p16_lt( posit16_t, posit16_t ) bool p8_lt( posit8_t, posit8_t ) Convert posit to integer (32 bits) : int_fast32_t p16_to_i32( posit16_t ) int_fast32_t p8_to_i32( posit8_t ) Convert posit to long long integer (64 bits) : int_fast64_t p16_to_i64( posit16_t) int_fast64_t p8_to_i64( posit8_t) Convert unsigned integer (32 bits) to posit: posit16_t ui32_to_p16( uint32_t a ) posit8_t ui32_to_p8( uint32_t a ) Convert unsigned long long int (64 bits) to posit: posit16_t ui64_to_p16( uint64_t a ) posit8_t ui64_to_p8( uint64_t a ) Convert integer (32 bits) to posit: posit16_t i32_to_p16( int32_t a ) posit8_t i32_to_p8( uint32_t a ) Convert long integer (64 bits) to posit: posit16_t i64_to_p16( int64_t a ) posit8_t i64_to_p8( uint64_t a ) Convert posit to unsigned integer (32 bits) : uint_fast32_t p16_to_ui32( posit16_t ) uint_fast32_t p8_to_ui32( posit8_t ) Convert posit to unsigned long long integer (64 bits) : uint_fast64_t p16_to_ui64( posit16_t) uint_fast64_t p8_to_ui64( posit8_t) Convert posit to integer (32 bits) : uint_fast32_t p16_to_i32( posit16_t ) uint_fast32_t p8_to_i32( posit8_t ) Convert posit to long long integer (64 bits) : uint_fast64_t p16_to_i64( posit16_t) uint_fast64_t p8_to_i64( posit8_t) Convert posit to posit of another size : posit8_t p16_to_p8( posit16_t ) posit32_t p16_to_p32( posit16_t ) posit16_t p8_to_p16( posit8_t ) posit32_t p8_to_p32( posit8_t ) #### Helper Functionalites (NOT in Posit Standard) Convert posit to double (64 bits) : double convertP16ToDouble(posit16_t) double convertP8ToDouble(posit8_t) Convert double (64 bits) to posit : posit16_t convertDoubleToP16(double) posit8_t convertDoubleToP8(double) Cast binary expressed in unsigned integer to posit : posit16_t castP16(uint16_t) posit8_t castP8(uint8_t) Cast posit into binary expressed in unsigned integer uint16_t castUI(posit16_t) uint8_t castUI(posit8_t) ## Easy to use C++ version ### Build and Link **Build and link your C++ program to SoftPosit.a (C)** Please compile your executable with g++ and not gcc. ``` g++ -std=gnu++11 -o main \ ../source/testmain.cpp \ ../../SoftPosit/source/../build/Linux-x86_64-GCC/softposit.a \ -I../../SoftPosit/source/../build/Linux-x86_64-GCC -O2 ``` ### Example #### Example of testmain.cpp ``` #include "softposit_cpp.h" int main(int argc, char *argv[]){ posit16 x = 1; posit16 y = 1.5; posit8 x8 = 1; quire16 q; quire8 q8; x += p16(1.5)*5.1; printf("%.13f sizeof: %d\n", x.toDouble(), sizeof(posit16)); x = q.qma(4, 1.2).toPosit(); printf("%.13f sizeof: %d\n", x.toDouble(), sizeof(quire16)); x8 = q8.qma(4, 1.2).toPosit(); printf("%.13f sizeof: %d\n", x8.toDouble(), sizeof(quire8)); std::cout << x; return 0; } ``` ### Functionalities #### Main functionalities * Posit types: posit16, posit8 * Fused-multiply-add: * posit16 fma(posit16, posit16, posit16) * posit18 fma(posit18, posit18, posit8) * Square root: * posit16 sqrt(posit16) * posit8 sqrt(posit8) * roundToInt: * posit16 rint(posit16) * posit8 rint(posit8) * Supported operators * \+ * += * \- * \-= * * * *= * / * /= * << * <<= * >> * >>= * & * &= * | * |= * ^ * ^= * && * || * ++ * -- * == * ~ * ! * != * * * < * *= * <= * Posit to Double: * double (instance of posit).toDouble() * Double to Posit: * posit16 p16(double) * posit8 p8(double) * Posit to NaR: * posit16 (instance of posit16).toNaR() * posit8 (instance of posit8).toNaR() #### Quire functionalities (particularly for deep learning) * Quire types: quire16, quire8 (when declared, quire is initiated to zero) * Clear quire to zero: * (instance of quire16).clr() * Quire multiply add (fused) * (instance of quire16).fma(quire16) * (instance of quire8).fma(quire8) * Quire multiply subtract (fused) * (instance of quire16).fms(quire16) * (instance of quire8).fms(quire8) * Convert quire to Posit * posit16 (instance of quire16).toPosit() * posit8 (instance of quire8).toPosit() * Check if quire is NaR * bool (instance of quire).isNaR() ## Julia * [Julia implementation] (https://github.com/milankl/SoftPosit.jl) on top of SoftPosit ### Install via Julia package manager ``` > add https://github.com/milankl/SoftPosit.jl ``` Credits to Milan Klöwer. ### Behind the scene #### Build shared library ``` cd SoftPosit/build/Linux_x86_64_GCC/ make -j6 julia ``` #### Simple Tests ``` julia> t = ccall((:convertDoubleToP16, "/path/to/SoftPosit/build/Linux-x86_64-GCC/softposit.so"), UInt16, (Float64,),1.0) 0x4000 julia> t = ccall((:convertDoubleToP16, "/path/to/SoftPosit/build/Linux-x86_64-GCC/softposit.so"), UInt16, (Float64,),-1.0) 0xc000 ``` ## Known implementations on top of SoftPosit * [Andrey Zgarbul's Rust implementation](https://crates.io/crates/softposit) * [Milan Klöwer's Julia implementation](https://github.com/milankl/SoftPosit.jl) * [SpeedGo Computing's TensorFlow](https://github.com/xman/tensorflow/tree/posit) * [SpeedGo Computing's Numpy](https://github.com/xman/numpy-posit) * [Cerlane Leong's SoftPosit-Python](https://gitlab.com/cerlane/SoftPosit-Python) * [David Thien's SoftPosit bindings Racket](https://github.com/DavidThien/softposit-rkt) * [Bill Zorn's SoftPosit and SoftFloat Python](https://pypi.org/project/sfpy/) luametatex-2.10.08/source/libraries/softposit/build/000077500000000000000000000000001442250314700224425ustar00rootroot00000000000000luametatex-2.10.08/source/libraries/softposit/build/Linux-x86_64-GCC/000077500000000000000000000000001442250314700250675ustar00rootroot00000000000000luametatex-2.10.08/source/libraries/softposit/build/Linux-x86_64-GCC/Makefile000066400000000000000000000145021442250314700265310ustar00rootroot00000000000000#============================================================================ # #This C source file is part of the SoftPosit Posit Arithmetic Package #by S. H. Leong (Cerlane). # #Copyright 2017, 2018 A*STAR. All rights reserved. # #This C source file was based on SoftFloat IEEE Floating-Point Arithmetic #Package, Release 3d, by John R. Hauser. # # # Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the # University of California. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, # this list of conditions, and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. Neither the name of the University nor the names of its contributors # may be used to endorse or promote products derived from this software # without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE # DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # #============================================================================= SOURCE_DIR ?= ../../source PYTHON_DIR ?= ../../python SPECIALIZE_TYPE ?= 8086-SSE COMPILER ?= gcc SOFTPOSIT_OPTS ?= \ -DINLINE_LEVEL=5 #\ -DSOFTPOSIT_QUAD -lquadmath COMPILE_PYTHON = \ $(COMPILER) -fPIC -c $(PYTHON_DIR)/softposit_python_wrap.c \ -I/usr/include/python \ -I$(SOURCE_DIR)/include -I. COMPILE_PYTHON3 = \ $(COMPILER) -fPIC -c $(PYTHON_DIR)/softposit_python_wrap.c \ -I/usr/include/python3 \ -I$(SOURCE_DIR)/include -I. LINK_PYTHON = \ ld -shared *.o -o $(PYTHON_DIR)/_softposit.so ifeq ($(OS),Windows_NT) DELETE = del /Q /F else DELETE = rm -f endif C_INCLUDES = -I. -I$(SOURCE_DIR)/$(SPECIALIZE_TYPE) -I$(SOURCE_DIR)/include OPTIMISATION = -O2 #-march=core-avx2 COMPILE_C = \ $(COMPILER) -c -Werror-implicit-function-declaration -DSOFTPOSIT_FAST_INT64 \ $(SOFTPOSIT_OPTS) $(C_INCLUDES) $(OPTIMISATION) \ -o $@ MAKELIB = ar crs $@ MAKESLIB = $(COMPILER) -shared $^ OBJ = .o LIB = .a SLIB = .so .PHONY: all all: softposit$(LIB) quad: SOFTPOSIT_OPTS+= -DSOFTPOSIT_QUAD -lquadmath quad: all python2: SOFTPOSIT_OPTS+= -fPIC python2: all $(COMPILE_PYTHON) $(LINK_PYTHON) python3: SOFTPOSIT_OPTS+= -fPIC python3: all $(COMPILE_PYTHON3) $(LINK_PYTHON) julia: SOFTPOSIT_OPTS+= -fPIC julia: softposit$(SLIB) OBJS_PRIMITIVES = OBJS_SPECIALIZE = OBJS_OTHERS = \ s_addMagsP8$(OBJ) \ s_subMagsP8$(OBJ) \ s_mulAddP8$(OBJ) \ p8_add$(OBJ) \ p8_sub$(OBJ) \ p8_mul$(OBJ) \ p8_div$(OBJ) \ p8_sqrt$(OBJ) \ p8_to_p16$(OBJ) \ p8_to_p32$(OBJ) \ p8_to_pX2$(OBJ) \ p8_to_i32$(OBJ) \ p8_to_i64$(OBJ) \ p8_to_ui32$(OBJ) \ p8_to_ui64$(OBJ) \ p8_roundToInt$(OBJ) \ p8_mulAdd$(OBJ) \ p8_eq$(OBJ) \ p8_le$(OBJ) \ p8_lt$(OBJ) \ quire8_fdp_add$(OBJ) \ quire8_fdp_sub$(OBJ) \ ui32_to_p8$(OBJ) \ ui64_to_p8$(OBJ) \ i32_to_p8$(OBJ) \ i64_to_p8$(OBJ) \ s_addMagsP16$(OBJ) \ s_subMagsP16$(OBJ) \ s_mulAddP16$(OBJ) \ p16_to_ui32$(OBJ) \ p16_to_ui64$(OBJ) \ p16_to_i32$(OBJ) \ p16_to_i64$(OBJ) \ p16_to_p8$(OBJ) \ p16_to_p32$(OBJ) \ p16_to_pX2$(OBJ) \ p16_roundToInt$(OBJ) \ p16_add$(OBJ) \ p16_sub$(OBJ) \ p16_mul$(OBJ) \ p16_mulAdd$(OBJ) \ p16_div$(OBJ) \ p16_eq$(OBJ) \ p16_le$(OBJ) \ p16_lt$(OBJ) \ p16_sqrt$(OBJ) \ quire16_fdp_add$(OBJ) \ quire16_fdp_sub$(OBJ) \ quire_helper$(OBJ) \ ui32_to_p16$(OBJ) \ ui64_to_p16$(OBJ) \ i32_to_p16$(OBJ) \ i64_to_p16$(OBJ) \ s_addMagsP32$(OBJ) \ s_subMagsP32$(OBJ) \ s_mulAddP32$(OBJ) \ p32_to_ui32$(OBJ) \ p32_to_ui64$(OBJ) \ p32_to_i32$(OBJ) \ p32_to_i64$(OBJ) \ p32_to_p8$(OBJ) \ p32_to_p16$(OBJ) \ p32_to_pX2$(OBJ) \ p32_roundToInt$(OBJ) \ p32_add$(OBJ) \ p32_sub$(OBJ) \ p32_mul$(OBJ) \ p32_mulAdd$(OBJ) \ p32_div$(OBJ) \ p32_eq$(OBJ) \ p32_le$(OBJ) \ p32_lt$(OBJ) \ p32_sqrt$(OBJ) \ quire32_fdp_add$(OBJ) \ quire32_fdp_sub$(OBJ) \ ui32_to_p32$(OBJ) \ ui64_to_p32$(OBJ) \ i32_to_p32$(OBJ) \ i64_to_p32$(OBJ) \ s_approxRecipSqrt_1Ks$(OBJ) \ c_convertDecToPosit8$(OBJ) \ c_convertPosit8ToDec$(OBJ) \ c_convertDecToPosit16$(OBJ) \ c_convertPosit16ToDec$(OBJ) \ c_convertQuire8ToPosit8$(OBJ) \ c_convertQuire16ToPosit16$(OBJ) \ c_convertQuire32ToPosit32$(OBJ) \ c_convertDecToPosit32$(OBJ) \ c_convertPosit32ToDec$(OBJ) \ c_int$(OBJ) \ s_addMagsPX2$(OBJ) \ s_subMagsPX2$(OBJ) \ s_mulAddPX2$(OBJ) \ pX2_add$(OBJ) \ pX2_sub$(OBJ) \ pX2_mul$(OBJ) \ pX2_div$(OBJ) \ pX2_mulAdd$(OBJ) \ pX2_roundToInt$(OBJ) \ pX2_sqrt$(OBJ) \ pX2_eq$(OBJ) \ pX2_le$(OBJ) \ pX2_lt$(OBJ) \ ui32_to_pX2$(OBJ) \ ui64_to_pX2$(OBJ) \ i32_to_pX2$(OBJ) \ i64_to_pX2$(OBJ) \ c_convertQuireX2ToPositX2$(OBJ) OBJS_ALL := $(OBJS_PRIMITIVES) $(OBJS_SPECIALIZE) $(OBJS_OTHERS) $(OBJS_ALL): \ platform.h \ $(SOURCE_DIR)/include/primitives.h $(OBJS_SPECIALIZE) $(OBJS_OTHERS): \ $(SOURCE_DIR)/include/softposit_types.h $(SOURCE_DIR)/include/internals.h \ $(SOURCE_DIR)/$(SPECIALIZE_TYPE)/specialize.h \ $(SOURCE_DIR)/include/softposit.h $(OBJS_PRIMITIVES) $(OBJS_OTHERS): %$(OBJ): $(SOURCE_DIR)/%.c $(COMPILE_C) $(SOURCE_DIR)/$*.c $(OBJS_SPECIALIZE): %$(OBJ): $(SOURCE_DIR)/$(SPECIALIZE_TYPE)/%.c $(COMPILE_C) $(SOURCE_DIR)/$(SPECIALIZE_TYPE)/$*.c softposit$(LIB): $(OBJS_ALL) $(MAKELIB) $^ softposit$(SLIB): $(OBJS_ALL) $(MAKESLIB) -o $@ .PHONY: clean clean: $(DELETE) $(OBJS_ALL) softposit_python_wrap.o softposit$(LIB) softposit$(SLIB) luametatex-2.10.08/source/libraries/softposit/build/Linux-x86_64-GCC/platform.h000066400000000000000000000043221442250314700270650ustar00rootroot00000000000000 /*============================================================================ This C header file is part of the SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ /*---------------------------------------------------------------------------- *----------------------------------------------------------------------------*/ #define LITTLEENDIAN 1 /*---------------------------------------------------------------------------- *----------------------------------------------------------------------------*/ #ifdef __GNUC_STDC_INLINE__ #define INLINE inline #else #define INLINE extern inline #endif luametatex-2.10.08/source/libraries/softposit/source/000077500000000000000000000000001442250314700226435ustar00rootroot00000000000000luametatex-2.10.08/source/libraries/softposit/source/8086-SSE/000077500000000000000000000000001442250314700237005ustar00rootroot00000000000000luametatex-2.10.08/source/libraries/softposit/source/8086-SSE/specialize.h000066400000000000000000000040541442250314700262040ustar00rootroot00000000000000 /*============================================================================ This C header file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017 2018 A*STAR. All rights reserved. This C header file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #ifndef specialize_h #define specialize_h 1 #include #include #include "softposit_types.h" #endif luametatex-2.10.08/source/libraries/softposit/source/c_convertDecToPosit16.c000066400000000000000000000245231442250314700271040ustar00rootroot00000000000000/*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017, 2018 A*STAR. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include #include "platform.h" #include "internals.h" void checkExtraTwoBitsP16(double f16, double temp, bool * bitsNPlusOne, bool * bitsMore ){ temp /= 2; if (temp<=f16){ *bitsNPlusOne = 1; f16-=temp; } if (f16>0) *bitsMore = 1; } uint_fast16_t convertFractionP16(double f16, uint_fast8_t fracLength, bool * bitsNPlusOne, bool * bitsMore ){ uint_fast16_t frac=0; if(f16==0) return 0; else if(f16==INFINITY) return 0x8000; f16 -= 1; //remove hidden bit if (fracLength==0) checkExtraTwoBitsP16(f16, 1.0, bitsNPlusOne, bitsMore); else{ double temp = 1; while (true){ temp /= 2; if (temp<=f16){ f16-=temp; fracLength--; frac = (frac<<1) + 1; //shift in one if (f16==0){ //put in the rest of the bits frac <<= (uint_fast8_t)fracLength; break; } if (fracLength == 0){ checkExtraTwoBitsP16(f16, temp, bitsNPlusOne, bitsMore); break; } } else{ frac <<= 1; //shift in a zero fracLength--; if (fracLength == 0){ checkExtraTwoBitsP16(f16, temp, bitsNPlusOne, bitsMore); break; } } } } return frac; } posit16_t convertFloatToP16(float a){ return convertDoubleToP16((double) a); } posit16_t convertDoubleToP16(double f16){ union ui16_p16 uZ; bool sign, regS; uint_fast16_t reg, frac=0; int_fast8_t exp=0; bool bitNPlusOne=0, bitsMore=0; (f16>=0) ? (sign=0) : (sign=1); if (f16 == 0 ){ uZ.ui = 0; return uZ.p; } else if(f16 == INFINITY || f16 == -INFINITY || f16 == NAN){ uZ.ui = 0x8000; return uZ.p; } else if (f16 == 1) { uZ.ui = 16384; return uZ.p; } else if (f16 == -1){ uZ.ui = 49152; return uZ.p; } else if (f16 >= 268435456){ //maxpos uZ.ui = 32767; return uZ.p; } else if (f16 <= -268435456){ // -maxpos uZ.ui = 32769; return uZ.p; } else if(f16 <= 3.725290298461914e-9 && !sign){ //minpos uZ.ui = 1; return uZ.p; } else if(f16 >= -3.725290298461914e-9 && sign){ //-minpos uZ.ui = 65535; return uZ.p; } else if (f16>1 || f16<-1){ if (sign){ //Make negative numbers positive for easier computation f16 = -f16; } regS = 1; reg = 1; //because k = m-1; so need to add back 1 // minpos if (f16 <= 3.725290298461914e-9){ uZ.ui = 1; } else{ //regime while (f16>=4){ f16 *=0.25; reg++; } if (f16>=2){ f16*=0.5; exp++; } int fracLength = 13-reg; if (fracLength<0){ //reg == 14, means rounding bits is exp and just the rest. if (f16>1) bitsMore = 1; } else frac = convertFractionP16 (f16, fracLength, &bitNPlusOne, &bitsMore); if (reg==14 && frac>0) { bitsMore = 1; frac=0; } if (reg>14) (regS) ? (uZ.ui= 32767): (uZ.ui=0x1); else{ uint_fast16_t regime = 1; if (regS) regime = ( (1< -1 ){ if (sign){ //Make negative numbers positive for easier computation f16 = -f16; } regS = 0; reg = 0; //regime while (f16<1){ f16 *= 4; reg++; } if (f16>=2){ f16/=2; exp++; } if (reg==14){ bitNPlusOne = exp; if (frac>1) bitsMore = 1; } else{ //only possible combination for reg=15 to reach here is 7FFF (maxpos) and FFFF (-minpos) //but since it should be caught on top, so no need to handle int_fast8_t fracLength = 13-reg; frac = convertFractionP16 (f16, fracLength, &bitNPlusOne, &bitsMore); } if (reg==14 && frac>0) { bitsMore = 1; frac=0; } if (reg>14) (regS) ? (uZ.ui= 32767): (uZ.ui=0x1); else{ uint_fast16_t regime = 1; if (regS) regime = ( (1<0) *bitsMore = 1; } uint_fast16_t convertQuadFractionP16(__float128 f16, uint_fast8_t fracLength, bool * bitsNPlusOne, bool * bitsMore ){ uint_fast16_t frac=0; if(f16==0) return 0; else if(f16==INFINITY) return 0x8000; f16 -= 1; //remove hidden bit if (fracLength==0) checkQuadExtraTwoBitsP16(f16, 1.0, bitsNPlusOne, bitsMore); else{ __float128 temp = 1; while (true){ temp /= 2; if (temp<=f16){ f16-=temp; fracLength--; frac = (frac<<1) + 1; //shift in one if (f16==0){ //put in the rest of the bits frac <<= (uint_fast8_t)fracLength; break; } if (fracLength == 0){ checkQuadExtraTwoBitsP16(f16, temp, bitsNPlusOne, bitsMore); break; } } else{ frac <<= 1; //shift in a zero fracLength--; if (fracLength == 0){ checkQuadExtraTwoBitsP16(f16, temp, bitsNPlusOne, bitsMore); break; } } } } return frac; } posit16_t convertQuadToP16(__float128 f16){ union ui16_p16 uZ; bool sign, regS; uint_fast16_t reg, frac=0; int_fast8_t exp=0; bool bitNPlusOne=0, bitsMore=0; (f16>=0) ? (sign=0) : (sign=1); if (f16 == 0 ){ uZ.ui = 0; return uZ.p; } else if(f16 == INFINITY || f16 == -INFINITY || f16 == NAN){ uZ.ui = 0x8000; return uZ.p; } else if (f16 == 1) { uZ.ui = 16384; return uZ.p; } else if (f16 == -1){ uZ.ui = 49152; return uZ.p; } else if (f16 >= 268435456){ //maxpos uZ.ui = 32767; return uZ.p; } else if (f16 <= -268435456){ // -maxpos uZ.ui = 32769; return uZ.p; } else if(f16 <= 3.725290298461914e-9 && !sign){ //minpos uZ.ui = 1; return uZ.p; } else if(f16 >= -3.725290298461914e-9 && sign){ //-minpos uZ.ui = 65535; return uZ.p; } else if (f16>1 || f16<-1){ if (sign){ //Make negative numbers positive for easier computation f16 = -f16; } regS = 1; reg = 1; //because k = m-1; so need to add back 1 // minpos if (f16 <= 3.725290298461914e-9){ uZ.ui = 1; } else{ //regime while (f16>=4){ f16 *=0.25; reg++; } if (f16>=2){ f16*=0.5; exp++; } int8_t fracLength = 13-reg; if (fracLength<0){ //reg == 14, means rounding bits is exp and just the rest. if (f16>1) bitsMore = 1; } else frac = convertQuadFractionP16 (f16, fracLength, &bitNPlusOne, &bitsMore); if (reg==14 && frac>0) { bitsMore = 1; frac=0; } if (reg>14) (regS) ? (uZ.ui= 32767): (uZ.ui=0x1); else{ uint_fast16_t regime = 1; if (regS) regime = ( (1< -1 ){ if (sign){ //Make negative numbers positive for easier computation f16 = -f16; } regS = 0; reg = 0; //regime while (f16<1){ f16 *= 4; reg++; } if (f16>=2){ f16/=2; exp++; } if (reg==14){ bitNPlusOne = exp; if (frac>1) bitsMore = 1; } else{ //only possible combination for reg=15 to reach here is 7FFF (maxpos) and FFFF (-minpos) //but since it should be caught on top, so no need to handle int_fast8_t fracLength = 13-reg; frac = convertQuadFractionP16 (f16, fracLength, &bitNPlusOne, &bitsMore); } if (reg==14 && frac>0) { bitsMore = 1; frac=0; } if (reg>14) (regS) ? (uZ.ui= 32767): (uZ.ui=0x1); else{ uint_fast16_t regime = 1; if (regS) regime = ( (1< #ifdef SOFTPOSIT_QUAD #include #endif #include "platform.h" #include "internals.h" #ifdef SOFTPOSIT_QUAD void checkQuadExtraP32TwoBits(__float128 f32, __float128 temp, bool * bitsNPlusOne, bool * bitsMore ){ temp /= 2; if (temp<=f32){ *bitsNPlusOne = 1; f32-=temp; } if (f32>0) *bitsMore = 1; } uint_fast32_t convertQuadFractionP32(__float128 f32, uint_fast16_t fracLength, bool * bitNPlusOne, bool * bitsMore ){ uint_fast32_t frac=0; if(f32==0) return 0; else if(f32==INFINITY) return 0x80000000; f32 -= 1; //remove hidden bit if (fracLength==0) checkQuadExtraP32TwoBits(f32, 1.0, bitNPlusOne, bitsMore); else{ __float128 temp = 1; while (true){ temp /= 2; if (temp<=f32){ f32-=temp; fracLength--; frac = (frac<<1) + 1; //shift in one if (f32==0){ frac <<= (uint_fast32_t)fracLength; break; } if (fracLength == 0){ checkQuadExtraP32TwoBits(f32, temp, bitNPlusOne, bitsMore); break; } } else{ frac <<= 1; //shift in a zero fracLength--; if (fracLength == 0){ checkQuadExtraP32TwoBits(f32, temp, bitNPlusOne, bitsMore); break; } } } } return frac; } posit32_t convertQuadToP32(__float128 f32){ union ui32_p32 uZ; bool sign, regS; uint_fast32_t reg, frac=0; int_fast32_t exp=0; bool bitNPlusOne=0, bitsMore=0; (f32>=0) ? (sign=0) : (sign=1); if (f32 == 0 ){ uZ.ui = 0; return uZ.p; } else if(f32 == INFINITY || f32 == -INFINITY || f32 == NAN){ uZ.ui = 0x80000000; return uZ.p; } else if (f32 == 1) { uZ.ui = 0x40000000; return uZ.p; } else if (f32 == -1){ uZ.ui = 0xC0000000; return uZ.p; } else if (f32 >= 1.329227995784916e+36){ //maxpos uZ.ui = 0x7FFFFFFF; return uZ.p; } else if (f32 <= -1.329227995784916e+36){ // -maxpos uZ.ui = 0x80000001; return uZ.p; } else if(f32 <= 7.52316384526264e-37 && !sign){ //minpos uZ.ui = 0x1; return uZ.p; } else if(f32 >= -7.52316384526264e-37 && sign){ //-minpos uZ.ui = 0xFFFFFFFF; return uZ.p; } else if (f32>1 || f32<-1){ if (sign){ //Make negative numbers positive for easier computation f32 = -f32; } regS = 1; reg = 1; //because k = m-1; so need to add back 1 // minpos if (f32 <= 7.52316384526264e-37){ uZ.ui = 1; } else{ //regime while (f32>=16){ f32 *=0.0625; // f32/=16; reg++; } while (f32>=2){ f32*=0.5; exp++; } int fracLength = 28-reg; if (fracLength<0){ //remove hidden bit //in both cases, reg=29 and 30, e is n+1 bit and frac are sticky bits if(reg==29){ bitNPlusOne = exp&0x1; exp>>=1; //taken care of by the pack algo } else{//reg=30 bitNPlusOne=exp>>1; bitsMore=exp&0x1; exp=0; } if (f32!=1){ bitsMore =1; frac=0; } } else frac = convertQuadFractionP32 (f32, fracLength, &bitNPlusOne, &bitsMore); if (reg>30 ){ (regS) ? (uZ.ui= 0x7FFFFFFF): (uZ.ui=0x1); } //rounding off fraction bits else{ uint_fast32_t regime = 1; if (regS) regime = ( (1< -1 ){ if (sign){ //Make negative numbers positive for easier computation f32 = -f32; } regS = 0; reg = 0; while (f32<1){ f32 *= 16; reg++; } while (f32>=2){ f32*=0.5; exp++; } //only possible combination for reg=15 to reach here is 7FFF (maxpos) and FFFF (-minpos) //but since it should be caught on top, so no need to handle int fracLength = 28-reg; if (fracLength<0){ //in both cases, reg=29 and 30, e is n+1 bit and frac are sticky bits if(reg==29){ bitNPlusOne = exp&0x1; exp>>=1; //taken care of by the pack algo } else{//reg=30 bitNPlusOne=exp>>1; bitsMore=exp&0x1; exp=0; } if (f32!=1){//because of hidden bit bitsMore =1; frac=0; } } else frac = convertQuadFractionP32 (f32, fracLength, &bitNPlusOne, &bitsMore); if (reg>30 ){ (regS) ? (uZ.ui= 0x7FFFFFFF): (uZ.ui=0x1); } //rounding off fraction bits else{ uint_fast32_t regime = 1; if (regS) regime = ( (1<=0) ? (sign=0) : (sign=1); if (f32 == 0 ){ uZ.ui = 0; return uZ.p; } else if(f32 == INFINITY || f32 == -INFINITY || f32 == NAN){ uZ.ui = 0x80000000; return uZ.p; } else if (f32 == 1) { uZ.ui = 0x40000000; return uZ.p; } else if (f32 == -1){ uZ.ui = 0xC0000000; return uZ.p; } /*else if (f32 >= 1.329227995784916e+36){ //maxpos uZ.ui = 0x7FFFFFFF; return uZ.p; } else if (f32 <= -1.329227995784916e+36){ // -maxpos uZ.ui = 0x80000001; return uZ.p; } else if(f32 <= 7.52316384526264e-37 && !sign){ //minpos uZ.ui = 0x1; return uZ.p; } else if(f32 >= -7.52316384526264e-37 && sign){ //-minpos uZ.ui = 0xFFFFFFFF; return uZ.p; }*/ else if (f32>1 || f32<-1){ if (sign){ //Make negative numbers positive for easier computation f32 = -f32; } regS = 1; reg = 1; //because k = m-1; so need to add back 1 // minpos if (x==32 && f32 <= 7.52316384526264e-37){ uZ.ui = 1; } else{ //regime while (f32>=16){ f32 *=0.0625; // f32/=16; reg++; } while (f32>=2){ f32*=0.5; exp++; } int fracLength = x-4-reg; if (fracLength<0){ //in both cases, reg=29 and 30, e is n+1 bit and frac are sticky bits if(reg==x-3){ bitNPlusOne = exp&0x1; //exp>>=1; //taken care of by the pack algo exp&=0x2; } else{//reg=30 bitNPlusOne=exp>>1; bitsMore=exp&0x1; exp=0; } if (f32!=1){//because of hidden bit bitsMore =1; frac=0; } } else frac = convertQuadFractionP32 (f32, fracLength, &bitNPlusOne, &bitsMore); if (reg>(x-2) ){ uZ.ui=(regS) ? (0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)) ): (0x1 << (32-x)); } //rounding off fraction bits else{ uint_fast32_t regime = 1; if (regS) regime = ( (1<0){ uZ.ui = 0x1 << (32-x); } if (bitNPlusOne) uZ.ui += ( ((uZ.ui>>(32-x)) & 0x1) | bitsMore ) << (32-x); } if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; } } else if (f32 < 1 || f32 > -1 ){ if (sign){ //Make negative numbers positive for easier computation f32 = -f32; } regS = 0; reg = 0; //regime while (f32<1){ f32 *= 16; reg++; } while (f32>=2){ f32*=0.5; exp++; } int fracLength = x-4-reg; if (fracLength<0){ //in both cases, reg=29 and 30, e is n+1 bit and frac are sticky bits if(reg==x-3){ bitNPlusOne = exp&0x1; //exp>>=1; //taken care of by the pack algo exp&=0x2; } else{//reg=30 bitNPlusOne=exp>>1; bitsMore=exp&0x1; exp=0; } if (f32!=1){//because of hidden bit bitsMore =1; frac=0; } } else frac = convertQuadFractionP32 (f32, fracLength, &bitNPlusOne, &bitsMore); if (reg>(x-2) ){ uZ.ui=(regS) ? (0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)) ): (0x1 << (32-x)); } //rounding off fraction bits else{ uint_fast32_t regime = 1; if (regS) regime = ( (1<0){ uZ.ui = 0x1 << (32-x); } if (bitNPlusOne){ uZ.ui += ( ((uZ.ui>>(32-x)) & 0x1) | bitsMore ) << (32-x); } } if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; } else { //NaR - for NaN, INF and all other combinations uZ.ui = 0x80000000; } return uZ.p; } #endif void checkExtraP32TwoBits(double f32, double temp, bool * bitsNPlusOne, bool * bitsMore ){ temp /= 2; if (temp<=f32){ *bitsNPlusOne = 1; f32-=temp; } if (f32>0) *bitsMore = 1; } uint_fast32_t convertFractionP32(double f32, uint_fast16_t fracLength, bool * bitsNPlusOne, bool * bitsMore ){ uint_fast32_t frac=0; if(f32==0) return 0; else if(f32==INFINITY) return 0x80000000; f32 -= 1; //remove hidden bit if (fracLength==0) checkExtraP32TwoBits(f32, 1.0, bitsNPlusOne, bitsMore); else{ double temp = 1; while (true){ temp /= 2; if (temp<=f32){ f32-=temp; fracLength--; frac = (frac<<1) + 1; //shift in one if (f32==0){ frac <<= (uint_fast16_t)fracLength; break; } if (fracLength == 0){ checkExtraP32TwoBits(f32, temp, bitsNPlusOne, bitsMore); break; } } else{ frac <<= 1; //shift in a zero fracLength--; if (fracLength == 0){ checkExtraP32TwoBits(f32, temp, bitsNPlusOne, bitsMore); break; } } } } return frac; } posit32_t convertDoubleToP32(double f32){ union ui32_p32 uZ; bool sign, regS; uint_fast32_t reg, frac=0; int_fast32_t exp=0; bool bitNPlusOne=0, bitsMore=0; (f32>=0) ? (sign=0) : (sign=1); if (f32 == 0 ){ uZ.ui = 0; return uZ.p; } else if(f32 == INFINITY || f32 == -INFINITY || f32 == NAN){ uZ.ui = 0x80000000; return uZ.p; } else if (f32 == 1) { uZ.ui = 0x40000000; return uZ.p; } else if (f32 == -1){ uZ.ui = 0xC0000000; return uZ.p; } else if (f32 >= 1.329227995784916e+36){ //maxpos uZ.ui = 0x7FFFFFFF; return uZ.p; } else if (f32 <= -1.329227995784916e+36){ // -maxpos uZ.ui = 0x80000001; return uZ.p; } else if(f32 <= 7.52316384526264e-37 && !sign){ //minpos uZ.ui = 0x1; return uZ.p; } else if(f32 >= -7.52316384526264e-37 && sign){ //-minpos uZ.ui = 0xFFFFFFFF; return uZ.p; } else if (f32>1 || f32<-1){ if (sign){ //Make negative numbers positive for easier computation f32 = -f32; } regS = 1; reg = 1; //because k = m-1; so need to add back 1 // minpos if (f32 <= 7.52316384526264e-37){ uZ.ui = 1; } else{ //regime while (f32>=16){ f32 *=0.0625; // f32/=16; reg++; } while (f32>=2){ f32*=0.5; exp++; } int8_t fracLength = 28-reg; if (fracLength<0){ //in both cases, reg=29 and 30, e is n+1 bit and frac are sticky bits if(reg==29){ bitNPlusOne = exp&0x1; exp>>=1; //taken care of by the pack algo } else{//reg=30 bitNPlusOne=exp>>1; bitsMore=exp&0x1; exp=0; } if (f32!=1){//because of hidden bit bitsMore =1; frac=0; } } else frac = convertFractionP32 (f32, fracLength, &bitNPlusOne, &bitsMore); if (reg>30 ){ (regS) ? (uZ.ui= 0x7FFFFFFF): (uZ.ui=0x1); } //rounding off fraction bits else{ uint_fast32_t regime = 1; if (regS) regime = ( (1< -1 ){ if (sign){ //Make negative numbers positive for easier computation f32 = -f32; } regS = 0; reg = 0; //regime while (f32<1){ f32 *= 16; reg++; } while (f32>=2){ f32*=0.5; exp++; } //only possible combination for reg=15 to reach here is 7FFF (maxpos) and FFFF (-minpos) //but since it should be caught on top, so no need to handle int_fast8_t fracLength = 28-reg; if (fracLength<0){ //in both cases, reg=29 and 30, e is n+1 bit and frac are sticky bits if(reg==29){ bitNPlusOne = exp&0x1; exp>>=1; //taken care of by the pack algo } else{//reg=30 bitNPlusOne=exp>>1; bitsMore=exp&0x1; exp=0; } if (f32!=1){//because of hidden bit bitsMore =1; frac=0; } } else frac = convertFractionP32 (f32, fracLength, &bitNPlusOne, &bitsMore); if (reg>30 ){ (regS) ? (uZ.ui= 0x7FFFFFFF): (uZ.ui=0x1); } //rounding off fraction bits else{ uint_fast32_t regime = 1; if (regS) regime = ( (1<=0) ? (sign=0) : (sign=1); if (f32 == 0 ){ uZ.ui = 0; return uZ.p; } else if(f32 == INFINITY || f32 == -INFINITY || f32 == NAN){ uZ.ui = 0x80000000; return uZ.p; } else if (f32 == 1) { uZ.ui = 0x40000000; return uZ.p; } else if (f32 == -1){ uZ.ui = 0xC0000000; return uZ.p; } /*else if (f32 >= 1.329227995784916e+36){ //maxpos uZ.ui = 0x7FFFFFFF; return uZ.p; } else if (f32 <= -1.329227995784916e+36){ // -maxpos uZ.ui = 0x80000001; return uZ.p; } else if(f32 <= 7.52316384526264e-37 && !sign){ //minpos uZ.ui = 0x1; return uZ.p; } else if(f32 >= -7.52316384526264e-37 && sign){ //-minpos uZ.ui = 0xFFFFFFFF; return uZ.p; }*/ else if (f32>1 || f32<-1){ if (sign){ //Make negative numbers positive for easier computation f32 = -f32; } regS = 1; reg = 1; //because k = m-1; so need to add back 1 // minpos if (x==32 && f32 <= 7.52316384526264e-37){ uZ.ui = 1; } else{ //regime while (f32>=16){ f32 *=0.0625; // f32/=16; reg++; } while (f32>=2){ f32*=0.5; exp++; } int fracLength = x-4-reg; if (fracLength<0){ //in both cases, reg=29 and 30, e is n+1 bit and frac are sticky bits if(reg==x-3){ bitNPlusOne = exp&0x1; //exp>>=1; //taken care of by the pack algo exp&=0x2; } else{//reg=30 bitNPlusOne=exp>>1; bitsMore=exp&0x1; exp=0; } if (f32!=1){//because of hidden bit bitsMore =1; frac=0; } } else frac = convertFractionP32 (f32, fracLength, &bitNPlusOne, &bitsMore); if (reg>(x-2) ){ uZ.ui=(regS) ? (0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)) ): (0x1 << (32-x)); } //rounding off fraction bits else{ uint_fast32_t regime = 1; if (regS) regime = ( (1<>=1; else if (reg<=28) exp<<= (28-reg); uZ.ui = ((uint32_t) (regime) << (30-reg)) + ((uint32_t) exp ) + ((uint32_t)(frac<<(32-x))); //minpos if (uZ.ui==0 && frac>0){ uZ.ui = 0x1 << (32-x); } if (bitNPlusOne) uZ.ui += ( ((uZ.ui>>(32-x)) & 0x1) | bitsMore ) << (32-x); } if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; } } else if (f32 < 1 || f32 > -1 ){ if (sign){ //Make negative numbers positive for easier computation f32 = -f32; } regS = 0; reg = 0; //regime while (f32<1){ f32 *= 16; reg++; } while (f32>=2){ f32*=0.5; exp++; } int fracLength = x-4-reg; if (fracLength<0){ //in both cases, reg=29 and 30, e is n+1 bit and frac are sticky bits if(reg==x-3){ bitNPlusOne = exp&0x1; //exp>>=1; //taken care of by the pack algo exp&=0x2; } else{//reg=30 bitNPlusOne=exp>>1; bitsMore=exp&0x1; exp=0; } if (f32!=1){//because of hidden bit bitsMore =1; frac=0; } } else frac = convertFractionP32 (f32, fracLength, &bitNPlusOne, &bitsMore); if (reg>(x-2) ){ uZ.ui=(regS) ? (0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)) ): (0x1 << (32-x)); } //rounding off fraction bits else{ uint_fast32_t regime = 1; if (regS) regime = ( (1<>=1; else if (reg<=28) exp<<= (28-reg); uZ.ui = ((uint32_t) (regime) << (30-reg)) + ((uint32_t) exp ) + ((uint32_t)(frac<<(32-x))); //minpos if (uZ.ui==0 && frac>0){ uZ.ui = 0x1 << (32-x); } if (bitNPlusOne){ uZ.ui += ( ((uZ.ui>>(32-x)) & 0x1) | bitsMore ) << (32-x); } } if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; } else { //NaR - for NaN, INF and all other combinations uZ.ui = 0x80000000; } return uZ.p; } luametatex-2.10.08/source/libraries/softposit/source/c_convertDecToPosit8.c000066400000000000000000000117651442250314700270310ustar00rootroot00000000000000/*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017, 2018 A*STAR. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include #include "platform.h" #include "internals.h" void checkExtraTwoBitsP8(double f8, double temp, bool * bitsNPlusOne, bool * bitsMore ){ temp /= 2; if (temp<=f8){ *bitsNPlusOne = 1; f8-=temp; } if (f8>0) *bitsMore = 1; } uint_fast16_t convertFractionP8(double f8, uint_fast8_t fracLength, bool * bitsNPlusOne, bool * bitsMore ){ uint_fast8_t frac=0; if(f8==0) return 0; else if(f8==INFINITY) return 0x80; f8 -= 1; //remove hidden bit if (fracLength==0) checkExtraTwoBitsP8(f8, 1.0, bitsNPlusOne, bitsMore); else{ double temp = 1; while (true){ temp /= 2; if (temp<=f8){ f8-=temp; fracLength--; frac = (frac<<1) + 1; //shift in one if (f8==0){ //put in the rest of the bits frac <<= (uint_fast8_t)fracLength; break; } if (fracLength == 0){ checkExtraTwoBitsP8(f8, temp, bitsNPlusOne, bitsMore); break; } } else{ frac <<= 1; //shift in a zero fracLength--; if (fracLength == 0){ checkExtraTwoBitsP8(f8, temp, bitsNPlusOne, bitsMore); break; } } } } //printf("convertfloat: frac:%d f16: %.26f bitsNPlusOne: %d, bitsMore: %d\n", frac, f16, bitsNPlusOne, bitsMore); return frac; } posit8_t convertDoubleToP8(double f8){ union ui8_p8 uZ; bool sign; uint_fast8_t reg, frac=0; bool bitNPlusOne=0, bitsMore=0; (f8>=0) ? (sign=0) : (sign=1); // sign: 1 bit, frac: 8 bits, mantisa: 23 bits //sign = a.parts.sign; //frac = a.parts.fraction; //exp = a.parts.exponent; if (f8 == 0 ){ uZ.ui = 0; return uZ.p; } else if(f8 == INFINITY || f8 == -INFINITY || f8 == NAN){ uZ.ui = 0x80; return uZ.p; } else if (f8 == 1) { uZ.ui = 0x40; return uZ.p; } else if (f8 == -1){ uZ.ui = 0xC0; return uZ.p; } else if (f8 >= 64){ //maxpos uZ.ui = 0x7F; return uZ.p; } else if (f8 <= -64){ // -maxpos uZ.ui = 0x81; return uZ.p; } else if(f8 <= 0.015625 && !sign){ //minpos uZ.ui = 0x1; return uZ.p; } else if(f8 >= -0.015625 && sign){ //-minpos uZ.ui = 0xFF; return uZ.p; } else if (f8>1 || f8<-1){ if (sign){ //Make negative numbers positive for easier computation f8 = -f8; } reg = 1; //because k = m-1; so need to add back 1 // minpos if (f8 <= 0.015625){ uZ.ui = 1; } else{ //regime while (f8>=2){ f8 *=0.5; reg++; } //rounding off regime bits if (reg>6 ) uZ.ui= 0x7F; else{ int8_t fracLength = 6-reg; frac = convertFractionP8 (f8, fracLength, &bitNPlusOne, &bitsMore); uint_fast8_t regime = 0x7F - (0x7F>>reg); uZ.ui = packToP8UI(regime, frac); if (bitNPlusOne) uZ.ui += ((uZ.ui&1) | bitsMore ); } if(sign) uZ.ui = -uZ.ui & 0xFF; } } else if (f8 < 1 || f8 > -1 ){ if (sign){ //Make negative numbers positive for easier computation f8 = -f8; } reg = 0; //regime //printf("here we go\n"); while (f8<1){ f8 *= 2; reg++; } //rounding off regime bits if (reg>6 ) uZ.ui=0x1; else{ int_fast8_t fracLength = 6-reg; frac = convertFractionP8 (f8, fracLength, &bitNPlusOne, &bitsMore); uint_fast8_t regime = 0x40>>reg; uZ.ui = packToP8UI(regime, frac); if (bitNPlusOne) uZ.ui += ((uZ.ui&1) | bitsMore ); } if(sign) uZ.ui = -uZ.ui & 0xFF; } else { //NaR - for NaN, INF and all other combinations uZ.ui = 0x80; } return uZ.p; } luametatex-2.10.08/source/libraries/softposit/source/c_convertDecToPositX1.c000066400000000000000000000241771442250314700271530ustar00rootroot00000000000000/*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017, 2018 A*STAR. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include #ifdef SOFTPOSIT_QUAD #include #endif #include "platform.h" #include "internals.h" #ifdef SOFTPOSIT_QUAD void checkQuadExtraPX1TwoBits(__float128 f32, __float128 temp, bool * bitsNPlusOne, bool * bitsMore ){ temp /= 2; if (temp<=f32){ *bitsNPlusOne = 1; f32-=temp; } if (f32>0) *bitsMore = 1; } uint_fast32_t convertQuadFractionPX1(__float128 f32, uint_fast16_t fracLength, bool * bitNPlusOne, bool * bitsMore ){ uint_fast32_t frac=0; if(f32==0) return 0; else if(f32==INFINITY) return 0x80000000; f32 -= 1; //remove hidden bit if (fracLength==0) checkQuadExtraPX1TwoBits(f32, 1.0, bitNPlusOne, bitsMore); else{ __float128 temp = 1; while (true){ temp /= 2; if (temp<=f32){ f32-=temp; fracLength--; frac = (frac<<1) + 1; //shift in one if (f32==0){ frac <<= (uint_fast32_t)fracLength; break; } if (fracLength == 0){ checkQuadExtraPX1TwoBits(f32, temp, bitNPlusOne, bitsMore); break; } } else{ frac <<= 1; //shift in a zero fracLength--; if (fracLength == 0){ checkQuadExtraPX1TwoBits(f32, temp, bitNPlusOne, bitsMore); break; } } } } return frac; } posit_1_t convertQuadToPX1(__float128 f32, int x){ union ui32_pX1 uZ; bool sign, regS; uint_fast32_t reg, frac=0; int_fast32_t exp=0; bool bitNPlusOne=0, bitsMore=0; (f32>=0) ? (sign=0) : (sign=1); if (f32 == 0 ){ uZ.ui = 0; return uZ.p; } else if(f32 == INFINITY || f32 == -INFINITY || f32 == NAN){ uZ.ui = 0x80000000; return uZ.p; } else if (f32 == 1) { uZ.ui = 0x40000000; return uZ.p; } else if (f32 == -1){ uZ.ui = 0xC0000000; return uZ.p; } else if (f32>1 || f32<-1){ if (sign){ //Make negative numbers positive for easier computation f32 = -f32; } regS = 1; reg = 1; //because k = m-1; so need to add back 1 // minpos if (x==32 && f32 <= 8.673617379884035e-19){ uZ.ui = 1; } else{ //regime while (f32>=4){ f32 *=0.25; // f32/=4; reg++; } if (f32>=2){ f32*=0.5; exp++; } int fracLength = x-3-reg; if (fracLength<0){ if (reg==x-2){ bitNPlusOne=exp; exp=0; } if(f32>1) bitsMore=1; } else frac = convertQuadFractionPX1 (f32, fracLength, &bitNPlusOne, &bitsMore); if (reg==30 && frac>0){ bitsMore = 1; frac = 0; } if (reg>(x-2) ){ uZ.ui=(regS) ? (0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)) ): (0x1 << (32-x)); } //rounding off fraction bits else{ uint_fast32_t regime = 1; if (regS) regime = ( (1<0){ uZ.ui = 0x1 << (32-x); } if (bitNPlusOne) uZ.ui += ( ((uZ.ui>>(32-x)) & 0x1) | bitsMore ) << (32-x); } if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; } } else if (f32 < 1 || f32 > -1 ){ if (sign){ //Make negative numbers positive for easier computation f32 = -f32; } regS = 0; reg = 0; //regime while (f32<1){ f32 *= 4; reg++; } if (f32>=2){ f32*=0.5; exp++; } int fracLength = x-3-reg; if (fracLength<0){ if (reg==x-2){ bitNPlusOne=exp; exp=0; } if(f32>1) bitsMore=1; } else frac = convertQuadFractionPX1 (f32, fracLength, &bitNPlusOne, &bitsMore); if (reg==30 && frac>0){ bitsMore = 1; frac = 0; } if (reg>(x-2) ){ uZ.ui=(regS) ? (0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)) ): (0x1 << (32-x)); } //rounding off fraction bits else{ uint_fast32_t regime = 1; if (regS) regime = ( (1<0){ uZ.ui = 0x1 << (32-x); } if (bitNPlusOne) uZ.ui += ( ((uZ.ui>>(32-x)) & 0x1) | bitsMore ) << (32-x); } if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; } else { //NaR - for NaN, INF and all other combinations uZ.ui = 0x80000000; } return uZ.p; } #endif void checkExtraPX1TwoBits(double f32, double temp, bool * bitsNPlusOne, bool * bitsMore ){ temp /= 2; if (temp<=f32){ *bitsNPlusOne = 1; f32-=temp; } if (f32>0) *bitsMore = 1; } uint_fast32_t convertFractionPX1(double f32, uint_fast16_t fracLength, bool * bitsNPlusOne, bool * bitsMore ){ uint_fast32_t frac=0; if(f32==0) return 0; else if(f32==INFINITY) return 0x80000000; f32 -= 1; //remove hidden bit if (fracLength==0) checkExtraPX1TwoBits(f32, 1.0, bitsNPlusOne, bitsMore); else{ double temp = 1; while (true){ temp /= 2; if (temp<=f32){ f32-=temp; fracLength--; frac = (frac<<1) + 1; //shift in one if (f32==0){ frac <<= (uint_fast16_t)fracLength; break; } if (fracLength == 0){ checkExtraPX1TwoBits(f32, temp, bitsNPlusOne, bitsMore); break; } } else{ frac <<= 1; //shift in a zero fracLength--; if (fracLength == 0){ checkExtraPX1TwoBits(f32, temp, bitsNPlusOne, bitsMore); break; } } } } return frac; } posit_1_t convertDoubleToPX1(double f32, int x){ union ui32_pX1 uZ; bool sign, regS; uint_fast32_t reg, frac=0; int_fast32_t exp=0; bool bitNPlusOne=0, bitsMore=0; (f32>=0) ? (sign=0) : (sign=1); if (f32 == 0 ){ uZ.ui = 0; return uZ.p; } else if(f32 == INFINITY || f32 == -INFINITY || f32 == NAN){ uZ.ui = 0x80000000; return uZ.p; } else if (f32 == 1) { uZ.ui = 0x40000000; return uZ.p; } else if (f32 == -1){ uZ.ui = 0xC0000000; return uZ.p; } else if (f32>1 || f32<-1){ if (sign){ //Make negative numbers positive for easier computation f32 = -f32; } regS = 1; reg = 1; //because k = m-1; so need to add back 1 // minpos if (x==32 && f32 <= 8.673617379884035e-19){ uZ.ui = 1; } else{ //regime while (f32>=4){ f32 *=0.25; // f32/=4; reg++; } if (f32>=2){ f32*=0.5; exp++; } //printf("reg: %d, exp: %d f32: %.26lf\n", reg, exp, f32); int fracLength = x-3-reg; if (fracLength<0){ if (reg==x-2){ bitNPlusOne=exp; exp=0; } if(f32>1) bitsMore=1; } else frac = convertFractionPX1(f32, fracLength, &bitNPlusOne, &bitsMore); if (reg==30 && frac>0){ bitsMore = 1; frac = 0; } if (reg>(x-2) ){ uZ.ui=(regS) ? (0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)) ): (0x1 << (32-x)); } //rounding off fraction bits else{ uint_fast32_t regime = 1; if (regS) regime = ( (1<0){ uZ.ui = 0x1 << (32-x); } //printBinary(&uZ.ui, 32); //uint32_t tt = (uZ.ui>>(32-x)); //printBinary(&tt, 32); if (bitNPlusOne) uZ.ui += ( ((uZ.ui>>(32-x)) & 0x1) | bitsMore ) << (32-x); } if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; } } else if (f32 < 1 || f32 > -1 ){ if (sign){ //Make negative numbers positive for easier computation f32 = -f32; } regS = 0; reg = 0; //regime while (f32<1){ f32 *= 4; reg++; } if (f32>=2){ f32*=0.5; exp++; } int fracLength = x-3-reg; if (fracLength<0){ if (reg==x-2){ bitNPlusOne=exp; exp=0; } if(f32>1) bitsMore=1; } else frac = convertFractionPX1 (f32, fracLength, &bitNPlusOne, &bitsMore); if (reg==30 && frac>0){ bitsMore = 1; frac = 0; } if (reg>(x-2) ){ uZ.ui=(regS) ? (0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)) ): (0x1 << (32-x)); } //rounding off fraction bits else{ uint_fast32_t regime = 1; if (regS) regime = ( (1<0){ uZ.ui = 0x1 << (32-x); } if (bitNPlusOne) uZ.ui += ( ((uZ.ui>>(32-x)) & 0x1) | bitsMore ) << (32-x); } if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; } else { //NaR - for NaN, INF and all other combinations uZ.ui = 0x80000000; } return uZ.p; } luametatex-2.10.08/source/libraries/softposit/source/c_convertPosit16ToDec.c000066400000000000000000000075651442250314700271130ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017, 2018 A*STAR. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include #include "platform.h" #include "internals.h" double convertP16ToDouble(posit16_t a){ union ui16_p16 uZ; //posit16 p16; double d16; uZ.p = a; if (uZ.ui==0){ return 0; } else if(uZ.ui==0x7FFF){ //maxpos -> 32767 return 268435456; } else if (uZ.ui==0x8001){ //-maxpos -> 32769 return -268435456; } else if (uZ.ui == 0x8000){ //NaR -> 32768 return NAN; } bool regS, sign; uint_fast16_t reg, shift=2, frac; int_fast16_t k=0; int_fast8_t exp; double fraction_max; sign = signP16UI( uZ.ui ); if (sign) uZ.ui = -uZ.ui & 0xFFFF; regS = signregP16UI( uZ.ui ); uint_fast16_t tmp = (uZ.ui<<2) & 0xFFFF; if (regS){ while (tmp>>15){ k++; shift++; tmp= (tmp<<1) & 0xFFFF; } reg = k+1; } else{ k=-1; while (!(tmp>>15)){ k--; shift++; tmp= (tmp<<1) & 0xFFFF; } tmp&=0x7FFF; reg =-k; } exp = tmp>>14; frac = (tmp & 0x3FFF) >> shift; fraction_max = pow(2, 13-reg) ; d16 = (double)( pow(4, k)* pow(2, exp) * (1+((double)frac/fraction_max)) ); if (sign) d16 = -d16; return d16; } #ifdef SOFTPOSIT_QUAD __float128 convertP16ToQuadDec(posit16_t a){ union ui16_p16 uZ; __float128 p16; uZ.p = a; if (uZ.ui==0){ p16 = 0; return p16; } else if(uZ.ui==0x7FFF){ //maxpos -> 32767 p16 = 268435456; return p16; } else if (uZ.ui==0x8001){ //-maxpos -> 32769 p16 = -268435456; return p16; } else if (uZ.ui == 0x8000){ //NaR -> 32768 p16 = INFINITY; return p16; } bool regS, sign; uint_fast16_t reg, shift=2, frac; int_fast16_t k=0; int_fast8_t exp; __float128 fraction_max; sign = signP16UI( uZ.ui ); if (sign) uZ.ui = -uZ.ui & 0xFFFF; regS = signregP16UI( uZ.ui ); uint_fast16_t tmp = (uZ.ui<<2) & 0xFFFF; if (regS){ while (tmp>>15){ k++; shift++; tmp= (tmp<<1) & 0xFFFF; } reg = k+1; } else{ k=-1; while (!(tmp>>15)){ k--; shift++; tmp= (tmp<<1) & 0xFFFF; } tmp&=0x7FFF; reg =-k; } exp = tmp>>14; frac = (tmp & 0x3FFF) >> shift; fraction_max = pow(2, 13-reg) ; p16 = (__float128)( pow(4, k)* pow(2, exp) * (1+((__float128)frac/fraction_max)) ); if (sign) p16 = -p16; return p16; } #endif luametatex-2.10.08/source/libraries/softposit/source/c_convertPosit32ToDec.c000066400000000000000000000156261442250314700271060ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017, 2018 A*STAR. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include #ifdef SOFTPOSIT_QUAD #include #endif #include "platform.h" #include "internals.h" #ifdef SOFTPOSIT_QUAD __float128 convertP32ToQuad(posit32_t a){ union ui32_p32 uZ; __float128 p32; uZ.p = a; if (uZ.ui==0){ p32 = 0; return p32; } else if(uZ.ui==0x7FFFFFFF){ //maxpos p32 = 1.329227995784916e+36; return p32; } else if (uZ.ui==0x80000001){ //-maxpos p32 = -1.329227995784916e+36; return p32; } else if (uZ.ui == 0x80000000){ p32 = NAN; return p32; } bool regS, sign; uint_fast32_t reg, shift=2, frac, tmp; int_fast32_t k=0; int_fast8_t exp; __float128 fraction_max; sign = signP32UI( uZ.ui ); if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; regS = signregP32UI( uZ.ui ); tmp = tmp = (uZ.ui<<2)&0xFFFFFFFF; if (regS){ while (tmp>>31){ k++; shift++; tmp= (tmp<<1) & 0xFFFFFFFF; } reg = k+1; } else{ k=-1; while (!(tmp>>31)){ k--; shift++; tmp= (tmp<<1) & 0xFFFFFFFF; } tmp&=0x7FFFFFFF; reg =-k; } exp = tmp>>29; frac = (tmp & 0x1FFFFFFF) >> shift; (reg>28) ? (fraction_max=1) : (fraction_max = pow(2, 28-reg) ) ; p32 = (__float128)( pow(16, k)* pow(2, exp) * (1+((__float128)frac/fraction_max)) ); if (sign) p32 = -p32; return p32; } __float128 convertPX2ToQuad(posit_2_t a){ union ui32_pX2 uZ; __float128 p32; uZ.p = a; if (uZ.ui==0){ p32 = 0; return p32; } else if(uZ.ui==0x7FFFFFFF){ //maxpos p32 = 1.329227995784916e+36; return p32; } else if (uZ.ui==0x80000001){ //-maxpos p32 = -1.329227995784916e+36; return p32; } else if (uZ.ui == 0x80000000){ p32 = INFINITY; return p32; } bool regS, sign; uint_fast32_t reg, shift=2, frac, tmp; int_fast32_t k=0; int_fast8_t exp; __float128 fraction_max; sign = signP32UI( uZ.ui ); if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; regS = signregP32UI( uZ.ui ); tmp = tmp = (uZ.ui<<2)&0xFFFFFFFF; if (regS){ while (tmp>>31){ k++; shift++; tmp= (tmp<<1) & 0xFFFFFFFF; } reg = k+1; } else{ k=-1; while (!(tmp>>31)){ k--; shift++; tmp= (tmp<<1) & 0xFFFFFFFF; } tmp&=0x7FFFFFFF; reg =-k; } exp = tmp>>29; frac = (tmp & 0x1FFFFFFF) >> shift; (reg>28) ? (fraction_max=1) : (fraction_max = pow(2, 28-reg) ) ; p32 = (__float128)( pow(16, k)* pow(2, exp) * (1+((__float128)frac/fraction_max)) ); if (sign) p32 = -p32; return p32; } #endif double convertP32ToDouble(posit32_t pA){ union ui32_p32 uA; union ui64_double uZ; uint_fast32_t uiA, tmp=0; uint_fast64_t expA=0, uiZ, fracA=0; bool signA=0, regSA; int_fast32_t scale, kA=0; uA.p = pA; uiA = uA.ui; if (uA.ui == 0) return 0; else if(uA.ui == 0x80000000) return NAN; else{ signA = signP32UI( uiA ); if(signA) uiA = (-uiA & 0xFFFFFFFF); regSA = signregP32UI(uiA); tmp = (uiA<<2)&0xFFFFFFFF; if (regSA){ while (tmp>>31){ kA++; tmp= (tmp<<1) & 0xFFFFFFFF; } } else{ kA=-1; while (!(tmp>>31)){ kA--; tmp= (tmp<<1) & 0xFFFFFFFF; } tmp&=0x7FFFFFFF; } expA = tmp>>29; //to get 2 bits fracA = (((uint64_t)tmp<<3) & 0xFFFFFFFF)<<20; expA = (((kA<<2)+expA) + 1023) << 52; uiZ = expA + fracA + (((uint64_t)signA&0x1)<<63); uZ.ui = uiZ; return uZ.d; } } /*double convertP32ToDoubleOld(posit32_t pA){ union ui32_p32 uZ; double d32; uZ.p = pA; if (uZ.ui==0){ return 0; } else if(uZ.ui==0x7FFFFFFF){ //maxpos return 1.329227995784916e+36; } else if (uZ.ui==0x80000001){ //-maxpos return -1.329227995784916e+36; } else if (uZ.ui == 0x80000000){ return INFINITY; } bool regS, sign; uint_fast32_t reg, shift=2, frac, tmp; int_fast32_t k=0; int_fast8_t exp; double fraction_max; sign = signP32UI( uZ.ui ); if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; regS = signregP32UI( uZ.ui ); tmp = (uZ.ui<<2)&0xFFFFFFFF; if (regS){ while (tmp>>31){ k++; shift++; tmp= (tmp<<1) & 0xFFFFFFFF; } reg = k+1; } else{ k=-1; while (!(tmp>>31)){ k--; shift++; tmp= (tmp<<1) & 0xFFFFFFFF; } tmp&=0x7FFFFFFF; reg =-k; } exp = tmp>>29; frac = (tmp & 0x1FFFFFFF) >> shift; (reg>28) ? (fraction_max=1) : (fraction_max = pow(2, 28-reg) ) ; d32 = (double)( pow(16, k)* pow(2, exp) * (1+((double)frac/fraction_max)) ); if (sign) d32 = -d32; return d32; }*/ double convertPX2ToDouble(posit_2_t a){ union ui32_pX2 uZ; double d32; uZ.p = a; if (uZ.ui==0){ return 0; } else if(uZ.ui==0x7FFFFFFF){ //maxpos return 1.329227995784916e+36; } else if (uZ.ui==0x80000001){ //-maxpos return -1.329227995784916e+36; } else if (uZ.ui == 0x80000000){ return INFINITY; } bool regS, sign; uint_fast32_t reg, shift=2, frac, tmp; int_fast32_t k=0; int_fast8_t exp; double fraction_max; sign = signP32UI( uZ.ui ); if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; regS = signregP32UI( uZ.ui ); tmp = (uZ.ui<<2)&0xFFFFFFFF; if (regS){ while (tmp>>31){ k++; shift++; tmp= (tmp<<1) & 0xFFFFFFFF; } reg = k+1; } else{ k=-1; while (!(tmp>>31)){ k--; shift++; tmp= (tmp<<1) & 0xFFFFFFFF; } tmp&=0x7FFFFFFF; reg =-k; } exp = tmp>>29; frac = (tmp & 0x1FFFFFFF) >> shift; (reg>28) ? (fraction_max=1) : (fraction_max = pow(2, 28-reg) ) ; d32 = (double)( pow(16, k)* pow(2, exp) * (1+((double)frac/fraction_max)) ); if (sign) d32 = -d32; return d32; } luametatex-2.10.08/source/libraries/softposit/source/c_convertPosit8ToDec.c000066400000000000000000000051521442250314700270220ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017, 2018 A*STAR. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include #include "platform.h" #include "internals.h" double convertP8ToDouble(posit8_t a){ union ui8_p8 uZ; double d8; uZ.p = a; if (uZ.ui==0){ return 0; } else if(uZ.ui==0x7F){ //maxpos return 64; } else if (uZ.ui==0x81){ //-maxpos return -64; } else if (uZ.ui == 0x80){ //NaR return NAN; } bool regS, sign; uint_fast8_t reg, shift=2, frac; int_fast8_t k=0; double fraction_max; sign = signP8UI( uZ.ui ); if (sign) uZ.ui = -uZ.ui & 0xFF; regS = signregP8UI( uZ.ui ); uint_fast8_t tmp = (uZ.ui<<2) & 0xFF; if (regS){ while (tmp>>7){ k++; shift++; tmp= (tmp<<1) & 0xFF; } reg = k+1; } else{ k=-1; while (!(tmp>>7)){ k--; shift++; tmp= (tmp<<1) & 0xFF; } tmp&=0x7F; reg =-k; } frac = (tmp & 0x7F) >> shift; fraction_max = pow(2, 6-reg) ; d8 = (double)( pow(2, k) * (1+((double)frac/fraction_max)) ); if (sign) d8 = -d8; return d8; } luametatex-2.10.08/source/libraries/softposit/source/c_convertPositX1ToDec.c000066400000000000000000000077051442250314700271510ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017, 2018 A*STAR. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include #ifdef SOFTPOSIT_QUAD #include #endif #include "platform.h" #include "internals.h" #ifdef SOFTPOSIT_QUAD __float128 convertPX1ToQuad(posit_1_t a){ union ui32_pX1 uZ; __float128 p32; uZ.p = a; if (uZ.ui==0){ p32 = 0; return p32; } else if(uZ.ui==0x7FFFFFFF){ //maxpos p32 = 1152921504606847000; return p32; } else if (uZ.ui==0x80000001){ //-maxpos p32 = -1152921504606847000; return p32; } else if (uZ.ui == 0x80000000){ p32 = NAN; return p32; } bool regS, sign; uint_fast32_t reg, shift=2, frac, tmp; int_fast32_t k=0; int_fast8_t exp; __float128 fraction_max; sign = signP32UI( uZ.ui ); if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; regS = signregP32UI( uZ.ui ); tmp = tmp = (uZ.ui<<2)&0xFFFFFFFF; if (regS){ while (tmp>>31){ k++; shift++; tmp= (tmp<<1) & 0xFFFFFFFF; } reg = k+1; } else{ k=-1; while (!(tmp>>31)){ k--; shift++; tmp= (tmp<<1) & 0xFFFFFFFF; } tmp&=0x7FFFFFFF; reg =-k; } exp = tmp>>30; frac = (tmp & 0x1FFFFFFF) >> shift; (reg>29) ? (fraction_max=1) : (fraction_max = pow(2, 29-reg) ) ; p32 = (__float128)( pow(16, k)* pow(2, exp) * (1+((__float128)frac/fraction_max)) ); if (sign) p32 = -p32; return p32; } #endif double convertPX1ToDouble(posit_1_t a){ union ui32_pX1 uZ; double d32; uZ.p = a; if (uZ.ui==0){ return 0; } else if(uZ.ui==0x7FFFFFFF){ //maxpos return 1152921504606847000; } else if (uZ.ui==0x80000001){ //-maxpos return -1152921504606847000; } else if (uZ.ui == 0x80000000){ return NAN; } bool regS, sign; uint_fast32_t reg, shift=2, frac, tmp; int_fast32_t k=0; int_fast8_t exp; double fraction_max; sign = signP32UI( uZ.ui ); if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; regS = signregP32UI( uZ.ui ); tmp = (uZ.ui<<2)&0xFFFFFFFF; if (regS){ while (tmp>>31){ k++; shift++; tmp= (tmp<<1) & 0xFFFFFFFF; } reg = k+1; } else{ k=-1; while (!(tmp>>31)){ k--; shift++; tmp= (tmp<<1) & 0xFFFFFFFF; } tmp&=0x7FFFFFFF; reg =-k; } exp = tmp>>30; frac = (tmp & 0x3FFFFFFF) >> shift; (reg>29) ? (fraction_max=1) : (fraction_max = pow(2, 29-reg) ) ; d32 = (double)( pow(4, k)* pow(2, exp) * (1+((double)frac/fraction_max)) ); if (sign) d32 = -d32; return d32; } luametatex-2.10.08/source/libraries/softposit/source/c_convertQuire16ToPosit16.c000066400000000000000000000075521442250314700276500ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017, 2018 A*STAR. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include #include "platform.h" #include "internals.h" //TODO DEPRECATED posit16_t convertQ16ToP16(quire16_t qA){ return q16_to_p16(qA); } posit16_t q16_to_p16(quire16_t qA){ union ui128_q16 uZ; union ui16_p16 uA; uint_fast16_t regA, fracA = 0, shift=0, regime; uint_fast64_t frac64A; bool sign, regSA=0, bitNPlusOne=0, bitsMore=0; int_fast8_t expA = 0; if (isQ16Zero(qA)){ uA.ui=0; return uA.p; } //handle NaR else if (isNaRQ16(qA)){ uA.ui=0x8000; return uA.p; } uZ.q = qA; sign = uZ.ui[0]>>63; if(sign){ //probably need to do two's complement here before the rest. if (uZ.ui[1]==0){ uZ.ui[0] = -uZ.ui[0]; } else{ uZ.ui[1] = -uZ.ui[1]; uZ.ui[0] = ~(uZ.ui[0]); } } int noLZ =0; if (uZ.ui[0] == 0){ noLZ+=64; uint_fast64_t tmp = uZ.ui[1]; while(!(tmp>>63)){ noLZ++; tmp<<=1; } frac64A = tmp; } else{ uint_fast64_t tmp = uZ.ui[0]; int noLZtmp = 0; while(!(tmp>>63)){ noLZtmp++; tmp<<=1; } noLZ+=noLZtmp; frac64A = tmp; frac64A+= ( uZ.ui[1]>>(64-noLZtmp) ); if (uZ.ui[1]<> 1; expA = 71 - noLZ - (kA<<1) ; if(kA<0){ regA = (-kA & 0xFFFF); regSA = 0; regime = 0x4000>>regA; } else{ regA = kA+1; regSA=1; regime = 0x7FFF - (0x7FFF>>regA); } if(regA>14){ //max or min pos. exp and frac does not matter. (regSA) ? (uA.ui= 0x7FFF): (uA.ui=0x1); } else{ //remove hidden bit frac64A&=0x7FFFFFFFFFFFFFFF; shift = regA+50; //1 es bit, 1 sign bit and 1 r terminating bit , 16+31+3 fracA = frac64A>>shift; if (regA!=14){ bitNPlusOne = (frac64A>>(shift-1)) & 0x1; unsigned long long tmp = frac64A<<(65-shift); if(frac64A<<(65-shift)) bitsMore = 1; } else if (frac64A>0){ fracA=0; bitsMore=1; } if (regA==14 && expA) bitNPlusOne = 1; uA.ui = packToP16UI(regime, regA, expA, fracA); if (bitNPlusOne){ uA.ui += (uA.ui&1) | bitsMore; } } if (sign) uA.ui = -uA.ui & 0xFFFF; return uA.p; } luametatex-2.10.08/source/libraries/softposit/source/c_convertQuire32ToPosit32.c000066400000000000000000000101311442250314700276270ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017, 2018 A*STAR. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include #include "platform.h" #include "internals.h" posit32_t q32_to_p32(quire32_t qA){ union ui512_q32 uZ; union ui32_p32 uA; uint_fast32_t regA, fracA = 0, shift=0, regime; uint_fast64_t frac64A; bool sign, regSA=0, bitNPlusOne=0, bitsMore=0; int_fast32_t expA = 0; int i; if (isQ32Zero(qA)){ uA.ui=0; return uA.p; } //handle NaR else if (isNaRQ32(qA)){ uA.ui=0x80000000; return uA.p; } uZ.q = qA; sign = uZ.ui[0]>>63; if(sign){ for (i=7; i>=0; i--){ if (uZ.ui[i]>0){ uZ.ui[i] = - uZ.ui[i]; i--; while(i>=0){ uZ.ui[i] = ~uZ.ui[i]; i--; } break; } } } //minpos and maxpos int noLZ =0; for (i=0; i<8; i++){ if (uZ.ui[i]==0){ noLZ+=64; } else{ uint_fast64_t tmp = uZ.ui[i]; int noLZtmp = 0; while (!(tmp>>63)){ noLZtmp++; tmp= (tmp<<1); } noLZ+=noLZtmp; frac64A = tmp; if (i!=7 && noLZtmp!=0){ frac64A+= ( uZ.ui[i+1]>>(64-noLZtmp) ); if( uZ.ui[i+1] & (((uint64_t)0x1<<(64-noLZtmp))-1) ) bitsMore=1; i++; } i++; while(i<8){ if (uZ.ui[i]>0){ bitsMore = 1;; break; } i++; } break; } } //default dot is between bit 271 and 272, extreme left bit is bit 0. Last right bit is bit 511. //Equations derived from quire32_mult last_pos = 271 - (kA<<2) - expA and first_pos = last_pos - frac_len int kA=(271-noLZ) >> 2; expA = 271 - noLZ - (kA<<2) ; if(kA<0){ //regA = (-kA & 0xFFFF); regA = -kA; regSA = 0; regime = 0x40000000>>regA; } else{ regA = kA+1; regSA=1; regime = 0x7FFFFFFF - (0x7FFFFFFF>>regA); } if(regA>30){ //max or min pos. exp and frac does not matter. (regSA) ? (uA.ui= 0x7FFFFFFF): (uA.ui=0x1); } else{ //remove hidden bit frac64A&=0x7FFFFFFFFFFFFFFF; shift = regA+35; //2 es bit, 1 sign bit and 1 r terminating bit , 31+4 fracA = frac64A>>shift; if (regA<=28){ bitNPlusOne = (frac64A>>(shift-1)) & 0x1; expA<<= (28-regA); if (frac64A<<(65-shift)) bitsMore=1; } else { if (regA==30){ bitNPlusOne = expA&0x2; bitsMore = (expA&0x1); expA = 0; } else if (regA==29){ bitNPlusOne = expA&0x1; expA>>=1; //taken care of by the pack algo } if (frac64A>0){ fracA=0; bitsMore =1; } } uA.ui = packToP32UI(regime, expA, fracA); if (bitNPlusOne) uA.ui += (uA.ui&1) | bitsMore; } if (sign) uA.ui = -uA.ui & 0xFFFFFFFF; return uA.p; } luametatex-2.10.08/source/libraries/softposit/source/c_convertQuire8ToPosit8.c000066400000000000000000000060661442250314700275110ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017, 2018 A*STAR. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include #include #include "platform.h" #include "internals.h" posit8_t q8_to_p8(quire8_t qA){ union ui32_q8 uZ; union ui8_p8 uA; uint_fast8_t regA, fracA = 0, shift=0, regime; uint_fast32_t frac32A; bool sign=0, regSA=0, bitNPlusOne=0, bitsMore=0; if (isQ8Zero(qA)){ uA.ui=0; return uA.p; } //handle NaR else if (isNaRQ8(qA)){ uA.ui=0x80; return uA.p; } uZ.q = qA; sign = uZ.ui>>31; if(sign){ uZ.ui = -uZ.ui & 0xFFFFFFFF; } int noLZ =0; uint_fast32_t tmp = uZ.ui; while (!(tmp>>31)){//==0 noLZ++; tmp<<=1; } frac32A = tmp; //default dot is between bit 19 and 20, extreme left bit is bit 0. Last right bit is bit 31. //Scale = k int kA=(19-noLZ); if(kA<0){ regA = (-kA & 0xFF); regSA = 0; regime = 0x40>>regA; } else{ regA = kA+1; regSA=1; regime = 0x7F-(0x7F>>regA); } if(regA>6){ //max or min pos. exp and frac does not matter. (regSA) ? (uA.ui= 0x7F): (uA.ui=0x1); } else{ //remove hidden bit frac32A&=0x7FFFFFFF; shift = regA+25; // 1 sign bit and 1 r terminating bit , 16+7+2 fracA = frac32A>>shift; bitNPlusOne = (frac32A>>(shift-1))&0x1 ; uA.ui = packToP8UI(regime, fracA); if (bitNPlusOne){ ( (frac32A <<(33-shift)) & 0xFFFFFFFF ) ? (bitsMore=1) : (bitsMore=0); uA.ui += (uA.ui&1) | bitsMore; } } if (sign) uA.ui = -uA.ui & 0xFF; return uA.p; } luametatex-2.10.08/source/libraries/softposit/source/c_convertQuireX2ToPositX2.c000066400000000000000000000107071442250314700277520ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017, 2018 A*STAR. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include #include "platform.h" #include "internals.h" posit_2_t qX2_to_pX2(quire_2_t qA, int x){ union ui512_qX2 uZ; union ui32_pX2 uA; uint_fast32_t fracA = 0, shift=0, regime; uint_fast64_t frac64A; bool sign, regSA=0, bitNPlusOne=0, bitsMore=0; int_fast32_t regA, expA = 0; int i; if (isQX2Zero(qA)){ uA.ui=0; return uA.p; } //handle NaR else if (isNaRQX2(qA)){ uA.ui=0x80000000; return uA.p; } uZ.q = qA; sign = uZ.ui[0]>>63; if(sign){ for (i=7; i>=0; i--){ if (uZ.ui[i]>0){ uZ.ui[i] = - uZ.ui[i]; i--; while(i>=0){ uZ.ui[i] = ~uZ.ui[i]; i--; } break; } } } //minpos and maxpos int noLZ =0; for (i=0; i<8; i++){ if (uZ.ui[i]==0){ noLZ+=64; } else{ uint_fast64_t tmp = uZ.ui[i]; int noLZtmp = 0; while (!(tmp>>63)){ noLZtmp++; tmp= (tmp<<1); } noLZ+=noLZtmp; frac64A = tmp; if (i!=7 && noLZtmp!=0){ frac64A+= ( uZ.ui[i+1]>>(64-noLZtmp) ); if( uZ.ui[i+1] & (((uint64_t)0x1<<(64-noLZtmp))-1) ) bitsMore=1; i++; } i++; while(i<8){ if (uZ.ui[i]>0){ bitsMore = 1;; break; } i++; } break; } } //default dot is between bit 271 and 272, extreme left bit is bit 0. Last right bit is bit 511. //Equations derived from quire32_mult last_pos = 271 - (kA<<2) - expA and first_pos = last_pos - frac_len int kA=(271-noLZ) >> 2; expA = 271 - noLZ - (kA<<2) ; if(kA<0){ //regA = (-kA & 0xFFFF); regA = -kA; regSA = 0; regime = 0x40000000>>regA; } else{ regA = kA+1; regSA=1; regime = 0x7FFFFFFF - (0x7FFFFFFF>>regA); } if(regA>(x-2)){ //max or min pos. exp and frac does not matter. uA.ui=(regSA) ? (0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)) ): (0x1 << (32-x)); } else{ //remove hidden bit frac64A&=0x7FFFFFFFFFFFFFFF; shift = regA+35; //2 es bit, 1 sign bit and 1 r terminating bit , 31+4 fracA = ((uint32_t) (frac64A>>shift)); //regime length is smaller than length of posit if (regA>(shift+31-x)) & 0x1; if ((frac64A<<(33-shift+x)) !=0) bitsMore=1; } else { if (regA==(x-2)){ bitNPlusOne = expA&0x2; bitsMore = (expA&0x1); expA = 0; } else if (regA==(x-3)){ bitNPlusOne = expA&0x1; //expA>>=1; //taken care of by the pack algo expA &=0x2; } if (frac64A>0){ fracA=0; bitsMore =1; } } } else{ regime=(regSA) ? (regime & ((int32_t)0x80000000>>(x-1)) ): (regime << (32-x)); expA=0; fracA=0; } expA <<= (28-regA); uA.ui = packToP32UI(regime, expA, fracA) & ((int32_t)0x80000000>>(x-1)); if (bitNPlusOne) uA.ui += (((uA.ui>>(32-x)) &0x1) | (uint32_t)bitsMore )<< (32-x); } if (sign) uA.ui = -uA.ui & 0xFFFFFFFF; return uA.p; } luametatex-2.10.08/source/libraries/softposit/source/c_int.c000066400000000000000000000101301442250314700240760ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" int_fast64_t p8_int( posit8_t pA ) { union ui8_p8 uA; int_fast64_t iZ; uint_fast8_t scale = 0, uiA; bool sign; uA.p = pA; uiA = uA.ui; //NaR if (uiA==0x80) return 0x8000000000000000LL; sign = uiA>>7; if (sign) uiA = -uiA & 0xFF; if (uiA < 0x40) return 0; else if (uiA < 0x60) iZ = 1; else { uiA -= 0x40; while (0x20 & uiA) { scale ++; uiA = (uiA - 0x20) << 1; } uiA <<= 1; iZ = ((uint64_t)uiA | 0x40) >> (6 - scale); } if (sign) iZ = -iZ; return iZ; } int_fast64_t p16_int( posit16_t pA ){ union ui16_p16 uA; int_fast64_t iZ; uint_fast16_t scale = 0, uiA; bool sign; uA.p = pA; uiA = uA.ui; // NaR if (uiA==0x8000) return 0x8000000000000000LL; sign = uiA>>15; if (sign) uiA = -uiA & 0xFFFF; if (uiA < 0x4000) return 0; else if (uiA < 0x5000) iZ = 1; else if (uiA < 0x5800) iZ = 2; else{ uiA -= 0x4000; while (0x2000 & uiA) { scale += 2; uiA = (uiA - 0x2000) << 1; } uiA <<= 1; if (0x2000 & uiA) scale++; iZ = ((uint64_t)uiA | 0x2000) >> (13 - scale); } if (sign) iZ = -iZ; return iZ; } int64_t p32_int( posit32_t pA ){ union ui32_p32 uA; int_fast64_t iZ; uint_fast32_t scale = 0, uiA; bool sign; uA.p = pA; uiA = uA.ui; if (uiA==0x80000000) return 0x8000000000000000; sign = uiA>>31; if (sign) uiA = -uiA & 0xFFFFFFFF; if (uiA < 0x40000000) return 0; else if (uiA < 0x48000000) iZ = 1; else if (uiA < 0x4C000000) iZ = 2; else if(uiA>0x7FFFAFFF) iZ= 0x7FFFFFFFFFFFFFFF; else{ uiA -= 0x40000000; while (0x20000000 & uiA) { scale += 4; uiA = (uiA - 0x20000000) << 1; } uiA <<= 1; // Skip over termination bit, which is 0. if (0x20000000 & uiA) scale+=2; // If first exponent bit is 1, increment the scale. if (0x10000000 & uiA) scale++; iZ = ((uiA | 0x10000000ULL)&0x1FFFFFFFULL) << 34; // Left-justify fraction in 32-bit result (one left bit padding) iZ = (scale<62) ? ((uiA | 0x10000000ULL)&0x1FFFFFFFULL) >> (28-scale): ((uiA | 0x10000000ULL)&0x1FFFFFFFULL) << (scale-28); } if (sign) iZ = -iZ ; return iZ; } int64_t pX2_int( posit_2_t pA ){ posit32_t p32 = {.v = pA.v}; return p32_int(p32); } luametatex-2.10.08/source/libraries/softposit/source/i32_to_p16.c000066400000000000000000000061271442250314700246020ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane) and John Gustafson. Copyright 2017 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of California. All Rights Reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit16_t i32_to_p16( int32_t iA ){ int_fast8_t k, log2 = 25; union ui16_p16 uZ; uint_fast16_t uiA; uint_fast32_t expA, mask = 0x02000000, fracA; bool sign; if (iA < -134217728){ //-2147483648 to -134217729 rounds to P32 value -268435456 uZ.ui = 0x8001; //-maxpos return uZ.p; } sign = iA>>31; if(sign){ iA = -iA &0xFFFFFFFF; } if( iA > 134217728 ) { //134217729 to 2147483647 rounds to P32 value 268435456 uiA = 0x7FFF; //maxpos } else if ( iA > 0x02FFFFFF ){ uiA = 0x7FFE; } else if ( iA < 2 ){ uiA = (iA << 14); } else { fracA = iA; while ( !(fracA & mask) ) { log2--; fracA <<= 1; } k = log2 >> 1; expA = (log2 & 0x1) << (12 - k); fracA = (fracA ^ mask); uiA = (0x7FFF ^ (0x3FFF >> k)) | expA | ( fracA >> (k + 13)); mask = 0x1000 << k; //bitNPlusOne if (mask & fracA) { if (((mask - 1) & fracA) | ((mask << 1) & fracA)) uiA++; } } (sign) ? (uZ.ui = -uiA &0xFFFF) : (uZ.ui = uiA); return uZ.p; } luametatex-2.10.08/source/libraries/softposit/source/i32_to_p32.c000066400000000000000000000057421442250314700246020ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit32_t i32_to_p32( int32_t iA ) { int_fast8_t k, log2 = 31;//length of bit (e.g. 4294966271) in int (32 but because we have only 32 bits, so one bit off to accommodate that fact) union ui32_p32 uZ; uint_fast32_t uiA; uint_fast32_t expA, mask = 0x80000000, fracA; bool sign; if (iA < -2147483135){ //-2147483648 to -2147483136 rounds to P32 value -2147483648 uZ.ui = 0x80500000; return uZ.p; } sign = iA>>31; if(sign) iA = -iA &0xFFFFFFFF; if ( iA >2147483135)//2147483136 to 2147483647 rounds to P32 value (2147483648)=> 0x7FB00000 uiA = 0x7FB00000; else if ( iA < 0x2 ) uiA = (iA << 30); else { fracA = iA; while ( !(fracA & mask) ) { log2--; fracA <<= 1; } k = (log2 >> 2); expA = (log2 & 0x3) << (27 - k); fracA = (fracA ^ mask); uiA = (0x7FFFFFFF ^ (0x3FFFFFFF >> k)) | expA | fracA>>(k+4); mask = 0x8 << k; //bitNPlusOne if (mask & fracA) if (((mask - 1) & fracA) | ((mask << 1) & fracA)) uiA++; } (sign) ? (uZ.ui = -uiA &0xFFFFFFFF) : (uZ.ui = uiA); return uZ.p; } luametatex-2.10.08/source/libraries/softposit/source/i32_to_p8.c000066400000000000000000000056171442250314700245260ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane) and John Gustafson. Copyright 2017 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of California. All Rights Reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include #include "platform.h" #include "internals.h" posit8_t i32_to_p8( int32_t iA ){ int_fast8_t k, log2 = 6;//length of bit union ui8_p8 uZ; uint_fast8_t uiA; uint_fast32_t mask = 0x40, fracA; bool sign; if (iA < -48){ //-48 to -MAX_INT rounds to P32 value -268435456 uZ.ui = 0x81; //-maxpos return uZ.p; } sign = iA>>31; if(sign){ iA = -iA &0xFFFFFFFF; } if ( iA > 48 ) { uiA = 0x7F; } else if ( iA < 2 ){ uiA = (iA << 6); } else { fracA = iA; while ( !(fracA & mask) ) { log2--; fracA <<= 1; } k = log2; fracA = (fracA ^ mask); uiA = (0x7F ^ (0x3F >> k)) | ( fracA >> (k+1) ) ; mask = 0x1 << k; //bitNPlusOne if (mask & fracA) { if (((mask - 1) & fracA) | ((mask << 1) & fracA)) uiA++; } } (sign) ? (uZ.ui = -uiA &0xFF) : (uZ.ui = uiA); return uZ.p; } luametatex-2.10.08/source/libraries/softposit/source/i32_to_pX1.c000066400000000000000000000074721442250314700246500ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit_1_t i32_to_pX1( int32_t a, int x ) { int_fast8_t k, log2 = 31;//length of bit (e.g. 2147418111) in int (32 but because we have only 32 bits, so one bit off to accommodate that fact) union ui32_pX1 uZ; uint_fast32_t uiA=0; uint_fast32_t expA, mask = 0x80000000, fracA; bool sign; sign = a>>31; if(sign) a = -a &0xFFFFFFFF; //NaR if (a == 0x80000000 || x<2 || x>32) uiA = 0x80000000; else if (x==2){ if (a>0) uiA=0x40000000; } else if ( a > 2147418111){ uiA = 0x7FFF9FFF; // 2147483648 //if (x<12) uiA&=((int32_t)0x80000000>>(x-1)); } else if ( a < 0x2 ) uiA = (a << 30); else { fracA = a; while ( !(fracA & mask) ) { log2--; fracA <<= 1; } k = (log2 >> 1); expA = (log2 & 0x1) << (28-k); fracA = (fracA ^ mask); if(k>=(x-2)){//maxpos uiA= 0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)); } else if (k==(x-3)){//bitNPlusOne-> first exp bit //bitLast is zero uiA = (0x7FFFFFFF ^ (0x3FFFFFFF >> k)); if( (expA & 0x2) && ((expA&0x1) | fracA) ) //bitNPlusOne //bitsMore uiA |= ((uint32_t)0x80000000>>(x-1)); } else if (k==(x-4)){ uiA = (0x7FFFFFFF ^ (0x3FFFFFFF >> k)) | ((expA &0x2)<< (27 - k)); if(expA&0x1){ if( (((uint32_t)0x80000000>>(x-1)) & uiA)| fracA) uiA += ((uint32_t)0x80000000>>(x-1)); } } else if (k==(x-5)){ uiA = (0x7FFFFFFF ^ (0x3FFFFFFF >> k)) | (expA<< (27 - k)); mask = 0x8 << (k -x); if (mask & fracA){ //bitNPlusOne if (((mask - 1) & fracA) | (expA&0x1)) { uiA+= ((uint32_t)0x80000000>>(x-1)); } } } else{ uiA = ((0x7FFFFFFF ^ (0x3FFFFFFF >> k)) | (expA<< (27 - k)) | fracA>>(k+4)) & ((int32_t)0x80000000>>(x-1));; mask = 0x8 << (k-x); //bitNPlusOne if (mask & fracA) if (((mask - 1) & fracA) | ((mask << 1) & fracA)) uiA+= ((uint32_t)0x80000000>>(x-1)); } } (sign) ? (uZ.ui = -uiA &0xFFFFFFFF) : (uZ.ui = uiA); return uZ.p; } luametatex-2.10.08/source/libraries/softposit/source/i32_to_pX2.c000066400000000000000000000077531442250314700246530ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit_2_t i32_to_pX2( int32_t iA, int x ) { int_fast8_t k, log2 = 31;//length of bit (e.g. 4294966271) in int (32 but because we have only 32 bits, so one bit off to accomdate that fact) union ui32_pX2 uZ; uint_fast32_t uiA=0; uint_fast32_t expA, mask = 0x80000000, fracA; bool sign; if (iA < -2147483135){ uZ.ui = 0x80500000; return uZ.p; } sign = iA>>31; if(sign) iA = -iA &0xFFFFFFFF; //NaR if (x<2 || x>32) uiA = 0x80000000; else if (x==2){ if (iA>0) uiA=0x40000000; } else if ( iA > 2147483135){//2147483136 to 2147483647 rounds to P32 value (2147483648)=> 0x7FB00000 uiA = 0x7FB00000; // 2147483648 if (x<10) uiA&=((int32_t)0x80000000>>(x-1)); else if (x<12) uiA = 0x7FF00000&((int32_t)0x80000000>>(x-1)); } else if ( iA < 0x2 ) uiA = (iA << 30); else { fracA = iA; while ( !(fracA & mask) ) { log2--; fracA <<= 1; } k = (log2 >> 2); expA = (log2 & 0x3); fracA = (fracA ^ mask); if(k>=(x-2)){//maxpos uiA= 0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)); } else if (k==(x-3)){//bitNPlusOne-> first exp bit //bitLast is zero uiA = (0x7FFFFFFF ^ (0x3FFFFFFF >> k)); if( (expA & 0x2) && ((expA&0x1) | fracA) ) //bitNPlusOne //bitsMore uiA |= ((uint32_t)0x80000000>>(x-1)); } else if (k==(x-4)){ uiA = (0x7FFFFFFF ^ (0x3FFFFFFF >> k)) | ((expA &0x2)<< (27 - k)); if(expA&0x1){ if( (((uint32_t)0x80000000>>(x-1)) & uiA)| fracA) uiA += ((uint32_t)0x80000000>>(x-1)); } } else if (k==(x-5)){ uiA = (0x7FFFFFFF ^ (0x3FFFFFFF >> k)) | (expA<< (27 - k)); mask = 0x8 << (k -x); if (mask & fracA){ //bitNPlusOne if (((mask - 1) & fracA) | (expA&0x1)) { uiA+= ((uint32_t)0x80000000>>(x-1)); } } } else{ uiA = ((0x7FFFFFFF ^ (0x3FFFFFFF >> k)) | (expA<< (27 - k)) | fracA>>(k+4)) & ((int32_t)0x80000000>>(x-1));; mask = 0x8 << (k-x); //bitNPlusOne if (mask & fracA) if (((mask - 1) & fracA) | ((mask << 1) & fracA)) uiA+= ((uint32_t)0x80000000>>(x-1)); } } (sign) ? (uZ.ui = -uiA &0xFFFFFFFF) : (uZ.ui = uiA); return uZ.p; } luametatex-2.10.08/source/libraries/softposit/source/i64_to_p16.c000066400000000000000000000057551442250314700246150ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane) and John Gustafson. Copyright 2017 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of California. All Rights Reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit16_t i64_to_p16( int64_t iA ) { int_fast8_t k, log2 = 25; union ui16_p16 uZ; uint_fast16_t uiA; uint_fast64_t expA, mask = 0x0000000002000000, fracA; bool sign; if (iA < -134217728){ //-9223372036854775808 to -134217729 rounds to P32 value -268435456 uZ.ui = 0x8001; //-maxpos return uZ.p; } sign = iA>>63; if (sign) iA = -iA; if( iA > 134217728 ) { //134217729 to 9223372036854775807 rounds to P32 value 268435456 uiA = 0x7FFF; //maxpos } else if ( iA > 0x0000000008000000 ) { uiA = 0x7FFF; } else if ( iA > 0x0000000002FFFFFF ){ uiA = 0x7FFE; } else if ( iA < 2 ){ uiA = (iA << 14); } else { fracA = iA; while ( !(fracA & mask) ) { log2--; fracA <<= 1; } k = log2 >> 1; expA = (log2 & 0x1) << (12 - k); fracA = fracA ^ mask; uiA = (0x7FFF ^ (0x3FFF >> k)) | expA | (fracA >> (k + 13)); mask = 0x1000 << k; if (mask & fracA) { if ( ((mask - 1) & fracA) | ((mask << 1) & fracA) ) uiA++; } } (sign) ? (uZ.ui = -uiA) : (uZ.ui = uiA); return uZ.p; } luametatex-2.10.08/source/libraries/softposit/source/i64_to_p32.c000066400000000000000000000061471442250314700246070ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit32_t i64_to_p32( int64_t iA ) { int_fast8_t k, log2 = 63;//length of bit (e.g. 9222809086901354496) in int (64 but because we have only 64 bits, so one bit off to accomdate that fact) union ui32_p32 uZ; uint_fast64_t uiA; uint_fast64_t mask = 0x8000000000000000, fracA; uint_fast32_t expA; bool sign; if (iA < -9222809086901354495){//-9222809086901354496 to -9223372036854775808 will be P32 value -9223372036854775808 uZ.ui = 0x80005000; return uZ.p; } sign = iA>>63; if(sign) iA = -iA; if ( iA >9222809086901354495)//9222809086901354495 bcos 9222809086901354496 to 9223372036854775807 will be P32 value 9223372036854775808 uiA = 0x7FFFB000; // P32: 9223372036854775808 else if ( iA < 0x2 ) uiA = (iA << 30); else { fracA = iA; while ( !(fracA & mask) ) { log2--; fracA <<= 1; } k = (log2 >> 2); expA = (log2 & 0x3) << (27 - k); fracA = (fracA ^ mask); uiA = (0x7FFFFFFF ^ (0x3FFFFFFF >> k)) | expA | fracA>>(k+36); mask = 0x800000000 << k; //bitNPlusOne if (mask & fracA) { if (((mask - 1) & fracA) | ((mask << 1) & fracA)) uiA++; } } (sign) ? (uZ.ui = -uiA) : (uZ.ui = uiA); return uZ.p; } luametatex-2.10.08/source/libraries/softposit/source/i64_to_p8.c000066400000000000000000000056051442250314700245300ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane) and John Gustafson. Copyright 2017 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of California. All Rights Reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include #include "platform.h" #include "internals.h" posit8_t i64_to_p8( int64_t iA ){ int_fast8_t k, log2 = 6;//length of bit union ui8_p8 uZ; uint_fast8_t uiA; uint_fast64_t mask = 0x40, fracA; bool sign; if (iA < -48){ //-48 to -MAX_INT rounds to P32 value -268435456 uZ.ui = 0x81; //-maxpos return uZ.p; } sign = iA>>63; if(sign){ iA = -iA; } if ( iA > 48 ) { uiA = 0x7F; } else if ( iA < 2 ){ uiA = (iA << 6); } else { fracA = iA; while ( !(fracA & mask) ) { log2--; fracA <<= 1; } k = log2; fracA = (fracA ^ mask); uiA = (0x7F ^ (0x3F >> k)) | ( fracA >> (k+1) ) ; mask = 0x1 << k; //bitNPlusOne if (mask & fracA) { if (((mask - 1) & fracA) | ((mask << 1) & fracA)) uiA++; } } (sign) ? (uZ.ui = -uiA &0xFF) : (uZ.ui = uiA); return uZ.p; } luametatex-2.10.08/source/libraries/softposit/source/i64_to_pX2.c000066400000000000000000000077311442250314700246540ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit_2_t i64_to_pX2( int64_t a, int x ) { int_fast8_t k, log2 = 63;//length of bit (e.g. 18445618173802707967) in int (64 but because we have only 64 bits, so one bit off to accomdate that fact) union ui32_pX2 uZ; uint_fast64_t uiA=0; uint_fast64_t mask = 0x8000000000000000, frac64A; uint_fast32_t expA; bool sign; sign = a>>63; if(sign) a = -a; //NaR if (a == 0x8000000000000000 || x<2 || x>32 ) uiA = 0x80000000; else if (x==2){ if (a>0) uiA=0x40000000; } else if ( a > 0x7FFDFFFFFFFFFFFF){//9222809086901354495 uiA = 0x7FFFB000; // P32: 9223372036854775808 if (x<18) uiA&=((int32_t)0x80000000>>(x-1)); } else if ( a < 0x2 ) uiA = (a << 30); else { frac64A = a; while ( !(frac64A & mask) ) { log2--; frac64A <<= 1; } k = (log2 >> 2); expA = (log2 & 0x3); frac64A = (frac64A ^ mask); if(k>=(x-2)){//maxpos uiA= 0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)); } else if (k==(x-3)){//bitNPlusOne-> first exp bit //bitLast is zero uiA = (0x7FFFFFFF ^ (0x3FFFFFFF >> k)); if( (expA & 0x2) && ((expA&0x1) | frac64A) ) //bitNPlusOne //bitsMore uiA |= ((uint32_t)0x80000000>>(x-1)); } else if (k==(x-4)){ uiA = (0x7FFFFFFF ^ (0x3FFFFFFF >> k)) | ((expA &0x2)<< (27 - k)); if(expA&0x1){ if( (((uint32_t)0x80000000>>(x-1)) & uiA)|| frac64A) uiA += ((uint32_t)0x80000000>>(x-1)); } } else if (k==(x-5)){ uiA = (0x7FFFFFFF ^ (0x3FFFFFFF >> k)) | (expA<< (27 - k)); mask = (uint64_t)0x800000000 << (k + 32-x); if (mask & frac64A){ //bitNPlusOne if (((mask - 1) & frac64A) | (expA&0x1)) { uiA+= ((uint32_t)0x80000000>>(x-1)); } } } else{ uiA = (0x7FFFFFFF ^ (0x3FFFFFFF >> k)) | (expA<< (27 - k)) | ((frac64A>>(k+36)) & ((int32_t)0x80000000>>(x-1))); mask = (uint64_t)0x800000000 << (k + 32-x); //bitNPlusOne position if (mask & frac64A) { if (((mask - 1) & frac64A) | ((mask << 1) & frac64A)) { uiA+= ((uint32_t)0x80000000>>(x-1)); } } } } (sign) ? (uZ.ui = -uiA) : (uZ.ui = uiA); return uZ.p; } luametatex-2.10.08/source/libraries/softposit/source/include/000077500000000000000000000000001442250314700242665ustar00rootroot00000000000000luametatex-2.10.08/source/libraries/softposit/source/include/internals.h000066400000000000000000000154001442250314700264360ustar00rootroot00000000000000 /*============================================================================ This C header file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017, 2018 A*STAR. All rights reserved. This C header file is part of the SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #ifndef internals_h #define internals_h 1 #ifdef __cplusplus extern "C"{ #endif #include "primitives.h" #include "softposit.h" #include "softposit_types.h" #include #ifdef SOFTPOSIT_QUAD #include #endif enum { softposit_mulAdd_subC = 1, softposit_mulAdd_subProd = 2 }; /*---------------------------------------------------------------------------- *----------------------------------------------------------------------------*/ #define signP8UI( a ) ((bool) ((uint8_t) (a)>>7)) #define signregP8UI( a ) ((bool) (((uint8_t) (a)>>6) & 0x1)) #define packToP8UI( regime, fracA) ((uint8_t) regime + ((uint8_t)(fracA)) ) posit8_t softposit_addMagsP8( uint_fast8_t, uint_fast8_t ); posit8_t softposit_subMagsP8( uint_fast8_t, uint_fast8_t ); posit8_t softposit_mulAddP8( uint_fast8_t, uint_fast8_t, uint_fast8_t, uint_fast8_t ); /*---------------------------------------------------------------------------- *----------------------------------------------------------------------------*/ #define signP16UI( a ) ( (bool) ( ( uint16_t ) (a)>>15 ) ) #define signregP16UI( a ) ( (bool) (((uint16_t) (a)>>14) & 0x1) ) #define expP16UI( a, regA ) ((int_fast8_t) ((a)>>(13-regA) & 0x0001)) #define packToP16UI( regime, regA, expA, fracA) ((uint16_t) regime + ((uint16_t) (expA)<< (13-regA)) + ((uint16_t)(fracA)) ) posit16_t softposit_addMagsP16( uint_fast16_t, uint_fast16_t ); posit16_t softposit_subMagsP16( uint_fast16_t, uint_fast16_t ); posit16_t softposit_mulAddP16( uint_fast16_t, uint_fast16_t, uint_fast16_t, uint_fast16_t ); /*---------------------------------------------------------------------------- *----------------------------------------------------------------------------*/ #define signP32UI( a ) ((bool) ((uint32_t) (a)>>31)) #define signregP32UI( a ) ((bool) (((uint32_t) (a)>>30) & 0x1)) #define packToP32UI(regime, expA, fracA) ( (uint32_t) regime + (uint32_t) expA + ((uint32_t)(fracA)) ) posit32_t softposit_addMagsP32( uint_fast32_t, uint_fast32_t ); posit32_t softposit_subMagsP32( uint_fast32_t, uint_fast32_t ); posit32_t softposit_mulAddP32( uint_fast32_t, uint_fast32_t, uint_fast32_t, uint_fast32_t ); /*---------------------------------------------------------------------------- *----------------------------------------------------------------------------*/ posit_2_t softposit_addMagsPX2( uint_fast32_t, uint_fast32_t, int ); posit_2_t softposit_subMagsPX2( uint_fast32_t, uint_fast32_t, int ); posit_2_t softposit_mulAddPX2( uint_fast32_t, uint_fast32_t, uint_fast32_t, uint_fast32_t, int ); /*---------------------------------------------------------------------------- *----------------------------------------------------------------------------*/ posit_1_t softposit_addMagsPX1( uint_fast32_t, uint_fast32_t, int); posit_1_t softposit_subMagsPX1( uint_fast32_t, uint_fast32_t, int); posit_1_t softposit_mulAddPX1( uint_fast32_t, uint_fast32_t, uint_fast32_t, uint_fast32_t, int ); /*uint_fast16_t reglengthP32UI (uint32_t); int_fast16_t regkP32UI(bool, uint_fast32_t); #define expP32UI( a, regA ) ((int_fast16_t) ((a>>(28-regA)) & 0x2)) #define regP32UI( a, regLen ) ( ((( uint_fast32_t ) (a) & (0x7FFFFFFF)) >> (30-regLen))) ) #define isNaRP32UI( a ) ( ((a) ^ 0x80000000) == 0 ) #define useed32P 16; //int_fast16_t expP32UI(uint32_t); #define expP32sizeUI 2; uint_fast32_t fracP32UI(uint_fast32_t, uint_fast16_t);*/ /*posit32_t convertDecToP32(posit32); posit32_t convertfloatToP32(float); posit32_t convertdoubleToP32(double ); //posit32_t convertQuadToP32(__float128); //__float128 convertP32ToQuadDec(posit32_t); //posit32_t c_roundPackToP32( bool, bool, int_fast16_t, int_fast16_t, uint_fast16_t, bool, bool ); //#define isNaNP32UI( a ) (((~(a) & 0x7F800000) == 0) && ((a) & 0x007FFFFF)) //posit32_t softposit_roundPackToP32( bool, int_fast16_t, uint_fast32_t ); //posit32_t softposit_normRoundPackToP32( bool, int_fast16_t, uint_fast32_t ); posit32_t softposit_addMagsP32( uint_fast32_t, uint_fast32_t ); posit32_t softposit_subMagsP32( uint_fast32_t, uint_fast32_t ); posit32_t softposit_mulAddP32(uint_fast32_t, uint_fast32_t, uint_fast32_t, uint_fast16_t); //quire32_t quire32_add(quire32_t, quire32_t); //quire32_t quire32_sub(quire32_t, quire32_t); quire32_t quire32_mul(posit32_t, posit32_t); quire32_t q32_fdp_add(quire32_t, posit32_t, posit32_t); quire32_t q32_fdp_sub(quire32_t, posit32_t, posit32_t); posit32_t convertQ32ToP32(quire32_t); #define isNaRQ32( q ) ( q.v[0]==0x8000000000000000ULL && q.v[1]==0 && q.v[2]==0 && q.v[3]==0 && q.v[4]==0 && q.v[5]==0 && q.v[6]==0 && q.v[7]==0) #define isQ32Zero(q) (q.v[0]==0 && q.v[1]==0 && q.v[2]==0 && q.v[3]==0 && q.v[4]==0 && q.v[5]==0 && q.v[6]==0 && q.v[7]==0) quire32_t q32_TwosComplement(quire32_t); #define q32_clr(q) ({\ q.v[0]=0;\ q.v[1]=0;\ q.v[2]=0;\ q.v[3]=0;\ q.v[4]=0;\ q.v[5]=0;\ q.v[6]=0;\ q.v[7]=0;\ q;\ }) */ #ifdef __cplusplus } #endif #endif luametatex-2.10.08/source/libraries/softposit/source/include/primitives.h000066400000000000000000000042171442250314700266360ustar00rootroot00000000000000 /*============================================================================ This C header file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017, 2018 A*STAR. All rights reserved. This C header file is part of the SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #ifndef primitives_h #define primitives_h 1 #include #include extern const uint_fast16_t softposit_approxRecipSqrt0[16]; extern const uint_fast16_t softposit_approxRecipSqrt1[16]; #endif luametatex-2.10.08/source/libraries/softposit/source/include/softposit.h000066400000000000000000000406661442250314700265050ustar00rootroot00000000000000/*============================================================================ This C header file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017, 2018 A*STAR. All rights reserved. This C header file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ /*============================================================================ | Note: If SoftPosit is modified from SoftFloat and is made available as a | general library for programs to use, it is strongly recommended that a | platform-specific version of this header, "softposit.h", be created that | folds in "softposit_types.h" and that eliminates all dependencies on | compile-time macros. *============================================================================*/ #ifndef softposit_h #define softposit_h 1 #ifdef __cplusplus extern "C"{ #endif #include #include #ifdef SOFTPOSIT_QUAD #include #endif #include "softposit_types.h" #include #ifndef THREAD_LOCAL #define THREAD_LOCAL #endif #define castUI( a ) ( (a).v ) /*---------------------------------------------------------------------------- | Integer-to-posit conversion routines. *----------------------------------------------------------------------------*/ posit8_t ui32_to_p8( uint32_t ); posit16_t ui32_to_p16( uint32_t ); posit32_t ui32_to_p32( uint32_t ); //posit64_t ui32_to_p64( uint32_t ); posit8_t ui64_to_p8( uint64_t ); posit16_t ui64_to_p16( uint64_t ); posit32_t ui64_to_p32( uint64_t ); //posit64_t ui64_to_p64( uint64_t ); posit8_t i32_to_p8( int32_t ); posit16_t i32_to_p16( int32_t ); posit32_t i32_to_p32( int32_t ); //posit64_t i32_to_p64( int32_t ); posit8_t i64_to_p8( int64_t ); posit16_t i64_to_p16( int64_t ); posit32_t i64_to_p32( int64_t ); //posit64_t i64_to_p64( int64_t ); /*---------------------------------------------------------------------------- | 8-bit (quad-precision) posit operations. *----------------------------------------------------------------------------*/ #define isNaRP8UI( a ) ( ((a) ^ 0x80) == 0 ) uint_fast32_t p8_to_ui32( posit8_t ); uint_fast64_t p8_to_ui64( posit8_t ); int_fast32_t p8_to_i32( posit8_t); int_fast64_t p8_to_i64( posit8_t); posit16_t p8_to_p16( posit8_t ); posit32_t p8_to_p32( posit8_t ); //posit64_t p8_to_p64( posit8_t ); posit_1_t p8_to_pX1( posit8_t, int ); posit_2_t p8_to_pX2( posit8_t, int ); posit8_t p8_roundToInt( posit8_t ); posit8_t p8_add( posit8_t, posit8_t ); posit8_t p8_sub( posit8_t, posit8_t ); posit8_t p8_mul( posit8_t, posit8_t ); posit8_t p8_mulAdd( posit8_t, posit8_t, posit8_t ); posit8_t p8_div( posit8_t, posit8_t ); posit8_t p8_sqrt( posit8_t ); bool p8_eq( posit8_t, posit8_t ); bool p8_le( posit8_t, posit8_t ); bool p8_lt( posit8_t, posit8_t ); //Quire 8 quire8_t q8_fdp_add(quire8_t, posit8_t, posit8_t); quire8_t q8_fdp_sub(quire8_t, posit8_t, posit8_t); posit8_t q8_to_p8(quire8_t); #define isNaRQ8( q ) ( (q).v==0x80000000 ) #define isQ8Zero(q) ( (q).v==0 ) int_fast64_t p8_int( posit8_t ); #define q8_clr(q) ({\ (q).v=0;\ q;\ }) static inline quire8_t q8Clr(){ quire8_t q; q.v=0; return q; } #define castQ8(a)({\ union ui32_q8 uA;\ uA.ui = (a);\ uA.q;\ }) #define castP8(a)({\ union ui8_p8 uA;\ uA.ui = (a);\ uA.p;\ }) #define negP8(a)({\ union ui8_p8 uA;\ uA.p = (a);\ uA.ui = -uA.ui&0xFF;\ uA.p; \ }) #define absP8(a)({\ union ui8_p8 uA;\ uA.p = (a);\ int mask = uA.ui >> 7;\ uA.ui = ((uA.ui + mask) ^ mask)&0xFF;\ uA.p; \ }) //Helper double convertP8ToDouble(posit8_t); posit8_t convertDoubleToP8(double); /*---------------------------------------------------------------------------- | 16-bit (half-precision) posit operations. *----------------------------------------------------------------------------*/ #define isNaRP16UI( a ) ( ((a) ^ 0x8000) == 0 ) uint_fast32_t p16_to_ui32( posit16_t ); uint_fast64_t p16_to_ui64( posit16_t ); int_fast32_t p16_to_i32( posit16_t); int_fast64_t p16_to_i64( posit16_t ); posit8_t p16_to_p8( posit16_t ); posit32_t p16_to_p32( posit16_t ); //posit64_t p16_to_p64( posit16_t ); posit_1_t p16_to_pX1( posit16_t, int ); posit_2_t p16_to_pX2( posit16_t, int ); posit16_t p16_roundToInt( posit16_t); posit16_t p16_add( posit16_t, posit16_t ); posit16_t p16_sub( posit16_t, posit16_t ); posit16_t p16_mul( posit16_t, posit16_t ); posit16_t p16_mulAdd( posit16_t, posit16_t, posit16_t ); posit16_t p16_div( posit16_t, posit16_t ); posit16_t p16_sqrt( posit16_t ); bool p16_eq( posit16_t, posit16_t ); bool p16_le( posit16_t, posit16_t ); bool p16_lt( posit16_t, posit16_t ); #ifdef SOFTPOSIT_QUAD __float128 convertP16ToQuadDec(posit16_t); posit16_t convertQuadToP16(__float128); #endif //Quire 16 quire16_t q16_fdp_add(quire16_t, posit16_t, posit16_t); quire16_t q16_fdp_sub(quire16_t, posit16_t, posit16_t); posit16_t convertQ16ToP16(quire16_t); posit16_t q16_to_p16(quire16_t); #define isNaRQ16( q ) ( (q).v[0]==0x8000000000000000ULL && (q).v[1]==0 ) #define isQ16Zero(q) (q.v[0]==0 && q.v[1]==0) quire16_t q16_TwosComplement(quire16_t); int_fast64_t p16_int( posit16_t); void printBinary(uint64_t*, int); void printBinaryPX(uint32_t*, int); void printHex(uint64_t); void printHex64(uint64_t); void printHexPX(uint32_t, int); #define q16_clr(q) ({\ (q).v[0]=0;\ (q).v[1]=0;\ q;\ }) static inline quire16_t q16Clr(){ quire16_t q; q.v[0]=0; q.v[1]=0; return q; } #define castQ16(l, r)({\ union ui128_q16 uA;\ uA.ui[0] = l; \ uA.ui[1] = r; \ uA.q;\ }) #define castP16(a)({\ union ui16_p16 uA;\ uA.ui = (a);\ uA.p;\ }) #define negP16(a)({\ union ui16_p16 uA;\ uA.p = (a);\ uA.ui = -uA.ui&0xFFFF;\ uA.p; \ }) #define absP16(a)({\ union ui16_p16 uA;\ uA.p = (a);\ int mask = uA.ui >> 15;\ uA.ui = ((uA.ui + mask) ^ mask)&0xFFFF;\ uA.p; \ }) //Helper double convertP16ToDouble(posit16_t); posit16_t convertFloatToP16(float); posit16_t convertDoubleToP16(double); /*---------------------------------------------------------------------------- | 32-bit (single-precision) posit operations. *----------------------------------------------------------------------------*/ uint_fast32_t p32_to_ui32( posit32_t ); uint_fast64_t p32_to_ui64( posit32_t); int_fast32_t p32_to_i32( posit32_t ); int_fast64_t p32_to_i64( posit32_t ); posit8_t p32_to_p8( posit32_t ); posit16_t p32_to_p16( posit32_t ); //posit64_t p32_to_p64( posit32_t ); posit32_t p32_roundToInt( posit32_t ); posit32_t p32_add( posit32_t, posit32_t ); posit32_t p32_sub( posit32_t, posit32_t ); posit32_t p32_mul( posit32_t, posit32_t ); posit32_t p32_mulAdd( posit32_t, posit32_t, posit32_t ); posit32_t p32_div( posit32_t, posit32_t ); posit32_t p32_sqrt( posit32_t ); bool p32_eq( posit32_t, posit32_t ); bool p32_le( posit32_t, posit32_t ); bool p32_lt( posit32_t, posit32_t ); posit_1_t p32_to_pX1( posit32_t, int); posit_2_t p32_to_pX2( posit32_t, int ); #define isNaRP32UI( a ) ( ((a) ^ 0x80000000) == 0 ) int64_t p32_int( posit32_t); #ifdef SOFTPOSIT_QUAD __float128 convertP32ToQuad(posit32_t); posit32_t convertQuadToP32(__float128); #endif quire32_t q32_fdp_add(quire32_t, posit32_t, posit32_t); quire32_t q32_fdp_sub(quire32_t, posit32_t, posit32_t); posit32_t q32_to_p32(quire32_t); #define isNaRQ32( q ) ( q.v[0]==0x8000000000000000ULL && q.v[1]==0 && q.v[2]==0 && q.v[3]==0 && q.v[4]==0 && q.v[5]==0 && q.v[6]==0 && q.v[7]==0) #define isQ32Zero(q) (q.v[0]==0 && q.v[1]==0 && q.v[2]==0 && q.v[3]==0 && q.v[4]==0 && q.v[5]==0 && q.v[6]==0 && q.v[7]==0) quire32_t q32_TwosComplement(quire32_t); #define q32_clr(q) ({\ q.v[0]=0;\ q.v[1]=0;\ q.v[2]=0;\ q.v[3]=0;\ q.v[4]=0;\ q.v[5]=0;\ q.v[6]=0;\ q.v[7]=0;\ q;\ }) static inline quire32_t q32Clr(){ quire32_t q; q.v[0]=0; q.v[1]=0; q.v[2]=0; q.v[3]=0; q.v[4]=0; q.v[5]=0; q.v[6]=0; q.v[7]=0; return q; } #define castQ32(l0, l1, l2, l3, l4, l5, l6, l7)({\ union ui512_q32 uA;\ uA.ui[0] = l0; \ uA.ui[1] = l1; \ uA.ui[2] = l2; \ uA.ui[3] = l3; \ uA.ui[4] = l4; \ uA.ui[5] = l5; \ uA.ui[6] = l6; \ uA.ui[7] = l7; \ uA.q;\ }) #define castP32(a)({\ posit32_t pA = {.v = (a)};\ pA; \ }) #define negP32(a)({\ union ui32_p32 uA;\ uA.p = (a);\ uA.ui = -uA.ui&0xFFFFFFFF;\ uA.p; \ }) #define absP32(a)({\ union ui32_p32 uA;\ uA.p = (a);\ int mask = uA.ui >> 31; \ uA.ui = ((uA.ui + mask) ^ mask)&0xFFFFFFFF; \ uA.p; \ }) //Helper double convertP32ToDouble(posit32_t); posit32_t convertFloatToP32(float); posit32_t convertDoubleToP32(double); /*---------------------------------------------------------------------------- | Dyanamic 2 to 32-bit Posits for es = 2 *----------------------------------------------------------------------------*/ posit_2_t pX2_add( posit_2_t, posit_2_t, int); posit_2_t pX2_sub( posit_2_t, posit_2_t, int); posit_2_t pX2_mul( posit_2_t, posit_2_t, int); posit_2_t pX2_div( posit_2_t, posit_2_t, int); posit_2_t pX2_mulAdd( posit_2_t, posit_2_t, posit_2_t, int); posit_2_t pX2_roundToInt( posit_2_t, int ); posit_2_t ui32_to_pX2( uint32_t, int ); posit_2_t ui64_to_pX2( uint64_t, int ); posit_2_t i32_to_pX2( int32_t, int ); posit_2_t i64_to_pX2( int64_t, int ); posit_2_t pX2_sqrt( posit_2_t, int ); uint_fast32_t pX2_to_ui32( posit_2_t ); uint_fast64_t pX2_to_ui64( posit_2_t ); int_fast32_t pX2_to_i32( posit_2_t ); int_fast64_t pX2_to_i64( posit_2_t ); int64_t pX2_int( posit_2_t ); bool pX2_eq( posit_2_t, posit_2_t); bool pX2_le( posit_2_t, posit_2_t); bool pX2_lt( posit_2_t, posit_2_t); posit8_t pX2_to_p8( posit_2_t ); posit16_t pX2_to_p16( posit_2_t ); posit_2_t pX2_to_pX2( posit_2_t, int); posit_1_t pX2_to_pX1( posit_2_t, int); static inline posit32_t pX2_to_p32(posit_2_t pA){ posit32_t p32 = {.v = pA.v}; return p32; } #define isNaRPX2UI( a ) ( ((a) ^ 0x80000000) == 0 ) //Helper posit_2_t convertDoubleToPX2(double, int); double convertPX2ToDouble(posit_2_t); #ifdef SOFTPOSIT_QUAD __float128 convertPX2ToQuad(posit_2_t); posit_2_t convertQuadToPX2(__float128, int); #endif quire_2_t qX2_fdp_add( quire_2_t q, posit_2_t pA, posit_2_t ); quire_2_t qX2_fdp_sub( quire_2_t q, posit_2_t pA, posit_2_t ); posit_2_t qX2_to_pX2(quire_2_t, int); #define isNaRQX2( q ) ( q.v[0]==0x8000000000000000ULL && q.v[1]==0 && q.v[2]==0 && q.v[3]==0 && q.v[4]==0 && q.v[5]==0 && q.v[6]==0 && q.v[7]==0) #define isQX2Zero(q) (q.v[0]==0 && q.v[1]==0 && q.v[2]==0 && q.v[3]==0 && q.v[4]==0 && q.v[5]==0 && q.v[6]==0 && q.v[7]==0) quire_2_t qX2_TwosComplement(quire_2_t); #define qX2_clr(q) ({\ q.v[0]=0;\ q.v[1]=0;\ q.v[2]=0;\ q.v[3]=0;\ q.v[4]=0;\ q.v[5]=0;\ q.v[6]=0;\ q.v[7]=0;\ q;\ }) static inline quire_2_t qX2Clr(){ quire_2_t q; q.v[0]=0; q.v[1]=0; q.v[2]=0; q.v[3]=0; q.v[4]=0; q.v[5]=0; q.v[6]=0; q.v[7]=0; return q; } #define castQX2(l0, l1, l2, l3, l4, l5, l6, l7)({\ union ui512_qX2 uA;\ uA.ui[0] = l0; \ uA.ui[1] = l1; \ uA.ui[2] = l2; \ uA.ui[3] = l3; \ uA.ui[4] = l4; \ uA.ui[5] = l5; \ uA.ui[6] = l6; \ uA.ui[7] = l7; \ uA.q;\ }) #define castPX2(a)({\ posit_2_t pA = {.v = (a)};\ pA; \ }) #define negPX2(a)({\ union ui32_pX2 uA;\ uA.p = (a);\ uA.ui = -uA.ui&0xFFFFFFFF;\ uA.p; \ }) #define absPX2(a)({\ union ui32_pX2 uA;\ uA.p = (a);\ int mask = uA.ui >> 31; \ uA.ui = ((uA.ui + mask) ^ mask)&0xFFFFFFFF; \ uA.p; \ }) /*---------------------------------------------------------------------------- | Dyanamic 2 to 32-bit Posits for es = 1 *----------------------------------------------------------------------------*/ posit_1_t pX1_add( posit_1_t, posit_1_t, int); posit_1_t pX1_sub( posit_1_t, posit_1_t, int); posit_1_t pX1_mul( posit_1_t, posit_1_t, int); posit_1_t pX1_div( posit_1_t, posit_1_t, int); posit_1_t pX1_mulAdd( posit_1_t, posit_1_t, posit_1_t, int); posit_1_t pX1_roundToInt( posit_1_t, int ); posit_1_t ui32_to_pX1( uint32_t, int ); posit_1_t ui64_to_pX1( uint64_t, int ); posit_1_t i32_to_pX1( int32_t, int ); posit_1_t i64_to_pX1( int64_t, int ); posit_1_t pX1_sqrt( posit_1_t, int ); uint_fast32_t pX1_to_ui32( posit_1_t ); uint_fast64_t pX1_to_ui64( posit_1_t ); int_fast32_t pX1_to_i32( posit_1_t ); int_fast64_t pX1_to_i64( posit_1_t ); int64_t pX1_int( posit_1_t ); bool pX1_eq( posit_1_t, posit_1_t); bool pX1_le( posit_1_t, posit_1_t); bool pX1_lt( posit_1_t, posit_1_t); posit8_t pX1_to_p8( posit_1_t ); posit16_t pX1_to_p16( posit_1_t ); posit32_t pX1_to_p32( posit_1_t ); posit_1_t pX1_to_pX1( posit_1_t, int); posit_2_t pX1_to_pX2( posit_1_t, int); #define isNaRpX1UI( a ) ( ((a) ^ 0x80000000) == 0 ) //Helper posit_1_t convertDoubleToPX1(double, int); double convertPX1ToDouble(posit_1_t); #ifdef SOFTPOSIT_QUAD __float128 convertPX1ToQuad(posit_1_t); posit_1_t convertQuadToPX1(__float128, int); #endif quire_1_t qX1_fdp_add( quire_1_t q, posit_1_t pA, posit_1_t ); quire_1_t qX1_fdp_sub( quire_1_t q, posit_1_t pA, posit_1_t ); posit_1_t qX1_to_pX1(quire_1_t, int); #define isNaRqX1( q ) ( q.v[0]==0x8000000000000000ULL && q.v[1]==0 && q.v[2]==0 && q.v[3]==0 && q.v[4]==0 && q.v[5]==0 && q.v[6]==0 && q.v[7]==0) #define isqX1Zero(q) (q.v[0]==0 && q.v[1]==0 && q.v[2]==0 && q.v[3]==0 && q.v[4]==0 && q.v[5]==0 && q.v[6]==0 && q.v[7]==0) quire_1_t qX1_TwosComplement(quire_1_t); #define qX1_clr(q) ({\ q.v[0]=0;\ q.v[1]=0;\ q.v[2]=0;\ q.v[3]=0;\ q.v[4]=0;\ q.v[5]=0;\ q.v[6]=0;\ q.v[7]=0;\ q;\ }) static inline quire_1_t qX1Clr(){ quire_1_t q; q.v[0]=0; q.v[1]=0; q.v[2]=0; q.v[3]=0; q.v[4]=0; q.v[5]=0; q.v[6]=0; q.v[7]=0; return q; } #define castqX1(l0, l1, l2, l3, l4, l5, l6, l7)({\ union ui512_qX1 uA;\ uA.ui[0] = l0; \ uA.ui[1] = l1; \ uA.ui[2] = l2; \ uA.ui[3] = l3; \ uA.ui[4] = l4; \ uA.ui[5] = l5; \ uA.ui[6] = l6; \ uA.ui[7] = l7; \ uA.q;\ }) #define castpX1(a)({\ posit_1_t pA = {.v = (a)};\ pA; \ }) #define negpX1(a)({\ union ui32_pX1 uA;\ uA.p = (a);\ uA.ui = -uA.ui&0xFFFFFFFF;\ uA.p; \ }) #define absPX1(a)({\ union ui32_pX1 uA;\ uA.p = (a);\ int mask = uA.ui >> 31; \ uA.ui = ((uA.ui + mask) ^ mask)&0xFFFFFFFF;\ uA.p; \ }) /*---------------------------------------------------------------------------- | 64-bit (double-precision) floating-point operations. *----------------------------------------------------------------------------*/ /*uint_fast32_t p64_to_ui32( posit64_t, uint_fast16_t, bool ); uint_fast64_t p64_to_ui64( posit64_t, uint_fast16_t, bool ); int_fast32_t p64_to_i32( posit64_t, uint_fast16_t, bool ); int_fast64_t p64_to_i64( posit64_t, uint_fast16_t, bool ); posit8_t p64_to_p8( posit64_t ); posit16_t p64_to_p16( posit64_t ); posit32_t p64_to_p32( posit64_t ); posit64_t p64_roundToInt( posit64_t, uint_fast16_t, bool ); posit64_t p64_add( posit64_t, posit64_t ); posit64_t p64_sub( posit64_t, posit64_t ); posit64_t p64_mul( posit64_t, posit64_t ); posit64_t p64_mulAdd( posit64_t, posit64_t, posit64_t ); posit64_t p64_div( posit64_t, posit64_t ); posit64_t p64_rem( posit64_t, posit64_t ); posit64_t p64_sqrt( posit64_t ); bool p64_eq( posit64_t, posit64_t ); bool p64_le( posit64_t, posit64_t );*/ #ifdef __cplusplus } #endif #endif luametatex-2.10.08/source/libraries/softposit/source/include/softposit_cpp.h000066400000000000000000001154041442250314700273400ustar00rootroot00000000000000/* Author: S.H. Leong (Cerlane) Copyright (c) 2018 Next Generation Arithmetic Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef INCLUDE_SOFTPOSIT_CPP_H_ #define INCLUDE_SOFTPOSIT_CPP_H_ #include #include "softposit.h" #include "math.h" //#include "positMath.h" #ifdef __cplusplus struct posit8{ uint8_t value; posit8(double x=0) : value(castUI(convertDoubleToP8(x))) { } //Equal posit8& operator=(const double a) { value = castUI(convertDoubleToP8(a)); return *this; } posit8& operator=(const int a) { value = castUI(i32_to_p8(a)); return *this; } //Add posit8 operator+(const posit8 &a) const{ posit8 ans; ans.value = castUI(p8_add(castP8(value), castP8(a.value))); return ans; } //Add equal posit8& operator+=(const posit8 &a) { value = castUI(p8_add(castP8(value), castP8(a.value))); return *this; } //Subtract posit8 operator-(const posit8 &a) const{ posit8 ans; ans.value = castUI(p8_sub(castP8(value), castP8(a.value))); return ans; } //Subtract equal posit8& operator-=(const posit8 &a) { value = castUI(p8_sub(castP8(value), castP8(a.value))); return *this; } //Multiply posit8 operator*(const posit8 &a) const{ posit8 ans; ans.value = castUI(p8_mul(castP8(value), castP8(a.value))); return ans; } //Multiply equal posit8& operator*=(const posit8 &a) { value = castUI(p8_mul(castP8(value), castP8(a.value))); return *this; } //Divide posit8 operator/(const posit8 &a) const{ posit8 ans; ans.value = castUI(p8_div(castP8(value), castP8(a.value))); return ans; } //Divide equal posit8& operator/=(const posit8 &a) { value = castUI(p8_div(castP8(value), castP8(a.value))); return *this; } //less than bool operator<(const posit8 &a) const{ return p8_lt(castP8(value), castP8(a.value)); } //less than equal bool operator<=(const posit8 &a) const{ return p8_le(castP8(value), castP8(a.value)); } //equal bool operator==(const posit8 &a) const{ return p8_eq(castP8(value), castP8(a.value)); } //Not equalCPP bool operator!=(const posit8 &a) const{ return !p8_eq(castP8(value), castP8(a.value)); } //greater than bool operator>(const posit8 &a) const{ return p8_lt(castP8(a.value), castP8(value)); } //greater than equal bool operator>=(const posit8 &a) const{ return p8_le(castP8(a.value), castP8(value)); } //plus plus posit8& operator++() { value = castUI(p8_add(castP8(value), castP8(0x40))); return *this; } //minus minus posit8& operator--() { value = castUI(p8_sub(castP8(value), castP8(0x40))); return *this; } //Binary operators posit8 operator>>(const int &x) { posit8 ans; ans.value = value>>x; return ans; } posit8& operator>>=(const int &x) { value = value>>x; return *this; } posit8 operator<<(const int &x) { posit8 ans; ans.value = (value<(const posit16 &a) const{ return p16_lt(castP16(a.value), castP16(value)); } //greater than equal bool operator>=(const posit16 &a) const{ return p16_le(castP16(a.value), castP16(value)); } //plus plus posit16& operator++() { value = castUI(p16_add(castP16(value), castP16(0x4000))); return *this; } //minus minus posit16& operator--() { value = castUI(p16_sub(castP16(value), castP16(0x4000))); return *this; } //Binary operators posit16 operator>>(const int &x) { posit16 ans; ans.value = value>>x; return ans; } posit16& operator>>=(const int &x) { value = value>>x; return *this; } posit16 operator<<(const int &x) { posit16 ans; ans.value = (value<(const posit32 &a) const{ return p32_lt(castP32(a.value), castP32(value)); } //greater than equal bool operator>=(const posit32 &a) const{ return p32_le(castP32(a.value), castP32(value)); } //plus plus posit32& operator++() { value = castUI(p32_add(castP32(value), castP32(0x40000000))); return *this; } //minus minus posit32& operator--() { value = castUI(p32_sub(castP32(value), castP32(0x40000000))); return *this; } //Binary operators posit32 operator>>(const int &x) { posit32 ans; ans.value = value>>x; return ans; } posit32& operator>>=(const int &x) { value = value>>x; return *this; } posit32 operator<<(const int &x) { posit32 ans; ans.value = (value<(const posit_2 &a) const{ return pX2_lt(castPX2(a.value), castPX2(value)); } //greater than equal bool operator>=(const posit_2 &a) const{ return pX2_le(castPX2(a.value), castPX2(value)); } //plus plus posit_2& operator++() { value = castUI(pX2_add(castPX2(value), castPX2(0x40000000), x)); return *this; } //minus minus posit_2& operator--() { value = castUI(pX2_sub(castPX2(value), castPX2(0x40000000), x)); return *this; } //Binary operators posit_2 operator>>(const int &x) { posit_2 ans; ans.value = (value>>x) & ((int32_t)0x80000000>>(x-1)); ans.x = x; return ans; } posit_2& operator>>=(const int &x) { value = (value>>x) & ((int32_t)0x80000000>>(x-1)); return *this; } posit_2 operator<<(const int &x) { posit_2 ans; ans.value = (value< /*---------------------------------------------------------------------------- | Types used to pass 16-bit, 32-bit, 64-bit, and 128-bit floating-point | arguments and results to/from functions. These types must be exactly | 16 bits, 32 bits, 64 bits, and 128 bits in size, respectively. Where a | platform has "native" support for IEEE-Standard floating-point formats, | the types below may, if desired, be defined as aliases for the native types | (typically 'float' and 'double', and possibly 'long double'). *----------------------------------------------------------------------------*/ #ifdef SOFTPOSIT_EXACT typedef struct { uint8_t v; bool exact; } posit8_t; typedef struct { uint_fast16_t v; bool exact; } posit16_t; typedef struct { uint32_t v; bool exact; } posit32_t; typedef struct { uint64_t v; bool exact; } posit64_t; typedef struct { uint64_t v[2]; bool exact; } posit128_t; typedef struct { uint64_t v[2]; bool exact; } quire16_t; #else typedef struct { uint8_t v; } posit8_t; typedef struct { uint16_t v; } posit16_t; typedef struct { uint32_t v; } posit32_t; typedef struct { uint64_t v; } posit64_t; typedef struct { uint64_t v[2]; } posit128_t; typedef struct { uint32_t v; } quire8_t; typedef struct { uint64_t v[2]; } quire16_t; typedef struct { uint64_t v[8]; } quire32_t; typedef struct { uint32_t v; } posit_2_t; typedef struct { uint32_t v; } posit_1_t; typedef struct { uint32_t v; } posit_0_t; typedef struct { uint64_t v[8]; } quire_2_t; typedef struct { uint64_t v[8]; } quire_1_t; typedef struct { uint64_t v[8]; } quire_0_t; #endif #ifdef SOFTPOSIT_EXACT typedef struct { uint8_t v; bool exact; } uint8e_t; typedef struct { uint16_t v; bool exact; } uint16e_t; typedef struct { uint32_t v; bool exact; } uint32e_t; typedef struct { uint64_t v; bool exact; } uint64e_t; typedef struct { uint64_t v[2]; bool exact; } uint128e_t; union ui8_p8 { uint8e_t ui; posit8_t p; }; union ui16_p16 { uint16e_t ui; posit16_t p; }; union ui32_p32 { uint32e_t ui; posit32_t p; }; union ui64_p64 { uint64e_t ui; posit64_t p; }; union ui128_q16 { uint64_t ui[2]; quire16_t q; }; #else union ui8_p8 { uint8_t ui; posit8_t p; }; union ui16_p16 { uint16_t ui; posit16_t p; }; union ui32_p32 { uint32_t ui; posit32_t p; }; union ui64_p64 { uint64_t ui; posit64_t p; }; union ui128_p128c {uint64_t ui[2]; posit128_t p;}; //c to differentiate from original implementation union ui32_pX2 { uint32_t ui; posit_2_t p; }; union ui32_pX1 { uint32_t ui; posit_1_t p; }; union ui32_pX0 { uint32_t ui; posit_1_t p; }; union ui64_double { uint64_t ui; double d; }; union ui32_q8 { uint32_t ui; // =0; // patched by HH because the compilers don't like this quire8_t q; }; union ui128_q16 { uint64_t ui[2]; // ={0,0}; // idem quire16_t q; }; union ui512_q32 { uint64_t ui[8]; // ={0,0,0,0, 0,0,0,0}; // idme quire32_t q; }; union ui512_qX2 { uint64_t ui[8]; // ={0,0,0,0, 0,0,0,0}; // idem quire_2_t q; }; union ui512_qX1 { uint64_t ui[8]; // ={0,0,0,0, 0,0,0,0}; // idem quire_1_t q; }; #endif #endif luametatex-2.10.08/source/libraries/softposit/source/p16_add.c000066400000000000000000000047471442250314700242410ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017, 2018 A*STAR. All rights reserved. This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit16_t p16_add( posit16_t a, posit16_t b ){ union ui16_p16 uA, uB; uint_fast16_t uiA, uiB; union ui16_p16 uZ; uA.p = a; uiA = uA.ui; uB.p = b; uiB = uB.ui; //Zero or infinity if (uiA==0 || uiB==0){ // Not required but put here for speed uZ.ui = uiA | uiB; return uZ.p; } else if ( uiA==0x8000 || uiB==0x8000 ){ uZ.ui = 0x8000; return uZ.p; } //different signs if ((uiA^uiB)>>15) return softposit_subMagsP16(uiA, uiB); else return softposit_addMagsP16(uiA, uiB); } luametatex-2.10.08/source/libraries/softposit/source/p16_div.c000066400000000000000000000111731442250314700242620ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017, 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include #include "platform.h" #include "internals.h" posit16_t p16_div( posit16_t pA, posit16_t pB ) { union ui16_p16 uA, uB, uZ; uint_fast16_t uiA, uiB, fracA, fracB, regA, regime, tmp; bool signA, signB, signZ, regSA, regSB, bitNPlusOne=0, bitsMore=0, rcarry; int_fast8_t expA, kA=0; uint_fast32_t frac32A, frac32Z, rem; div_t divresult; uA.p = pA; uiA = uA.ui; uB.p = pB; uiB = uB.ui; //Zero or infinity if ( uiA==0x8000 || uiB==0x8000 || uiB==0){ #ifdef SOFTPOSIT_EXACT uZ.ui.v = 0x8000; uZ.ui.exact = 0; #else uZ.ui = 0x8000; #endif return uZ.p; } else if (uiA==0){ #ifdef SOFTPOSIT_EXACT uZ.ui.v = 0; if ( (uiA==0 && uiA.ui.exact) || (uiB==0 && uiB.ui.exact) ) uZ.ui.exact = 1; else uZ.ui.exact = 0; #else uZ.ui = 0; #endif return uZ.p; } signA = signP16UI( uiA ); signB = signP16UI( uiB ); signZ = signA ^ signB; if(signA) uiA = (-uiA & 0xFFFF); if(signB) uiB = (-uiB & 0xFFFF); regSA = signregP16UI(uiA); regSB = signregP16UI(uiB); tmp = (uiA<<2) & 0xFFFF; if (regSA){ while (tmp>>15){ kA++; tmp= (tmp<<1) & 0xFFFF; } } else{ kA=-1; while (!(tmp>>15)){ kA--; tmp= (tmp<<1) & 0xFFFF; } tmp&=0x7FFF; } expA = tmp>>14; fracA = (0x4000 | tmp); frac32A = fracA<<14; tmp = (uiB<<2) & 0xFFFF; if (regSB){ while (tmp>>15){ kA--; tmp= (tmp<<1) & 0xFFFF; } fracB = (0x4000 | tmp); } else{ kA++; while (!(tmp>>15)){ kA++; tmp= (tmp<<1) & 0xFFFF; } tmp&=0x7FFF; fracB = (0x4000 | (0x7FFF & tmp)); } expA -= tmp>>14; divresult = div (frac32A,fracB); frac32Z = divresult.quot; rem = divresult.rem; if (expA<0){ expA=1; kA--; } if (frac32Z!=0){ rcarry = frac32Z >> 14; // this is the hidden bit (14th bit) , extreme right bit is bit 0 if (!rcarry){ if (expA==0) kA --; expA^=1; frac32Z<<=1; } } if(kA<0){ regA = (-kA & 0xFFFF); regSA = 0; regime = 0x4000>>regA; } else{ regA = kA+1; regSA=1; regime = 0x7FFF - (0x7FFF>>regA); } if(regA>14){ //max or min pos. exp and frac does not matter. (regSA) ? (uZ.ui= 0x7FFF): (uZ.ui=0x1); } else{ //remove carry and rcarry bits and shift to correct position frac32Z &= 0x3FFF; fracA = (uint_fast16_t)frac32Z >> (regA+1); if (regA!=14) bitNPlusOne = (frac32Z >> regA) & 0x1; else if (fracA>0){ fracA=0; bitsMore =1; } if (regA==14 && expA) bitNPlusOne = 1; //sign is always zero uZ.ui = packToP16UI(regime, regA, expA, fracA); if (bitNPlusOne){ ( ((1<>15){ kA++; tmp= (tmp<<1) & 0xFFFF; } } else{ kA=-1; while (!(tmp>>15)){ kA--; tmp= (tmp<<1) & 0xFFFF; } tmp&=0x7FFF; } expA = tmp>>14; fracA = (0x4000 | tmp); tmp = (uiB<<2) & 0xFFFF; if (regSB){ while (tmp>>15){ kA++; tmp= (tmp<<1) & 0xFFFF; } } else{ kA--; while (!(tmp>>15)){ kA--; tmp= (tmp<<1) & 0xFFFF; } tmp&=0x7FFF; } expA += tmp>>14; frac32Z = (uint_fast32_t) fracA * (0x4000 | tmp); if (expA>1){ kA++; expA ^=0x2; } rcarry = frac32Z>>29;//3rd bit of frac32Z if (rcarry){ if (expA) kA ++; expA^=1; frac32Z>>=1; } if(kA<0){ regA = (-kA & 0xFFFF); regSA = 0; regime = 0x4000>>regA; } else{ regA = kA+1; regSA=1; regime = 0x7FFF - (0x7FFF>>regA); } if(regA>14){ //max or min pos. exp and frac does not matter. (regSA) ? (uZ.ui= 0x7FFF): (uZ.ui=0x1); } else{ //remove carry and rcarry bits and shift to correct position frac32Z = (frac32Z&0xFFFFFFF) >> (regA-1); fracA = (uint_fast16_t) (frac32Z>>16); if (regA!=14) bitNPlusOne |= (0x8000 & frac32Z) ; else if (fracA>0){ fracA=0; bitsMore =1; } if (regA==14 && expA) bitNPlusOne = 1; //sign is always zero uZ.ui = packToP16UI(regime, regA, expA, fracA); //n+1 frac bit is 1. Need to check if another bit is 1 too if not round to even if (bitNPlusOne){ if (0x7FFF & frac32Z) bitsMore=1; uZ.ui += (uZ.ui&1) | bitsMore; } } if (signZ) uZ.ui = -uZ.ui & 0xFFFF; return uZ.p; } luametatex-2.10.08/source/libraries/softposit/source/p16_mulAdd.c000066400000000000000000000045051442250314700247070ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017, 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit16_t p16_mulAdd( posit16_t a, posit16_t b, posit16_t c ) { union ui16_p16 uA; uint_fast16_t uiA; union ui16_p16 uB; uint_fast16_t uiB; union ui16_p16 uC; uint_fast16_t uiC; uA.p = a; uiA = uA.ui; uB.p = b; uiB = uB.ui; uC.p = c; uiC = uC.ui; return softposit_mulAddP16( uiA, uiB, uiC, 0 ); } luametatex-2.10.08/source/libraries/softposit/source/p16_roundToInt.c000066400000000000000000000077661442250314700256220ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane) and John Gustafson. Copyright 2017, 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit16_t p16_roundToInt( posit16_t pA ) { union ui16_p16 uA; uint_fast16_t mask = 0x2000, scale=0, tmp=0, uiA; bool bitLast, bitNPlusOne, sign; uA.p = pA; uiA = uA.ui; // Copy of the input. sign = (uiA > 0x8000); // sign is True if pA > NaR. if (sign) uiA = -uiA & 0xFFFF; // A is now |A|. if (uiA <= 0x3000) { // 0 <= |pA| <= 1/2 rounds to zero. uA.ui = 0; return uA.p; } else if (uiA < 0x4800) { // 1/2 < x < 3/2 rounds to 1. uA.ui = 0x4000; } else if (uiA <= 0x5400) { // 3/2 <= x <= 5/2 rounds to 2. uA.ui = 0x5000; } else if (uiA >= 0x7C00) { // If |A| is 256 or greater, leave it unchanged. return uA.p; // This also takes care of the NaR case, 0x8000. } else { // 34% of the cases, we have to decode the posit. while (mask & uiA) { // Increment scale by 2 for each regime sign bit. scale += 2; // Regime sign bit is always 1 in this range. mask >>= 1; // Move the mask right, to the next bit. } mask >>= 1; // Skip over termination bit. if (mask & uiA) scale++; // If exponent is 1, increment the scale. mask >>= scale; // Point to the last bit of the integer part. bitLast = (uiA & mask); // Extract the bit, without shifting it. mask >>= 1; tmp = (uiA & mask); bitNPlusOne = tmp; // "True" if nonzero. uiA ^= tmp; // Erase the bit, if it was set. tmp = uiA & (mask - 1); // tmp has any remaining bits. uiA ^= tmp; // Erase those bits, if any were set. if (bitNPlusOne) { // logic for round to nearest, tie to even if (bitLast | tmp) uiA += (mask << 1); } uA.ui = uiA; } if (sign) uA.ui = -uA.ui & 0xFFFF; // Apply the sign of Z. return uA.p; } luametatex-2.10.08/source/libraries/softposit/source/p16_sqrt.c000066400000000000000000000113531442250314700244710ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane) and John Gustafson. Copyright 2017, 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" extern const uint_fast16_t softposit_approxRecipSqrt0[]; extern const uint_fast16_t softposit_approxRecipSqrt1[]; posit16_t p16_sqrt( posit16_t pA ) { union ui16_p16 uA; uint_fast16_t expA, fracA, index, r0, shift, sigma0, uiA, uiZ; uint_fast32_t eSqrR0, fracZ, negRem, recipSqrt, shiftedFracZ; int_fast16_t kZ; bool bitNPlusOne; uA.p = pA; uiA = uA.ui; // If sign bit is set, return NaR. if (uiA>>15) { uA.ui = 0x8000; return uA.p; } // If the argument is zero, return zero. if (uiA==0) { uA.ui = 0; return uA.p; } // Compute the square root. Here, kZ is the net power-of-2 scaling of the result. // Decode the regime and exponent bit; scale the input to be in the range 1 to 4: if (uiA >> 14) { kZ = -1; while (uiA & 0x4000) { kZ++; uiA= (uiA<<1) & 0xFFFF; } } else { kZ = 0; while (!(uiA & 0x4000)) { kZ--; uiA= (uiA<<1) & 0xFFFF; } } uiA &= 0x3fff; expA = 1 - (uiA >> 13); fracA = (uiA | 0x2000) >> 1; // Use table look-up of first four bits for piecewise linear approx. of 1/sqrt: index = ((fracA >> 8) & 0xE) + expA; r0 = softposit_approxRecipSqrt0[index] - (((uint_fast32_t) softposit_approxRecipSqrt1[index] * (fracA & 0x1FF)) >> 13); // Use Newton-Raphson refinement to get more accuracy for 1/sqrt: eSqrR0 = ((uint_fast32_t) r0 * r0) >> 1; if (expA) eSqrR0 >>= 1; sigma0 = 0xFFFF ^ (0xFFFF & (((uint64_t)eSqrR0 * (uint64_t)fracA) >> 18));//~(uint_fast16_t) ((eSqrR0 * fracA) >> 18); recipSqrt = ((uint_fast32_t) r0 << 2) + (((uint_fast32_t) r0 * sigma0) >> 23); // We need 17 bits of accuracy for posit16 square root approximation. // Multiplying 16 bits and 18 bits needs 64-bit scratch before the right shift: fracZ = (((uint_fast64_t) fracA) * recipSqrt) >> 13; // Figure out the regime and the resulting right shift of the fraction: if (kZ < 0) { shift = (-1 - kZ) >> 1; uiZ = 0x2000 >> shift; } else { shift = kZ >> 1; uiZ = 0x7fff - (0x7FFF >> (shift + 1)); } // Set the exponent bit in the answer, if it is nonzero: if (kZ & 1) uiZ |= (0x1000 >> shift); // Right-shift fraction bits, accounting for 1 <= a < 2 versus 2 <= a < 4: fracZ = fracZ >> (expA + shift); // Trick for eliminating off-by-one cases that only uses one multiply: fracZ++; if (!(fracZ & 7)) { shiftedFracZ = fracZ >> 1; negRem = (shiftedFracZ * shiftedFracZ) & 0x3FFFF; if (negRem & 0x20000) { fracZ |= 1; } else { if (negRem) fracZ--; } } // Strip off the hidden bit and round-to-nearest using last 4 bits. fracZ -= (0x10000 >> shift); bitNPlusOne = (fracZ >> 3) & 1; if (bitNPlusOne) { if (((fracZ >> 4) & 1) | (fracZ & 7)) fracZ += 0x10; } // Assemble the result and return it. uA.ui = uiZ | (fracZ >> 4); return uA.p; } luametatex-2.10.08/source/libraries/softposit/source/p16_sub.c000066400000000000000000000053061442250314700242720ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017 A*STAR. All rights reserved. This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit16_t p16_sub( posit16_t a, posit16_t b ){ union ui16_p16 uA, uB; uint_fast16_t uiA, uiB; union ui16_p16 uZ; uA.p = a; uiA = uA.ui; uB.p = b; uiB = uB.ui; #ifdef SOFTPOSIT_EXACT uZ.ui.exact = (uiA.ui.exact & uiB.ui.exact); #endif //infinity if ( uiA==0x8000 || uiB==0x8000 ){ #ifdef SOFTPOSIT_EXACT uZ.ui.v = 0x8000; uZ.ui.exact = 0; #else uZ.ui = 0x8000; #endif return uZ.p; } //Zero else if ( uiA==0 || uiB==0 ){ #ifdef SOFTPOSIT_EXACT uZ.ui.v = (uiA | -uiB); uZ.ui.exact = 0; #else uZ.ui = (uiA | -uiB); #endif return uZ.p; } //different signs if ((uiA^uiB)>>15) return softposit_addMagsP16(uiA, (-uiB & 0xFFFF)); else return softposit_subMagsP16(uiA, (-uiB & 0xFFFF)); } luametatex-2.10.08/source/libraries/softposit/source/p16_to_i32.c000066400000000000000000000101651442250314700245770ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane) and John Gustafson. Copyright 2017, 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" int_fast32_t p16_to_i32( posit16_t pA ){ union ui16_p16 uA; int_fast32_t mask, iZ, tmp; uint_fast16_t scale = 0, uiA; bool bitLast, bitNPlusOne, sign; uA.p = pA; uiA = uA.ui; // Copy of the input. //NaR if (uiA==0x8000) return 0; sign = (uiA > 0x8000); // sign is True if pA > NaR. if (sign) uiA = -uiA & 0xFFFF; // A is now |A|. if (uiA <= 0x3000) { // 0 <= |pA| <= 1/2 rounds to zero. return 0; } else if (uiA < 0x4800) { // 1/2 < x < 3/2 rounds to 1. iZ = 1; } else if (uiA <= 0x5400) { // 3/2 <= x <= 5/2 rounds to 2. iZ = 2; } else { // Decode the posit, left-justifying as we go. uiA -= 0x4000; // Strip off first regime bit (which is a 1). while (0x2000 & uiA) { // Increment scale by 2 for each regime sign bit. scale += 2; // Regime sign bit is always 1 in this range. uiA = (uiA - 0x2000) << 1; // Remove the bit; line up the next regime bit. } uiA <<= 1; // Skip over termination bit, which is 0. if (0x2000 & uiA) scale++; // If exponent is 1, increment the scale. iZ = ((uint32_t)uiA | 0x2000) << 17; // Left-justify fraction in 32-bit result (one left bit padding) mask = 0x40000000 >> scale; // Point to the last bit of the integer part. bitLast = (iZ & mask); // Extract the bit, without shifting it. mask >>= 1; tmp = (iZ & mask); bitNPlusOne = tmp; // "True" if nonzero. iZ ^= tmp; // Erase the bit, if it was set. tmp = iZ & (mask - 1); // tmp has any remaining bits. // This is bitsMore iZ ^= tmp; // Erase those bits, if any were set. if (bitNPlusOne) { // logic for round to nearest, tie to even if (bitLast | tmp) iZ += (mask << 1); } iZ = (uint32_t)iZ >> (30 - scale); // Right-justify the integer. } if (sign) iZ = -iZ; // Apply the sign of the input. return iZ; } luametatex-2.10.08/source/libraries/softposit/source/p16_to_i64.c000066400000000000000000000055451442250314700246120ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane) and John Gustafson. Copyright 2017, 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" int_fast64_t p16_to_i64( posit16_t pA ){ union ui16_p16 uA; int_fast64_t mask, tmp, iZ; uint_fast16_t scale = 0, uiA; bool sign, bitLast, bitNPlusOne; uA.p = pA; uiA = uA.ui; // NaR if (uiA==0x8000) return 0; sign = uiA>>15; if (sign) uiA = -uiA & 0xFFFF; if (uiA <= 0x3000) return 0; else if (uiA < 0x4800) iZ = 1; else if (uiA <= 0x5400) iZ = 2; else{ uiA -= 0x4000; while (0x2000 & uiA) { scale += 2; uiA = (uiA - 0x2000) << 1; } uiA <<= 1; if (0x2000 & uiA) scale++; iZ = ((uint64_t)uiA | 0x2000) << 49; mask = 0x4000000000000000 >> scale; bitLast = (iZ & mask); mask >>= 1; tmp = (iZ & mask); bitNPlusOne = tmp; iZ ^= tmp; tmp = iZ & (mask - 1); // bitsMore iZ ^= tmp; if (bitNPlusOne) if (bitLast | tmp) iZ += (mask << 1); iZ = (uint64_t)iZ >> (62 - scale); } if (sign) iZ = -iZ; return iZ; } luametatex-2.10.08/source/libraries/softposit/source/p16_to_p32.c000066400000000000000000000061241442250314700246060ustar00rootroot00000000000000/*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit32_t p16_to_p32( posit16_t pA ) { union ui16_p16 uA; union ui32_p32 uZ; uint_fast16_t uiA, tmp; uint_fast32_t exp_frac32A=0, regime; bool sign, regSA; int_fast8_t kA=0, regA; uA.p = pA; uiA = uA.ui; if (uiA==0x8000 || uiA==0 ){ uZ.ui = (uint32_t)uiA<<16; return uZ.p; } sign = signP16UI( uiA ); if (sign) uiA = -uiA & 0xFFFF; regSA = signregP16UI(uiA); tmp = (uiA<<2) & 0xFFFF; if (regSA){ while (tmp>>15){ kA++; tmp= (tmp<<1) & 0xFFFF; } } else{ kA=-1; while (!(tmp>>15)){ kA--; tmp= (tmp<<1) & 0xFFFF; } tmp&=0x7FFF; } exp_frac32A = (uint32_t) tmp<<16; if(kA<0){ regA = -kA; //if (regA&0x1) exp_frac32A |= 0x80000000; exp_frac32A |= ((uint32_t)(regA&0x1)<<31); regA = (regA+1)>>1; if (regA==0) regA=1; regSA = 0; regime = 0x40000000>>regA; } else{ exp_frac32A |= ((uint32_t)(kA&0x1)<<31); (kA==0) ? (regA=1) : (regA = (kA+2)>>1); regSA=1; regime = 0x7FFFFFFF - (0x7FFFFFFF>>regA); } exp_frac32A >>=(regA+2); //2 because of sign and regime terminating bit uZ.ui = regime + exp_frac32A; if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; return uZ.p; } luametatex-2.10.08/source/libraries/softposit/source/p16_to_p8.c000066400000000000000000000065121442250314700245320ustar00rootroot00000000000000/*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit8_t p16_to_p8( posit16_t pA ) { union ui16_p16 uA; union ui8_p8 uZ; uint_fast16_t uiA, tmp, regime; uint_fast16_t exp_frac16A=0; bool sign, regSA, bitsMore=0; int_fast8_t kA=0, regA; uA.p = pA; uiA = uA.ui; if (uiA==0x8000 || uiA==0 ){ uZ.ui = (uiA>>8) &0xFF; return uZ.p; } sign = signP16UI( uiA ); if (sign) uiA = -uiA & 0xFFFF; regSA = signregP16UI(uiA); tmp = (uiA<<2) & 0xFFFF; if (regSA){ while (tmp>>15){ kA++; tmp= (tmp<<1) & 0xFFFF; } } else{ kA=-1; while (!(tmp>>15)){ kA--; tmp= (tmp<<1) & 0xFFFF; } tmp&=0x7FFF; } if (kA<-3 || kA>=3){ (kA<0) ? (uZ.ui=0x1):(uZ.ui= 0x7F); } else{ //2nd bit exp exp_frac16A = tmp; if(kA<0){ regA = ((-kA)<<1) - (exp_frac16A>>14); if (regA==0) regA=1; regSA = 0; regime = 0x40>>regA; } else{ (kA==0)?(regA=1 + (exp_frac16A>>14)): (regA = ((kA+1)<<1) + (exp_frac16A>>14) -1); regSA=1; regime = 0x7F - (0x7F>>regA); } if (regA>5){ uZ.ui = regime; } else{ //int shift = regA+8; //exp_frac16A= ((exp_frac16A)&0x3FFF) >> shift; //first 2 bits already empty (for sign and regime terminating bit) uZ.ui = regime + ( ((exp_frac16A)&0x3FFF)>>(regA+8) ); } } if ( exp_frac16A & (0x80<>(9-regA)); uZ.ui += (uZ.ui&1) | bitsMore; } if (sign) uZ.ui = -uZ.ui & 0xFF; return uZ.p; } luametatex-2.10.08/source/libraries/softposit/source/p16_to_pX1.c000066400000000000000000000056261442250314700246600ustar00rootroot00000000000000/*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit_1_t p16_to_pX1( posit16_t pA, int x ) { union ui16_p16 uA; union ui32_pX1 uZ; uint_fast16_t tmp; uint_fast32_t uiA, exp_frac32A=0, regime; bool sign, regSA; int_fast8_t kA=0, regA; if (x<2 || x>32){ uZ.ui = 0x80000000; return uZ.p; } uA.p = pA; uiA = ((uint32_t) uA.ui<<16) &0xFFFFFFFF; if (uiA==0x80000000 || uiA==0 ){ uZ.ui = uiA; return uZ.p; } sign = signP32UI( uiA ); if (sign) uiA = -uiA & 0xFFFFFFFF; if (x==2){ uZ.ui=(uiA>0)?(0x40000000):(0); } else if (x==32 || (((uint32_t)0xFFFFFFFF>>x) & uiA)==0 ){ uZ.ui = uiA; } else { int shift = 32-x; if( (uiA>>shift)!=(0x7FFFFFFF>>shift) ){ if( ((uint32_t)0x80000000>>x) & uiA){ if ( ( ((uint32_t)0x80000000>>(x-1)) & uiA) || (((uint32_t)0x7FFFFFFF>>x) & uiA) ) uiA += (0x1<>(x-1)); if (uZ.ui==0) uZ.ui = 0x1<32){ uZ.ui = 0x80000000; return uZ.p; } uA.p = pA; uiA = uA.ui; if (uiA==0x8000 || uiA==0 ){ uZ.ui = (uint32_t)uiA<<16; return uZ.p; } sign = signP16UI( uiA ); if (sign) uiA = -uiA & 0xFFFF; if(x==2){ uZ.ui=(uiA>0)?(0x40000000):(0); } else{ regSA = signregP16UI(uiA); tmp = (uiA<<2) & 0xFFFF; if (regSA){ while (tmp>>15){ kA++; tmp= (tmp<<1) & 0xFFFF; } } else{ kA=-1; while (!(tmp>>15)){ kA--; tmp= (tmp<<1) & 0xFFFF; } tmp&=0x7FFF; } exp_frac32A = (uint32_t) tmp<<16; if(kA<0){ regA = -kA; //if (regA&0x1) exp_frac32A |= 0x80000000; exp_frac32A |= ((uint32_t)(regA&0x1)<<31); regA = (regA+1)>>1; if (regA==0) regA=1; regSA = 0; regime = 0x40000000>>regA; } else{ exp_frac32A |= ((uint32_t)(kA&0x1)<<31); (kA==0) ? (regA=1) : (regA = (kA+2)>>1); regSA=1; regime = 0x7FFFFFFF - (0x7FFFFFFF>>regA); } if(regA>(x-2)){ //max or min pos. exp and frac does not matter. uZ.ui=(regSA) ? (0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)) ): (0x1 << (32-x)); } else{ exp_frac32A >>=(regA+2); //2 because of sign and regime terminating bit uZ.ui = regime + exp_frac32A; int shift = 32-x; if( (uZ.ui>>shift)!=(0x7FFFFFFF>>shift) ){ if( ((uint32_t)0x80000000>>x) & uZ.ui){ if ( ( ((uint32_t)0x80000000>>(x-1)) & uZ.ui) || (((uint32_t)0x7FFFFFFF>>x) & uZ.ui) ) uZ.ui += (0x1<>(x-1)); if (uZ.ui==0) uZ.ui = 0x1<=0x8000) return 0; //negative if (uiA <= 0x3000) { // 0 <= |pA| <= 1/2 rounds to zero. return 0; } else if (uiA < 0x4800) { // 1/2 < x < 3/2 rounds to 1. iZ = 1; } else if (uiA <= 0x5400) { // 3/2 <= x <= 5/2 rounds to 2. iZ = 2; } else { // Decode the posit, left-justifying as we go. uiA -= 0x4000; // Strip off first regime bit (which is a 1). while (0x2000 & uiA) { // Increment scale by 2 for each regime sign bit. scale += 2; // Regime sign bit is always 1 in this range. uiA = (uiA - 0x2000) << 1; // Remove the bit; line up the next regime bit. } uiA <<= 1; // Skip over termination bit, which is 0. if (0x2000 & uiA) scale++; // If exponent is 1, increment the scale. iZ = ((uint32_t)uiA | 0x2000) << 17; // Left-justify fraction in 32-bit result (one left bit padding) mask = 0x40000000 >> scale; // Point to the last bit of the integer part. bitLast = (iZ & mask); // Extract the bit, without shifting it. mask >>= 1; tmp = (iZ & mask); bitNPlusOne = tmp; // "True" if nonzero. iZ ^= tmp; // Erase the bit, if it was set. tmp = iZ & (mask - 1); // tmp has any remaining bits. // This is bitsMore iZ ^= tmp; // Erase those bits, if any were set. if (bitNPlusOne) { // logic for round to nearest, tie to even if (bitLast | tmp) iZ += (mask << 1); } iZ = (uint32_t)iZ >> (30 - scale); // Right-justify the integer. } return iZ; } luametatex-2.10.08/source/libraries/softposit/source/p16_to_ui64.c000066400000000000000000000055211442250314700247710ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane) and John Gustafson. Copyright 2017 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" uint_fast64_t p16_to_ui64( posit16_t pA ) { union ui16_p16 uA; uint_fast64_t mask, iZ, tmp; uint_fast16_t scale = 0, uiA; bool bitLast, bitNPlusOne; uA.p = pA; uiA = uA.ui; //NaR //if (uiA==0x8000) return 0; //negative if (uiA>=0x8000) return 0; if (uiA <= 0x3000) { return 0; } else if (uiA < 0x4800) { iZ = 1; } else if (uiA <= 0x5400) { iZ = 2; } else { uiA -= 0x4000; while (0x2000 & uiA) { scale += 2; uiA = (uiA - 0x2000) << 1; } uiA <<= 1; if (0x2000 & uiA) scale++; iZ = ((uint64_t)uiA | 0x2000) << 49; mask = 0x4000000000000000 >> scale; bitLast = (iZ & mask); mask >>= 1; tmp = (iZ & mask); bitNPlusOne = tmp; iZ ^= tmp; tmp = iZ & (mask - 1); // bitsMore iZ ^= tmp; if (bitNPlusOne) if (bitLast | tmp) iZ += (mask << 1); iZ = (uint64_t)iZ >> (62 - scale); } return iZ; } luametatex-2.10.08/source/libraries/softposit/source/p32_add.c000066400000000000000000000054001442250314700242220ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017, 2018 A*STAR. All rights reserved. This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit32_t p32_add( posit32_t a, posit32_t b ){ union ui32_p32 uA, uB, uZ; uint_fast32_t uiA, uiB; uA.p = a; uiA = uA.ui; uB.p = b; uiB = uB.ui; #ifdef SOFTPOSIT_EXACT uZ.ui.exact = (uiA.ui.exact & uiB.ui.exact); #endif //Zero or infinity if (uiA==0 || uiB==0){ // Not required but put here for speed #ifdef SOFTPOSIT_EXACT uZ.ui.v = uiA | uiB; uZ.ui.exact = (uiA.ui.exact & uiB.ui.exact); #else uZ.ui = uiA | uiB; #endif return uZ.p; } else if ( uiA==0x80000000 || uiB==0x80000000 ){ //printf("in infinity\n"); #ifdef SOFTPOSIT_EXACT uZ.ui.v = 0x80000000; uZ.ui.exact = 0; #else uZ.ui = 0x80000000; #endif return uZ.p; } //different signs if ((uiA^uiB)>>31) return softposit_subMagsP32(uiA, uiB); else return softposit_addMagsP32(uiA, uiB); } luametatex-2.10.08/source/libraries/softposit/source/p32_div.c000066400000000000000000000116231442250314700242600ustar00rootroot00000000000000/*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017, 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include #include "platform.h" #include "internals.h" posit32_t p32_div( posit32_t pA, posit32_t pB ) { union ui32_p32 uA, uB, uZ; uint_fast32_t uiA, uiB, fracA, fracB, regA, regime, regB, tmp; bool signA, signB, signZ, regSA, regSB, bitNPlusOne=0, bitsMore=0, rcarry; int_fast8_t kA=0; int_fast32_t expA; uint_fast64_t frac64A, frac64Z, rem; lldiv_t divresult; uA.p = pA; uiA = uA.ui; uB.p = pB; uiB = uB.ui; //Zero or infinity if ( uiA==0x80000000 || uiB==0x80000000 || uiB==0){ #ifdef SOFTPOSIT_EXACT uZ.ui.v = 0x80000000; uZ.ui.exact = 0; #else uZ.ui = 0x80000000; #endif return uZ.p; } else if (uiA==0){ #ifdef SOFTPOSIT_EXACT uZ.ui.v = 0; if ( (uiA==0 && uiA.ui.exact) || (uiB==0 && uiB.ui.exact) ) uZ.ui.exact = 1; else uZ.ui.exact = 0; #else uZ.ui = 0; #endif return uZ.p; } signA = signP32UI( uiA ); signB = signP32UI( uiB ); signZ = signA ^ signB; if(signA) uiA = (-uiA & 0xFFFFFFFF); if(signB) uiB = (-uiB & 0xFFFFFFFF); regSA = signregP32UI(uiA); regSB = signregP32UI(uiB); tmp = (uiA<<2)&0xFFFFFFFF; if (regSA){ while (tmp>>31){ kA++; tmp= (tmp<<1) & 0xFFFFFFFF; } } else{ kA=-1; while (!(tmp>>31)){ kA--; tmp= (tmp<<1) & 0xFFFFFFFF; } tmp&=0x7FFFFFFF; } expA = tmp>>29; //to get 2 bits fracA = ((tmp<<1) | 0x40000000) & 0x7FFFFFFF; frac64A = (uint64_t) fracA << 30; tmp = (uiB<<2)&0xFFFFFFFF; if (regSB){ while (tmp>>31){ kA--; tmp= (tmp<<1) & 0xFFFFFFFF; } } else{ kA++; while (!(tmp>>31)){ kA++; tmp= (tmp<<1) & 0xFFFFFFFF; } tmp&=0x7FFFFFFF; } expA -= tmp>>29; fracB = ((tmp<<1) | 0x40000000) & 0x7FFFFFFF; divresult = lldiv (frac64A,(uint_fast64_t)fracB); frac64Z = divresult.quot; rem = divresult.rem; if (expA<0){ expA+=4; kA--; } if (frac64Z!=0){ rcarry = frac64Z >> 30; // this is the hidden bit (14th bit) , extreme right bit is bit 0 if (!rcarry){ if (expA==0){ kA--; expA=3; } else expA--; frac64Z<<=1; } } if(kA<0){ regA = -kA; regSA = 0; regime = 0x40000000>>regA; } else{ regA = kA+1; regSA=1; regime = 0x7FFFFFFF - (0x7FFFFFFF>>regA); } if(regA>30){ //max or min pos. exp and frac does not matter. (regSA) ? (uZ.ui= 0x7FFFFFFF): (uZ.ui=0x1); } else{ //remove carry and rcarry bits and shift to correct position frac64Z &= 0x3FFFFFFF; fracA = (uint_fast32_t)frac64Z >> (regA+2); if (regA<=28){ bitNPlusOne = (frac64Z >> (regA +1)) & 0x1; expA<<= (28-regA); if (bitNPlusOne) ( ((1<<(regA+1))-1) & frac64Z ) ? (bitsMore=1) : (bitsMore=0); } else { if (regA==30){ bitNPlusOne = expA&0x2; bitsMore = (expA&0x1); expA = 0; } else if (regA==29){ bitNPlusOne = expA&0x1; expA>>=1; //taken care of by the pack algo } if (frac64Z>0){ fracA=0; bitsMore =1; } } if (rem) bitsMore =1; uZ.ui = packToP32UI(regime, expA, fracA); if (bitNPlusOne) uZ.ui += (uZ.ui&1) | bitsMore; } if (signZ) uZ.ui = -uZ.ui & 0xFFFFFFFF; return uZ.p; } luametatex-2.10.08/source/libraries/softposit/source/p32_eq.c000066400000000000000000000042441442250314700241040ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" bool p32_eq( posit32_t a, posit32_t b ) { union ui32_p32 uA, uB; int32_t uiA, uiB; uA.p = a; uiA = (int32_t) uA.ui; uB.p = b; uiB = (int32_t)uB.ui; if(uiA==uiB) return true; else return false; } luametatex-2.10.08/source/libraries/softposit/source/p32_le.c000066400000000000000000000042431442250314700240760ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" bool p32_le( posit32_t a, posit32_t b ) { union ui32_p32 uA, uB; int32_t uiA, uiB; uA.p = a; uiA = (int32_t) uA.ui; uB.p = b; uiB = (int32_t)uB.ui; if(uiA<=uiB) return true; else return false; } luametatex-2.10.08/source/libraries/softposit/source/p32_lt.c000066400000000000000000000042421442250314700241140ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" bool p32_lt( posit32_t a, posit32_t b ) { union ui32_p32 uA, uB; int32_t uiA, uiB; uA.p = a; uiA = (int32_t) uA.ui; uB.p = b; uiB = (int32_t)uB.ui; if(uiA>31){ kA++; tmp= (tmp<<1) & 0xFFFFFFFF; } } else{ kA=-1; while (!(tmp>>31)){ kA--; tmp= (tmp<<1) & 0xFFFFFFFF; } tmp&=0x7FFFFFFF; } expA = tmp>>29; //to get 2 bits fracA = ((tmp<<1) | 0x40000000) & 0x7FFFFFFF; tmp = (uiB<<2)&0xFFFFFFFF; if (regSB){ while (tmp>>31){ kA++; tmp= (tmp<<1) & 0xFFFFFFFF; } } else{ kA--; while (!(tmp>>31)){ kA--; tmp= (tmp<<1) & 0xFFFFFFFF; } tmp&=0x7FFFFFFF; } expA += tmp>>29; frac64Z = (uint_fast64_t) fracA * (((tmp<<1) | 0x40000000) & 0x7FFFFFFF); if (expA>3){ kA++; expA&=0x3; // -=4 } rcarry = frac64Z>>61;//3rd bit of frac64Z if (rcarry){ expA++; if (expA>3){ kA ++; expA&=0x3; } frac64Z>>=1; } if(kA<0){ regA = -kA; regSA = 0; regime = 0x40000000>>regA; } else{ regA = kA+1; regSA=1; regime = 0x7FFFFFFF - (0x7FFFFFFF>>regA); } if(regA>30){ //max or min pos. exp and frac does not matter. (regSA) ? (uZ.ui= 0x7FFFFFFF): (uZ.ui=0x1); } else{ //remove carry and rcarry bits and shift to correct position (2 bits exp, so + 1 than 16 bits) frac64Z = (frac64Z&0xFFFFFFFFFFFFFFF) >> regA; fracA = (uint_fast32_t) (frac64Z>>32); if (regA<=28){ bitNPlusOne |= (0x80000000 & frac64Z); expA<<= (28-regA); } else { if (regA==30){ bitNPlusOne = expA&0x2; bitsMore = (expA&0x1); expA = 0; } else if (regA==29){ bitNPlusOne = expA&0x1; expA>>=1; //taken care of by the pack algo } if (fracA>0){ fracA=0; bitsMore =1; } } //sign is always zero uZ.ui = packToP32UI(regime, expA, fracA); //n+1 frac bit is 1. Need to check if another bit is 1 too if not round to even if (bitNPlusOne){ if (0x7FFFFFFF & frac64Z) bitsMore=1; uZ.ui += (uZ.ui&1) | bitsMore; } } if (signZ) uZ.ui = -uZ.ui & 0xFFFFFFFF; return uZ.p; } luametatex-2.10.08/source/libraries/softposit/source/p32_mulAdd.c000066400000000000000000000041341442250314700247030ustar00rootroot00000000000000/*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017, 2018 A*STAR. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit32_t p32_mulAdd( posit32_t a, posit32_t b, posit32_t c ) { union ui32_p32 uA; uint_fast32_t uiA; union ui32_p32 uB; uint_fast32_t uiB; union ui32_p32 uC; uint_fast32_t uiC; uA.p = a; uiA = uA.ui; uB.p = b; uiB = uB.ui; uC.p = c; uiC = uC.ui; return softposit_mulAddP32( uiA, uiB, uiC, 0 ); } luametatex-2.10.08/source/libraries/softposit/source/p32_roundToInt.c000066400000000000000000000067551442250314700256150ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane) and John Gustafson. Copyright 2017 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit32_t p32_roundToInt( posit32_t pA ){ union ui32_p32 uA; uint_fast32_t mask = 0x20000000, scale=0, tmp=0, uiA, uiZ; bool bitLast, bitNPlusOne, sign; uA.p = pA; uiA = uA.ui; sign = uiA>>31; // sign is True if pA > NaR. if (sign) uiA = -uiA & 0xFFFFFFFF; // A is now |A|. if (uiA <= 0x38000000) { // 0 <= |pA| <= 1/2 rounds to zero. uA.ui = 0; return uA.p; } else if (uiA < 0x44000000) { // 1/2 < x < 3/2 rounds to 1. uA.ui = 0x40000000; } else if (uiA <= 0x4A000000) { // 3/2 <= x <= 5/2 rounds to 2. uA.ui = 0x48000000; } else if (uiA >= 0x7E800000) { // If |A| is 0x7E800000 (posit is pure integer value), leave it unchanged. return uA.p; // This also takes care of the NaR case, 0x80000000. } else { // 34% of the cases, we have to decode the posit. while (mask & uiA) { scale += 4; mask >>= 1; } mask >>= 1; //Exponential (2 bits) if (mask & uiA) scale+=2; mask >>= 1; if (mask & uiA) scale++; mask >>= scale; //the rest of the bits bitLast = (uiA & mask); mask >>= 1; tmp = (uiA & mask); bitNPlusOne = tmp; uiA ^= tmp; // Erase the bit, if it was set. tmp = uiA & (mask - 1); // this is actually bitsMore uiA ^= tmp; if (bitNPlusOne) { if (bitLast | tmp) uiA += (mask << 1); } uA.ui = uiA; } if (sign) uA.ui = -uA.ui & 0xFFFFFFFF; return uA.p; } luametatex-2.10.08/source/libraries/softposit/source/p32_sqrt.c000066400000000000000000000115431442250314700244700ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" extern const uint_fast16_t softposit_approxRecipSqrt0[]; extern const uint_fast16_t softposit_approxRecipSqrt1[]; posit32_t p32_sqrt( posit32_t pA ) { union ui32_p32 uA; uint_fast32_t index, r0, shift, fracA, expZ, expA; uint_fast32_t mask, uiA, uiZ; uint_fast64_t eSqrR0, fracZ, negRem, recipSqrt, shiftedFracZ, sigma0, sqrSigma0; int_fast32_t eps, shiftZ; uA.p = pA; uiA = uA.ui; // If NaR or a negative number, return NaR. if (uiA & 0x80000000) { uA.ui = 0x80000000; return uA.p; } // If the argument is zero, return zero. else if (!uiA) { return uA.p; } // Compute the square root; shiftZ is the power-of-2 scaling of the result. // Decode regime and exponent; scale the input to be in the range 1 to 4: if (uiA & 0x40000000) { shiftZ = -2; while (uiA & 0x40000000) { shiftZ += 2; uiA = (uiA << 1) & 0xFFFFFFFF; } } else { shiftZ = 0; while (!(uiA & 0x40000000)) { shiftZ -= 2; uiA = (uiA << 1) & 0xFFFFFFFF; } } uiA &= 0x3FFFFFFF; expA = (uiA >> 28); shiftZ += (expA >> 1); expA = (0x1 ^ (expA & 0x1)); uiA &= 0x0FFFFFFF; fracA = (uiA | 0x10000000); // Use table look-up of first 4 bits for piecewise linear approx. of 1/sqrt: index = ((fracA >> 24) & 0xE) + expA; eps = ((fracA >> 9) & 0xFFFF); r0 = softposit_approxRecipSqrt0[index] - (((uint_fast32_t) softposit_approxRecipSqrt1[index] * eps) >> 20); // Use Newton-Raphson refinement to get 33 bits of accuracy for 1/sqrt: eSqrR0 = (uint_fast64_t) r0 * r0; if (!expA) eSqrR0 <<= 1; sigma0 = 0xFFFFFFFF & (0xFFFFFFFF ^ ((eSqrR0 * (uint64_t)fracA) >> 20)); recipSqrt = ((uint_fast64_t) r0 << 20) + (((uint_fast64_t) r0 * sigma0) >> 21); sqrSigma0 = ((sigma0 * sigma0) >> 35); recipSqrt += ( (( recipSqrt + (recipSqrt >> 2) - ((uint_fast64_t)r0 << 19) ) * sqrSigma0) >> 46 ); fracZ = (((uint_fast64_t) fracA) * recipSqrt) >> 31; if (expA) fracZ = (fracZ >> 1); // Find the exponent of Z and encode the regime bits. expZ = shiftZ & 0x3; if (shiftZ < 0) { shift = (-1 - shiftZ) >> 2; uiZ = 0x20000000 >> shift; } else { shift = shiftZ >> 2; uiZ = 0x7FFFFFFF - (0x3FFFFFFF >> shift); } // Trick for eliminating off-by-one cases that only uses one multiply: fracZ++; if (!(fracZ & 0xF)) { shiftedFracZ = fracZ >> 1; negRem = (shiftedFracZ * shiftedFracZ) & 0x1FFFFFFFF; if (negRem & 0x100000000) { fracZ |= 1; } else { if (negRem) fracZ--; } } // Strip off the hidden bit and round-to-nearest using last shift+5 bits. fracZ &= 0xFFFFFFFF; mask = (1 << (4 + shift)); if (mask & fracZ) { if ( ((mask - 1) & fracZ) | ((mask << 1) & fracZ) ) fracZ += (mask << 1); } // Assemble the result and return it. uA.ui = uiZ | (expZ << (27 - shift)) | (fracZ >> (5 + shift)); return uA.p; } luametatex-2.10.08/source/libraries/softposit/source/p32_sub.c000066400000000000000000000050551442250314700242710ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit32_t p32_sub( posit32_t a, posit32_t b ) { union ui32_p32 uA, uB, uZ; uint_fast32_t uiA, uiB; uA.p = a; uiA = uA.ui; uB.p = b; uiB = uB.ui; #ifdef SOFTPOSIT_EXACT uZ.ui.exact = (uiA.ui.exact & uiB.ui.exact); #endif //infinity if ( uiA==0x80000000 || uiB==0x80000000 ){ #ifdef SOFTPOSIT_EXACT uZ.ui.v = 0x80000000; uZ.ui.exact = 0; #else uZ.ui = 0x80000000; #endif return uZ.p; } //Zero else if ( uiA==0 || uiB==0 ){ #ifdef SOFTPOSIT_EXACT uZ.ui.v = (uiA | -uiB); uZ.ui.exact = 0; #else uZ.ui = (uiA | -uiB); #endif return uZ.p; } //different signs if ((uiA^uiB)>>31) return softposit_addMagsP32(uiA, (-uiB & 0xFFFFFFFF)); else return softposit_subMagsP32(uiA, (-uiB & 0xFFFFFFFF)); } luametatex-2.10.08/source/libraries/softposit/source/p32_to_i32.c000066400000000000000000000076721442250314700246060ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" int_fast32_t pX2_to_i32( posit_2_t pA ){ posit32_t p32 = {.v = pA.v}; return p32_to_i32(p32); } int_fast32_t p32_to_i32( posit32_t pA ){ union ui32_p32 uA; uint_fast64_t iZ64, mask, tmp; int_fast32_t iZ; uint_fast32_t scale = 0, uiA; bool bitLast, bitNPlusOne, bitsMore, sign; uA.p = pA; uiA = uA.ui; if (uiA==0x80000000) return 0; sign = uiA>>31; if (sign) uiA = -uiA & 0xFFFFFFFF; if (uiA <= 0x38000000) return 0; // 0 <= |pA| <= 1/2 rounds to zero. else if (uiA < 0x44000000) iZ = 1; // 1/2 < x < 3/2 rounds to 1. else if (uiA <= 0x4A000000) iZ = 2; // 3/2 <= x <= 5/2 rounds to 2. // For speed. Can be commented out //overflow so return max integer value else if(uiA>0x7FAFFFFF) return (sign) ? (-2147483648) : (2147483647); //return INT_MAX else{ uiA -= 0x40000000; while (0x20000000 & uiA) { scale += 4; uiA = (uiA - 0x20000000) << 1; } uiA <<= 1; // Skip over termination bit, which is 0. if (0x20000000 & uiA) scale+=2; // If first exponent bit is 1, increment the scale. if (0x10000000 & uiA) scale++; iZ64 = (((uint64_t)uiA | 0x10000000ULL)&0x1FFFFFFFULL) << 34; // Left-justify fraction in 32-bit result (one left bit padding) mask = 0x4000000000000000 >> scale; // Point to the last bit of the integer part. bitLast = (iZ64 & mask); // Extract the bit, without shifting it. mask >>= 1; tmp = (iZ64 & mask); bitNPlusOne = tmp; // "True" if nonzero. iZ64 ^= tmp; // Erase the bit, if it was set. tmp = iZ64 & (mask - 1); // tmp has any remaining bits. // This is bitsMore iZ64 ^= tmp; // Erase those bits, if any were set. if (bitNPlusOne) { // logic for round to nearest, tie to even if (bitLast | tmp) iZ64 += (mask << 1); } iZ = (uint64_t)iZ64 >> (62 - scale); // Right-justify the integer. } if (sign){ iZ = (-iZ & 0xFFFFFFFF); } return iZ; } luametatex-2.10.08/source/libraries/softposit/source/p32_to_i64.c000066400000000000000000000077011442250314700246040ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" int_fast64_t pX2_to_i64( posit_2_t pA ){ posit32_t p32 = {.v = pA.v}; return p32_to_i64(p32); } int_fast64_t p32_to_i64( posit32_t pA ){ union ui32_p32 uA; uint_fast64_t mask, tmp; int_fast64_t iZ; uint_fast32_t scale = 0, uiA; bool bitLast, bitNPlusOne, bitsMore, sign; uA.p = pA; uiA = uA.ui; if (uiA==0x80000000) return 0; sign = uiA>>31; if (sign) uiA = -uiA & 0xFFFFFFFF; if (uiA <= 0x38000000) return 0; // 0 <= |pA| <= 1/2 rounds to zero. else if (uiA < 0x44000000) iZ = 1; // 1/2 < x < 3/2 rounds to 1. else if (uiA <= 0x4A000000) iZ = 2; // 3/2 <= x <= 5/2 rounds to 2. //overflow so return max integer value else if(uiA>0x7FFFAFFF) return (sign) ? (-9223372036854775808) : (0x7FFFFFFFFFFFFFFF); else{ uiA -= 0x40000000; while (0x20000000 & uiA) { scale += 4; uiA = (uiA - 0x20000000) << 1; } uiA <<= 1; // Skip over termination bit, which is 0. if (0x20000000 & uiA) scale+=2; // If first exponent bit is 1, increment the scale. if (0x10000000 & uiA) scale++; iZ = ((uiA | 0x10000000ULL)&0x1FFFFFFFULL) << 34; // Left-justify fraction in 32-bit result (one left bit padding) if(scale<62){ mask = 0x4000000000000000 >> scale; // Point to the last bit of the integer part. bitLast = (iZ & mask); // Extract the bit, without shifting it. mask >>= 1; tmp = (iZ & mask); bitNPlusOne = tmp; // "True" if nonzero. iZ ^= tmp; // Erase the bit, if it was set. tmp = iZ & (mask - 1); // tmp has any remaining bits. // This is bitsMore iZ ^= tmp; // Erase those bits, if any were set. if (bitNPlusOne) { // logic for round to nearest, tie to even if (bitLast | tmp) iZ += (mask << 1); } iZ = ((uint64_t)iZ) >> (62 - scale); // Right-justify the integer. } else if (scale>62) iZ = (uint64_t)iZ << (scale-62); } if (sign) iZ = -iZ ; return iZ; } luametatex-2.10.08/source/libraries/softposit/source/p32_to_p16.c000066400000000000000000000067151442250314700246140ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit16_t pX2_to_p16( posit_2_t pA ){ posit32_t p32 = {.v = pA.v}; return p32_to_p16(p32); } posit16_t p32_to_p16( posit32_t pA ){ union ui32_p32 uA; union ui16_p16 uZ; uint_fast32_t uiA, tmp=0, exp_frac32A; uint_fast16_t regime, exp_frac=0; bool sign, regSA, bitsMore=0, bitNPlusOne=0; int_fast16_t kA=0, regA; uA.p = pA; uiA = uA.ui; if (uiA==0x80000000 || uiA==0 ){ uZ.ui = (uint16_t)(uiA>>16); return uZ.p; } sign = signP32UI( uiA ); if (sign) uiA = -uiA & 0xFFFFFFFF; if (uiA>0x7F600000) uZ.ui = 0x7FFF; else if (uiA<0x00A00000) uZ.ui = 0x1; else{ regSA = signregP32UI(uiA); //regime tmp = (uiA<<2)&0xFFFFFFFF; if (regSA){ while (tmp>>31){ kA++; tmp= (tmp<<1) & 0xFFFFFFFF; } } else{ kA=-1; while (!(tmp>>31)){ kA--; tmp= (tmp<<1) & 0xFFFFFFFF; } tmp&=0x7FFFFFFF; } //exp and frac exp_frac32A = tmp<<1; printBinary(&exp_frac32A, 32); printf("kA: %d\n", kA); if(kA<0){ regA = (-kA)<<1; if (exp_frac32A&0x80000000) regA--; exp_frac32A = (exp_frac32A<<1) &0xFFFFFFFF; regSA = 0; regime = 0x4000>>regA; } else{ regA = (kA<<1)+1; if (exp_frac32A&0x80000000) regA++; exp_frac32A = (exp_frac32A<<1) &0xFFFFFFFF; regSA=1; regime = 0x7FFF - (0x7FFF>>regA); } if ((exp_frac32A>>(17+regA)) & 0x1) bitNPlusOne = 1; if (regA<14) exp_frac = (uint16_t) (exp_frac32A>>(18+regA)); uZ.ui = regime + exp_frac; if (bitNPlusOne){ if ((exp_frac32A<<(15-regA)) & 0xFFFFFFFF) bitsMore=1; uZ.ui += (bitNPlusOne & (uZ.ui&1)) | ( bitNPlusOne & bitsMore); } } if (sign) uZ.ui = (-uZ.ui & 0xFFFF); return uZ.p; } luametatex-2.10.08/source/libraries/softposit/source/p32_to_p8.c000066400000000000000000000066661442250314700245420ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit8_t pX2_to_p8( posit_2_t pA ){ posit32_t p32 = {.v = pA.v}; return p32_to_p8(p32); } posit8_t p32_to_p8( posit32_t pA ){ union ui32_p32 uA; union ui8_p8 uZ; uint_fast32_t uiA, tmp=0, regime; uint_fast32_t exp_frac32A=0; bool sign, regSA, bitsMore=0, bitNPlusOne=0; int_fast8_t kA=0, regA; uA.p = pA; uiA = uA.ui; if (uiA==0x80000000 || uiA==0 ){ uZ.ui = (uint8_t)(uiA>>24) &0xFF; return uZ.p; } sign = signP32UI( uiA ); if (sign) uiA = -uiA & 0xFFFFFFFF; if (uiA>0x66000000) uZ.ui = 0x7F; else if (uiA<0x1A000000) uZ.ui = 0x1; else{ regSA = signregP32UI(uiA); //regime tmp = (uiA<<2)&0xFFFFFFFF; if (regSA){ while (tmp>>31){ kA++; tmp= (tmp<<1) & 0xFFFFFFFF; } } else{ kA=-1; while (!(tmp>>31)){ kA--; tmp= (tmp<<1) & 0xFFFFFFFF; } tmp&=0x7FFFFFFF; } //2nd and 3rd bit exp exp_frac32A = tmp; if(kA<0){ regA = ((-kA)<<2) - (exp_frac32A>>29); if (regA==0) regA=1; regSA = 0; regime = (regA>6) ? (0x1) : (0x40>>regA); } else{ (kA==0)?(regA=1 + (exp_frac32A>>29)): (regA = (kA<<2) + (exp_frac32A>>29) +1); regSA=1; regime = 0x7F - (0x7F>>regA); } exp_frac32A = (exp_frac32A<<3) &0xFFFFFFFF; if (regA>5){ uZ.ui = regime; } else{ //exp_frac32A= ((exp_frac32A)&0x3F) >> shift; //first 2 bits already empty (for sign and regime terminating bit) uZ.ui = regime | ( exp_frac32A>>(regA+26) ); } if ( exp_frac32A & (0x2000000<>(7-regA)); uZ.ui += (uZ.ui&1) | bitsMore; } } if (sign) uZ.ui = -uZ.ui & 0xFF; return uZ.p; } luametatex-2.10.08/source/libraries/softposit/source/p32_to_pX1.c000066400000000000000000000072101442250314700246450ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit_1_t p32_to_pX1( posit32_t pA, int x ){ union ui32_p32 uA; union ui32_pX1 uZ; uint_fast32_t uiA, tmp, regime; uint_fast32_t exp_frac32A=0; bool sign, regSA, bitNPlusOne, bitsMore; int_fast8_t kA=0, regA; if (x<2 || x>32){ uZ.ui = 0x80000000; return uZ.p; } uA.p = pA; uiA = uA.ui; if (uiA==0x80000000 || uiA==0 ){ uZ.ui = uiA; return uZ.p; } sign = signP32UI( uiA ); if (sign) uiA = -uiA & 0xFFFFFFFF; if (x==2){ uZ.ui=(uiA>0)?(0x40000000):(0); } else { regSA = signregP32UI(uiA); tmp = (uiA<<2)&0xFFFFFFFF; if (regSA){ while (tmp>>31){ kA++; tmp= (tmp<<1) & 0xFFFFFFFF; } } else{ kA=-1; while (!(tmp>>31)){ kA--; tmp= (tmp<<1) & 0xFFFFFFFF; } tmp&=0x7FFFFFFF; } //exp and frac exp_frac32A = tmp<<1; //printf("kA: %d\n", kA); //printBinary(&exp_frac32A, 32); if(kA<0){ regA = (-kA)<<1; if (exp_frac32A&0x80000000) regA--; exp_frac32A = (exp_frac32A<<1) &0xFFFFFFFF; regSA = 0; regime = 0x40000000>>regA; } else{ regA = (kA<<1)+1; if (exp_frac32A&0x80000000) regA++; exp_frac32A = (exp_frac32A<<1) &0xFFFFFFFF; regSA=1; regime = 0x7FFFFFFF - (0x7FFFFFFF>>regA); } if(regA>(x-2)){ //max or min pos. exp and frac does not matter. uZ.ui=(regSA) ? (0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)) ): (0x1 << (32-x)); } else{ bitNPlusOne = (exp_frac32A >>(regA+33-x))&0x1; bitsMore = exp_frac32A&(0x7FFFFFFF>>(x-regA-2)); if (regA<30) exp_frac32A >>=(2+regA); else exp_frac32A=0; uZ.ui = regime + (exp_frac32A & ((int32_t)0x80000000>>(x-1)) ); if (uZ.ui==0) uZ.ui = 0x1<<(32-x); else if (bitNPlusOne){ uZ.ui += (uint32_t)(((uZ.ui>>(32-x))&1) | bitsMore) << (32-x) ; } } } if (sign) uZ.ui = (-uZ.ui & 0xFFFFFFFF); return uZ.p; } luametatex-2.10.08/source/libraries/softposit/source/p32_to_pX2.c000066400000000000000000000055031442250314700246510ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit_2_t pX2_to_pX2( posit_2_t pA, int x ){ posit32_t p32 = {.v = pA.v}; return p32_to_pX2(p32, x); } posit_2_t p32_to_pX2( posit32_t pA, int x ){ union ui32_p32 uA; union ui32_pX2 uZ; uint_fast32_t uiA; bool sign; if (x<2 || x>32){ uZ.ui = 0x80000000; return uZ.p; } uA.p = pA; uiA = uA.ui; if (uiA==0x80000000 || uiA==0 ){ uZ.ui = uiA; return uZ.p; } sign = signP32UI( uiA ); if (sign) uiA = -uiA & 0xFFFFFFFF; if (x==2){ uZ.ui=(uiA>0)?(0x40000000):(0); } else if (x==32 || (((uint32_t)0xFFFFFFFF>>x) & uiA)==0 ){ uZ.ui = uiA; } else { int shift = 32-x; if( (uiA>>shift)!=(0x7FFFFFFF>>shift) ){ if( ((uint32_t)0x80000000>>x) & uiA){ if ( ( ((uint32_t)0x80000000>>(x-1)) & uiA) || (((uint32_t)0x7FFFFFFF>>x) & uiA) ) uiA += (0x1<>(x-1)); if (uZ.ui==0) uZ.ui = 0x1<=0x80000000) return 0; if (uiA <= 0x38000000) { // 0 <= |pA| <= 1/2 rounds to zero. return 0; } else if (uiA < 0x44000000) { // 1/2 < x < 3/2 rounds to 1. return 1; } else if (uiA <= 0x4A000000) { // 3/2 <= x <= 5/2 rounds to 2. return 2; } //overflow so return max integer value else if(uiA>0x7FBFFFFF){ return 0xFFFFFFFF; } else { uiA -= 0x40000000; while (0x20000000 & uiA) { scale += 4; uiA = (uiA - 0x20000000) << 1; } uiA <<= 1; // Skip over termination bit, which is 0. if (0x20000000 & uiA) scale+=2; // If first exponent bit is 1, increment the scale. if (0x10000000 & uiA) scale++; iZ64 = (((uint64_t)uiA | 0x10000000ULL)&0x1FFFFFFFULL) << 34; // Left-justify fraction in 32-bit result (one left bit padding) mask = 0x4000000000000000 >> scale; // Point to the last bit of the integer part. bitLast = (iZ64 & mask); // Extract the bit, without shifting it. mask >>= 1; tmp = (iZ64 & mask); bitNPlusOne = tmp; // "True" if nonzero. iZ64 ^= tmp; // Erase the bit, if it was set. tmp = iZ64 & (mask - 1); // tmp has any remaining bits. // This is bitsMore iZ64 ^= tmp; // Erase those bits, if any were set. if (bitNPlusOne) { // logic for round to nearest, tie to even if (bitLast | tmp) iZ64 += (mask << 1); } iZ = (uint64_t)iZ64 >> (62 - scale); // Right-justify the integer. } return iZ; } luametatex-2.10.08/source/libraries/softposit/source/p32_to_ui64.c000066400000000000000000000077171442250314700250000ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane) and John Gustafson. Copyright 2017 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" uint_fast64_t pX2_to_ui64( posit_2_t pA ) { posit32_t p32 = {.v = pA.v}; return p32_to_ui64(p32); } uint_fast64_t p32_to_ui64( posit32_t pA ) { union ui32_p32 uA; uint_fast64_t mask, iZ, tmp; uint_fast32_t scale = 0, uiA; bool bitLast, bitNPlusOne, bitsMore; uA.p = pA; uiA = uA.ui; //NaR //if (uiA==0x80000000) return 0; //negative if (uiA>=0x80000000) return 0; if (uiA <= 0x38000000) { // 0 <= |pA| <= 1/2 rounds to zero. return 0; } else if (uiA < 0x44000000) { // 1/2 < x < 3/2 rounds to 1. return 1; } else if (uiA <= 0x4A000000) { // 3/2 <= x <= 5/2 rounds to 2. return 2; } else if (uiA>0x7FFFBFFF){ return 0xFFFFFFFFFFFFFFFFULL; } else { uiA -= 0x40000000; while (0x20000000 & uiA) { scale += 4; uiA = (uiA - 0x20000000) << 1; } uiA <<= 1; // Skip over termination bit, which is 0. if (0x20000000 & uiA) scale+=2; // If first exponent bit is 1, increment the scale. if (0x10000000 & uiA) scale++; iZ = (((uint64_t)uiA | 0x10000000ULL)&0x1FFFFFFFULL) << 34; // Left-justify fraction in 32-bit result (one left bit padding) if(scale<62){ mask = 0x4000000000000000 >> scale; // Point to the last bit of the integer part. bitLast = (iZ & mask); // Extract the bit, without shifting it. mask >>= 1; tmp = (iZ & mask); bitNPlusOne = tmp; // "True" if nonzero. iZ ^= tmp; // Erase the bit, if it was set. tmp = iZ & (mask - 1); // tmp has any remaining bits. // This is bitsMore iZ ^= tmp; // Erase those bits, if any were set. if (bitNPlusOne) { // logic for round to nearest, tie to even if (bitLast | tmp) iZ += (mask << 1); } iZ = (uint64_t)iZ >> (62 - scale); // Right-justify the integer. } else if (scale>62){ iZ = (uint64_t)iZ << (scale-62); } } return iZ; } luametatex-2.10.08/source/libraries/softposit/source/p8_add.c000066400000000000000000000047571442250314700241630ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017, 2018 A*STAR. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit8_t p8_add( posit8_t a, posit8_t b ) { union ui8_p8 uA, uB; uint_fast8_t uiA, uiB; union ui8_p8 uZ; uA.p = a; uiA = uA.ui; uB.p = b; uiB = uB.ui; #ifdef SOFTPOSIT_EXACT uZ.ui.exact = (uiA.ui.exact & uiB.ui.exact); #endif //Zero or infinity if (uiA==0 || uiB==0){ // Not required but put here for speed #ifdef SOFTPOSIT_EXACT uZ.ui.v = uiA | uiB; uZ.ui.exact = (uiA.ui.exact & uiB.ui.exact); #else uZ.ui = uiA | uiB; #endif return uZ.p; } else if ( uiA==0x80 || uiB==0x80 ){ #ifdef SOFTPOSIT_EXACT uZ.ui.v = 0x80; uZ.ui.exact = 0; #else uZ.ui = 0x80; #endif return uZ.p; } //different signs if ((uiA^uiB)>>7) return softposit_subMagsP8(uiA, uiB); else return softposit_addMagsP8(uiA, uiB); } luametatex-2.10.08/source/libraries/softposit/source/p8_div.c000066400000000000000000000103011442250314700241730ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017, 2018 A*STAR. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include #include "platform.h" #include "internals.h" posit8_t p8_div( posit8_t pA, posit8_t pB ) { union ui8_p8 uA, uB, uZ; uint_fast8_t uiA, uiB, fracA, fracB, regA, regime, tmp; bool signA, signB, signZ, regSA, regSB, bitNPlusOne=0, bitsMore=0, rcarry; int_fast8_t kA=0; uint_fast16_t frac16A, frac16Z, rem; div_t divresult; uA.p = pA; uiA = uA.ui; uB.p = pB; uiB = uB.ui; //Zero or infinity if ( uiA==0x80 || uiB==0x80 || uiB==0){ #ifdef SOFTPOSIT_EXACT uZ.ui.v = 0x80; uZ.ui.exact = 0; #else uZ.ui = 0x80; #endif return uZ.p; } else if (uiA==0){ #ifdef SOFTPOSIT_EXACT uZ.ui.v = 0; if ( (uiA==0 && uiA.ui.exact) || (uiB==0 && uiB.ui.exact) ) uZ.ui.exact = 1; else uZ.ui.exact = 0; #else uZ.ui = 0; #endif return uZ.p; } signA = signP8UI( uiA ); signB = signP8UI( uiB ); signZ = signA ^ signB; if(signA) uiA = (-uiA & 0xFF); if(signB) uiB = (-uiB & 0xFF); regSA = signregP8UI(uiA); regSB = signregP8UI(uiB); tmp = (uiA<<2) & 0xFF; if (regSA){ while (tmp>>7){ kA++; tmp= (tmp<<1) & 0xFF; } } else{ kA=-1; while (!(tmp>>7)){ kA--; tmp= (tmp<<1) & 0xFF; } tmp&=0x7F; } fracA = (0x80 | tmp); frac16A = fracA<<7; //hidden bit 2nd bit tmp = (uiB<<2) & 0xFF; if (regSB){ while (tmp>>7){ kA--; tmp= (tmp<<1) & 0xFF; } fracB = (0x80 | tmp); } else{ kA++; while (!(tmp>>7)){ kA++; tmp= (tmp<<1) & 0xFF; } tmp&=0x7F; fracB = (0x80 | (0x7F & tmp)); } divresult = div (frac16A,fracB); frac16Z = divresult.quot; rem = divresult.rem; if (frac16Z!=0){ rcarry = frac16Z >> 7; // this is the hidden bit (7th bit) , extreme right bit is bit 0 if (!rcarry){ kA --; frac16Z<<=1; } } if(kA<0){ regA = (-kA & 0xFF); regSA = 0; regime = 0x40>>regA; } else{ regA = kA+1; regSA=1; regime = 0x7F-(0x7F>>regA); } if(regA>6){ //max or min pos. exp and frac does not matter. (regSA) ? (uZ.ui= 0x7F): (uZ.ui=0x1); } else{ //remove carry and rcarry bits and shift to correct position frac16Z &=0x7F; fracA = (uint_fast16_t)frac16Z >> (regA+1); bitNPlusOne = (0x1 & (frac16Z>> regA)) ; uZ.ui = packToP8UI(regime, fracA); //uZ.ui = (uint16_t) (regime) + ((uint16_t) (expA)<< (13-regA)) + ((uint16_t)(fracA)); if (bitNPlusOne){ (((1<>7){ kA++; tmp= (tmp<<1) & 0xFF; } } else{ kA=-1; while (!(tmp>>7)){ kA--; tmp= (tmp<<1) & 0xFF; } tmp&=0x7F; } fracA = (0x80 | tmp); tmp = (uiB<<2) & 0xFF; if (regSB){ while (tmp>>7){ kA++; tmp= (tmp<<1) & 0xFF; } } else{ kA--; while (!(tmp>>7)){ kA--; tmp= (tmp<<1) & 0xFF; } tmp&=0x7F; } frac16Z = (uint_fast16_t) fracA * (0x80 | tmp); rcarry = frac16Z>>15;//1st bit of frac32Z if (rcarry){ kA++; frac16Z>>=1; } if(kA<0){ regA = (-kA & 0xFF); regSA = 0; regime = 0x40>>regA; } else{ regA = kA+1; regSA=1; regime = 0x7F-(0x7F>>regA); } if(regA>6){ //max or min pos. exp and frac does not matter. (regSA) ? (uZ.ui= 0x7F): (uZ.ui=0x1); } else{ //remove carry and rcarry bits and shift to correct position frac16Z = (frac16Z&0x3FFF) >> regA; fracA = (uint_fast8_t) (frac16Z>>8); bitNPlusOne = (0x80 & frac16Z) ; uZ.ui = packToP8UI(regime, fracA); //n+1 frac bit is 1. Need to check if another bit is 1 too if not round to even if (bitNPlusOne){ if (0x7F & frac16Z) bitsMore=1; uZ.ui += (uZ.ui&1) | bitsMore; } } if (signZ) uZ.ui = -uZ.ui & 0xFF; return uZ.p; } luametatex-2.10.08/source/libraries/softposit/source/p8_mulAdd.c000066400000000000000000000041141442250314700246240ustar00rootroot00000000000000/*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017, 2018 A*STAR. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit8_t p8_mulAdd( posit8_t a, posit8_t b, posit8_t c ) { union ui8_p8 uA; uint_fast8_t uiA; union ui8_p8 uB; uint_fast8_t uiB; union ui8_p8 uC; uint_fast8_t uiC; uA.p = a; uiA = uA.ui; uB.p = b; uiB = uB.ui; uC.p = c; uiC = uC.ui; return softposit_mulAddP8( uiA, uiB, uiC, 0 ); } luametatex-2.10.08/source/libraries/softposit/source/p8_roundToInt.c000066400000000000000000000060741442250314700255320ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane) and John Gustafson. Copyright 2017, 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit8_t p8_roundToInt( posit8_t pA ) { union ui8_p8 uA; uint_fast8_t mask = 0x20, scale=0, tmp=0, uiA; bool bitLast, bitNPlusOne, sign; uA.p = pA; uiA = uA.ui; sign = (uiA > 0x80); // sign is True if pA > NaR. if (sign) uiA = -uiA & 0xFF; if (uiA <= 0x20) { // 0 <= |pA| <= 1/2 rounds to zero. uA.ui = 0; return uA.p; } else if (uiA < 0x50) { // 1/2 < x < 3/2 rounds to 1. uA.ui = 0x40; } else if (uiA <= 0x64) { // 3/2 <= x <= 5/2 rounds to 2. uA.ui = 0x60; } else if (uiA >= 0x78) { // If |A| is 8 or greater, leave it unchanged. return uA.p; // This also takes care of the NaR case, 0x80. } else { while (mask & uiA) { scale += 1; mask >>= 1; } mask >>= scale; bitLast = (uiA & mask); mask >>= 1; tmp = (uiA & mask); bitNPlusOne = tmp; uiA ^= tmp; tmp = uiA & (mask - 1); //bitsMore uiA ^= tmp; if (bitNPlusOne) { if (bitLast | tmp) uiA += (mask << 1); } uA.ui = uiA; } if (sign) uA.ui = -uA.ui & 0xFF; return uA.p; } luametatex-2.10.08/source/libraries/softposit/source/p8_sqrt.c000066400000000000000000000050661442250314700244160ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017 A*STAR. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit8_t p8_sqrt( posit8_t pA ) { union ui8_p8 uA; uint_fast8_t uiA; static const uint8_t p8Sqrt [] = {0, 8, 11, 14, 16, 18, 20, 21, 23, 24, 25, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 38, 39, 40, 41, 42, 42, 43, 44, 45, 45, 46, 47, 47, 48, 49, 49, 50, 51, 51, 52, 52, 53, 54, 54, 55, 55, 56, 57, 57, 58, 58, 59, 59, 60, 60, 61, 61, 62, 62, 63, 63, 64, 64, 65, 65, 66, 66, 67, 67, 68, 68, 69, 69, 70, 70, 70, 71, 71, 72, 72, 72, 73, 73, 74, 74, 74, 75, 75, 75, 76, 76, 77, 77, 77, 79, 80, 81, 83, 84, 85, 86, 87, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 100, 101, 102, 103, 105, 108, 110, 112, 114, 115, 120}; uA.p = pA; uiA = uA.ui; if (uiA>=0x80){ uA.ui = 0x80; return uA.p; } uA.ui = p8Sqrt[uiA]; return uA.p; } luametatex-2.10.08/source/libraries/softposit/source/p8_sub.c000066400000000000000000000047171442250314700242200ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017 A*STAR. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit8_t p8_sub( posit8_t a, posit8_t b ){ union ui8_p8 uA, uB; uint_fast8_t uiA, uiB; union ui8_p8 uZ; uA.p = a; uiA = uA.ui; uB.p = b; uiB = uB.ui; #ifdef SOFTPOSIT_EXACT uZ.ui.exact = (uiA.ui.exact & uiB.ui.exact); #endif //infinity if ( uiA==0x80 || uiB==0x80 ){ #ifdef SOFTPOSIT_EXACT uZ.ui.v = 0x80; uZ.ui.exact = 0; #else uZ.ui = 0x80; #endif return uZ.p; } //Zero else if ( uiA==0 || uiB==0 ){ #ifdef SOFTPOSIT_EXACT uZ.ui.v = (uiA | -uiB); uZ.ui.exact = 0; #else uZ.ui = (uiA | -uiB); #endif return uZ.p; } //different signs if (signP8UI(uiA^uiB)) return softposit_addMagsP8(uiA, (-uiB & 0xFF)); else return softposit_subMagsP8(uiA, (-uiB & 0xFF)); } luametatex-2.10.08/source/libraries/softposit/source/p8_to_i32.c000066400000000000000000000076661442250314700245340ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane) and John Gustafson. Copyright 2017, 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" int_fast32_t p8_to_i32( posit8_t pA ){ union ui8_p8 uA; int_fast32_t mask, iZ, tmp; uint_fast8_t scale = 0, uiA; bool bitLast, bitNPlusOne, sign; uA.p = pA; uiA = uA.ui; // Copy of the input. //NaR if (uiA==0x80) return 0; sign = (uiA > 0x80); // sign is True if pA > NaR. if (sign) uiA = -uiA & 0xFF; // A is now |A|. if (uiA <= 0x20) { // 0 <= |pA| <= 1/2 rounds to zero. return 0; } else if (uiA < 0x50) { // 1/2 < x < 3/2 rounds to 1. iZ = 1; } else { // Decode the posit, left-justifying as we go. uiA -= 0x40; // Strip off first regime bit (which is a 1). while (0x20 & uiA) { // Increment scale one for each regime sign bit. scale ++; // Regime sign bit is always 1 in this range. uiA = (uiA - 0x20) << 1; // Remove the bit; line up the next regime bit. } uiA <<= 1; // Skip over termination bit, which is 0. iZ = ((uint32_t)uiA | 0x40) << 24; // Left-justify fraction in 32-bit result (one left bit padding) mask = 0x40000000 >> scale; // Point to the last bit of the integer part. bitLast = (iZ & mask); // Extract the bit, without shifting it. mask >>= 1; tmp = (iZ & mask); bitNPlusOne = tmp; // "True" if nonzero. iZ ^= tmp; // Erase the bit, if it was set. tmp = iZ & (mask - 1); // tmp has any remaining bits. // This is bitsMore iZ ^= tmp; // Erase those bits, if any were set. if (bitNPlusOne) { // logic for round to nearest, tie to even if (bitLast | tmp) iZ += (mask << 1); } iZ = (uint32_t)iZ >> (30 - scale); // Right-justify the integer. } if (sign) iZ = -iZ; // Apply the sign of the input. return iZ; } luametatex-2.10.08/source/libraries/softposit/source/p8_to_i64.c000066400000000000000000000075221442250314700245300ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane) and John Gustafson. Copyright 2017 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" int_fast64_t p8_to_i64( posit8_t pA ) { union ui8_p8 uA; int_fast64_t mask, iZ, tmp; uint_fast8_t scale = 0, uiA; bool bitLast, bitNPlusOne, sign; uA.p = pA; uiA = uA.ui; // Copy of the input. //NaR if (uiA==0x80) return 0; sign = uiA>>7; if (sign) uiA = -uiA & 0xFF; if (uiA <= 0x20) { // 0 <= |pA| <= 1/2 rounds to zero. return 0; } else if (uiA < 0x50) { // 1/2 < x < 3/2 rounds to 1. iZ = 1; } else { // Decode the posit, left-justifying as we go. uiA -= 0x40; // Strip off first regime bit (which is a 1). while (0x20 & uiA) { // Increment scale by 1 for each regime sign bit. scale ++; // Regime sign bit is always 1 in this range. uiA = (uiA - 0x20) << 1; // Remove the bit; line up the next regime bit. } uiA <<= 1; // Skip over termination bit, which is 0. iZ = ((uint64_t)uiA | 0x40) << 55; // Left-justify fraction in 32-bit result (one left bit padding) mask = 0x2000000000000000 >> scale; // Point to the last bit of the integer part. bitLast = (iZ & mask); // Extract the bit, without shifting it. mask >>= 1; tmp = (iZ & mask); bitNPlusOne = tmp; // "True" if nonzero. iZ ^= tmp; // Erase the bit, if it was set. tmp = iZ & (mask - 1); // tmp has any remaining bits. // This is bitsMore iZ ^= tmp; // Erase those bits, if any were set. if (bitNPlusOne) { // logic for round to nearest, tie to even if (bitLast | tmp) iZ += (mask<<1) ; } iZ = (uint64_t)iZ >> (61 - scale); // Right-justify the integer. } if (sign) iZ = -iZ; // Apply the sign of the input. return iZ; } luametatex-2.10.08/source/libraries/softposit/source/p8_to_p16.c000066400000000000000000000057651442250314700245430ustar00rootroot00000000000000/*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit16_t p8_to_p16( posit8_t pA ) { union ui8_p8 uA; union ui16_p16 uZ; uint_fast8_t uiA, tmp; uint_fast16_t exp_frac16A=0, regime; bool sign, regSA; int_fast8_t kA=0, regA; uA.p = pA; uiA = uA.ui; //NaR or zero if (uiA==0x80 || uiA==0 ){ uZ.ui = (uint16_t)uiA<<8; return uZ.p; } sign = signP8UI( uiA ); if (sign) uiA = -uiA & 0xFF; regSA = signregP8UI(uiA); tmp = (uiA<<2) & 0xFF; if (regSA){ while (tmp>>7){ kA++; tmp= (tmp<<1) & 0xFF; } } else{ kA=-1; while (!(tmp>>7)){ kA--; tmp= (tmp<<1) & 0xFF; } tmp&=0x7F; } exp_frac16A = tmp<<8; if(kA<0){ regA = -kA; if (regA&0x1) exp_frac16A |= 0x8000; regA = (regA+1)>>1; if (regA==0) regA=1; regSA = 0; regime = 0x4000>>regA; } else{ if (kA&0x1) exp_frac16A |= 0x8000; regA = (kA+2)>>1; if (regA==0) regA=1; regSA=1; regime = 0x7FFF - (0x7FFF>>regA); } exp_frac16A >>=(regA+2); //2 because of sign and regime terminating bit uZ.ui = regime + exp_frac16A; if (sign) uZ.ui = -uZ.ui & 0xFFFF; return uZ.p; } luametatex-2.10.08/source/libraries/softposit/source/p8_to_p32.c000066400000000000000000000061031442250314700245240ustar00rootroot00000000000000/*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit32_t p8_to_p32( posit8_t pA ) { union ui8_p8 uA; union ui32_p32 uZ; uint_fast8_t uiA, tmp; uint_fast32_t exp_frac32A=0, regime; bool sign, regSA; int_fast8_t kA=0, regA; uA.p = pA; uiA = uA.ui; if (uiA==0x80 || uiA==0 ){ uZ.ui = (uint32_t)uiA<<24; return uZ.p; } sign = signP8UI( uiA ); if (sign) uiA = -uiA & 0xFF; regSA = signregP8UI(uiA); tmp = (uiA<<2) & 0xFF; if (regSA){ while (tmp>>7){ kA++; tmp= (tmp<<1) & 0xFF; } } else{ kA=-1; while (!(tmp>>7)){ kA--; tmp= (tmp<<1) & 0xFF; } tmp&=0x7F; } exp_frac32A = tmp<<22; if(kA<0){ regA = -kA; // Place exponent bits exp_frac32A |= ( ((regA&0x1)| ((regA+1)&0x2))<<29 ); regA = (regA+3)>>2; if (regA==0) regA=1; regSA = 0; regime = 0x40000000>>regA; } else{ exp_frac32A |= ( (kA&0x3) << 29 ); regA = (kA+4)>>2; if (regA==0) regA=1; regSA=1; regime = 0x7FFFFFFF - (0x7FFFFFFF>>regA); } exp_frac32A =((uint_fast32_t)exp_frac32A) >> (regA+1); //2 because of sign and regime terminating bit uZ.ui = regime + exp_frac32A; if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; return uZ.p; } luametatex-2.10.08/source/libraries/softposit/source/p8_to_pX1.c000066400000000000000000000067651442250314700246060ustar00rootroot00000000000000/*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit_1_t p8_to_pX1( posit8_t pA, int x ) { union ui8_p8 uA; union ui32_pX1 uZ; uint_fast8_t uiA, tmp; uint_fast32_t exp_frac32A=0, regime; bool sign, regSA; int_fast8_t kA=0, regA; if (x<2 || x>32){ uZ.ui = 0x80000000; return uZ.p; } uA.p = pA; uiA = uA.ui; if (uiA==0x80 || uiA==0 ){ uZ.ui = (uint32_t)uiA<<24; return uZ.p; } sign = signP8UI( uiA ); if (sign) uiA = -uiA & 0xFF; if(x==2){ uZ.ui=(uiA>0)?(0x40000000):(0); } else{ regSA = signregP8UI(uiA); tmp = (uiA<<2) & 0xFF; if (regSA){ while (tmp>>7){ kA++; tmp= (tmp<<1) & 0xFF; } } else{ kA=-1; while (!(tmp>>7)){ kA--; tmp= (tmp<<1) & 0xFF; } tmp&=0x7F; } exp_frac32A = tmp<<24; if(kA<0){ regA = -kA; // Place exponent bits if (regA&0x1) exp_frac32A |= 0x80000000; regA = (regA+1)>>1; if (regA==0) regA=1; regSA = 0; regime = 0x40000000>>regA; } else{ if (kA&0x1) exp_frac32A |= 0x80000000; regA = (kA+2)>>1; if (regA==0) regA=1; regSA=1; regime = 0x7FFFFFFF - (0x7FFFFFFF>>regA); } exp_frac32A >>=(regA+2); //2 because of sign and regime terminating bit uZ.ui = regime + exp_frac32A; int shift = 32-x; if( (uZ.ui>>shift)!=(0x7FFFFFFF>>shift) ){ if( ((uint32_t)0x80000000>>x) & uZ.ui){ if ( ( ((uint32_t)0x80000000>>(x-1)) & uZ.ui) || (((uint32_t)0x7FFFFFFF>>x) & uZ.ui) ) uZ.ui += (0x1<>(x-1)); if (uZ.ui==0) uZ.ui = 0x1<32){ uZ.ui = 0x80000000; return uZ.p; } uA.p = pA; uiA = uA.ui; if (uiA==0x80 || uiA==0 ){ uZ.ui = (uint32_t)uiA<<24; return uZ.p; } sign = signP8UI( uiA ); if (sign) uiA = -uiA & 0xFF; if(x==2){ uZ.ui=(uiA>0)?(0x40000000):(0); } else{ regSA = signregP8UI(uiA); tmp = (uiA<<2) & 0xFF; if (regSA){ while (tmp>>7){ kA++; tmp= (tmp<<1) & 0xFF; } } else{ kA=-1; while (!(tmp>>7)){ kA--; tmp= (tmp<<1) & 0xFF; } tmp&=0x7F; } exp_frac32A = tmp<<22; if(kA<0){ regA = -kA; // Place exponent bits exp_frac32A |= ( ((regA&0x1)| ((regA+1)&0x2))<<29 ); regA = (regA+3)>>2; if (regA==0) regA=1; regSA = 0; regime = 0x40000000>>regA; } else{ exp_frac32A |= ( (kA&0x3) << 29 ); regA = (kA+4)>>2; if (regA==0) regA=1; regSA=1; regime = 0x7FFFFFFF - (0x7FFFFFFF>>regA); } exp_frac32A =((uint_fast32_t)exp_frac32A) >> (regA+1); //2 because of sign and regime terminating bit uZ.ui = regime + exp_frac32A; int shift = 32-x; if( (uZ.ui>>shift)!=(0x7FFFFFFF>>shift) ){ if( ((uint32_t)0x80000000>>x) & uZ.ui){ if ( ( ((uint32_t)0x80000000>>(x-1)) & uZ.ui) || (((uint32_t)0x7FFFFFFF>>x) & uZ.ui) ) uZ.ui += (0x1<>(x-1)); if (uZ.ui==0) uZ.ui = 0x1<=0x80) return 0; //negative if (uiA <= 0x20) { // 0 <= |pA| <= 1/2 rounds to zero. return 0; } else if (uiA < 0x50) { // 1/2 < x < 3/2 rounds to 1. iZ = 1; } else { // Decode the posit, left-justifying as we go. uiA -= 0x40; // Strip off first regime bit (which is a 1). while (0x20 & uiA) { // Increment scale by 1 for each regime sign bit. scale ++; // Regime sign bit is always 1 in this range. uiA = (uiA - 0x20) << 1; // Remove the bit; line up the next regime bit. } uiA <<= 1; // Skip over termination bit, which is 0. iZ = ((uint32_t)uiA | 0x40) << 24; // Left-justify fraction in 32-bit result (one left bit padding) mask = 0x40000000 >> scale; // Point to the last bit of the integer part. bitLast = (iZ & mask); // Extract the bit, without shifting it. mask >>= 1; tmp = (iZ & mask); bitNPlusOne = tmp; // "True" if nonzero. iZ ^= tmp; // Erase the bit, if it was set. tmp = iZ & (mask - 1); // tmp has any remaining bits. // This is bitsMore iZ ^= tmp; // Erase those bits, if any were set. if (bitNPlusOne) { // logic for round to nearest, tie to even if (bitLast | tmp) iZ += (mask<<1) ; } iZ = iZ >> (30 - scale); // Right-justify the integer. } return iZ; } luametatex-2.10.08/source/libraries/softposit/source/p8_to_ui64.c000066400000000000000000000073761442250314700247240ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane) and John Gustafson. Copyright 2017 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" uint_fast64_t p8_to_ui64( posit8_t pA ) { union ui8_p8 uA; uint_fast64_t mask, iZ, tmp; uint_fast8_t scale = 0, uiA; bool bitLast, bitNPlusOne; uA.p = pA; uiA = uA.ui; // Copy of the input. //NaR //if (uiA==0x80) return 0; if (uiA>=0x80) return 0; //negative if (uiA <= 0x20) { // 0 <= |pA| <= 1/2 rounds to zero. return 0; } else if (uiA < 0x50) { // 1/2 < x < 3/2 rounds to 1. iZ = 1; } else { // Decode the posit, left-justifying as we go. uiA -= 0x40; // Strip off first regime bit (which is a 1). while (0x20 & uiA) { // Increment scale by 1 for each regime sign bit. scale ++; // Regime sign bit is always 1 in this range. uiA = (uiA - 0x20) << 1; // Remove the bit; line up the next regime bit. } uiA <<= 1; // Skip over termination bit, which is 0. iZ = ((uint64_t)uiA | 0x40) << 55; // Left-justify fraction in 32-bit result (one left bit padding) mask = 0x2000000000000000 >> scale; // Point to the last bit of the integer part. bitLast = (iZ & mask); // Extract the bit, without shifting it. mask >>= 1; tmp = (iZ & mask); bitNPlusOne = tmp; // "True" if nonzero. iZ ^= tmp; // Erase the bit, if it was set. tmp = iZ & (mask - 1); // tmp has any remaining bits. // This is bitsMore iZ ^= tmp; // Erase those bits, if any were set. if (bitNPlusOne) { // logic for round to nearest, tie to even if (bitLast | tmp) iZ += (mask<<1) ; } iZ = (uint64_t)iZ >> (61 - scale); // Right-justify the integer. } return iZ; } luametatex-2.10.08/source/libraries/softposit/source/pX1_add.c000066400000000000000000000050621442250314700242720ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017, 2018 A*STAR. All rights reserved. This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit_1_t pX1_add( posit_1_t a, posit_1_t b, int x ){ union ui32_pX1 uA, uB, uZ; uint_fast32_t uiA, uiB; if (x<2 || x>32){ uZ.ui = 0x80000000; return uZ.p; } uA.p = a; uiA = uA.ui; uB.p = b; uiB = uB.ui; //Zero or infinity if (uiA==0 || uiB==0){ // Not required but put here for speed uZ.ui = uiA | uiB; return uZ.p; } else if ( uiA==0x80000000 || uiB==0x80000000 ){ uZ.ui = 0x80000000; return uZ.p; } //different signs if ((uiA^uiB)>>31) return softposit_subMagsPX1(uiA, uiB, x); else return softposit_addMagsPX1(uiA, uiB, x); } luametatex-2.10.08/source/libraries/softposit/source/pX1_div.c000066400000000000000000000122661442250314700243300ustar00rootroot00000000000000/*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017, 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include #include "platform.h" #include "internals.h" posit_1_t pX1_div( posit_1_t pA, posit_1_t pB, int x ) { union ui32_pX1 uA, uB, uZ; int regA; uint_fast32_t uiA, uiB, fracA, fracB, regime, tmp; bool signA, signB, signZ, regSA, regSB, bitNPlusOne=0, bitsMore=0, rcarry; int_fast8_t kA=0; int_fast32_t expA; uint_fast64_t frac64A, frac64Z, rem; lldiv_t divresult; if (x<2 || x>32){ uZ.ui = 0x80000000; return uZ.p; } uA.p = pA; uiA = uA.ui; uB.p = pB; uiB = uB.ui; //Zero or infinity if ( uiA==0x80000000 || uiB==0x80000000 || uiB==0){ #ifdef SOFTPOSIT_EXACT uZ.ui.v = 0x80000000; uZ.ui.exact = 0; #else uZ.ui = 0x80000000; #endif return uZ.p; } else if (uiA==0){ #ifdef SOFTPOSIT_EXACT uZ.ui.v = 0; if ( (uiA==0 && uiA.ui.exact) || (uiB==0 && uiB.ui.exact) ) uZ.ui.exact = 1; else uZ.ui.exact = 0; #else uZ.ui = 0; #endif return uZ.p; } signA = signP32UI( uiA ); signB = signP32UI( uiB ); signZ = signA ^ signB; if(signA) uiA = (-uiA & 0xFFFFFFFF); if(signB) uiB = (-uiB & 0xFFFFFFFF); regSA = signregP32UI(uiA); regSB = signregP32UI(uiB); if (x==2){ uZ.ui = 0x40000000; } else{ tmp = (uiA<<2)&0xFFFFFFFF; if (regSA){ while (tmp>>31){ kA++; tmp= (tmp<<1) & 0xFFFFFFFF; } } else{ kA=-1; while (!(tmp>>31)){ kA--; tmp= (tmp<<1) & 0xFFFFFFFF; } tmp&=0x7FFFFFFF; } expA = tmp>>30; //to get 1 bits fracA = (tmp | 0x40000000) & 0x7FFFFFFF; frac64A = (uint64_t) fracA << 30; tmp = (uiB<<2)&0xFFFFFFFF; if (regSB){ while (tmp>>31){ kA--; tmp= (tmp<<1) & 0xFFFFFFFF; } } else{ kA++; while (!(tmp>>31)){ kA++; tmp= (tmp<<1) & 0xFFFFFFFF; } tmp&=0x7FFFFFFF; } expA -= tmp>>30; fracB = (tmp | 0x40000000) & 0x7FFFFFFF; divresult = lldiv (frac64A,(uint_fast64_t)fracB); frac64Z = divresult.quot; rem = divresult.rem; if (expA<0){ expA=1; kA--; } if (frac64Z!=0){ rcarry = frac64Z >> 30; // this is the hidden bit (14th bit) , extreme right bit is bit 0 if (!rcarry){ if (expA==0) kA --; expA^=1; frac64Z<<=1; } } } if(kA<0){ regA = -kA; regSA = 0; regime = 0x40000000>>regA; } else{ regA = kA+1; regSA=1; regime = 0x7FFFFFFF - (0x7FFFFFFF>>regA); } if(regA>(x-2)){ //max or min pos. exp and frac does not matter. uZ.ui=(regSA) ? (0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)) ): (0x1 << (32-x)); } else{ //remove carry and rcarry bits and shift to correct position frac64Z &= 0x3FFFFFFF; fracA = (uint_fast32_t)frac64Z >> (regA+1); //regime length is smaller than length of posit if (regA>(x-regA-1)) & frac64Z); bitsMore = ((0x7FFFFFFF>>(x-regA-1)) & frac64Z); fracA&=((int32_t)0x80000000>>(x-1)); } else if (frac64Z>0) { fracA=0; bitsMore=1; } if(regA==(x-2) && expA){ bitNPlusOne=1; expA=0; } if (rem) bitsMore =1; } else{ regime=(regSA) ? (regime & ((int32_t)0x80000000>>(x-1)) ): (regime << (32-x)); expA=0; fracA=0; } expA <<= (29-regA); uZ.ui = packToP32UI(regime, expA, fracA); if (bitNPlusOne) uZ.ui += (uint32_t)(((uZ.ui>>(32-x))&1) | bitsMore) << (32-x) ; } if (signZ) uZ.ui = -uZ.ui & 0xFFFFFFFF; return uZ.p; } luametatex-2.10.08/source/libraries/softposit/source/pX1_eq.c000066400000000000000000000042271442250314700241510ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2021. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" bool pX1_eq( posit_1_t a, posit_1_t b) { union ui32_pX1 uA, uB; int32_t uiA, uiB; uA.p = a; uiA = (int32_t) uA.ui; uB.p = b; uiB = (int32_t)uB.ui; if(uiA==uiB) return true; else return false; } luametatex-2.10.08/source/libraries/softposit/source/pX1_le.c000066400000000000000000000042331442250314700241410ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2021 NGA. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" bool pX1_le( posit_1_t a, posit_1_t b ) { union ui32_pX1 uA, uB; int32_t uiA, uiB; uA.p = a; uiA = (int32_t) uA.ui; uB.p = b; uiB = (int32_t)uB.ui; if(uiA<=uiB) return true; else return false; } luametatex-2.10.08/source/libraries/softposit/source/pX1_lt.c000066400000000000000000000042321442250314700241570ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2021 NGA. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" bool pX1_lt( posit_1_t a, posit_1_t b ) { union ui32_pX1 uA, uB; int32_t uiA, uiB; uA.p = a; uiA = (int32_t) uA.ui; uB.p = b; uiB = (int32_t)uB.ui; if(uiA32){ uZ.ui = 0x80000000; return uZ.p; } uA.p = pA; uiA = uA.ui; uB.p = pB; uiB = uB.ui; #ifdef SOFTPOSIT_EXACT uZ.ui.exact = (uiA.ui.exact & uiB.ui.exact); #endif //NaR or Zero if ( uiA==0x80000000 || uiB==0x80000000 ){ #ifdef SOFTPOSIT_EXACT uZ.ui.v = 0x80000000; uZ.ui.exact = 0; #else uZ.ui = 0x80000000; #endif return uZ.p; } else if (uiA==0 || uiB==0){ #ifdef SOFTPOSIT_EXACT uZ.ui.v = 0; if ( (uiA==0 && uiA.ui.exact) || (uiB==0 && uiB.ui.exact) ) uZ.ui.exact = 1; else uZ.ui.exact = 0; #else uZ.ui = 0; #endif return uZ.p; } signA = signP32UI( uiA ); signB = signP32UI( uiB ); signZ = signA ^ signB; if(signA) uiA = (-uiA & 0xFFFFFFFF); if(signB) uiB = (-uiB & 0xFFFFFFFF); regSA = signregP32UI(uiA); regSB = signregP32UI(uiB); if (x==2){ uZ.ui = (regSA®SB) ? (0x40000000) : (0x0); } else{ tmp = (uiA<<2)&0xFFFFFFFF; if (regSA){ while (tmp>>31){ kA++; tmp= (tmp<<1) & 0xFFFFFFFF; } } else{ kA=-1; while (!(tmp>>31)){ kA--; tmp= (tmp<<1) & 0xFFFFFFFF; } tmp&=0x7FFFFFFF; } expA = tmp>>30; //to get 1 bits fracA = (tmp | 0x40000000) & 0x7FFFFFFF; tmp = (uiB<<2)&0xFFFFFFFF; if (regSB){ while (tmp>>31){ kA++; tmp= (tmp<<1) & 0xFFFFFFFF; } } else{ kA--; while (!(tmp>>31)){ kA--; tmp= (tmp<<1) & 0xFFFFFFFF; } tmp&=0x7FFFFFFF; } expA += tmp>>30; frac64Z = (uint_fast64_t) fracA * ((tmp | 0x40000000) & 0x7FFFFFFF); if (expA>1){ kA++; expA^=0x2; } rcarry = frac64Z>>61;//3rd bit of frac64Z if (rcarry){ if (expA) kA ++; expA^=1; frac64Z>>=1; } } if(kA<0){ regA = -kA; regSA = 0; regime = 0x40000000>>regA; } else{ regA = kA+1; regSA=1; regime = 0x7FFFFFFF - (0x7FFFFFFF>>regA); } if(regA>(x-2)){ //max or min pos. exp and frac does not matter. uZ.ui=(regSA) ? (0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)) ): (0x1 << (32-x)); } else{ //remove carry and rcarry bits and shift to correct position (2 bits exp, so + 1 than 16 bits) frac64Z = (frac64Z &0xFFFFFFFFFFFFFFF)>> (regA-1); fracA = (uint_fast32_t) (frac64Z>>32); //regime length is smaller than length of posit if (regA>x) & frac64Z); bitsMore = ((0x7FFFFFFFFFFFFFFF>>x) & frac64Z); fracA&=((int32_t)0x80000000>>(x-1)); } else if (frac64Z>0){ fracA=0; bitsMore=1; } if(regA==(x-2) && expA){ bitNPlusOne=1; expA=0; } } else{ regime=(regSA) ? (regime & ((int32_t)0x80000000>>(x-1)) ): (regime << (32-x)); expA=0; fracA=0; } expA <<= (29-regA); uZ.ui = packToP32UI(regime, expA, fracA); if (bitNPlusOne){ uZ.ui += (uint32_t)(((uZ.ui>>(32-x))&1) | bitsMore) << (32-x) ; } } if (signZ) uZ.ui = -uZ.ui & 0xFFFFFFFF; return uZ.p; } luametatex-2.10.08/source/libraries/softposit/source/pX1_mulAdd.c000066400000000000000000000041541442250314700247510ustar00rootroot00000000000000/*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017, 2018 A*STAR. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit_1_t pX1_mulAdd( posit_1_t a, posit_1_t b, posit_1_t c, int x ) { //a*b + c union ui32_pX1 uA; uint_fast32_t uiA; union ui32_pX1 uB; uint_fast32_t uiB; union ui32_pX1 uC; uint_fast32_t uiC; uA.p = a; uiA = uA.ui; uB.p = b; uiB = uB.ui; uC.p = c; uiC = uC.ui; return softposit_mulAddPX1( uiA, uiB, uiC, 0, x); } luametatex-2.10.08/source/libraries/softposit/source/pX1_roundToInt.c000066400000000000000000000072621442250314700256530ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane) and John Gustafson. Copyright 2017 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit_1_t pX1_roundToInt( posit_1_t pA, int x ){ union ui32_pX1 uA; uint_fast32_t mask = 0x20000000, scale=0, tmp=0, uiA, uiZ; bool bitLast, bitNPlusOne, sign; uA.p = pA; uiA = uA.ui; sign = uiA>>31; // sign is True if pA > NaR. if (sign) uiA = -uiA & 0xFFFFFFFF; // A is now |A|. if (uiA <= 0x30000000) { // 0 <= |pA| <= 1/2 rounds to zero. uA.ui = 0; return uA.p; } else if (uiA < 0x48000000) { // 1/2 < x < 3/2 rounds to 1. uA.ui = 0x40000000; } else if (uiA <= 0x54000000) { // 3/2 <= x <= 5/2 rounds to 2. uA.ui = 0x50000000; } else if (uiA >= 0x7FE80000) { // If |A| is 0x7FE800000 (4194304) (posit is pure integer value), leave it unchanged. if (x>8) return uA.p; // This also takes care of the NaR case, 0x80000000. else{ bitNPlusOne=((uint32_t)0x80000000>>x) & uiA; tmp = ((uint32_t)0x7FFFFFFF>>x)& uiA; //bitsMore bitLast = ((uint32_t)0x80000000>>(x-1)) & uiA; if (bitNPlusOne) if (bitLast | tmp) uiA += bitLast; uA.ui = uiA; } } else { // 34% of the cases, we have to decode the posit. while (mask & uiA) { scale += 2; mask >>= 1; } mask >>= 1; if (mask & uiA) scale++; mask >>= scale; //the rest of the bits bitLast = (uiA & mask); mask >>= 1; tmp = (uiA & mask); bitNPlusOne = tmp; uiA ^= tmp; // Erase the bit, if it was set. tmp = uiA & (mask - 1); // this is actually bitsMore uiA ^= tmp; if (bitNPlusOne) { if (bitLast | tmp) uiA += (mask << 1); } uA.ui = uiA; } if (sign) uA.ui = -uA.ui & 0xFFFFFFFF; return uA.p; } luametatex-2.10.08/source/libraries/softposit/source/pX1_sub.c000066400000000000000000000051771442250314700243420ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit_1_t pX1_sub( posit_1_t a, posit_1_t b, int x) { union ui32_pX1 uA, uB, uZ; uint_fast32_t uiA, uiB; if (x<2 || x>32){ uZ.ui = 0x80000000; return uZ.p; } uA.p = a; uiA = uA.ui; uB.p = b; uiB = uB.ui; #ifdef SOFTPOSIT_EXACT uZ.ui.exact = (uiA.ui.exact & uiB.ui.exact); #endif //infinity if ( uiA==0x80000000 || uiB==0x80000000 ){ #ifdef SOFTPOSIT_EXACT uZ.ui.v = 0x80000000; uZ.ui.exact = 0; #else uZ.ui = 0x80000000; #endif return uZ.p; } //Zero else if ( uiA==0 || uiB==0 ){ #ifdef SOFTPOSIT_EXACT uZ.ui.v = (uiA | -uiB); uZ.ui.exact = 0; #else uZ.ui = (uiA | -uiB); #endif return uZ.p; } //different signs if ((uiA^uiB)>>31) return softposit_addMagsPX1(uiA, (-uiB & 0xFFFFFFFF), x); else return softposit_subMagsPX1(uiA, (-uiB & 0xFFFFFFFF), x); } luametatex-2.10.08/source/libraries/softposit/source/pX1_to_i32.c000066400000000000000000000103541442250314700246410ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane) and John Gustafson. Copyright 2017 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" int_fast32_t pX1_to_i32( posit_1_t pA ) { union ui32_pX1 uA; uint_fast64_t iZ64, mask, tmp; int_fast32_t iZ, scale = 0, uiA; bool sign=0, bitLast, bitNPlusOne; uA.p = pA; uiA = uA.ui; // Copy of the input. //NaR if (uiA==0x80000000) return 0; sign = (uiA > 0x80000000); // sign is True if pA > NaR. if (sign) uiA = -uiA & 0xFFFFFFFF; // A is now |A|. if (uiA <= 0x30000000) { // 0 <= |pA| <= 1/2 rounds to zero. return 0; } else if (uiA < 0x48000000) { // 1/2 < x < 3/2 rounds to 1. iZ = 1; } else if (uiA <= 0x54000000) { // 3/2 <= x <= 5/2 rounds to 2. iZ = 2; } else if (uiA>0x7FFF9FFF){ //2147418112 return (sign) ? (-2147483648) : (2147483647); } else { // Decode the posit, left-justifying as we go. uiA -= 0x40000000; // Strip off first regime bit (which is a 1). while (0x20000000 & uiA) { // Increment scale by 2 for each regime sign bit. scale += 2; // Regime sign bit is always 1 in this range. uiA = (uiA - 0x20000000) << 1; // Remove the bit; line up the next regime bit. } uiA <<= 1; // Skip over termination bit, which is 0. if (0x20000000 & uiA) scale++; // If exponent is 1, increment the scale. iZ64 = ((uint64_t)uiA | 0x20000000) << 33; // Left-justify fraction in 64-bit result (one left bit padding) mask = 0x4000000000000000 >> scale; // Point to the last bit of the integer part. bitLast = (iZ64 & mask); // Extract the bit, without shifting it. mask >>= 1; tmp = (iZ64 & mask); bitNPlusOne = tmp; // "True" if nonzero. iZ64 ^= tmp; // Erase the bit, if it was set. tmp = iZ64 & (mask - 1); // tmp has any remaining bits. // This is bitsMore iZ64 ^= tmp; // Erase those bits, if any were set. if (bitNPlusOne) { // logic for round to nearest, tie to even if (bitLast | tmp) iZ64 += (mask << 1); } iZ = (uint64_t)iZ64 >> (62 - scale); // Right-justify the integer. } if (sign) iZ = -iZ; return iZ; } luametatex-2.10.08/source/libraries/softposit/source/pX1_to_i64.c000066400000000000000000000101711442250314700246430ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane) and John Gustafson. Copyright 2017 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" int_fast64_t pX1_to_i64( posit_1_t pA ) { union ui32_pX1 uA; int_fast64_t mask, tmp; uint_fast64_t iZ, scale = 0, uiA; bool sign=0, bitLast, bitNPlusOne; uA.p = pA; uiA = uA.ui; // Copy of the input. //NaR if (uiA==0x80000000) return 0; sign = (uiA > 0x80000000); // sign is True if pA > NaR. if (sign) uiA = -uiA & 0xFFFFFFFF; // A is now |A|. if (uiA <= 0x30000000) { // 0 <= |pA| <= 1/2 rounds to zero. return 0; } else if (uiA < 0x48000000) { // 1/2 < x < 3/2 rounds to 1. iZ = 1; } else if (uiA <= 0x54000000) { // 3/2 <= x <= 5/2 rounds to 2. iZ = 2; } else { // Decode the posit, left-justifying as we go. uiA -= 0x40000000; // Strip off first regime bit (which is a 1). while (0x20000000 & uiA) { // Increment scale by 2 for each regime sign bit. scale += 2; // Regime sign bit is always 1 in this range. uiA = (uiA - 0x20000000) << 1; // Remove the bit; line up the next regime bit. } uiA <<= 1; // Skip over termination bit, which is 0. if (0x20000000 & uiA) scale++; // If exponent is 1, increment the scale. iZ = ((uint64_t)uiA | 0x20000000) << 33; // Left-justify fraction in 64-bit result (one left bit padding) mask = 0x4000000000000000 >> scale; // Point to the last bit of the integer part. bitLast = (iZ & mask); // Extract the bit, without shifting it. mask >>= 1; tmp = (iZ & mask); bitNPlusOne = tmp; // "True" if nonzero. iZ ^= tmp; // Erase the bit, if it was set. tmp = iZ & (mask - 1); // tmp has any remaining bits. // This is bitsMore iZ ^= tmp; // Erase those bits, if any were set. if (bitNPlusOne) { // logic for round to nearest, tie to even if (bitLast | tmp) iZ += (mask << 1); } iZ = (uint64_t)iZ >> (62 - scale); // Right-justify the integer. } if (sign) iZ = -iZ; return iZ; } luametatex-2.10.08/source/libraries/softposit/source/pX1_to_p16.c000066400000000000000000000047311442250314700246540ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit16_t pX1_to_p16( posit_1_t pA ){ union ui32_pX1 uA; union ui16_p16 uZ; uint_fast32_t uiA; bool sign; uA.p = pA; uiA = uA.ui; if (uiA==0x80000000 || uiA==0 ){ uZ.ui = uiA>>16; return uZ.p; } sign = signP32UI( uiA ); if (sign) uiA = -uiA & 0xFFFFFFFF; if ((uiA&0xFFFF)==0 ){ uZ.ui = uiA>>16; } else { if( (uiA>>16)!=0x7FFF ){ if( (uint32_t)0x8000 & uiA){ if ( ( ((uint32_t)0x10000) & uiA) || (((uint32_t)0x7FFF) & uiA) ) uiA += 0x10000; } } uZ.ui = uiA>>16; if (uZ.ui==0) uZ.ui = 0x1; } if (sign) uZ.ui = (-uZ.ui & 0xFFFFFFFF); return uZ.p; } luametatex-2.10.08/source/libraries/softposit/source/pX1_to_p32.c000066400000000000000000000061561442250314700246550ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit32_t pX1_to_p32( posit_1_t pA ){ union ui32_pX1 uA; union ui32_p32 uZ; uint_fast32_t uiA, tmp, regime; uint_fast32_t exp_frac32A=0; bool sign, regSA, bitNPlusOne=0, bitsMore=0; int_fast8_t kA=0, regA; uA.p = pA; uiA = uA.ui; if (uiA==0x80000000 || uiA==0 ){ uZ.ui = uiA; return uZ.p; } sign = signP32UI( uiA ); if (sign) uiA = -uiA & 0xFFFFFFFF; regSA = signregP32UI(uiA); tmp = (uiA<<2)&0xFFFFFFFF; if (regSA){ while (tmp>>31){ kA++; tmp= (tmp<<1) & 0xFFFFFFFF; } } else{ kA=-1; while (!(tmp>>31)){ kA--; tmp= (tmp<<1) & 0xFFFFFFFF; } tmp&=0x7FFFFFFF; } //2nd bit exp exp_frac32A = tmp; if(kA<0){ regA = -kA; exp_frac32A |= ((uint32_t)(regA&0x1)<<31); regA = (regA+1)>>1; if (regA==0) regA=1; regSA = 0; regime = 0x40000000>>regA; } else{ exp_frac32A |= ((uint32_t)(kA&0x1)<<31); (kA==0) ? (regA=1) : (regA = (kA+2)>>1); regSA=1; regime = 0x7FFFFFFF - (0x7FFFFFFF>>regA); } bitNPlusOne = (exp_frac32A >>(regA+1))&0x1; bitsMore = exp_frac32A&(0x7FFFFFFF>>(31-regA)); exp_frac32A >>=(regA+2); //2 because of sign and regime terminating bit uZ.ui = regime + exp_frac32A; if (bitNPlusOne){ uZ.ui += (uZ.ui&1) | bitsMore; } if (sign) uZ.ui = (-uZ.ui & 0xFFFFFFFF); return uZ.p; } luametatex-2.10.08/source/libraries/softposit/source/pX1_to_p8.c000066400000000000000000000061501442250314700245720ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit8_t pX1_to_p8( posit_1_t pA ){ union ui32_pX1 uA; union ui8_p8 uZ; uint_fast32_t uiA, tmp, regime; uint_fast32_t exp_frac32A=0; bool sign, regSA, bitsMore=0; int_fast8_t kA=0, regA; uA.p = pA; uiA = uA.ui; if (uiA==0x80000000 || uiA==0 ){ uZ.ui = (uiA>>24) & 0xFF; return uZ.p; } sign = signP32UI( uiA ); if (sign) uiA = -uiA & 0xFFFFFFFF; regSA = signregP32UI(uiA); tmp = (uiA<<2)&0xFFFFFFFF; if (regSA){ while (tmp>>31){ kA++; tmp= (tmp<<1) & 0xFFFFFFFF; } } else{ kA=-1; while (!(tmp>>31)){ kA--; tmp= (tmp<<1) & 0xFFFFFFFF; } tmp&=0x7FFFFFFF; } if (kA<-3 || kA>=3){ (kA<0) ? (uZ.ui=0x1):(uZ.ui= 0x7F); } else{ //2nd bit exp exp_frac32A = tmp; if(kA<0){ regA = ((-kA)<<1) - (exp_frac32A>>30); if (regA==0) regA=1; regSA = 0; regime = 0x40>>regA; } else{ (kA==0)?(regA=1 + (exp_frac32A>>30)): (regA = ((kA+1)<<1) + (exp_frac32A>>30) -1); regSA=1; regime = 0x7F - (0x7F>>regA); } if (regA>5){ uZ.ui = regime; } else{ uZ.ui = regime + ( ((exp_frac32A)&0x3FFFFFFF)>>(regA+24) ); } } if ( exp_frac32A & (0x800000<32){ uZ.ui = 0x80000000; return uZ.p; } uA.p = pA; uiA = uA.ui; if (uiA==0x80000000 || uiA==0 ){ uZ.ui = uiA; return uZ.p; } sign = signP32UI( uiA ); if (sign) uiA = -uiA & 0xFFFFFFFF; if (x==2){ uZ.ui=(uiA>0)?(0x40000000):(0); } else if (x==32 || (((uint32_t)0xFFFFFFFF>>x) & uiA)==0 ){ uZ.ui = uiA; } else { int shift = 32-x; if( (uiA>>shift)!=(0x7FFFFFFF>>shift) ){ if( ((uint32_t)0x80000000>>x) & uiA){ if ( ( ((uint32_t)0x80000000>>(x-1)) & uiA) || (((uint32_t)0x7FFFFFFF>>x) & uiA) ) uiA += (0x1<>(x-1)); if (uZ.ui==0) uZ.ui = 0x1<32){ uZ.ui = 0x80000000; return uZ.p; } uA.p = pA; uiA = uA.ui; if (uiA==0x80000000 || uiA==0 ){ uZ.ui = uiA; return uZ.p; } sign = signP32UI( uiA ); if (sign) uiA = -uiA & 0xFFFFFFFF; regSA = signregP32UI(uiA); if (x==2){ uZ.ui=(uiA>0)?(0x40000000):(0); } /* else if (x==32 || (((uint32_t)0xFFFFFFFF>>x) & uiA)==0 ){ uZ.ui = uiA; }*/ else { tmp = (uiA<<2)&0xFFFFFFFF; if (regSA){ while (tmp>>31){ kA++; tmp= (tmp<<1) & 0xFFFFFFFF; } } else{ kA=-1; while (!(tmp>>31)){ kA--; tmp= (tmp<<1) & 0xFFFFFFFF; } tmp&=0x7FFFFFFF; } //2nd bit exp exp_frac32A = tmp; if(kA<0){ regA = -kA; exp_frac32A |= ((uint32_t)(regA&0x1)<<31); regA = (regA+1)>>1; if (regA==0) regA=1; regSA = 0; regime = 0x40000000>>regA; } else{ exp_frac32A |= ((uint32_t)(kA&0x1)<<31); (kA==0) ? (regA=1) : (regA = (kA+2)>>1); regSA=1; regime = 0x7FFFFFFF - (0x7FFFFFFF>>regA); } if(regA>(x-2)){ //max or min pos. exp and frac does not matter. uZ.ui=(regSA) ? (0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)) ): (0x1 << (32-x)); } else{ //printBinary(&exp_frac32A, 32); //uint32_t temp = (0x7FFFFFFF>>(x-regA-2)); //printBinary(&temp, 32); //printBinary(®ime, 32); bitNPlusOne = (exp_frac32A >>(regA+33-x))&0x1; bitsMore = exp_frac32A&(0x7FFFFFFF>>(x-regA-2)); //printf("bitNPlusOne: %d bitsMore: %d\n", bitNPlusOne, bitsMore); exp_frac32A >>= (regA+2); //2 because of sign and regime terminating bit uZ.ui = regime + (exp_frac32A & ((int32_t)0x80000000>>(x-1)) ); //printBinary(&uZ.ui, 32); //int shift = 32-x; /*if( (uiA>>shift)!=(0x7FFFFFFF>>shift) ){ if( ((uint32_t)0x80000000>>x) & uZ.ui){ if ( ( ((uint32_t)0x80000000>>(x-1)) & uZ.ui) || (((uint32_t)0x7FFFFFFF>>x) & uZ.ui) ) uZ.ui += (0x1<>(x-1));*/ if (uZ.ui==0) uZ.ui = (uint32_t)0x1<<(32-x); else if (bitNPlusOne){ uZ.ui += (uint32_t)(((uZ.ui>>(32-x))&1) | bitsMore) << (32-x) ; } } } if (sign) uZ.ui = (-uZ.ui & 0xFFFFFFFF); return uZ.p; } luametatex-2.10.08/source/libraries/softposit/source/pX1_to_ui32.c000066400000000000000000000101351442250314700250230ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane) and John Gustafson. Copyright 2017 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" uint_fast32_t pX1_to_ui32( posit_1_t pA ) { union ui32_pX1 uA; uint_fast64_t iZ64, mask, tmp; uint_fast32_t iZ, scale = 0, uiA; bool bitLast, bitNPlusOne; uA.p = pA; uiA = uA.ui; // Copy of the input. //NaR //if (uiA==0x80000000) return 0; if (uiA>=0x80000000) return 0; //negative if (uiA <= 0x30000000) { // 0 <= |pA| <= 1/2 rounds to zero. return 0; } else if (uiA < 0x48000000) { // 1/2 < x < 3/2 rounds to 1. return 1; } else if (uiA <= 0x54000000) { // 3/2 <= x <= 5/2 rounds to 2. return 2; } else if (uiA>0x7FFFBFFF){ //4294836223 return 4294967295; } else { // Decode the posit, left-justifying as we go. uiA -= 0x40000000; // Strip off first regime bit (which is a 1). while (0x20000000 & uiA) { // Increment scale by 2 for each regime sign bit. scale += 2; // Regime sign bit is always 1 in this range. uiA = (uiA - 0x20000000) << 1; // Remove the bit; line up the next regime bit. } uiA <<= 1; // Skip over termination bit, which is 0. if (0x20000000 & uiA) scale++; // If exponent is 1, increment the scale. iZ64 = (uint64_t)(uiA | 0x20000000) << 33; // Left-justify fraction in 64-bit result (one left bit padding) mask = 0x4000000000000000 >> scale; // Point to the last bit of the integer part. bitLast = (iZ64 & mask); // Extract the bit, without shifting it. mask >>= 1; tmp = (iZ64 & mask); bitNPlusOne = tmp; // "True" if nonzero. iZ64 ^= tmp; // Erase the bit, if it was set. tmp = iZ64 & (mask - 1); // tmp has any remaining bits. // This is bitsMore iZ64 ^= tmp; // Erase those bits, if any were set. if (bitNPlusOne) { // logic for round to nearest, tie to even if (bitLast | tmp) iZ64 += (mask << 1); } iZ = (uint64_t)iZ64 >> (62 - scale); // Right-justify the integer. } return iZ; } luametatex-2.10.08/source/libraries/softposit/source/pX1_to_ui64.c000066400000000000000000000077751442250314700250500ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane) and John Gustafson. Copyright 2017 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" uint_fast64_t pX1_to_ui64( posit_1_t pA ) { union ui32_pX1 uA; uint_fast64_t mask, tmp; uint_fast64_t iZ, scale = 0, uiA; bool bitLast, bitNPlusOne; uA.p = pA; uiA = uA.ui; // Copy of the input. //NaR //if (uiA==0x80000000) return 0; if (uiA>=0x80000000) return 0; //negative if (uiA <= 0x30000000) { // 0 <= |pA| <= 1/2 rounds to zero. return 0; } else if (uiA < 0x48000000) { // 1/2 < x < 3/2 rounds to 1. return 1; } else if (uiA <= 0x54000000) { // 3/2 <= x <= 5/2 rounds to 2. return 2; } else { // Decode the posit, left-justifying as we go. uiA -= 0x40000000; // Strip off first regime bit (which is a 1). while (0x20000000 & uiA) { // Increment scale by 2 for each regime sign bit. scale += 2; // Regime sign bit is always 1 in this range. uiA = (uiA - 0x20000000) << 1; // Remove the bit; line up the next regime bit. } uiA <<= 1; // Skip over termination bit, which is 0. if (0x20000000 & uiA) scale++; // If exponent is 1, increment the scale. iZ = (uiA | 0x20000000) << 33; // Left-justify fraction in 64-bit result (one left bit padding) mask = 0x4000000000000000 >> scale; // Point to the last bit of the integer part. bitLast = (iZ & mask); // Extract the bit, without shifting it. mask >>= 1; tmp = (iZ & mask); bitNPlusOne = tmp; // "True" if nonzero. iZ ^= tmp; // Erase the bit, if it was set. tmp = iZ & (mask - 1); // tmp has any remaining bits. // This is bitsMore iZ ^= tmp; // Erase those bits, if any were set. if (bitNPlusOne) { // logic for round to nearest, tie to even if (bitLast | tmp) iZ += (mask << 1); } iZ = (uint64_t)iZ >> (62 - scale); // Right-justify the integer. } return iZ; } luametatex-2.10.08/source/libraries/softposit/source/pX2_add.c000066400000000000000000000050621442250314700242730ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017, 2018 A*STAR. All rights reserved. This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit_2_t pX2_add( posit_2_t a, posit_2_t b, int x ){ union ui32_pX2 uA, uB, uZ; uint_fast32_t uiA, uiB; if (x<2 || x>32){ uZ.ui = 0x80000000; return uZ.p; } uA.p = a; uiA = uA.ui; uB.p = b; uiB = uB.ui; //Zero or infinity if (uiA==0 || uiB==0){ // Not required but put here for speed uZ.ui = uiA | uiB; return uZ.p; } else if ( uiA==0x80000000 || uiB==0x80000000 ){ uZ.ui = 0x80000000; return uZ.p; } //different signs if ((uiA^uiB)>>31) return softposit_subMagsPX2(uiA, uiB, x); else return softposit_addMagsPX2(uiA, uiB, x); } luametatex-2.10.08/source/libraries/softposit/source/pX2_div.c000066400000000000000000000125431442250314700243270ustar00rootroot00000000000000/*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017, 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include #include "platform.h" #include "internals.h" posit_2_t pX2_div( posit_2_t pA, posit_2_t pB, int x ) { union ui32_pX2 uA, uB, uZ; int regA, regB; uint_fast32_t uiA, uiB, fracA, fracB, regime, tmp; bool signA, signB, signZ, regSA, regSB, bitNPlusOne=0, bitsMore=0, rcarry; int_fast8_t kA=0; int_fast32_t expA; uint_fast64_t frac64A, frac64Z, rem; lldiv_t divresult; if (x<2 || x>32){ uZ.ui = 0x80000000; return uZ.p; } uA.p = pA; uiA = uA.ui; uB.p = pB; uiB = uB.ui; //Zero or infinity if ( uiA==0x80000000 || uiB==0x80000000 || uiB==0){ #ifdef SOFTPOSIT_EXACT uZ.ui.v = 0x80000000; uZ.ui.exact = 0; #else uZ.ui = 0x80000000; #endif return uZ.p; } else if (uiA==0){ #ifdef SOFTPOSIT_EXACT uZ.ui.v = 0; if ( (uiA==0 && uiA.ui.exact) || (uiB==0 && uiB.ui.exact) ) uZ.ui.exact = 1; else uZ.ui.exact = 0; #else uZ.ui = 0; #endif return uZ.p; } signA = signP32UI( uiA ); signB = signP32UI( uiB ); signZ = signA ^ signB; if(signA) uiA = (-uiA & 0xFFFFFFFF); if(signB) uiB = (-uiB & 0xFFFFFFFF); regSA = signregP32UI(uiA); regSB = signregP32UI(uiB); if (x==2){ uZ.ui = 0x40000000; } else{ tmp = (uiA<<2)&0xFFFFFFFF; if (regSA){ while (tmp>>31){ kA++; tmp= (tmp<<1) & 0xFFFFFFFF; } } else{ kA=-1; while (!(tmp>>31)){ kA--; tmp= (tmp<<1) & 0xFFFFFFFF; } tmp&=0x7FFFFFFF; } expA = tmp>>29; //to get 2 bits fracA = ((tmp<<1) | 0x40000000) & 0x7FFFFFFF; frac64A = (uint64_t) fracA << 30; tmp = (uiB<<2)&0xFFFFFFFF; if (regSB){ while (tmp>>31){ kA--; tmp= (tmp<<1) & 0xFFFFFFFF; } } else{ kA++; while (!(tmp>>31)){ kA++; tmp= (tmp<<1) & 0xFFFFFFFF; } tmp&=0x7FFFFFFF; } expA -= tmp>>29; fracB = ((tmp<<1) | 0x40000000) & 0x7FFFFFFF; divresult = lldiv (frac64A,(uint_fast64_t)fracB); frac64Z = divresult.quot; rem = divresult.rem; if (expA<0){ expA+=4; kA--; } if (frac64Z!=0){ rcarry = frac64Z >> 30; // this is the hidden bit (14th bit) , extreme right bit is bit 0 if (!rcarry){ if (expA==0){ kA--; expA=3; } else expA--; frac64Z<<=1; } } } if(kA<0){ regA = -kA; regSA = 0; regime = 0x40000000>>regA; } else{ regA = kA+1; regSA=1; regime = 0x7FFFFFFF - (0x7FFFFFFF>>regA); } if(regA>(x-2)){ //max or min pos. exp and frac does not matter. uZ.ui=(regSA) ? (0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)) ): (0x1 << (32-x)); } else{ //remove carry and rcarry bits and shift to correct position frac64Z &= 0x3FFFFFFF; fracA = (uint_fast32_t)frac64Z >> (regA+2); //regime length is smaller than length of posit if (regA>(x-regA-2))& frac64Z; bitsMore = ((0x7FFFFFFF>>(x-regA-2)) & frac64Z); fracA&=((int32_t)0x80000000>>(x-1)); } else { if (regA==(x-2)){ bitNPlusOne = expA&0x2; bitsMore = (expA&0x1); expA = 0; } else if (regA==(x-3)){ bitNPlusOne = expA&0x1; expA &=0x2; } if (frac64Z>0){ fracA=0; bitsMore =1; } } if (rem) bitsMore =1; } else{ regime=(regSA) ? (regime & ((int32_t)0x80000000>>(x-1)) ): (regime << (32-x)); expA=0; fracA=0; } expA <<= (28-regA); uZ.ui = packToP32UI(regime, expA, fracA); if (bitNPlusOne) uZ.ui += (uint32_t)(((uZ.ui>>(32-x))&1) | bitsMore) << (32-x) ; } if (signZ) uZ.ui = -uZ.ui & 0xFFFFFFFF; return uZ.p; } luametatex-2.10.08/source/libraries/softposit/source/pX2_eq.c000066400000000000000000000042431442250314700241500ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" bool pX2_eq( posit_2_t a, posit_2_t b) { union ui32_pX2 uA, uB; int32_t uiA, uiB; uA.p = a; uiA = (int32_t) uA.ui; uB.p = b; uiB = (int32_t)uB.ui; if(uiA==uiB) return true; else return false; } luametatex-2.10.08/source/libraries/softposit/source/pX2_le.c000066400000000000000000000042431442250314700241430ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" bool pX2_le( posit_2_t a, posit_2_t b ) { union ui32_pX2 uA, uB; int32_t uiA, uiB; uA.p = a; uiA = (int32_t) uA.ui; uB.p = b; uiB = (int32_t)uB.ui; if(uiA<=uiB) return true; else return false; } luametatex-2.10.08/source/libraries/softposit/source/pX2_lt.c000066400000000000000000000042421442250314700241610ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" bool pX2_lt( posit_2_t a, posit_2_t b ) { union ui32_pX2 uA, uB; int32_t uiA, uiB; uA.p = a; uiA = (int32_t) uA.ui; uB.p = b; uiB = (int32_t)uB.ui; if(uiA32){ uZ.ui = 0x80000000; return uZ.p; } uA.p = pA; uiA = uA.ui; uB.p = pB; uiB = uB.ui; #ifdef SOFTPOSIT_EXACT uZ.ui.exact = (uiA.ui.exact & uiB.ui.exact); #endif //NaR or Zero if ( uiA==0x80000000 || uiB==0x80000000 ){ #ifdef SOFTPOSIT_EXACT uZ.ui.v = 0x80000000; uZ.ui.exact = 0; #else uZ.ui = 0x80000000; #endif return uZ.p; } else if (uiA==0 || uiB==0){ #ifdef SOFTPOSIT_EXACT uZ.ui.v = 0; if ( (uiA==0 && uiA.ui.exact) || (uiB==0 && uiB.ui.exact) ) uZ.ui.exact = 1; else uZ.ui.exact = 0; #else uZ.ui = 0; #endif return uZ.p; } signA = signP32UI( uiA ); signB = signP32UI( uiB ); signZ = signA ^ signB; if(signA) uiA = (-uiA & 0xFFFFFFFF); if(signB) uiB = (-uiB & 0xFFFFFFFF); regSA = signregP32UI(uiA); regSB = signregP32UI(uiB); if (x==2){ uZ.ui = (regSA®SB) ? (0x40000000) : (0x0); } else{ tmp = (uiA<<2)&0xFFFFFFFF; if (regSA){ while (tmp>>31){ kA++; tmp= (tmp<<1) & 0xFFFFFFFF; } } else{ kA=-1; while (!(tmp>>31)){ kA--; tmp= (tmp<<1) & 0xFFFFFFFF; //tmpX--; } tmp&=0x7FFFFFFF; } expA = tmp>>29; //to get 2 bits fracA = ((tmp<<1) | 0x40000000) & 0x7FFFFFFF; tmp = (uiB<<2)&0xFFFFFFFF; if (regSB){ while (tmp>>31){ kA++; tmp= (tmp<<1) & 0xFFFFFFFF; } } else{ kA--; while (!(tmp>>31)){ kA--; tmp= (tmp<<1) & 0xFFFFFFFF; } tmp&=0x7FFFFFFF; } expA += tmp>>29; frac64Z = (uint_fast64_t) fracA * (((tmp<<1) | 0x40000000) & 0x7FFFFFFF); if (expA>3){ kA++; expA&=0x3; // -=4 } rcarry = frac64Z>>61;//3rd bit of frac64Z if (rcarry){ expA++; if (expA>3){ kA ++; expA&=0x3; } frac64Z>>=1; } } if(kA<0){ regA = -kA; regSA = 0; regime = 0x40000000>>regA; } else{ regA = kA+1; regSA=1; regime = 0x7FFFFFFF - (0x7FFFFFFF>>regA); } if(regA>(x-2)){ //max or min pos. exp and frac does not matter. uZ.ui=(regSA) ? (0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)) ): (0x1 << (32-x)); } else{ //remove carry and rcarry bits and shift to correct position (2 bits exp, so + 1 than 16 bits) frac64Z = (frac64Z&0xFFFFFFFFFFFFFFF) >> regA; fracA = (uint_fast32_t) (frac64Z>>32); //regime length is smaller than length of posit if (regA>x) & frac64Z); bitsMore = ((0x7FFFFFFFFFFFFFFF>>x) & frac64Z); fracA&=((int32_t)0x80000000>>(x-1)); } else { if (regA==(x-2)){ bitNPlusOne = expA&0x2; bitsMore = (expA&0x1); expA = 0; } else if (regA==(x-3)){ bitNPlusOne = expA&0x1; //expA>>=1; //taken care of by the pack algo expA &=0x2; } if (frac64Z>0){ fracA=0; bitsMore =1; } } } else{ regime=(regSA) ? (regime & ((int32_t)0x80000000>>(x-1)) ): (regime << (32-x)); expA=0; fracA=0; } expA <<= (28-regA); uZ.ui = packToP32UI(regime, expA, fracA); if (bitNPlusOne){ uZ.ui += (uint32_t)(((uZ.ui>>(32-x))&1) | bitsMore) << (32-x) ; } } if (signZ) uZ.ui = -uZ.ui & 0xFFFFFFFF; return uZ.p; } luametatex-2.10.08/source/libraries/softposit/source/pX2_mulAdd.c000066400000000000000000000041541442250314700247520ustar00rootroot00000000000000/*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017, 2018 A*STAR. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit_2_t pX2_mulAdd( posit_2_t a, posit_2_t b, posit_2_t c, int x ) { //a*b + c union ui32_pX2 uA; uint_fast32_t uiA; union ui32_pX2 uB; uint_fast32_t uiB; union ui32_pX2 uC; uint_fast32_t uiC; uA.p = a; uiA = uA.ui; uB.p = b; uiB = uB.ui; uC.p = c; uiC = uC.ui; return softposit_mulAddPX2( uiA, uiB, uiC, 0, x); } luametatex-2.10.08/source/libraries/softposit/source/pX2_roundToInt.c000066400000000000000000000074171442250314700256560ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane) and John Gustafson. Copyright 2017 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit_2_t pX2_roundToInt( posit_2_t pA, int x ){ union ui32_pX2 uA; uint_fast32_t mask = 0x20000000, scale=0, tmp=0, uiA, uiZ; bool bitLast, bitNPlusOne, sign; uA.p = pA; uiA = uA.ui; sign = uiA>>31; // sign is True if pA > NaR. if (sign) uiA = -uiA & 0xFFFFFFFF; // A is now |A|. if (uiA <= 0x38000000) { // 0 <= |pA| <= 1/2 rounds to zero. uA.ui = 0; return uA.p; } else if (uiA < 0x44000000) { // 1/2 < x < 3/2 rounds to 1. uA.ui = 0x40000000; } else if (uiA <= 0x4A000000) { // 3/2 <= x <= 5/2 rounds to 2. uA.ui = (x>4) ? ( 0x48000000 ) : (0x40000000); } else if (uiA >= 0x7E800000) { // If |A| is 0x7E800000 (4194304) (posit is pure integer value), leave it unchanged. if (x>8) return uA.p; // This also takes care of the NaR case, 0x80000000. else{ bitNPlusOne=((uint32_t)0x80000000>>x) & uiA; tmp = ((uint32_t)0x7FFFFFFF>>x)& uiA; //bitsMore bitLast = ((uint32_t)0x80000000>>(x-1)) & uiA; if (bitNPlusOne) if (bitLast | tmp) uiA += bitLast; uA.ui = uiA; } } else { // 34% of the cases, we have to decode the posit. while (mask & uiA) { scale += 4; mask >>= 1; } mask >>= 1; //Exponential (2 bits) if (mask & uiA) scale+=2; mask >>= 1; if (mask & uiA) scale++; mask >>= scale; //the rest of the bits bitLast = (uiA & mask); mask >>= 1; tmp = (uiA & mask); bitNPlusOne = tmp; uiA ^= tmp; // Erase the bit, if it was set. tmp = uiA & (mask - 1); // this is actually bitsMore uiA ^= tmp; if (bitNPlusOne) { if (bitLast | tmp) uiA += (mask << 1); } uA.ui = uiA; } if (sign) uA.ui = -uA.ui & 0xFFFFFFFF; return uA.p; } luametatex-2.10.08/source/libraries/softposit/source/pX2_sqrt.c000066400000000000000000000127241442250314700245370ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" extern const uint_fast16_t softposit_approxRecipSqrt0[]; extern const uint_fast16_t softposit_approxRecipSqrt1[]; posit_2_t pX2_sqrt( posit_2_t pA, int x ) { union ui32_pX2 uA; uint_fast32_t index, r0, shift, fracA, expZ, expA; uint_fast32_t mask, uiA, uiZ; uint_fast64_t eSqrR0, frac64Z, negRem, recipSqrt, shiftedFracZ, sigma0, sqrSigma0; int_fast32_t eps, shiftZ; if (x<2 || x>32){ uA.ui = 0x80000000; return uA.p; } uA.p = pA; uiA = uA.ui; // If NaR or a negative number, return NaR. if (uiA & 0x80000000) { uA.ui = 0x80000000; return uA.p; } // If the argument is zero, return zero. else if (!uiA) { return uA.p; } // Compute the square root; shiftZ is the power-of-2 scaling of the result. // Decode regime and exponent; scale the input to be in the range 1 to 4: if (uiA & 0x40000000) { shiftZ = -2; while (uiA & 0x40000000) { shiftZ += 2; uiA = (uiA << 1) & 0xFFFFFFFF; } } else { shiftZ = 0; while (!(uiA & 0x40000000)) { shiftZ -= 2; uiA = (uiA << 1) & 0xFFFFFFFF; } } uiA &= 0x3FFFFFFF; expA = (uiA >> 28); shiftZ += (expA >> 1); expA = (0x1 ^ (expA & 0x1)); uiA &= 0x0FFFFFFF; fracA = (uiA | 0x10000000); // Use table look-up of first 4 bits for piecewise linear approx. of 1/sqrt: index = ((fracA >> 24) & 0xE) + expA; eps = ((fracA >> 9) & 0xFFFF); r0 = softposit_approxRecipSqrt0[index] - (((uint_fast32_t) softposit_approxRecipSqrt1[index] * eps) >> 20); // Use Newton-Raphson refinement to get 33 bits of accuracy for 1/sqrt: eSqrR0 = (uint_fast64_t) r0 * r0; if (!expA) eSqrR0 <<= 1; sigma0 = 0xFFFFFFFF & (0xFFFFFFFF ^ ((eSqrR0 * (uint64_t)fracA) >> 20)); recipSqrt = ((uint_fast64_t) r0 << 20) + (((uint_fast64_t) r0 * sigma0) >> 21); sqrSigma0 = ((sigma0 * sigma0) >> 35); recipSqrt += ( (( recipSqrt + (recipSqrt >> 2) - ((uint_fast64_t)r0 << 19) ) * sqrSigma0) >> 46 ); frac64Z = (((uint_fast64_t) fracA) * recipSqrt) >> 31; if (expA) frac64Z = (frac64Z >> 1); // Find the exponent of Z and encode the regime bits. expZ = shiftZ & 0x3; if (shiftZ < 0) { shift = (-1 - shiftZ) >> 2; uiZ = 0x20000000 >> shift; } else { shift = shiftZ >> 2; uiZ = 0x7FFFFFFF - (0x3FFFFFFF >> shift); } // Trick for eliminating off-by-one cases that only uses one multiply: frac64Z++; if (!(frac64Z & 0xF)) { shiftedFracZ = frac64Z >> 1; negRem = (shiftedFracZ * shiftedFracZ) & 0x1FFFFFFFF; if (negRem & 0x100000000) { frac64Z |= 1; } else { if (negRem) frac64Z--; } } // Strip off the hidden bit and round-to-nearest using last shift+5 bits. frac64Z &= 0xFFFFFFFF; mask = (1 << (36 + shift - x)); if (mask & frac64Z) { if ( ((mask - 1) & frac64Z) | ((mask << 1) & frac64Z) ) frac64Z+=(mask << 1) ; // Assemble the result and return it. uA.ui = uiZ | (expZ << (27 - shift)) | (frac64Z >> (5 + shift)); } else{ // Assemble the result and return it. uA.ui = uiZ | (expZ << (27 - shift)) | (frac64Z >> (5 + shift)); //Check if rounding bits in regime or exp and clean off unwanted bits if( ((uint32_t)0x80000000>>x) & uA.ui){ if ( ( ((uint32_t)0x80000000>>(x-1)) & uA.ui) || (((uint32_t)0x7FFFFFFF>>x) & uA.ui) ) uA.ui = (uA.ui & ((int32_t)0x80000000>>(x-1))) + ((uint32_t)0x80000000>>(x-1)); } } uA.ui &=((int32_t)0x80000000>>(x-1)); return uA.p; } luametatex-2.10.08/source/libraries/softposit/source/pX2_sub.c000066400000000000000000000051761442250314700243420ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit_2_t pX2_sub( posit_2_t a, posit_2_t b, int x) { union ui32_pX2 uA, uB, uZ; uint_fast32_t uiA, uiB; if (x<2 || x>32){ uZ.ui = 0x80000000; return uZ.p; } uA.p = a; uiA = uA.ui; uB.p = b; uiB = uB.ui; #ifdef SOFTPOSIT_EXACT uZ.ui.exact = (uiA.ui.exact & uiB.ui.exact); #endif //infinity if ( uiA==0x80000000 || uiB==0x80000000 ){ #ifdef SOFTPOSIT_EXACT uZ.ui.v = 0x80000000; uZ.ui.exact = 0; #else uZ.ui = 0x80000000; #endif return uZ.p; } //Zero else if ( uiA==0 || uiB==0 ){ #ifdef SOFTPOSIT_EXACT uZ.ui.v = (uiA | -uiB); uZ.ui.exact = 0; #else uZ.ui = (uiA | -uiB); #endif return uZ.p; } //different signs if ((uiA^uiB)>>31) return softposit_addMagsPX2(uiA, (-uiB & 0xFFFFFFFF), x); else return softposit_subMagsPX2(uiA, (-uiB & 0xFFFFFFFF), x); } luametatex-2.10.08/source/libraries/softposit/source/pX2_to_pX1.c000066400000000000000000000071001442250314700247100ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit_1_t pX2_to_pX1( posit_2_t pA, int x ){ union ui32_pX2 uA; union ui32_pX1 uZ; uint_fast32_t uiA, tmp, regime; uint_fast32_t exp_frac32A=0; bool sign, regSA, bitNPlusOne, bitsMore; int_fast8_t kA=0, regA; if (x<2 || x>32){ uZ.ui = 0x80000000; return uZ.p; } uA.p = pA; uiA = uA.ui; if (uiA==0x80000000 || uiA==0 ){ uZ.ui = uiA; return uZ.p; } sign = signP32UI( uiA ); if (sign) uiA = -uiA & 0xFFFFFFFF; regSA = signregP32UI(uiA); if (x==2){ uZ.ui=(uiA>0)?(0x40000000):(0); } else { //regime tmp = (uiA<<2)&0xFFFFFFFF; if (regSA){ while (tmp>>31){ kA++; tmp= (tmp<<1) & 0xFFFFFFFF; } } else{ kA=-1; while (!(tmp>>31)){ kA--; tmp= (tmp<<1) & 0xFFFFFFFF; } tmp&=0x7FFFFFFF; } //exp and frac exp_frac32A = tmp<<1; if(kA<0){ regA = (-kA)<<1; if (exp_frac32A&0x80000000) regA--; exp_frac32A = (exp_frac32A<<1) &0xFFFFFFFF; regSA = 0; regime = 0x40000000>>regA; } else{ regA = (kA<<1)+1; if (exp_frac32A&0x80000000) regA++; exp_frac32A = (exp_frac32A<<1) &0xFFFFFFFF; regSA=1; regime = 0x7FFFFFFF - (0x7FFFFFFF>>regA); } if(regA>(x-2)){ //max or min pos. exp and frac does not matter. uZ.ui=(regSA) ? (0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)) ): (0x1 << (32-x)); } else{ bitNPlusOne = (exp_frac32A >>(regA+33-x))&0x1; bitsMore = exp_frac32A&(0x7FFFFFFF>>(x-regA-2)); if (regA<30) exp_frac32A >>=(2+regA); else exp_frac32A=0; uZ.ui = regime + (exp_frac32A & ((int32_t)0x80000000>>(x-1)) ); if (uZ.ui==0) uZ.ui = 0x1<<(32-x); else if (bitNPlusOne){ uZ.ui += (uint32_t)(((uZ.ui>>(32-x))&1) | bitsMore) << (32-x) ; } } } if (sign) uZ.ui = (-uZ.ui & 0xFFFFFFFF); return uZ.p; } luametatex-2.10.08/source/libraries/softposit/source/quire16_fdp_add.c000066400000000000000000000113471442250314700257520ustar00rootroot00000000000000/*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017, 2018 A*STAR. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include #include "platform.h" #include "internals.h" quire16_t q16_fdp_add( quire16_t q, posit16_t pA, posit16_t pB ){ union ui16_p16 uA, uB; union ui128_q16 uZ, uZ1, uZ2; uint_fast16_t uiA, uiB; uint_fast16_t fracA, tmp; bool signA, signB, signZ2, regSA, regSB, rcarry; int_fast8_t expA; int_fast16_t kA=0, shiftRight=0; uint_fast32_t frac32Z; //For add bool rcarryb, b1, b2, rcarryZ;//, rcarrySignZ; uZ1.q = q; uA.p = pA; uiA = uA.ui; uB.p = pB; uiB = uB.ui; //NaR if (isNaRQ16(q) || isNaRP16UI(uA.ui) || isNaRP16UI(uB.ui)){ uZ2.ui[0]=0x8000000000000000ULL; uZ2.ui[1] = 0; return uZ2.q; } else if (uiA==0 || uiB==0) return q; //max pos (sign plus and minus) signA = signP16UI( uiA ); signB = signP16UI( uiB ); signZ2 = signA ^ signB; if(signA) uiA = (-uiA & 0xFFFF); if(signB) uiB = (-uiB & 0xFFFF); regSA = signregP16UI(uiA); regSB = signregP16UI(uiB); tmp = (uiA<<2) & 0xFFFF; if (regSA){ while (tmp>>15){ kA++; tmp= (tmp<<1) & 0xFFFF; } } else{ kA=-1; while (!(tmp>>15)){ kA--; tmp= (tmp<<1) & 0xFFFF; } tmp&=0x7FFF; } expA = tmp>>14; fracA = (0x4000 | tmp); tmp = (uiB<<2) & 0xFFFF; if (regSB){ while (tmp>>15){ kA++; tmp= (tmp<<1) & 0xFFFF; } } else{ kA--; while (!(tmp>>15)){ kA--; tmp= (tmp<<1) & 0xFFFF; } tmp&=0x7FFF; } expA += tmp>>14; frac32Z = (uint_fast32_t) fracA * (0x4000 | tmp); if (expA>1){ kA++; expA ^=0x2; } rcarry = frac32Z>>29;//3rd bit (position 2) of frac32Z, hidden bit is 4th bit (position 3) if (rcarry){ if (expA) kA ++; expA^=1; frac32Z>>=1; } //default dot is between bit 71 and 72, extreme left bit is bit 0. Last right bit is bit 127. //Scale = 2^es * k + e => 2k + e int firstPos = 71 - (kA<<1) - expA; //No worries about hidden bit moving before position 4 because fraction is right aligned so //there are 16 spare bits if (firstPos>63){ //This means entire fraction is in right 64 bits uZ2.ui[0] = 0; shiftRight = firstPos-99;//99 = 63+ 4+ 32 uZ2.ui[1] = (shiftRight<0) ? ((uint64_t)frac32Z << -shiftRight) : ((uint64_t) frac32Z >> shiftRight); } else{//frac32Z can be in both left64 and right64 shiftRight = firstPos - 35;// -35= -3-32 if (shiftRight<0) uZ2.ui[0] = ((uint64_t)frac32Z) << -shiftRight; else{ uZ2.ui[0] = (uint64_t)frac32Z >> shiftRight; uZ2.ui[1] = (uint64_t) frac32Z << (64 - shiftRight); } } if (signZ2){ if (uZ2.ui[1]>0){ uZ2.ui[1] = - uZ2.ui[1]; uZ2.ui[0] = ~uZ2.ui[0]; } else{ uZ2.ui[0] = -uZ2.ui[0]; } } //Addition b1 = uZ1.ui[1]&0x1; b2 = uZ2.ui[1]&0x1; rcarryb = b1 & b2; uZ.ui[1] = (uZ1.ui[1]>>1) + (uZ2.ui[1]>>1) + rcarryb; rcarryZ = uZ.ui[1]>>63; uZ.ui[1] = (uZ.ui[1]<<1 | (b1^b2) ); b1 = uZ1.ui[0]&0x1; b2 = uZ2.ui[0]&0x1; rcarryb = b1 & b2 ; int_fast8_t rcarryb3 = b1 + b2 + rcarryZ; uZ.ui[0] = (uZ1.ui[0]>>1) + (uZ2.ui[0]>>1) + ((rcarryb3>>1)& 0x1); //rcarrySignZ = uZ.ui[0]>>63; uZ.ui[0] = (uZ.ui[0]<<1 | (rcarryb3 & 0x1) ); //Exception handling for NaR if (isNaRQ16(uZ.q)) uZ.q.v[0] = 0; return uZ.q; } luametatex-2.10.08/source/libraries/softposit/source/quire16_fdp_sub.c000066400000000000000000000115051442250314700260070ustar00rootroot00000000000000/*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017, 2018 A*STAR. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include #include "platform.h" #include "internals.h" quire16_t q16_fdp_sub( quire16_t q, posit16_t pA, posit16_t pB ){ union ui16_p16 uA, uB; union ui128_q16 uZ, uZ1, uZ2; uint_fast16_t uiA, uiB; uint_fast16_t fracA, tmp; bool signA, signB, signZ2, regSA, regSB, rcarry; int_fast8_t expA; int_fast16_t kA=0, shiftRight; uint_fast32_t frac32Z; //For add bool rcarryb, b1, b2, rcarryZ;//, rcarrySignZ; uZ1.q = q; uA.p = pA; uiA = uA.ui; uB.p = pB; uiB = uB.ui; //NaR if (isNaRQ16(q) || isNaRP16UI(uA.ui) || isNaRP16UI(uB.ui)){ uZ2.ui[0]=0x8000000000000000ULL; uZ2.ui[1] = 0; return uZ2.q; } else if (uiA==0 || uiB==0) return q; //max pos (sign plus and minus) signA = signP16UI( uiA ); signB = signP16UI( uiB ); signZ2 = signA ^ signB; if(signA) uiA = (-uiA & 0xFFFF); if(signB) uiB = (-uiB & 0xFFFF); regSA = signregP16UI(uiA); regSB = signregP16UI(uiB); tmp = (uiA<<2) & 0xFFFF; if (regSA){ while (tmp>>15){ kA++; tmp= (tmp<<1) & 0xFFFF; } } else{ kA=-1; while (!(tmp>>15)){ kA--; tmp= (tmp<<1) & 0xFFFF; } tmp&=0x7FFF; } expA = tmp>>14; fracA = (0x4000 | tmp); tmp = (uiB<<2) & 0xFFFF; if (regSB){ while (tmp>>15){ kA++; tmp= (tmp<<1) & 0xFFFF; } } else{ kA--; while (!(tmp>>15)){ kA--; tmp= (tmp<<1) & 0xFFFF; } tmp&=0x7FFF; } expA += tmp>>14; frac32Z = (uint_fast32_t) fracA * (0x4000 | tmp); if (expA>1){ kA++; expA ^=0x2; } rcarry = frac32Z>>29;//3rd bit (position 2) of frac32Z, hidden bit is 4th bit (position 3) if (rcarry){ if (expA) kA ++; expA^=1; frac32Z>>=1; } //default dot is between bit 71 and 72, extreme left bit is bit 0. Last right bit is bit 127. //Scale = 2^es * k + e => 2k + e int firstPos = 71 - (kA<<1) - expA; //No worries about hidden bit moving before position 4 because fraction is right aligned so //there are 16 spare bits if (firstPos>63){ //This means entire fraction is in right 64 bits uZ2.ui[0] = 0; shiftRight = firstPos-99;//99 = 63+ 4+ 32 if (shiftRight<0)//shiftLeft uZ2.ui[1] = ((uint64_t)frac32Z) << -shiftRight; else uZ2.ui[1] = (uint64_t) frac32Z >> shiftRight; } else{//frac32Z can be in both left64 and right64 shiftRight = firstPos - 35;// -35= -3-32 if (shiftRight<0) uZ2.ui[0] = ((uint64_t)frac32Z) << -shiftRight; else{ uZ2.ui[0] = (uint64_t)frac32Z >> shiftRight; uZ2.ui[1] = (uint64_t) frac32Z << (64 - shiftRight); } } //This is the only difference from ADD (signZ2) and (!signZ2) if (!signZ2){ if (uZ2.ui[1]>0){ uZ2.ui[1] = - uZ2.ui[1]; uZ2.ui[0] = ~uZ2.ui[0]; } else{ uZ2.ui[0] = -uZ2.ui[0]; } } //Subtraction b1 = uZ1.ui[1]&0x1; b2 = uZ2.ui[1]&0x1; rcarryb = b1 & b2; uZ.ui[1] = (uZ1.ui[1]>>1) + (uZ2.ui[1]>>1) + rcarryb; rcarryZ = uZ.ui[1]>>63; uZ.ui[1] = (uZ.ui[1]<<1 | (b1^b2) ); b1 = uZ1.ui[0]&0x1; b2 = uZ2.ui[0]&0x1; rcarryb = b1 & b2 ; int_fast8_t rcarryb3 = b1 + b2 + rcarryZ; uZ.ui[0] = (uZ1.ui[0]>>1) + (uZ2.ui[0]>>1) + ((rcarryb3>>1)& 0x1); //rcarrySignZ = uZ.ui[0]>>63; uZ.ui[0] = (uZ.ui[0]<<1 | (rcarryb3 & 0x1) ); //Exception handling if (isNaRQ16(uZ.q)) uZ.q.v[0] = 0; return uZ.q; } luametatex-2.10.08/source/libraries/softposit/source/quire32_fdp_add.c000066400000000000000000000121171442250314700257440ustar00rootroot00000000000000/*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017, 2018 A*STAR. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include #include #include "platform.h" #include "internals.h" quire_2_t qX2_fdp_add( quire_2_t q, posit_2_t pA, posit_2_t pB ){ union ui512_q32 uQZ; union ui32_p32 uA, uB; memcpy(uQZ.ui, q.v, 8*sizeof(uint64_t)); uA.ui = pA.v; uB.ui = pB.v; uQZ.q = q32_fdp_add(uQZ.q, uA.p, uB.p); memcpy(q.v, uQZ.ui, 8*sizeof(uint64_t)); return q; } quire32_t q32_fdp_add( quire32_t q, posit32_t pA, posit32_t pB ){ union ui32_p32 uA, uB; union ui512_q32 uZ, uZ1, uZ2; uint_fast32_t uiA, uiB; uint_fast32_t fracA, tmp; bool signA, signB, signZ2, regSA, regSB, rcarry; int_fast32_t expA; int_fast16_t kA=0, shiftRight=0; uint_fast64_t frac64Z; //For add bool rcarryb, b1, b2, rcarryZ=0; uZ1.q = q; uA.p = pA; uiA = uA.ui; uB.p = pB; uiB = uB.ui; uZ2.q = q32Clr(uZ2.q); //set it to zero //NaR if (isNaRQ32(q) || isNaRP32UI(uA.ui) || isNaRP32UI(uB.ui)){ //set to all zeros uZ2.ui[0]=0x8000000000000000ULL; return uZ2.q; } else if (uiA==0 || uiB==0) return q; //max pos (sign plus and minus) signA = signP32UI( uiA ); signB = signP32UI( uiB ); signZ2 = signA ^ signB; if(signA) uiA = (-uiA & 0xFFFFFFFF); if(signB) uiB = (-uiB & 0xFFFFFFFF); regSA = signregP32UI(uiA); regSB = signregP32UI(uiB); tmp = (uiA<<2) & 0xFFFFFFFF; if (regSA){ while (tmp>>31){ kA++; tmp= (tmp<<1) & 0xFFFFFFFF; } } else{ kA=-1; while (!(tmp>>31)){ kA--; tmp= (tmp<<1) & 0xFFFFFFFF; } tmp&=0x7FFFFFFF; } expA = tmp>>29; //to get 2 bits fracA = ((tmp<<2) | 0x80000000) & 0xFFFFFFFF; tmp = (uiB<<2) & 0xFFFFFFFF; if (regSB){ while (tmp>>31){ kA++; tmp= (tmp<<1) & 0xFFFFFFFF; } } else{ kA--; while (!(tmp>>31)){ kA--; tmp= (tmp<<1) & 0xFFFFFFFF; } tmp&=0x7FFFFFFF; } expA += tmp>>29; frac64Z = (uint_fast64_t) fracA * (((tmp<<2) | 0x80000000) & 0xFFFFFFFF); if (expA>3){ kA++; expA&=0x3; // -=4 } //Will align frac64Z such that hidden bit is the first bit on the left. rcarry = frac64Z>>63;//1st bit of frac64Z if (rcarry){ expA++; if (expA>3){ kA ++; expA&=0x3; } } else frac64Z<<=1; //default dot is between bit 271 and 272, extreme left bit is bit 0. Last right bit is bit 512. //Minpos is 120 position to the right of binary point (dot) //Scale = 2^es * k + e => 2k + e int firstPos = 271 - (kA<<2) - expA; //Moving in chunk of 64. If it is in first chunk, a part might be in the chunk right to it. Simply have to handle that. int i; for (i=0; i<8; i++){ if (firstPos<(i+1)*64){ //Need to check how much of the fraction is in the next 64 bits shiftRight = firstPos - (i*64); uZ2.ui[i] = frac64Z >> shiftRight; if (i!=7 && shiftRight!=0) uZ2.ui[i+1] = frac64Z << (64 - shiftRight); break; } } if (signZ2){ for (i=7; i>=0; i--){ if (uZ2.ui[i]>0){ uZ2.ui[i] = - uZ2.ui[i]; i--; while(i>=0){ uZ2.ui[i] = ~uZ2.ui[i]; i--; } break; } } } //Addition for (i=7; i>=0; i--){ b1 = uZ1.ui[i] & 0x1; b2 = uZ2.ui[i] & 0x1; if (i==7){ rcarryb = b1 & b2; uZ.ui[i] = (uZ1.ui[i]>>1) + (uZ2.ui[i]>>1) + rcarryb; rcarryZ = uZ.ui[i]>>63; uZ.ui[i] = (uZ.ui[i]<<1 | (b1^b2) ); } else{ int_fast8_t rcarryb3 = b1 + b2 + rcarryZ; uZ.ui[i] = (uZ1.ui[i]>>1) + (uZ2.ui[i]>>1) + (rcarryb3>>1); rcarryZ = uZ.ui[i]>>63; uZ.ui[i] = (uZ.ui[i]<<1 | (rcarryb3 & 0x1) ); } } //Exception handling if (isNaRQ32(uZ.q) ) uZ.q.v[0]=0; return uZ.q; } luametatex-2.10.08/source/libraries/softposit/source/quire32_fdp_sub.c000066400000000000000000000122531442250314700260060ustar00rootroot00000000000000/*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017, 2018 A*STAR. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include #include #include "platform.h" #include "internals.h" //c-(a*b) quire_2_t qX2_fdp_sub( quire_2_t q, posit_2_t pA, posit_2_t pB ){ union ui512_q32 uQZ; union ui32_p32 uA, uB; memcpy(uQZ.ui, q.v, 8*sizeof(uint64_t)); uA.ui = pA.v; uB.ui = pB.v; uQZ.q = q32_fdp_sub(uQZ.q, uA.p, uB.p); memcpy(q.v, uQZ.ui, 8*sizeof(uint64_t)); return q; } quire32_t q32_fdp_sub( quire32_t q, posit32_t pA, posit32_t pB ){ union ui32_p32 uA, uB; union ui512_q32 uZ, uZ1, uZ2; uint_fast32_t uiA, uiB; uint_fast32_t fracA, tmp; bool signA, signB, signZ2, regSA, regSB, rcarry; int_fast32_t expA; int_fast16_t kA=0, shiftRight=0; uint_fast64_t frac64Z; //For sub bool rcarryb, b1, b2, rcarryZ; uZ1.q = q; uA.p = pA; uiA = uA.ui; uB.p = pB; uiB = uB.ui; uZ2.q = q32Clr(uZ2.q); //set it to zero //NaR if (isNaRQ32(q) || isNaRP32UI(uA.ui) || isNaRP32UI(uB.ui)){ //set to all zeros uZ2.ui[0]=0x8000000000000000ULL; return uZ2.q; } else if (uiA==0 || uiB==0) return q; //max pos (sign plus and minus) signA = signP32UI( uiA ); signB = signP32UI( uiB ); signZ2 = signA ^ signB; if(signA) uiA = (-uiA & 0xFFFFFFFF); if(signB) uiB = (-uiB & 0xFFFFFFFF); regSA = signregP32UI(uiA); regSB = signregP32UI(uiB); tmp = (uiA<<2) & 0xFFFFFFFF; if (regSA){ while (tmp>>31){ kA++; tmp= (tmp<<1) & 0xFFFFFFFF; } } else{ kA=-1; while (!(tmp>>31)){ kA--; tmp= (tmp<<1) & 0xFFFFFFFF; } tmp&=0x7FFFFFFF; } expA = tmp>>29; //to get 2 bits fracA = ((tmp<<2) | 0x80000000) & 0xFFFFFFFF; tmp = (uiB<<2) & 0xFFFFFFFF; if (regSB){ while (tmp>>31){ kA++; tmp= (tmp<<1) & 0xFFFFFFFF; } } else{ kA--; while (!(tmp>>31)){ kA--; tmp= (tmp<<1) & 0xFFFFFFFF; } tmp&=0x7FFFFFFF; } expA += tmp>>29; frac64Z = (uint_fast64_t) fracA * (((tmp<<2) | 0x80000000) & 0xFFFFFFFF); if (expA>3){ kA++; expA&=0x3; // -=4 } //Will align frac64Z such that hidden bit is the first bit on the left. rcarry = frac64Z>>63;//1st bit of frac64Z if (rcarry){ expA++; if (expA>3){ kA ++; expA&=0x3; } //frac64Z>>=1; } else frac64Z<<=1; //default dot is between bit 271 and 272, extreme left bit is bit 0. Last right bit is bit 512. //Minpos is 120 position to the right of binary point (dot) //Scale = 2^es * k + e => 2k + e int firstPos = 271 - (kA<<2) - expA; //Moving in chunk of 64. If it is in first chunk, a part might be in the chunk right to it. Simply have to handle that. int i; for (i=0; i<8; i++){ if (firstPos<(i+1)*64){ //Need to check how much of the fraction is in the next 64 bits shiftRight = firstPos - (i*64); uZ2.ui[i] = frac64Z >> shiftRight; if (i!=7 && shiftRight!=0) uZ2.ui[i+1] = frac64Z << (64 - shiftRight); break; } } //This is the only difference from ADD (signZ2) and (!signZ2) if (!signZ2){ for (i=7; i>=0; i--){ if (uZ2.ui[i]>0){ uZ2.ui[i] = - uZ2.ui[i]; i--; while(i>=0){ uZ2.ui[i] = ~uZ2.ui[i]; i--; } break; } } } //Subtraction for (i=7; i>=0; i--){ b1 = uZ1.ui[i] & 0x1; b2 = uZ2.ui[i] & 0x1; if (i==7){ rcarryb = b1 & b2; uZ.ui[i] = (uZ1.ui[i]>>1) + (uZ2.ui[i]>>1) + rcarryb; rcarryZ = uZ.ui[i]>>63; uZ.ui[i] = (uZ.ui[i]<<1 | (b1^b2) ); } else{ int_fast8_t rcarryb3 = b1 + b2 + rcarryZ; uZ.ui[i] = (uZ1.ui[i]>>1) + (uZ2.ui[i]>>1) + (rcarryb3>>1); rcarryZ = uZ.ui[i]>>63; uZ.ui[i] = (uZ.ui[i]<<1 | (rcarryb3 & 0x1) ); } } //Exception handling if (isNaRQ32(uZ.q) ) uZ.q.v[0]=0; return uZ.q; } luametatex-2.10.08/source/libraries/softposit/source/quire8_fdp_add.c000066400000000000000000000067741442250314700257030ustar00rootroot00000000000000/*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017, 2018 A*STAR. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include #include "platform.h" #include "internals.h" quire8_t q8_fdp_add( quire8_t q, posit8_t pA, posit8_t pB ){ union ui8_p8 uA, uB; union ui32_q8 uqZ, uqZ1, uqZ2; uint_fast8_t uiA, uiB; uint_fast8_t fracA, tmp; bool signA, signB, signZ2, regSA, regSB, rcarry; int_fast8_t kA=0, shiftRight=0; uint_fast32_t frac32Z; uqZ1.q = q; uA.p = pA; uiA = uA.ui; uB.p = pB; uiB = uB.ui; //NaR if (isNaRQ8(q) || isNaRP8UI(uA.ui) || isNaRP8UI(uB.ui)){ uqZ2.ui=0x80000000; return uqZ2.q; } else if (uiA==0 || uiB==0) return q; //max pos (sign plus and minus) signA = signP8UI( uiA ); signB = signP8UI( uiB ); signZ2 = signA ^ signB; if(signA) uiA = (-uiA & 0xFF); if(signB) uiB = (-uiB & 0xFF); regSA = signregP8UI(uiA); regSB = signregP8UI(uiB); tmp = (uiA<<2) & 0xFF; if (regSA){ while (tmp>>7){ kA++; tmp= (tmp<<1) & 0xFF; } } else{ kA=-1; while (!(tmp>>7)){ kA--; tmp= (tmp<<1) & 0xFF; } tmp&=0x7F; } fracA = (0x80 | tmp); tmp = (uiB<<2) & 0xFF; if (regSB){ while (tmp>>7){ kA++; tmp= (tmp<<1) & 0xFF; } } else{ kA--; while (!(tmp>>7)){ kA--; tmp= (tmp<<1) & 0xFF; } tmp&=0x7F; } frac32Z = (uint_fast32_t)( fracA * (0x80 | tmp) ) <<16; rcarry = frac32Z>>31;//1st bit (position 2) of frac32Z, hidden bit is 4th bit (position 3) if (rcarry){ kA ++; frac32Z>>=1; } //default dot is between bit 19 and 20, extreme left bit is bit 0. Last right bit is bit 31. //Scale = 2^es * k + e => 2k + e // firstPost = 19-kA, shift = firstPos -1 (because frac32Z start from 2nd bit) //int firstPos = 19 - kA; shiftRight = 18-kA; uqZ2.ui = frac32Z>> shiftRight; if (signZ2) uqZ2.ui = -uqZ2.ui & 0xFFFFFFFF; //Addition uqZ.ui = uqZ2.ui + uqZ1.ui; //Exception handling if (isNaRQ8(uqZ.q) ) uqZ.q.v = 0; return uqZ.q; } luametatex-2.10.08/source/libraries/softposit/source/quire8_fdp_sub.c000066400000000000000000000071051442250314700257310ustar00rootroot00000000000000/*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017, 2018 A*STAR. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include #include "platform.h" #include "internals.h" //q - (pA*pB) quire8_t q8_fdp_sub( quire8_t q, posit8_t pA, posit8_t pB ){ union ui8_p8 uA, uB; union ui32_q8 uqZ, uqZ1, uqZ2; uint_fast8_t uiA, uiB; uint_fast8_t fracA, tmp; bool signA, signB, signZ2, regSA, regSB, rcarry; int_fast8_t kA=0, shiftRight=0; uint_fast32_t frac32Z; uqZ1.q = q; uA.p = pA; uiA = uA.ui; uB.p = pB; uiB = uB.ui; //NaR if (isNaRQ8(q) || isNaRP8UI(uA.ui) || isNaRP8UI(uB.ui)){ uqZ2.ui=0x80000000; return uqZ2.q; } else if (uiA==0 || uiB==0) return q; //max pos (sign plus and minus) signA = signP8UI( uiA ); signB = signP8UI( uiB ); signZ2 = signA ^ signB; if(signA) uiA = (-uiA & 0xFF); if(signB) uiB = (-uiB & 0xFF); regSA = signregP8UI(uiA); regSB = signregP8UI(uiB); tmp = (uiA<<2) & 0xFF; if (regSA){ while (tmp>>7){ kA++; tmp= (tmp<<1) & 0xFF; } } else{ kA=-1; while (!(tmp>>7)){ kA--; tmp= (tmp<<1) & 0xFF; } tmp&=0x7F; } fracA = (0x80 | tmp); tmp = (uiB<<2) & 0xFF; if (regSB){ while (tmp>>7){ kA++; tmp= (tmp<<1) & 0xFF; } } else{ kA--; while (!(tmp>>7)){ kA--; tmp= (tmp<<1) & 0xFF; } tmp&=0x7F; } frac32Z = (uint_fast32_t)( fracA * (0x80 | tmp) ) <<16; rcarry = frac32Z>>31;//1st bit (position 2) of frac32Z, hidden bit is 4th bit (position 3) if (rcarry){ kA ++; frac32Z>>=1; } //default dot is between bit 19 and 20, extreme left bit is bit 0. Last right bit is bit 31. //Scale = 2^es * k + e => 2k + e // firstPost = 19-kA, shift = firstPos -1 (because frac32Z start from 2nd bit) //int firstPos = 19 - kA; shiftRight = 18-kA; uqZ2.ui = frac32Z>> shiftRight; //This is the only difference from ADD (signZ2) and (!signZ2) if (!signZ2) uqZ2.ui = -uqZ2.ui & 0xFFFFFFFF; //Addition uqZ.ui = uqZ2.ui + uqZ1.ui; //Exception handling if (isNaRQ8(uqZ.q) ) uqZ.q.v=0; return uqZ.q; } luametatex-2.10.08/source/libraries/softposit/source/quire_helper.c000066400000000000000000000102241442250314700254720ustar00rootroot00000000000000/*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017 A*STAR. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" void printBinary(uint64_t * s, int size) { int i; uint64_t number = *s; int bitSize = size -1; for(i = 0; i < size; ++i) { if(i%8 == 0) putchar(' '); printf("%llu", (number >> (bitSize-i))&1); } printf("\n"); } void printBinaryQuire32(quire32_t * s){ int size = 512; int dotPos = 272; int bitSize = 63; int n = 0; uint64_t number = s->v[n]; for(int i = 0; i < size; ++i) { if (i!=0 && i%64==0){ printf("\n"); n++; number = s->v[n]; } if(i%8 == 0) putchar(' '); if (i==dotPos) putchar('.'); printf("%llu", (number >> (bitSize-i))&1); } printf("\n"); } void printBinaryQuire16(quire16_t * s){ int size = 128; int dotPos = 72; int bitSize = 63; int n = 0; uint64_t number = s->v[n]; for(int i = 0; i < size; ++i) { if (i!=0 && i%64==0){ printf("\n"); n++; number = s->v[n]; } if(i%8 == 0) putchar(' '); if (i==dotPos) putchar('.'); printf("%llu", (number >> (bitSize-i))&1); } printf("\n"); } void printBinaryQuire8(quire8_t * s){ int size = 32; uint32_t number = s->v; int dotPos = 20; int bitSize = size -1; for(int i = 0; i < size; ++i) { if(i%8 == 0) putchar(' '); if (i==dotPos) putchar('.'); printf("%u", (number >> (bitSize-i))&1); } printf("\n"); } void printBinaryPX(uint32_t * s, int size) { int i; uint32_t number = *s; number >>= (32-size); int bitSize = size -1; for(i = 0; i < size; ++i){ if(i%8 == 0) putchar(' '); printf("%u", (number >> (bitSize-i))&1); } printf("\n"); } void printHex64(uint64_t s) { printf("%016llx\n", s); } void printHex(uint64_t s) { printf("0x%llx\n", s); } void printHexPX(uint32_t s, int size) { s>>=(32-size); printf("0x%x\n", s); } quire16_t q16_TwosComplement(quire16_t q){ if (!isQ16Zero(q) && !isNaRQ16(q)){ if (q.v[1]==0){ q.v[0] = -q.v[0]; } else{ q.v[1] = - q.v[1]; q.v[0] = ~q.v[0]; } } return q; } quire32_t q32_TwosComplement(quire32_t q){ if (!isQ32Zero(q) && !isNaRQ32(q)){ int i=7; bool found = false; while(i){ if (found){ q.v[i] = ~q.v[i]; } else{ if (q.v[i]!=0){ q.v[i] = -q.v[i]; found = true; } } i--; } } return q; } quire_2_t qX2_TwosComplement(quire_2_t q){ if (!isQX2Zero(q) && !isNaRQX2(q)){ int i=7; bool found = false; while(i){ if (found){ q.v[i] = ~q.v[i]; } else{ if (q.v[i]!=0){ q.v[i] = -q.v[i]; found = true; } } i--; } } return q; } luametatex-2.10.08/source/libraries/softposit/source/s_addMagsP16.c000066400000000000000000000111271442250314700251620ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017, 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" #include "stdlib.h" #include #ifdef SOFTPOSIT_EXACT posit16_t softposit_addMagsP16( uint_fast16_t uiA, uint_fast16_t uiB, bool isExact){ #else posit16_t softposit_addMagsP16( uint_fast16_t uiA, uint_fast16_t uiB ){ #endif uint_fast16_t regA, uiX, uiY; uint_fast32_t frac32A, frac32B; uint_fast16_t fracA=0, regime, tmp; bool sign, regSA, regSB, rcarry=0, bitNPlusOne=0, bitsMore=0; int_fast8_t kA=0, expA; int_fast16_t shiftRight; union ui16_p16 uZ; sign = signP16UI( uiA ); //sign is always positive.. actually don't have to do this. if (sign){ uiA = -uiA & 0xFFFF; uiB = -uiB & 0xFFFF; } if ((int_fast16_t)uiA < (int_fast16_t)uiB){ uiX = uiA; uiY = uiB; uiA = uiY; uiB = uiX; } regSA = signregP16UI( uiA ); regSB = signregP16UI( uiB ); tmp = (uiA<<2) & 0xFFFF; if (regSA){ while (tmp>>15){ kA++; tmp= (tmp<<1) & 0xFFFF; } } else{ kA=-1; while (!(tmp>>15)){ kA--; tmp= (tmp<<1) & 0xFFFF; } tmp&=0x7FFF; } expA = tmp>>14; frac32A = (0x4000 | tmp) << 16; shiftRight = kA; tmp = (uiB<<2) & 0xFFFF; if (regSB){ while (tmp>>15){ shiftRight--; tmp= (tmp<<1) & 0xFFFF; } frac32B = (0x4000 | tmp) <<16; } else{ shiftRight++; while (!(tmp>>15)){ shiftRight++; tmp= (tmp<<1) & 0xFFFF; } tmp&=0x7FFF; frac32B = ( (0x4000 | tmp) <<16 ) & 0x7FFFFFFF; } //This is 2kZ + expZ; (where kZ=kA-kB and expZ=expA-expB) shiftRight = (shiftRight<<1) + expA - (tmp>>14); if (shiftRight==0){ frac32A += frac32B; //rcarry is one if (expA) kA ++; expA^=1; frac32A>>=1; } else{ //Manage CLANG (LLVM) compiler when shifting right more than number of bits (shiftRight>31) ? (frac32B=0): (frac32B >>= shiftRight); //frac32B >>= shiftRight frac32A += frac32B; rcarry = 0x80000000 & frac32A; //first left bit if(rcarry){ if (expA) kA ++; expA^=1; frac32A>>=1; } } if(kA<0){ regA = (-kA & 0xFFFF); regSA = 0; regime = 0x4000>>regA; } else{ regA = kA+1; regSA=1; regime = 0x7FFF - (0x7FFF>>regA); } if(regA>14){ //max or min pos. exp and frac does not matter. (regSA) ? (uZ.ui= 0x7FFF): (uZ.ui=0x1); } else{ //remove hidden bits frac32A = (frac32A & 0x3FFFFFFF) >>(regA + 1) ; fracA = frac32A>>16; if (regA!=14) bitNPlusOne = (frac32A>>15) & 0x1; else if (frac32A>0){ fracA=0; bitsMore =1; } if (regA==14 && expA) bitNPlusOne = 1; uZ.ui = packToP16UI(regime, regA, expA, fracA); if (bitNPlusOne){ if ( frac32A&0x7FFF ) bitsMore=1; //n+1 frac bit is 1. Need to check if another bit is 1 too if not round to even uZ.ui += (uZ.ui&1) | bitsMore; } } if (sign) uZ.ui = -uZ.ui & 0xFFFF; return uZ.p; } luametatex-2.10.08/source/libraries/softposit/source/s_addMagsP32.c000066400000000000000000000110371442250314700251600ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017, 2018 A*STAR. All rights reserved. This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit32_t softposit_addMagsP32( uint_fast32_t uiA, uint_fast32_t uiB ) { uint_fast16_t regA, regB; uint_fast64_t frac64A=0, frac64B=0; uint_fast32_t fracA=0, regime, tmp; bool sign, regSA, regSB, rcarry=0, bitNPlusOne=0, bitsMore=0; int_fast8_t kA=0; int_fast32_t expA; int_fast16_t shiftRight; union ui32_p32 uZ; sign = signP32UI( uiA ); if (sign){ uiA = -uiA & 0xFFFFFFFF; uiB = -uiB & 0xFFFFFFFF; } if ((int_fast32_t)uiA < (int_fast32_t)uiB){ uiA ^= uiB; uiB ^= uiA; uiA ^= uiB; } regSA = signregP32UI( uiA ); regSB = signregP32UI( uiB ); tmp = (uiA<<2)&0xFFFFFFFF; if (regSA){ while (tmp>>31){ kA++; tmp= (tmp<<1) & 0xFFFFFFFF; } } else{ kA=-1; while (!(tmp>>31)){ kA--; tmp= (tmp<<1) & 0xFFFFFFFF; } tmp&=0x7FFFFFFF; } expA = tmp>>29; //to get 2 bits frac64A = ((0x40000000ULL | tmp<<1) & 0x7FFFFFFFULL) <<32; shiftRight = kA; tmp = (uiB<<2) & 0xFFFFFFFF; if (regSB){ while (tmp>>31){ shiftRight--; tmp= (tmp<<1) & 0xFFFFFFFF; } } else{ shiftRight++; while (!(tmp>>31)){ shiftRight++; tmp= (tmp<<1) & 0xFFFFFFFF; } tmp&=0x7FFFFFFF; } frac64B = ((0x40000000ULL | tmp<<1) & 0x7FFFFFFFULL) <<32; //This is 4kZ + expZ; (where kZ=kA-kB and expZ=expA-expB) shiftRight = (shiftRight<<2) + expA - (tmp>>29); //Manage CLANG (LLVM) compiler when shifting right more than number of bits (shiftRight>63) ? (frac64B=0): (frac64B >>= shiftRight); //frac64B >>= shiftRight frac64A += frac64B; rcarry = 0x8000000000000000 & frac64A; //first left bit if (rcarry){ expA++; if (expA>3){ kA ++; expA&=0x3; } frac64A>>=1; } if(kA<0){ regA = -kA; regSA = 0; regime = 0x40000000>>regA; } else{ regA = kA+1; regSA=1; regime = 0x7FFFFFFF - (0x7FFFFFFF>>regA); } if(regA>30){ //max or min pos. exp and frac does not matter. (regSA) ? (uZ.ui= 0x7FFFFFFF): (uZ.ui=0x1); } else{ //remove hidden bits frac64A = (frac64A & 0x3FFFFFFFFFFFFFFF) >>(regA + 2) ; // 2 bits exp fracA = frac64A>>32; if (regA<=28){ bitNPlusOne |= (0x80000000 & frac64A) ; expA <<= (28-regA); } else { if (regA==30){ bitNPlusOne = expA&0x2; bitsMore = (expA&0x1); expA = 0; } else if (regA==29){ bitNPlusOne = expA&0x1; expA>>=1; } if (fracA>0){ fracA=0; bitsMore =1; } } uZ.ui = packToP32UI(regime, expA, fracA); //n+1 frac bit is 1. Need to check if another bit is 1 too if not round to even if (bitNPlusOne){ if (0x7FFFFFFF & frac64A) bitsMore=1; uZ.ui += (uZ.ui&1) | bitsMore; } } if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; return uZ.p; } luametatex-2.10.08/source/libraries/softposit/source/s_addMagsP8.c000066400000000000000000000074241442250314700251100ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017, 2018 A*STAR. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" #ifdef SOFTPOSIT_EXACT posit8_t softposit_addMagsP8( uint_fast8_t uiA, uint_fast8_t uiB, bool isExact){ #else posit8_t softposit_addMagsP8( uint_fast8_t uiA, uint_fast8_t uiB ){ #endif uint_fast8_t regA; uint_fast16_t frac16A, frac16B; uint_fast8_t fracA=0, regime, tmp; bool sign, regSA, regSB, rcarry=0, bitNPlusOne=0, bitsMore=0; int_fast8_t kA=0; int_fast16_t shiftRight; union ui8_p8 uZ; sign = signP8UI( uiA ); //sign is always positive.. actually don't have to do this. if (sign){ uiA = -uiA & 0xFF; uiB = -uiB & 0xFF; } if ((int_fast8_t)uiA < (int_fast8_t)uiB){ uiA ^= uiB; uiB ^= uiA; uiA ^= uiB; } regSA = signregP8UI( uiA ); regSB = signregP8UI( uiB ); tmp = (uiA<<2) & 0xFF; if (regSA){ while (tmp>>7){ kA++; tmp= (tmp<<1) & 0xFF; } } else{ kA=-1; while (!(tmp>>7)){ kA--; tmp= (tmp<<1) & 0xFF; } tmp&=0x7F; } frac16A = (0x80 | tmp) << 7; shiftRight = kA; tmp = (uiB<<2) & 0xFF; if (regSB){ while (tmp>>7){ shiftRight--; tmp= (tmp<<1) & 0xFF; } } else{ shiftRight++; while (!(tmp>>7)){ shiftRight++; tmp= (tmp<<1) & 0xFF; } tmp&=0x7F; } frac16B = (0x80 | tmp) <<7 ; //Manage CLANG (LLVM) compiler when shifting right more than number of bits (shiftRight>7) ? (frac16B=0): (frac16B >>= shiftRight); //frac32B >>= shiftRight frac16A += frac16B; rcarry = 0x8000 & frac16A; //first left bit if (rcarry){ kA++; frac16A>>=1; } if(kA<0){ regA = (-kA & 0xFF); regSA = 0; regime = 0x40>>regA; } else{ regA = kA+1; regSA=1; regime = 0x7F-(0x7F>>regA); } if(regA>6){ //max or min pos. exp and frac does not matter. (regSA) ? (uZ.ui= 0x7F): (uZ.ui=0x1); } else{ frac16A = (frac16A&0x3FFF) >> regA; fracA = (uint_fast8_t) (frac16A>>8); bitNPlusOne = (0x80 & frac16A) ; uZ.ui = packToP8UI(regime, fracA); //n+1 frac bit is 1. Need to check if another bit is 1 too if not round to even if (bitNPlusOne){ if (0x7F & frac16A) bitsMore=1; uZ.ui += (uZ.ui&1) | bitsMore; } } if (sign) uZ.ui = -uZ.ui & 0xFF; return uZ.p; } luametatex-2.10.08/source/libraries/softposit/source/s_addMagsPX1.c000066400000000000000000000120371442250314700252250ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017, 2018 A*STAR. All rights reserved. This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit_1_t softposit_addMagsPX1( uint_fast32_t uiA, uint_fast32_t uiB, int x ) { int regA; uint_fast64_t frac64A=0, frac64B=0; uint_fast32_t fracA=0, regime, tmp; bool sign, regSA, regSB, rcarry=0, bitNPlusOne=0, bitsMore=0; int_fast8_t kA=0; int_fast32_t expA=0; int_fast16_t shiftRight; union ui32_pX1 uZ; sign = signP32UI( uiA ); if (sign){ uiA = -uiA & 0xFFFFFFFF; uiB = -uiB & 0xFFFFFFFF; } if ((int_fast32_t)uiA < (int_fast32_t)uiB){ uiA ^= uiB; uiB ^= uiA; uiA ^= uiB; } regSA = signregP32UI( uiA ); regSB = signregP32UI( uiB ); if (x==2){ uZ.ui = (regSA|regSB) ? (0x40000000) : (0x0); } else{ tmp = (uiA<<2)&0xFFFFFFFF; if (regSA){ while (tmp>>31){ kA++; tmp= (tmp<<1) & 0xFFFFFFFF; } } else{ kA=-1; while (!(tmp>>31)){ kA--; tmp= (tmp<<1) & 0xFFFFFFFF; } tmp&=0x7FFFFFFF; } expA = tmp>>30; //to get 1 bits frac64A = ((0x40000000ULL | tmp) & 0x7FFFFFFFULL) <<32; shiftRight = kA; tmp = (uiB<<2) & 0xFFFFFFFF; if (regSB){ while (tmp>>31){ shiftRight--; tmp= (tmp<<1) & 0xFFFFFFFF; } } else{ shiftRight++; while (!(tmp>>31)){ shiftRight++; tmp= (tmp<<1) & 0xFFFFFFFF; } tmp&=0x7FFFFFFF; } frac64B = ((0x40000000ULL | tmp) & 0x7FFFFFFFULL) <<32; //This is 2kZ + expZ; (where kZ=kA-kB and expZ=expA-expB) shiftRight = (shiftRight<<1) + expA - (tmp>>30); if (shiftRight==0){ frac64A += frac64B; //rcarry is one if (expA) kA ++; expA^=1; frac64A>>=1; } else{ //Manage CLANG (LLVM) compiler when shifting right more than number of bits (shiftRight>63) ? (frac64B=0): (frac64B >>= shiftRight); //frac64B >>= shiftRight frac64A += frac64B; rcarry = 0x8000000000000000 & frac64A; //first left bit if (rcarry){ if (expA) kA ++; expA^=1;; frac64A>>=1; } } if(kA<0){ regA = -kA; regSA = 0; regime = 0x40000000>>regA; } else{ regA = kA+1; regSA=1; regime = 0x7FFFFFFF - (0x7FFFFFFF>>regA); } if(regA>(x-2)){ //max or min pos. exp and frac does not matter. uZ.ui=(regSA) ? (0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)) ): (0x1 << (32-x)); } else{ //remove hidden bits frac64A = (frac64A & 0x3FFFFFFFFFFFFFFF) >>(regA + 1) ; // 2 bits exp fracA = frac64A>>32; //regime length is smaller than length of posit if (regA>x) & frac64A); else if (frac64A>0){ fracA=0; bitsMore =1; } if (regA==(x-2) && expA){ bitNPlusOne = 1; expA=0; } } else{ regime=(regSA) ? (regime & ((int32_t)0x80000000>>(x-1)) ): (regime << (32-x)); expA=0; fracA=0; } fracA &=((int32_t)0x80000000>>(x-1)); expA <<= (29-regA); uZ.ui = packToP32UI(regime, expA, fracA); //n+1 frac bit is 1. Need to check if another bit is 1 too if not round to even if (bitNPlusOne){ if ((0x7FFFFFFFFFFFFFFF>>x) & frac64A) bitsMore=1; uZ.ui += (uint32_t)(((uZ.ui>>(32-x))&1) | bitsMore) << (32-x) ; } } } if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; return uZ.p; } luametatex-2.10.08/source/libraries/softposit/source/s_addMagsPX2.c000066400000000000000000000122221442250314700252220ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017, 2018 A*STAR. All rights reserved. This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit_2_t softposit_addMagsPX2( uint_fast32_t uiA, uint_fast32_t uiB, int x ) { int regA; uint_fast64_t frac64A=0, frac64B=0; uint_fast32_t fracA=0, regime, tmp; bool sign, regSA, regSB, rcarry=0, bitNPlusOne=0, bitsMore=0; int_fast8_t kA=0; int_fast32_t expA=0; int_fast16_t shiftRight; union ui32_pX2 uZ; sign = signP32UI( uiA ); if (sign){ uiA = -uiA & 0xFFFFFFFF; uiB = -uiB & 0xFFFFFFFF; } if ((int_fast32_t)uiA < (int_fast32_t)uiB){ uiA ^= uiB; uiB ^= uiA; uiA ^= uiB; } regSA = signregP32UI( uiA ); regSB = signregP32UI( uiB ); if (x==2){ uZ.ui = (regSA|regSB) ? (0x40000000) : (0x0); } else{ //int tmpX = x-2; tmp = (uiA<<2)&0xFFFFFFFF; if (regSA){ while (tmp>>31){ kA++; tmp= (tmp<<1) & 0xFFFFFFFF; } } else{ kA=-1; while (!(tmp>>31)){ kA--; tmp= (tmp<<1) & 0xFFFFFFFF; } tmp&=0x7FFFFFFF; } expA = tmp>>29; //to get 2 bits frac64A = ((0x40000000ULL | tmp<<1) & 0x7FFFFFFFULL) <<32; shiftRight = kA; tmp = (uiB<<2) & 0xFFFFFFFF; if (regSB){ while (tmp>>31){ shiftRight--; tmp= (tmp<<1) & 0xFFFFFFFF; } } else{ shiftRight++; while (!(tmp>>31)){ shiftRight++; tmp= (tmp<<1) & 0xFFFFFFFF; } tmp&=0x7FFFFFFF; } frac64B = ((0x40000000ULL | tmp<<1) & 0x7FFFFFFFULL) <<32; //This is 4kZ + expZ; (where kZ=kA-kB and expZ=expA-expB) shiftRight = (shiftRight<<2) + expA - (tmp>>29); //Manage CLANG (LLVM) compiler when shifting right more than number of bits (shiftRight>63) ? (frac64B=0): (frac64B >>= shiftRight); //frac64B >>= shiftRight frac64A += frac64B; rcarry = 0x8000000000000000 & frac64A; //first left bit if (rcarry){ expA++; if (expA>3){ kA ++; expA&=0x3; } frac64A>>=1; } if(kA<0){ regA = -kA; regSA = 0; regime = 0x40000000>>regA; } else{ regA = kA+1; regSA=1; regime = 0x7FFFFFFF - (0x7FFFFFFF>>regA); } if(regA>(x-2)){ //max or min pos. exp and frac does not matter. uZ.ui=(regSA) ? (0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)) ): (0x1 << (32-x)); } else{ //remove hidden bits frac64A = (frac64A & 0x3FFFFFFFFFFFFFFF) >>(regA + 2) ; // 2 bits exp fracA = frac64A>>32; //regime length is smaller than length of posit if (regA>=1; expA &=0x2; } if (fracA>0){ fracA=0; bitsMore =1; } } } else{ regime=(regSA) ? (regime & ((int32_t)0x80000000>>(x-1)) ): (regime << (32-x)); expA=0; fracA=0; } fracA &=((int32_t)0x80000000>>(x-1)); expA <<= (28-regA); uZ.ui = packToP32UI(regime, expA, fracA); //n+1 frac bit is 1. Need to check if another bit is 1 too if not round to even if (bitNPlusOne){ if (((uint64_t)0xFFFFFFFFFFFFFFFF>>(x+1)) & frac64A) bitsMore=1; uZ.ui += (uint32_t)(((uZ.ui>>(32-x))&1) | bitsMore) << (32-x) ; } } } if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; return uZ.p; } luametatex-2.10.08/source/libraries/softposit/source/s_approxRecipSqrt_1Ks.c000066400000000000000000000045611442250314700272230ustar00rootroot00000000000000/*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane) and John Gustafson. Copyright 2017, 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include #include "platform.h" const uint_fast16_t softposit_approxRecipSqrt0[16] = { 0xb4c9, 0xffab, 0xaa7d, 0xf11c, 0xa1c5, 0xe4c7, 0x9a43, 0xda29, 0x93b5, 0xd0e5, 0x8ded, 0xc8b7, 0x88c6, 0xc16d, 0x8424, 0xbae1 }; const uint_fast16_t softposit_approxRecipSqrt1[16] = { 0xa5a5, 0xea42, 0x8c21, 0xc62d, 0x788f, 0xaa7f, 0x6928, 0x94b6, 0x5cc7, 0x8335, 0x52a6, 0x74e2, 0x4a3e, 0x68fe, 0x432b, 0x5efd }; luametatex-2.10.08/source/libraries/softposit/source/s_mulAddP16.c000066400000000000000000000147241442250314700250360ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017, 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" //softposit_mulAdd_subC => (uiA*uiB)-uiC //softposit_mulAdd_subProd => uiC - (uiA*uiB) //Default is always op==0 posit16_t softposit_mulAddP16( uint_fast16_t uiA, uint_fast16_t uiB, uint_fast16_t uiC, uint_fast16_t op ){ union ui16_p16 uZ; uint_fast16_t regZ, fracA, fracZ, regime, tmp; bool signA, signB, signC, signZ, regSA, regSB, regSC, regSZ, bitNPlusOne=0, bitsMore=0, rcarry; int_fast8_t expA, expC, expZ; int_fast16_t kA=0, kC=0, kZ=0, shiftRight; uint_fast32_t frac32C=0, frac32Z=0; //NaR if ( uiA==0x8000 || uiB==0x8000 || uiC==0x8000 ){ uZ.ui = 0x8000; return uZ.p; } else if (uiA==0 || uiB==0){ if (op == softposit_mulAdd_subC) uZ.ui = -uiC; else uZ.ui = uiC; return uZ.p; } signA = signP16UI( uiA ); signB = signP16UI( uiB ); signC = signP16UI( uiC );//^ (op == softposit_mulAdd_subC); signZ = signA ^ signB;// ^ (op == softposit_mulAdd_subProd); if(signA) uiA = (-uiA & 0xFFFF); if(signB) uiB = (-uiB & 0xFFFF); if(signC) uiC = (-uiC & 0xFFFF); regSA = signregP16UI(uiA); regSB = signregP16UI(uiB); regSC = signregP16UI(uiC); tmp = (uiA<<2) & 0xFFFF; if (regSA){ while (tmp>>15){ kA++; tmp= (tmp<<1) & 0xFFFF; } } else{ kA=-1; while (!(tmp>>15)){ kA--; tmp= (tmp<<1) & 0xFFFF; } tmp&=0x7FFF; } expA = tmp>>14; fracA = (0x8000 | (tmp<<1)); //use first bit here for hidden bit to get more bits tmp = (uiB<<2) & 0xFFFF; if (regSB){ while (tmp>>15){ kA++; tmp= (tmp<<1) & 0xFFFF; } } else{ kA--; while (!(tmp>>15)){ kA--; tmp= (tmp<<1) & 0xFFFF; } tmp&=0x7FFF; } expA += tmp>>14; frac32Z = (uint_fast32_t) fracA * (0x8000 | (tmp <<1)); // first bit hidden bit if (expA>1){ kA++; expA ^=0x2; } rcarry = frac32Z>>31;//1st bit of frac32Z if (rcarry){ if (expA) kA ++; expA^=1; frac32Z>>=1; } //Add if (uiC!=0){ tmp = (uiC<<2) & 0xFFFF; if (regSC){ while (tmp>>15){ kC++; tmp= (tmp<<1) & 0xFFFF; } } else{ kC=-1; while (!(tmp>>15)){ kC--; tmp= (tmp<<1) & 0xFFFF; } tmp&=0x7FFF; } expC = tmp>>14; frac32C = (0x4000 | tmp) << 16; shiftRight = ((kA-kC)<<1) + (expA-expC); //actually this is the scale if (shiftRight<0){ // |uiC| > |Prod Z| if (shiftRight<=-31){ bitsMore = 1; frac32Z = 0; } else if (((frac32Z<<(32+shiftRight))&0xFFFFFFFF)!=0) bitsMore = 1; if (signZ==signC) frac32Z = frac32C + (frac32Z>>-shiftRight); else {//different signs frac32Z = frac32C - (frac32Z>>-shiftRight) ; signZ=signC; if (bitsMore) frac32Z-=1; } kZ = kC; expZ = expC; } else if (shiftRight>0){// |uiC| < |Prod| //if (frac32C&((1<=31){ bitsMore = 1; frac32C = 0; } else if (((frac32C<<(32-shiftRight))&0xFFFFFFFF)!=0) bitsMore = 1; if (signZ==signC) frac32Z = frac32Z + (frac32C>>shiftRight); else{ frac32Z = frac32Z - (frac32C>>shiftRight); if (bitsMore) frac32Z-=1; } kZ = kA; expZ = expA; } else{ if(frac32C==frac32Z && signZ!=signC ){ //check if same number uZ.ui = 0; return uZ.p; } else{ if (signZ==signC) frac32Z += frac32C; else{ if (frac32Z>1)&0x7FFFFFFF; } else { //for subtract cases if (frac32Z!=0){ while((frac32Z>>29)==0){ kZ--; frac32Z<<=2; } } bool ecarry = (0x40000000 & frac32Z)>>30; if(!ecarry){ if (expZ==0) kZ--; expZ^=1; frac32Z<<=1; } } } else{ kZ = kA; expZ=expA; } if(kZ<0){ regZ = (-kZ & 0xFFFF); regSZ = 0; regime = 0x4000>>regZ; } else{ regZ = kZ+1; regSZ=1; regime = 0x7FFF - (0x7FFF>>regZ); } if(regZ>14){ //max or min pos. exp and frac does not matter. (regSZ) ? (uZ.ui= 0x7FFF): (uZ.ui=0x1); } else{ //remove hidden bits frac32Z &= 0x3FFFFFFF; fracZ = frac32Z >> (regZ + 17); if (regZ!=14) bitNPlusOne = (frac32Z>>regZ) & 0x10000; else if (frac32Z>0){ fracZ=0; bitsMore =1; } if (regZ==14 && expZ) bitNPlusOne = 1; uZ.ui = packToP16UI(regime, regZ, expZ, fracZ); if (bitNPlusOne){ if ( (frac32Z<<(16-regZ)) &0xFFFFFFFF ) bitsMore =1; uZ.ui += (uZ.ui&1) | bitsMore; } } if (signZ) uZ.ui = -uZ.ui & 0xFFFF; return uZ.p; } luametatex-2.10.08/source/libraries/softposit/source/s_mulAddP32.c000066400000000000000000000152261442250314700250320ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017, 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" posit32_t softposit_mulAddP32( uint_fast32_t uiA, uint_fast32_t uiB, uint_fast32_t uiC, uint_fast32_t op ){ union ui32_p32 uZ; uint_fast32_t regA, regZ, fracA, fracZ, regime, tmp; bool signA, signB, signC, signZ, regSA, regSB, regSC, regSZ, bitNPlusOne=0, bitsMore=0, rcarry; int_fast32_t expA, expC, expZ; int_fast16_t kA=0, kC=0, kZ=0, shiftRight; uint_fast64_t frac64C, frac64Z; //NaR if ( uiA==0x80000000 || uiB==0x80000000 || uiC==0x80000000 ){ uZ.ui = 0x80000000; return uZ.p; } else if (uiA==0 || uiB==0){ if (op == softposit_mulAdd_subC) uZ.ui = -uiC; else uZ.ui = uiC; return uZ.p; } signA = signP32UI( uiA ); signB = signP32UI( uiB ); signC = signP32UI( uiC );//^ (op == softposit_mulAdd_subC); signZ = signA ^ signB;// ^ (op == softposit_mulAdd_subProd); if(signA) uiA = (-uiA & 0xFFFFFFFF); if(signB) uiB = (-uiB & 0xFFFFFFFF); if(signC) uiC = (-uiC & 0xFFFFFFFF); regSA = signregP32UI(uiA); regSB = signregP32UI(uiB); regSC = signregP32UI(uiC); tmp = (uiA<<2)&0xFFFFFFFF; if (regSA){ while (tmp>>31){ kA++; tmp= (tmp<<1) & 0xFFFFFFFF; } } else{ kA=-1; while (!(tmp>>31)){ kA--; tmp= (tmp<<1) & 0xFFFFFFFF; } tmp&=0x7FFFFFFF; } expA = tmp>>29; //to get 2 bits fracA = ((tmp<<2) | 0x80000000) & 0xFFFFFFFF; tmp = (uiB<<2)&0xFFFFFFFF; if (regSB){ while (tmp>>31){ kA++; tmp= (tmp<<1) & 0xFFFFFFFF; } } else{ kA--; while (!(tmp>>31)){ kA--; tmp= (tmp<<1) & 0xFFFFFFFF; } tmp&=0x7FFFFFFF; } expA += tmp>>29; frac64Z = (uint_fast64_t) fracA * (((tmp<<2) | 0x80000000) & 0xFFFFFFFF); if (expA>3){ kA++; expA&=0x3; // -=4 } rcarry = frac64Z>>63;//1st bit of frac64Z if (rcarry){ expA++; if (expA>3){ kA ++; expA&=0x3; } frac64Z>>=1; } if (uiC!=0){ tmp = (uiC<<2)&0xFFFFFFFF; if (regSC){ while (tmp>>31){ kC++; tmp= (tmp<<1) & 0xFFFFFFFF; } } else{ kC=-1; while (!(tmp>>31)){ kC--; tmp= (tmp<<1) & 0xFFFFFFFF; } tmp&=0x7FFFFFFF; } expC = tmp>>29; //to get 2 bits frac64C = (((tmp<<1) | 0x40000000ULL) & 0x7FFFFFFFULL)<<32; shiftRight = ((kA-kC)<<2) + (expA-expC); if (shiftRight<0){ // |uiC| > |Prod| if (shiftRight<=-63){ bitsMore = 1; frac64Z = 0; //set bitsMore to one? } else if ((frac64Z<<(64+shiftRight))!=0) bitsMore = 1; if (signZ==signC) frac64Z = frac64C + (frac64Z>>-shiftRight); else {//different signs frac64Z = frac64C - (frac64Z>>-shiftRight) ; signZ=signC; if (bitsMore) frac64Z-=1; } kZ = kC; expZ = expC; } else if (shiftRight>0){// |uiC| < |Prod| //if (frac32C&((1<=63) { bitsMore = 1; frac64C = 0; } else if ((frac64C<<(64-shiftRight))!=0) bitsMore = 1; if (signZ==signC) frac64Z = frac64Z + (frac64C>>shiftRight); else{ frac64Z = frac64Z - (frac64C>>shiftRight); if (bitsMore) frac64Z-=1; } kZ = kA; expZ = expA; } else{ if(frac64C==frac64Z && signZ!=signC ){ //check if same number uZ.ui = 0; return uZ.p; } else{ if (signZ==signC) frac64Z += frac64C; else{ if (frac64Z>63; //first left bit if(rcarry){ expZ++; if (expZ>3){ kZ++; expZ&=0x3; } frac64Z=(frac64Z>>1)&0x7FFFFFFFFFFFFFFF; } else { //for subtract cases if (frac64Z!=0){ while((frac64Z>>59)==0){ kZ--; frac64Z<<=4; } while((frac64Z>>62)==0){ expZ--; frac64Z<<=1; if (expZ<0){ kZ--; expZ=3; } } } } } else{ kZ = kA; expZ=expA; } if(kZ<0){ regZ = -kZ; regSZ = 0; regime = 0x40000000>>regZ; } else{ regZ = kZ+1; regSZ=1; regime = 0x7FFFFFFF - (0x7FFFFFFF>>regZ); } if(regZ>30){ //max or min pos. exp and frac does not matter. (regSZ) ? (uZ.ui= 0x7FFFFFFF): (uZ.ui=0x1); } else{ if (regZ<=28){ //remove hidden bits frac64Z &= 0x3FFFFFFFFFFFFFFF; fracZ = frac64Z >> (regZ + 34);//frac32Z>>16; bitNPlusOne |= (0x200000000 & (frac64Z >>regZ ) ) ; expZ <<= (28-regZ); } else { if (regZ==30){ bitNPlusOne = expZ&0x2; bitsMore = (expZ&0x1); expZ = 0; } else if (regZ==29){ bitNPlusOne = expZ&0x1; expZ>>=1; } if (fracZ>0){ fracZ=0; bitsMore =1; } } uZ.ui = packToP32UI(regime, expZ, fracZ); if (bitNPlusOne){ if ( (frac64Z<<(32-regZ)) &0xFFFFFFFFFFFFFFFF ) bitsMore =1; uZ.ui += (uZ.ui&1) | bitsMore; } } if (signZ) uZ.ui = -uZ.ui & 0xFFFFFFFFFFFFFFFF; return uZ.p; } luametatex-2.10.08/source/libraries/softposit/source/s_mulAddP8.c000066400000000000000000000127301442250314700247520ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017, 2018 A*STAR. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" //softposit_mulAdd_subC => (uiA*uiB)-uiC //softposit_mulAdd_subProd => uiC - (uiA*uiB) //Default is always op==0 posit8_t softposit_mulAddP8( uint_fast8_t uiA, uint_fast8_t uiB, uint_fast8_t uiC, uint_fast8_t op ){ union ui8_p8 uZ; uint_fast8_t regZ, fracA, fracZ, regime, tmp; bool signA, signB, signC, signZ, regSA, regSB, regSC, regSZ, bitNPlusOne=0, bitsMore=0, rcarry; int_fast8_t kA=0, kC=0, kZ=0, shiftRight; uint_fast16_t frac16C, frac16Z; //NaR if ( uiA==0x80 || uiB==0x80 || uiC==0x80 ){ uZ.ui = 0x80; return uZ.p; } else if (uiA==0 || uiB==0){ if (op == softposit_mulAdd_subC) uZ.ui = -uiC; else uZ.ui = uiC; return uZ.p; } signA = signP8UI( uiA ); signB = signP8UI( uiB ); signC = signP8UI( uiC );//^ (op == softposit_mulAdd_subC); signZ = signA ^ signB;// ^ (op == softposit_mulAdd_subProd); if(signA) uiA = (-uiA & 0xFF); if(signB) uiB = (-uiB & 0xFF); if(signC) uiC = (-uiC & 0xFF); regSA = signregP8UI(uiA); regSB = signregP8UI(uiB); regSC = signregP8UI(uiC); tmp = (uiA<<2) & 0xFF; if (regSA){ while (tmp>>7){ kA++; tmp= (tmp<<1) & 0xFF; } } else{ kA=-1; while (!(tmp>>7)){ kA--; tmp= (tmp<<1) & 0xFF; } tmp&=0x7F; } fracA = (0x80 | tmp); //use first bit here for hidden bit to get more bits tmp = (uiB<<2) & 0xFF; if (regSB){ while (tmp>>7){ kA++; tmp= (tmp<<1) & 0xFF; } } else{ kA--; while (!(tmp>>7)){ kA--; tmp= (tmp<<1) & 0xFF; } tmp&=0x7F; } frac16Z = (uint_fast16_t) fracA * (0x80 | tmp); rcarry = frac16Z>>15;//1st bit of frac16Z if (rcarry){ kA++; frac16Z>>=1; } if (uiC!=0){ tmp = (uiC<<2) & 0xFF; if (regSC){ while (tmp>>7){ kC++; tmp= (tmp<<1) & 0xFF; } } else{ kC=-1; while (!(tmp>>7)){ kC--; tmp= (tmp<<1) & 0xFF; } tmp&=0x7F; } frac16C = (0x80 | tmp) <<7 ; shiftRight = (kA-kC); if (shiftRight<0){ // |uiC| > |Prod| if (shiftRight<=-15) { bitsMore = 1; frac16Z=0; } else if (((frac16Z<<(16+shiftRight))&0xFFFF)!=0) bitsMore = 1; if (signZ==signC) frac16Z = frac16C + (frac16Z>>-shiftRight); else {//different signs frac16Z = frac16C - (frac16Z>>-shiftRight) ; signZ=signC; if (bitsMore) frac16Z-=1; } kZ = kC; } else if (shiftRight>0){// |uiC| < |Prod| if(shiftRight>=15){ bitsMore = 1; frac16C = 0; } else if (((frac16C<<(16-shiftRight))&0xFFFF)!=0) bitsMore = 1; if (signZ==signC) frac16Z += (frac16C>>shiftRight); else{ frac16Z -= (frac16C>>shiftRight); if (bitsMore) frac16Z-=1; } kZ = kA; } else{ if(frac16C==frac16Z && signZ!=signC ){ //check if same number uZ.ui = 0; return uZ.p; } else{ if (signZ==signC) frac16Z += frac16C; else{ if (frac16Z>1)&0x7FFF; } else { //for subtract cases if (frac16Z!=0){ while((frac16Z>>14)==0){ kZ--; frac16Z<<=1; } } } } else{ kZ = kA; } if(kZ<0){ regZ = (-kZ & 0xFF); regSZ = 0; regime = 0x40>>regZ; } else{ regZ = kZ+1; regSZ=1; regime = 0x7F - (0x7F>>regZ); } if(regZ>6){ //max or min pos. exp and frac does not matter. (regSZ) ? (uZ.ui= 0x7F): (uZ.ui=0x1); } else{ //remove hidden bits frac16Z &= 0x3FFF; fracZ = (frac16Z >> regZ) >> 8; bitNPlusOne = ((frac16Z>>regZ) & 0x80); uZ.ui = packToP8UI(regime, fracZ); if (bitNPlusOne){ if ( (frac16Z<<(9-regZ)) &0xFFFF ) bitsMore =1; uZ.ui += (uZ.ui&1) | bitsMore; } } if (signZ) uZ.ui = -uZ.ui & 0xFF; return uZ.p; } luametatex-2.10.08/source/libraries/softposit/source/s_mulAddPX1.c000066400000000000000000000206211442250314700250710ustar00rootroot00000000000000 /*============================================================================ This C source file is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). Copyright 2017, 2018 A*STAR. All rights reserved. This C source file was based on SoftFloat IEEE Floating-Point Arithmetic Package, Release 3d, by John R. Hauser. Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =============================================================================*/ #include "platform.h" #include "internals.h" //a*b+c posit_1_t softposit_mulAddPX1( uint_fast32_t uiA, uint_fast32_t uiB, uint_fast32_t uiC, uint_fast32_t op, int x ){ union ui32_pX1 uZ; int regZ; uint_fast32_t fracA, fracZ, regime, tmp; bool signA, signB, signC, signZ, regSA, regSB, regSC, regSZ, bitNPlusOne=0, bitsMore=0, rcarry; int_fast32_t expA, expC, expZ; int_fast16_t kA=0, kC=0, kZ=0, shiftRight; uint_fast64_t frac64C, frac64Z; if (x<2 || x>32){ uZ.ui = 0x80000000; return uZ.p; } //NaR if ( uiA==0x80000000 || uiB==0x80000000 || uiC==0x80000000 ){ uZ.ui = 0x80000000; return uZ.p; } else if (uiA==0 || uiB==0){ if (op == softposit_mulAdd_subC) uZ.ui = -uiC; else uZ.ui = uiC; return uZ.p; } signA = signP32UI( uiA ); signB = signP32UI( uiB ); signC = signP32UI( uiC );//^ (op == softposit_mulAdd_subC); signZ = signA ^ signB;// ^ (op == softposit_mulAdd_subProd); if(signA) uiA = (-uiA & 0xFFFFFFFF); if(signB) uiB = (-uiB & 0xFFFFFFFF); if(signC) uiC = (-uiC & 0xFFFFFFFF); regSA = signregP32UI(uiA); regSB = signregP32UI(uiB); regSC = signregP32UI(uiC); if (x==2){ uZ.ui = (regSA®SB) ? (0x40000000) : (0x0); if (signZ){// i.e. negative prod if (signC){ uZ.ui |= uiC; uZ.ui = -uZ.ui & 0xFFFFFFFF; } else{//prod is negative if (uiC==uZ.ui) uZ.ui = 0; else uZ.ui =(uZ.ui>0)?( 0xC0000000):(0x40000000); } } else{ //prod : same sign signZ=0 if (signC){ if (uiC==uZ.ui) uZ.ui = 0; else uZ.ui = (uZ.ui>0) ? (0x40000000) : (0xC0000000); } else{//C is positive uZ.ui |= uiC; } } return uZ.p; } else{ tmp = (uiA<<2)&0xFFFFFFFF; if (regSA){ while (tmp>>31){ kA++; tmp= (tmp<<1) & 0xFFFFFFFF; } } else{ kA=-1; while (!(tmp>>31)){ kA--; tmp= (tmp<<1) & 0xFFFFFFFF; } tmp&=0x7FFFFFFF; } expA = tmp>>30; //to get 2 bits fracA = ((tmp<<1) | 0x80000000) & 0xFFFFFFFF; tmp = (uiB<<2)&0xFFFFFFFF; if (regSB){ while (tmp>>31){ kA++; tmp= (tmp<<1) & 0xFFFFFFFF; } } else{ kA--; while (!(tmp>>31)){ kA--; tmp= (tmp<<1) & 0xFFFFFFFF; } tmp&=0x7FFFFFFF; } expA += tmp>>30; frac64Z = (uint_fast64_t) fracA * (((tmp<<1) | 0x80000000) & 0xFFFFFFFF); if (expA>1){ kA++; expA^=0x2; } rcarry = frac64Z>>63;//1st bit of frac64Z if (rcarry){ if (expA) kA ++; expA^=1; frac64Z>>=1; } if (uiC!=0){ tmp = (uiC<<2)&0xFFFFFFFF; if (regSC){ while (tmp>>31){ kC++; tmp= (tmp<<1) & 0xFFFFFFFF; } } else{ kC=-1; while (!(tmp>>31)){ kC--; tmp= (tmp<<1) & 0xFFFFFFFF; } tmp&=0x7FFFFFFF; } //printBinary(&expC, 32); expC = tmp>>30; //to get 1 bits frac64C = ((tmp | 0x40000000ULL) & 0x7FFFFFFFULL)<<32; shiftRight = ((kA-kC)<<1) + (expA-expC); //printf("shiftRight: %d kA: %d kC: %d\n", shiftRight, kA, kC); //printBinary(&frac64Z, 64); //printBinary(&frac64C, 64); if (shiftRight<0){ // |uiC| > |Prod| if (shiftRight<=-63){ bitsMore = 1; frac64Z = 0; //set bitsMore to one? } else if ((frac64Z<<(64+shiftRight))!=0) bitsMore = 1; //printf("bitsMore: %d\n", bitsMore); if (signZ==signC) frac64Z = frac64C + (frac64Z>>-shiftRight); else {//different signs frac64Z = frac64C - (frac64Z>>-shiftRight) ; signZ=signC; if (bitsMore) frac64Z-=1; } kZ = kC; expZ = expC; //printf("kZ: %d expZ: %d\n", kZ, expZ); //printBinary(&frac64Z, 64); } else if (shiftRight>0){// |uiC| < |Prod| //if (frac32C&((1<=63) { bitsMore = 1; frac64C = 0; } else if ((frac64C<<(64-shiftRight))!=0) bitsMore = 1; if (signZ==signC) frac64Z = frac64Z + (frac64C>>shiftRight); else{ frac64Z = frac64Z - (frac64C>>shiftRight); if (bitsMore) frac64Z-=1; } kZ = kA; expZ = expA; } else{ if(frac64C==frac64Z && signZ!=signC ){ //check if same number uZ.ui = 0; return uZ.p; } else{ if (signZ==signC) frac64Z += frac64C; else{ if (frac64Z>63; //first left bit if(rcarry){ if (expZ) kZ ++; expZ^=1; if (frac64Z&0x1) bitsMore = 1; frac64Z=(frac64Z>>1)&0x7FFFFFFFFFFFFFFF; } else { //for subtract cases if (frac64Z!=0){ while((frac64Z>>61)==0){ kZ--; frac64Z<<=2; } } bool ecarry = (0x4000000000000000 & frac64Z)>>62; if(!ecarry){ if (expZ==0) kZ--; expZ^=1; frac64Z<<=1; } //printf("kZ: %d expZ: %d\n", kZ, expZ); //printf("frac64Z:\n"); //printBinary(&frac64Z,64); } } else{ kZ = kA; expZ=expA; } if(kZ<0){ regZ = -kZ; regSZ = 0; regime = 0x40000000>>regZ; } else{ regZ = kZ+1; regSZ=1; regime = 0x7FFFFFFF - (0x7FFFFFFF>>regZ); } //printf("regZ: %d regSZ: %d kZ: %d expZ: %d\n", regZ, regSZ, kZ, expZ); //printBinary(&frac64Z,64); if(regZ>(x-2)){ //max or min pos. exp and frac does not matter. uZ.ui=(regSZ) ? (0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)) ): (0x1 << (32-x)); } else{ if (regZ