pax_global_header00006660000000000000000000000064135704343400014515gustar00rootroot0000000000000052 comment=571ab2d0fffc19feafedb65fca0e42cf70463b86 py-lz4framed-0.14.0/000077500000000000000000000000001357043434000141155ustar00rootroot00000000000000py-lz4framed-0.14.0/.gitignore000066400000000000000000000001261357043434000161040ustar00rootroot00000000000000*.py[cod] *.so build/ dist/ __pycache__/ MANIFEST *.egg .eggs/ py_lz4framed.egg-info/ py-lz4framed-0.14.0/CHANGELOG000066400000000000000000000021731357043434000153320ustar00rootroot000000000000000.14.0 - Updated lz4 to v1.9.2 (includes fix for CVE-2019-17543) 0.13.0 - Updated lz4 to v1.8.3 (data corruption fix for v1.8.2) 0.12.0 - Updated lz4 to v1.8.2 - Confirmed compatibility with Python 3.7 0.11.0 - Updated lz4 to v1.8.1 (faster/stronger ultra modes, levels 10+) - Minor documentation clarification about compression levels. 0.10.0 - Updated lz4 to v1.8.0 - Support for fast acceleration via negative compression levels - Support for block checksums 0.9.7 - Allow for any bytes-like objects as input (e.g. memoryview) 0.9.6 - Windows build compatibility 0.9.5 - Potential reference leak fix (on LZ4FError being raised) 0.9.4 - Updated lz4 to v1.7.5 (Note: Available compression levels have changed) - Use lz4 constants for available compression levels - Use LZ4_VERSION_STRING to propagate underlying lz4 version - Update tests to reflect new compression levels 0.9.3 - Allow for lack of stdin/stdout/stdout buffer access (#3, https://github.com/windreamer) 0.9.2 - Fix index in argv (Jan Špaček) - License information update 0.9.1 - Fixup Python 2/3 compat layer for nose (Mathias Laurin) 0.9.0 - Initial public release py-lz4framed-0.14.0/CONTRIBUTING.md000066400000000000000000000013341357043434000163470ustar00rootroot00000000000000##How to Contribute We welcome contributions from the wider community. Thanks in advance for your help. We have a few guidelines ####Guidelines - Use the branch `dev-contrib` to make your change. - Add test(s) to the unit tests, if applicable. - Before you check a change in, make sure it passes all the static tests (pylint and flake8) and the unit tests in both Python 2 and 3. - We reserve the right to alter your code before integrating your change. - Changes will integrated into a release on a schedule of our discretion, at which point pip release will be updated to include. - Your contribution will be mentioned in CHANGELOG, unless you specify otherwise - Iotic Labs has copyright over contributions (under Apache v2) py-lz4framed-0.14.0/LICENSE000066400000000000000000000261441357043434000151310ustar00rootroot00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright 2016 Iotic Labs Ltd Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at https://github.com/Iotic-Labs/py-lz4framed/blob/master/LICENSE Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. py-lz4framed-0.14.0/MANIFEST.in000066400000000000000000000001271357043434000156530ustar00rootroot00000000000000include README.md CHANGELOG test.py global-include NOTICE LICENSE NEWS include lz4/*.h py-lz4framed-0.14.0/NOTICE000066400000000000000000000003101357043434000150130ustar00rootroot00000000000000py-lz4framed Copyright (c) 2016 Iotic Labs Ltd LZ4 Library Copyright (c) 2011-2016 Yann Collet six.py Copyright (c) 2010-2015 Benjamin Peterson ez_setup.py Copyright (c) Python Packaging Authority py-lz4framed-0.14.0/README.md000066400000000000000000000053001357043434000153720ustar00rootroot00000000000000# Overview This is an [LZ4](http://lz4.org)-frame compression library for Python v3.2+ (and 2.7+), bound to Yann Collet's [LZ4 C implementation](https://github.com/lz4/lz4). # Installing / packaging ```shell # To get from PyPI pip3 install py-lz4framed # To only build extension modules inline (e.g. in repository) python3 setup.py build_ext -i # To build & install globally python3 setup.py install ``` **Notes** - The above as well as all other python3-using commands should also run with v2.7+ - This module is also available via [Anaconda (conda-forge)](https://anaconda.org/conda-forge/py-lz4framed) (with binaries for Linux, OSX and Windows) - PyPI releases are signed with the [Iotic Labs Software release signing key](https://developer.iotic-labs.com/iotic-labs.com.asc) # Usage Single-function operation: ```python import lz4framed compressed = lz4framed.compress(b'binary data') uncompressed = lz4framed.decompress(compressed) ``` To iteratively compress (to a file or e.g. BytesIO instance): ```python with open('myFile', 'wb') as f: # Context automatically finalises frame on completion, unless an exception occurs with Compressor(f) as c: try: while (...): c.update(moreData) except Lz4FramedNoDataError: pass ``` To decompress from a file-like object: ```python with open('myFile', 'rb') as f: try: for chunk in Decompressor(f): decoded.append(chunk) except Lz4FramedNoDataError: # Compress frame data incomplete - error case ... ``` See also [lz4framed/\_\_main\_\_.py](lz4framed/__main__.py) for example usage. # Documentation ```python import lz4framed print(lz4framed.__version__, lz4framed.LZ4_VERSION, lz4framed.LZ4F_VERSION) help(lz4framed) ``` # Command-line utility ```shell python3 -mlz4framed USAGE: lz4framed (compress|decompress) (INFILE|-) [OUTFILE] (De)compresses an lz4 frame. Input is read from INFILE unless set to '-', in which case stdin is used. If OUTFILE is not specified, output goes to stdout. ``` # Tests ## Static This library has been checked using [flake8](https://pypi.python.org/pypi/flake8) and [pylint](http://www.pylint.org), using a modified configuration - see _pylint.rc_ and _flake8.cfg_. ## Unit ```shell python3 -m unittest discover -v . ``` # Why? The only existing lz4-frame interoperable implementation I was aware of at the time of writing ([lz4tools](https://github.com/darkdragn/lz4tools)) had the following limitations: - Incomplete implementation in terms of e.g. reference & memory leaks on failure - Lack of unit tests - Not thread safe - Does not release GIL during low level (de)compression operations - Did not address the requirements for an external project py-lz4framed-0.14.0/ez_setup.py000066400000000000000000000274331357043434000163360ustar00rootroot00000000000000#!/usr/bin/env python """ Setuptools bootstrapping installer. Run this script to install or upgrade setuptools. """ import os import shutil import sys import tempfile import zipfile import optparse import subprocess import platform import textwrap import contextlib import json import codecs from distutils import log try: from urllib.request import urlopen except ImportError: from urllib2 import urlopen try: from site import USER_SITE except ImportError: USER_SITE = None LATEST = object() DEFAULT_VERSION = LATEST DEFAULT_URL = "https://pypi.python.org/packages/source/s/setuptools/" DEFAULT_SAVE_DIR = os.curdir def _python_cmd(*args): """ Execute a command. Return True if the command succeeded. """ args = (sys.executable,) + args return subprocess.call(args) == 0 def _install(archive_filename, install_args=()): """Install Setuptools.""" with archive_context(archive_filename): # installing log.warn('Installing Setuptools') if not _python_cmd('setup.py', 'install', *install_args): log.warn('Something went wrong during the installation.') log.warn('See the error message above.') # exitcode will be 2 return 2 def _build_egg(egg, archive_filename, to_dir): """Build Setuptools egg.""" with archive_context(archive_filename): # building an egg log.warn('Building a Setuptools egg in %s', to_dir) _python_cmd('setup.py', '-q', 'bdist_egg', '--dist-dir', to_dir) # returning the result log.warn(egg) if not os.path.exists(egg): raise IOError('Could not build the egg.') class ContextualZipFile(zipfile.ZipFile): """Supplement ZipFile class to support context manager for Python 2.6.""" def __enter__(self): return self def __exit__(self, type, value, traceback): self.close() def __new__(cls, *args, **kwargs): """Construct a ZipFile or ContextualZipFile as appropriate.""" if hasattr(zipfile.ZipFile, '__exit__'): return zipfile.ZipFile(*args, **kwargs) return super(ContextualZipFile, cls).__new__(cls) @contextlib.contextmanager def archive_context(filename): """ Unzip filename to a temporary directory, set to the cwd. The unzipped target is cleaned up after. """ tmpdir = tempfile.mkdtemp() log.warn('Extracting in %s', tmpdir) old_wd = os.getcwd() try: os.chdir(tmpdir) with ContextualZipFile(filename) as archive: archive.extractall() # going in the directory subdir = os.path.join(tmpdir, os.listdir(tmpdir)[0]) os.chdir(subdir) log.warn('Now working in %s', subdir) yield finally: os.chdir(old_wd) shutil.rmtree(tmpdir) def _do_download(version, download_base, to_dir, download_delay): """Download Setuptools.""" egg = os.path.join(to_dir, 'setuptools-%s-py%d.%d.egg' % (version, sys.version_info[0], sys.version_info[1])) if not os.path.exists(egg): archive = download_setuptools(version, download_base, to_dir, download_delay) _build_egg(egg, archive, to_dir) sys.path.insert(0, egg) # Remove previously-imported pkg_resources if present (see # https://bitbucket.org/pypa/setuptools/pull-request/7/ for details). if 'pkg_resources' in sys.modules: _unload_pkg_resources() import setuptools setuptools.bootstrap_install_from = egg def use_setuptools( version=DEFAULT_VERSION, download_base=DEFAULT_URL, to_dir=DEFAULT_SAVE_DIR, download_delay=15): """ Ensure that a setuptools version is installed. Return None. Raise SystemExit if the requested version or later cannot be installed. """ version = _resolve_version(version) to_dir = os.path.abspath(to_dir) # prior to importing, capture the module state for # representative modules. rep_modules = 'pkg_resources', 'setuptools' imported = set(sys.modules).intersection(rep_modules) try: import pkg_resources pkg_resources.require("setuptools>=" + version) # a suitable version is already installed return except ImportError: # pkg_resources not available; setuptools is not installed; download pass except pkg_resources.DistributionNotFound: # no version of setuptools was found; allow download pass except pkg_resources.VersionConflict as VC_err: if imported: _conflict_bail(VC_err, version) # otherwise, unload pkg_resources to allow the downloaded version to # take precedence. del pkg_resources _unload_pkg_resources() return _do_download(version, download_base, to_dir, download_delay) def _conflict_bail(VC_err, version): """ Setuptools was imported prior to invocation, so it is unsafe to unload it. Bail out. """ conflict_tmpl = textwrap.dedent(""" The required version of setuptools (>={version}) is not available, and can't be installed while this script is running. Please install a more recent version first, using 'easy_install -U setuptools'. (Currently using {VC_err.args[0]!r}) """) msg = conflict_tmpl.format(**locals()) sys.stderr.write(msg) sys.exit(2) def _unload_pkg_resources(): del_modules = [ name for name in sys.modules if name.startswith('pkg_resources') ] for mod_name in del_modules: del sys.modules[mod_name] def _clean_check(cmd, target): """ Run the command to download target. If the command fails, clean up before re-raising the error. """ try: subprocess.check_call(cmd) except subprocess.CalledProcessError: if os.access(target, os.F_OK): os.unlink(target) raise def download_file_powershell(url, target): """ Download the file at url to target using Powershell. Powershell will validate trust. Raise an exception if the command cannot complete. """ target = os.path.abspath(target) ps_cmd = ( "[System.Net.WebRequest]::DefaultWebProxy.Credentials = " "[System.Net.CredentialCache]::DefaultCredentials; " '(new-object System.Net.WebClient).DownloadFile("%(url)s", "%(target)s")' % locals() ) cmd = [ 'powershell', '-Command', ps_cmd, ] _clean_check(cmd, target) def has_powershell(): """Determine if Powershell is available.""" if platform.system() != 'Windows': return False cmd = ['powershell', '-Command', 'echo test'] with open(os.path.devnull, 'wb') as devnull: try: subprocess.check_call(cmd, stdout=devnull, stderr=devnull) except Exception: return False return True download_file_powershell.viable = has_powershell def download_file_curl(url, target): cmd = ['curl', url, '--silent', '--output', target] _clean_check(cmd, target) def has_curl(): cmd = ['curl', '--version'] with open(os.path.devnull, 'wb') as devnull: try: subprocess.check_call(cmd, stdout=devnull, stderr=devnull) except Exception: return False return True download_file_curl.viable = has_curl def download_file_wget(url, target): cmd = ['wget', url, '--quiet', '--output-document', target] _clean_check(cmd, target) def has_wget(): cmd = ['wget', '--version'] with open(os.path.devnull, 'wb') as devnull: try: subprocess.check_call(cmd, stdout=devnull, stderr=devnull) except Exception: return False return True download_file_wget.viable = has_wget def download_file_insecure(url, target): """Use Python to download the file, without connection authentication.""" src = urlopen(url) try: # Read all the data in one block. data = src.read() finally: src.close() # Write all the data in one block to avoid creating a partial file. with open(target, "wb") as dst: dst.write(data) download_file_insecure.viable = lambda: True def get_best_downloader(): downloaders = ( download_file_powershell, download_file_curl, download_file_wget, download_file_insecure, ) viable_downloaders = (dl for dl in downloaders if dl.viable()) return next(viable_downloaders, None) def download_setuptools( version=DEFAULT_VERSION, download_base=DEFAULT_URL, to_dir=DEFAULT_SAVE_DIR, delay=15, downloader_factory=get_best_downloader): """ Download setuptools from a specified location and return its filename. `version` should be a valid setuptools version number that is available as an sdist for download under the `download_base` URL (which should end with a '/'). `to_dir` is the directory where the egg will be downloaded. `delay` is the number of seconds to pause before an actual download attempt. ``downloader_factory`` should be a function taking no arguments and returning a function for downloading a URL to a target. """ version = _resolve_version(version) # making sure we use the absolute path to_dir = os.path.abspath(to_dir) zip_name = "setuptools-%s.zip" % version url = download_base + zip_name saveto = os.path.join(to_dir, zip_name) if not os.path.exists(saveto): # Avoid repeated downloads log.warn("Downloading %s", url) downloader = downloader_factory() downloader(url, saveto) return os.path.realpath(saveto) def _resolve_version(version): """ Resolve LATEST version """ if version is not LATEST: return version resp = urlopen('https://pypi.python.org/pypi/setuptools/json') with contextlib.closing(resp): try: charset = resp.info().get_content_charset() except Exception: # Python 2 compat; assume UTF-8 charset = 'UTF-8' reader = codecs.getreader(charset) doc = json.load(reader(resp)) return str(doc['info']['version']) def _build_install_args(options): """ Build the arguments to 'python setup.py install' on the setuptools package. Returns list of command line arguments. """ return ['--user'] if options.user_install else [] def _parse_args(): """Parse the command line for options.""" parser = optparse.OptionParser() parser.add_option( '--user', dest='user_install', action='store_true', default=False, help='install in user site package (requires Python 2.6 or later)') parser.add_option( '--download-base', dest='download_base', metavar="URL", default=DEFAULT_URL, help='alternative URL from where to download the setuptools package') parser.add_option( '--insecure', dest='downloader_factory', action='store_const', const=lambda: download_file_insecure, default=get_best_downloader, help='Use internal, non-validating downloader' ) parser.add_option( '--version', help="Specify which version to download", default=DEFAULT_VERSION, ) parser.add_option( '--to-dir', help="Directory to save (and re-use) package", default=DEFAULT_SAVE_DIR, ) options, args = parser.parse_args() # positional arguments are ignored return options def _download_args(options): """Return args for download_setuptools function from cmdline args.""" return dict( version=options.version, download_base=options.download_base, downloader_factory=options.downloader_factory, to_dir=options.to_dir, ) def main(): """Install or upgrade setuptools and EasyInstall.""" options = _parse_args() archive = download_setuptools(**_download_args(options)) return _install(archive, _build_install_args(options)) if __name__ == '__main__': sys.exit(main()) py-lz4framed-0.14.0/flake8.cfg000066400000000000000000000006701357043434000157530ustar00rootroot00000000000000[flake8] # comma-separated filename and glob patterns default: .svn,CVS,.bzr,.hg,.git,__pycache #exclude = # comma-separated filename and glob patterns default: *.py #filename = # select errors and warnings to enable which are off by default # select = # skip errors or warnings #ignore = # set maximum allowed line length default: 79 max-line-length = 120 # set the error format #format = # McCabe complexity threshold max-complexity = 10 py-lz4framed-0.14.0/lz4/000077500000000000000000000000001357043434000146265ustar00rootroot00000000000000py-lz4framed-0.14.0/lz4/LICENSE000066400000000000000000000010651357043434000156350ustar00rootroot00000000000000This repository uses 2 different licenses : - all files in the `lib` directory use a BSD 2-Clause license - all other files use a GPLv2 license, unless explicitly stated otherwise Relevant license is reminded at the top of each source file, and with presence of COPYING or LICENSE file in associated directories. This model is selected to emphasize that files in the `lib` directory are designed to be included into 3rd party applications, while all other files, in `programs`, `tests` or `examples`, receive more limited attention and support for such scenario. py-lz4framed-0.14.0/lz4/NEWS000066400000000000000000000326771357043434000153440ustar00rootroot00000000000000v1.9.1 fix : decompression functions were reading a few bytes beyond input size (introduced in v1.9.0, reported by @ppodolsky and @danlark1) api : fix : lz4frame initializers compatibility with c++, reported by @degski cli : added command --list, based on a patch by @gabrielstedman build: improved Windows build, by @JPeterMugaas build: AIX, by Norman Green v1.9.0 perf: large decompression speed improvement on x86/x64 (up to +20%) by @djwatson api : changed : _destSize() compression variants are promoted to stable API api : new : LZ4_initStream(HC), replacing LZ4_resetStream(HC) api : changed : LZ4_resetStream(HC) as recommended reset function, for better performance on small data cli : support custom block sizes, by @blezsan build: source code can be amalgamated, by Bing Xu build: added meson build, by @lzutao build: new build macros : LZ4_DISTANCE_MAX, LZ4_FAST_DEC_LOOP install: MidnightBSD, by @laffer1 install: msys2 on Windows 10, by @vtorri v1.8.3 perf: minor decompression speed improvement (~+2%) with gcc fix : corruption in v1.8.2 at level 9 for files > 64KB under rare conditions (#560) cli : new command --fast, by @jennifermliu cli : fixed elapsed time, and added cpu load indicator (on -vv) (#555) api : LZ4_decompress_safe_partial() now decodes exactly the nb of bytes requested (feature request #566) build : added Haiku target, by @fbrosson, and MidnightBSD, by @laffer1 doc : updated documentation regarding dictionary compression v1.8.2 perf: *much* faster dictionary compression on small files, by @felixhandte perf: improved decompression speed and binary size, by Alexey Tourbin (@svpv) perf: slightly faster HC compression and decompression speed perf: very small compression ratio improvement fix : compression compatible with low memory addresses (< 0xFFFF) fix : decompression segfault when provided with NULL input, by @terrelln cli : new command --favor-decSpeed cli : benchmark mode more accurate for small inputs fullbench : can bench _destSize() variants, by @felixhandte doc : clarified block format parsing restrictions, by Alexey Tourbin (@svpv) v1.8.1 perf : faster and stronger ultra modes (levels 10+) perf : slightly faster compression and decompression speed perf : fix bad degenerative case, reported by @c-morgenstern fix : decompression failed when using a combination of extDict + low memory address (#397), reported and fixed by Julian Scheid (@jscheid) cli : support for dictionary compression (`-D`), by Felix Handte @felixhandte cli : fix : `lz4 -d --rm` preserves timestamp (#441) cli : fix : do not modify /dev/null permission as root, by @aliceatlas api : `_destSize()` variant supported for all compression levels build : `make` and `make test` compatible with `-jX`, reported by @mwgamera build : can control LZ4LIB_VISIBILITY macro, by @mikir install: fix man page directory (#387), reported by Stuart Cardall (@itoffshore) v1.8.0 cli : fix : do not modify /dev/null permissions, reported by @Maokaman1 cli : added GNU separator -- specifying that all following arguments are files API : added LZ4_compress_HC_destSize(), by Oleg (@remittor) API : added LZ4F_resetDecompressionContext() API : lz4frame : negative compression levels trigger fast acceleration, request by Lawrence Chan API : lz4frame : can control block checksum and dictionary ID API : fix : expose obsolete decoding functions, reported by Chen Yufei API : experimental : lz4frame_static : new dictionary compression API build : fix : static lib installation, by Ido Rosen build : dragonFlyBSD, OpenBSD, NetBSD supported build : LZ4_MEMORY_USAGE can be modified at compile time, through external define doc : Updated LZ4 Frame format to v1.6.0, restoring Dictionary-ID field doc : lz4 api manual, by Przemyslaw Skibinski v1.7.5 lz4hc : new high compression mode : levels 10-12 compress more and slower, by Przemyslaw Skibinski lz4cat : fix : works with relative path (#284) and stdin (#285) (reported by @beiDei8z) cli : fix minor notification when using -r recursive mode API : lz4frame : LZ4F_frameBound(0) gives upper bound of *flush() and *End() operations (#290, #280) doc : markdown version of man page, by Takayuki Matsuoka (#279) build : Makefile : fix make -jX lib+exe concurrency (#277) build : cmake : improvements by Michał Górny (#296) v1.7.4.2 fix : Makefile : release build compatible with PIE and customized compilation directives provided through environment variables (#274, reported by Antoine Martin) v1.7.4 Improved : much better speed in -mx32 mode cli : fix : Large file support in 32-bits mode on Mac OS-X fix : compilation on gcc 4.4 (#272), reported by Antoine Martin v1.7.3 Changed : moved to versioning; package, cli and library have same version number Improved: Small decompression speed boost Improved: Small compression speed improvement on 64-bits systems Improved: Small compression ratio and speed improvement on small files Improved: Significant speed boost on ARMv6 and ARMv7 Fix : better ratio on 64-bits big-endian targets Improved cmake build script, by Evan Nemerson New liblz4-dll project, by Przemyslaw Skibinki Makefile: Generates object files (*.o) for faster (re)compilation on low power systems cli : new : --rm and --help commands cli : new : preserved file attributes, by Przemyslaw Skibinki cli : fix : crash on some invalid inputs cli : fix : -t correctly validates lz4-compressed files, by Nick Terrell cli : fix : detects and reports fread() errors, thanks to Hiroshi Fujishima report #243 cli : bench : new : -r recursive mode lz4cat : can cat multiple files in a single command line (#184) Added : doc/lz4_manual.html, by Przemyslaw Skibinski Added : dictionary compression and frame decompression examples, by Nick Terrell Added : Debianization, by Evgeniy Polyakov r131 New : Dos/DJGPP target, thanks to Louis Santillan (#114) Added : Example using lz4frame library, by Zbigniew Jędrzejewski-Szmek (#118) Changed: xxhash symbols are modified (namespace emulation) within liblz4 r130: Fixed : incompatibility sparse mode vs console, reported by Yongwoon Cho (#105) Fixed : LZ4IO exits too early when frame crc not present, reported by Yongwoon Cho (#106) Fixed : incompatibility sparse mode vs append mode, reported by Takayuki Matsuoka (#110) Performance fix : big compression speed boost for clang (+30%) New : cross-version test, by Takayuki Matsuoka r129: Added : LZ4_compress_fast(), LZ4_compress_fast_continue() Added : LZ4_compress_destSize() Changed: New lz4 and lz4hc compression API. Previous function prototypes still supported. Changed: Sparse file support enabled by default New : LZ4 CLI improved performance compressing/decompressing multiple files (#86, kind contribution from Kyle J. Harper & Takayuki Matsuoka) Fixed : GCC 4.9+ optimization bug - Reported by Markus Trippelsdorf, Greg Slazinski & Evan Nemerson Changed: Enums converted to LZ4F_ namespace convention - by Takayuki Matsuoka Added : AppVeyor CI environment, for Visual tests - Suggested by Takayuki Matsuoka Modified:Obsolete functions generate warnings - Suggested by Evan Nemerson, contributed by Takayuki Matsuoka Fixed : Bug #75 (unfinished stream), reported by Yongwoon Cho Updated: Documentation converted to MarkDown format r128: New : lz4cli sparse file support (Requested by Neil Wilson, and contributed by Takayuki Matsuoka) New : command -m, to compress multiple files in a single command (suggested by Kyle J. Harper) Fixed : Restored lz4hc compression ratio (slightly lower since r124) New : lz4 cli supports long commands (suggested by Takayuki Matsuoka) New : lz4frame & lz4cli frame content size support New : lz4frame supports skippable frames, as requested by Sergey Cherepanov Changed: Default "make install" directory is /usr/local, as notified by Ron Johnson New : lz4 cli supports "pass-through" mode, requested by Neil Wilson New : datagen can generate sparse files New : scan-build tests, thanks to kind help by Takayuki Matsuoka New : g++ compatibility tests New : arm cross-compilation test, thanks to kind help by Takayuki Matsuoka Fixed : Fuzzer + frametest compatibility with NetBSD (issue #48, reported by Thomas Klausner) Added : Visual project directory Updated: Man page & Specification r127: N/A : added a file on SVN r126: New : lz4frame API is now integrated into liblz4 Fixed : GCC 4.9 bug on highest performance settings, reported by Greg Slazinski Fixed : bug within LZ4 HC streaming mode, reported by James Boyle Fixed : older compiler don't like nameless unions, reported by Cheyi Lin Changed : lz4 is C90 compatible Changed : added -pedantic option, fixed a few mminor warnings r125: Changed : endian and alignment code Changed : directory structure : new "lib" directory Updated : lz4io, now uses lz4frame Improved: slightly improved decoding speed Fixed : LZ4_compress_limitedOutput(); Special thanks to Christopher Speller ! Fixed : some alignment warnings under clang Fixed : deprecated function LZ4_slideInputBufferHC() r124: New : LZ4 HC streaming mode Fixed : LZ4F_compressBound() using null preferencesPtr Updated : xxHash to r38 Updated library number, to 1.4.0 r123: Added : experimental lz4frame API, thanks to Takayuki Matsuoka and Christopher Jackson for testings Fix : s390x support, thanks to Nobuhiro Iwamatsu Fix : test mode (-t) no longer requires confirmation, thanks to Thary Nguyen r122: Fix : AIX & AIX64 support (SamG) Fix : mips 64-bits support (lew van) Added : Examples directory, using code examples from Takayuki Matsuoka Updated : Framing specification, to v1.4.1 Updated : xxHash, to r36 r121: Added : Makefile : install for kFreeBSD and Hurd (Nobuhiro Iwamatsu) Fix : Makefile : install for OS-X and BSD, thanks to Takayuki Matsuoka r120: Modified : Streaming API, using strong types Added : LZ4_versionNumber(), thanks to Takayuki Matsuoka Fix : OS-X : library install name, thanks to Clemens Lang Updated : Makefile : synchronize library version number with lz4.h, thanks to Takayuki Matsuoka Updated : Makefile : stricter compilation flags Added : pkg-config, thanks to Zbigniew Jędrzejewski-Szmek (issue 135) Makefile : lz4-test only test native binaries, as suggested by Michał Górny (issue 136) Updated : xxHash to r35 r119: Fix : Issue 134 : extended malicious address space overflow in 32-bits mode for some specific configurations r118: New : LZ4 Streaming API (Fast version), special thanks to Takayuki Matsuoka New : datagen : parametrable synthetic data generator for tests Improved : fuzzer, support more test cases, more parameters, ability to jump to specific test fix : support ppc64le platform (issue 131) fix : Issue 52 (malicious address space overflow in 32-bits mode when using large custom format) fix : Makefile : minor issue 130 : header files permissions r117: Added : man pages for lz4c and lz4cat Added : automated tests on Travis, thanks to Takayuki Matsuoka ! fix : block-dependency command line (issue 127) fix : lz4fullbench (issue 128) r116: hotfix (issue 124 & 125) r115: Added : lz4cat utility, installed on POSX systems (issue 118) OS-X compatible compilation of dynamic library (issue 115) r114: Makefile : library correctly compiled with -O3 switch (issue 114) Makefile : library compilation compatible with clang Makefile : library is versioned and linked (issue 119) lz4.h : no more static inline prototypes (issue 116) man : improved header/footer (issue 111) Makefile : Use system default $(CC) & $(MAKE) variables (issue 112) xxhash : updated to r34 r113: Large decompression speed improvement for GCC 32-bits. Thanks to Valery Croizier ! LZ4HC : Compression Level is now a programmable parameter (CLI from 4 to 9) Separated IO routines from command line (lz4io.c) Version number into lz4.h (suggested by Francesc Alted) r112: quickfix r111 : Makefile : added capability to install libraries Modified Directory tree, to better separate libraries from programs. r110 : lz4 & lz4hc : added capability to allocate state & stream state with custom allocator (issue 99) fuzzer & fullbench : updated to test new functions man : documented -l command (Legacy format, for Linux kernel compression) (issue 102) cmake : improved version by Mika Attila, building programs and libraries (issue 100) xxHash : updated to r33 Makefile : clean also delete local package .tar.gz r109 : lz4.c : corrected issue 98 (LZ4_compress_limitedOutput()) Makefile : can specify version number from makefile r108 : lz4.c : corrected compression efficiency issue 97 in 64-bits chained mode (-BD) for streams > 4 GB (thanks Roman Strashkin for reporting) r107 : Makefile : support DESTDIR for staged installs. Thanks Jorge Aparicio. Makefile : make install installs both lz4 and lz4c (Jorge Aparicio) Makefile : removed -Wno-implicit-declaration compilation switch lz4cli.c : include for isatty() (Luca Barbato) lz4.h : introduced LZ4_MAX_INPUT_SIZE constant (Shay Green) lz4.h : LZ4_compressBound() : unified macro and inline definitions (Shay Green) lz4.h : LZ4_decompressSafe_partial() : clarify comments (Shay Green) lz4.c : LZ4_compress() verify input size condition (Shay Green) bench.c : corrected a bug in free memory size evaluation cmake : install into bin/ directory (Richard Yao) cmake : check for just C compiler (Elan Ruusamae) r106 : Makefile : make dist modify text files in the package to respect Unix EoL convention lz4cli.c : corrected small display bug in HC mode r105 : Makefile : New install script and man page, contributed by Prasad Pandit lz4cli.c : Minor modifications, for easier extensibility COPYING : added license file LZ4_Streaming_Format.odt : modified file name to remove white space characters Makefile : .exe suffix now properly added only for Windows target py-lz4framed-0.14.0/lz4/README.md000066400000000000000000000126361357043434000161150ustar00rootroot00000000000000LZ4 - Extremely fast compression ================================ LZ4 is lossless compression algorithm, providing compression speed > 500 MB/s per core, scalable with multi-cores CPU. It features an extremely fast decoder, with speed in multiple GB/s per core, typically reaching RAM speed limits on multi-core systems. Speed can be tuned dynamically, selecting an "acceleration" factor which trades compression ratio for faster speed. On the other end, a high compression derivative, LZ4_HC, is also provided, trading CPU time for improved compression ratio. All versions feature the same decompression speed. LZ4 is also compatible with [dictionary compression](https://github.com/facebook/zstd#the-case-for-small-data-compression), both at [API](https://github.com/lz4/lz4/blob/v1.8.3/lib/lz4frame.h#L481) and [CLI](https://github.com/lz4/lz4/blob/v1.8.3/programs/lz4.1.md#operation-modifiers) levels. It can ingest any input file as dictionary, though only the final 64KB are used. This capability can be combined with the [Zstandard Dictionary Builder](https://github.com/facebook/zstd/blob/v1.3.5/programs/zstd.1.md#dictionary-builder), in order to drastically improve compression performance on small files. LZ4 library is provided as open-source software using BSD 2-Clause license. |Branch |Status | |------------|---------| |master | [![Build Status][travisMasterBadge]][travisLink] [![Build status][AppveyorMasterBadge]][AppveyorLink] [![coverity][coverBadge]][coverlink] | |dev | [![Build Status][travisDevBadge]][travisLink] [![Build status][AppveyorDevBadge]][AppveyorLink] | [travisMasterBadge]: https://travis-ci.org/lz4/lz4.svg?branch=master "Continuous Integration test suite" [travisDevBadge]: https://travis-ci.org/lz4/lz4.svg?branch=dev "Continuous Integration test suite" [travisLink]: https://travis-ci.org/lz4/lz4 [AppveyorMasterBadge]: https://ci.appveyor.com/api/projects/status/github/lz4/lz4?branch=master&svg=true "Windows test suite" [AppveyorDevBadge]: https://ci.appveyor.com/api/projects/status/github/lz4/lz4?branch=dev&svg=true "Windows test suite" [AppveyorLink]: https://ci.appveyor.com/project/YannCollet/lz4-1lndh [coverBadge]: https://scan.coverity.com/projects/4735/badge.svg "Static code analysis of Master branch" [coverlink]: https://scan.coverity.com/projects/4735 > **Branch Policy:** > - The "master" branch is considered stable, at all times. > - The "dev" branch is the one where all contributions must be merged before being promoted to master. > + If you plan to propose a patch, please commit into the "dev" branch, or its own feature branch. Direct commit to "master" are not permitted. Benchmarks ------------------------- The benchmark uses [lzbench], from @inikep compiled with GCC v8.2.0 on Linux 64-bits (Ubuntu 4.18.0-17). The reference system uses a Core i7-9700K CPU @ 4.9GHz (w/ turbo boost). Benchmark evaluates the compression of reference [Silesia Corpus] in single-thread mode. [lzbench]: https://github.com/inikep/lzbench [Silesia Corpus]: http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia | Compressor | Ratio | Compression | Decompression | | ---------- | ----- | ----------- | ------------- | | memcpy | 1.000 | 13700 MB/s | 13700 MB/s | |**LZ4 default (v1.9.0)** |**2.101**| **780 MB/s**| **4970 MB/s** | | LZO 2.09 | 2.108 | 670 MB/s | 860 MB/s | | QuickLZ 1.5.0 | 2.238 | 575 MB/s | 780 MB/s | | Snappy 1.1.4 | 2.091 | 565 MB/s | 1950 MB/s | | [Zstandard] 1.4.0 -1 | 2.883 | 515 MB/s | 1380 MB/s | | LZF v3.6 | 2.073 | 415 MB/s | 910 MB/s | | [zlib] deflate 1.2.11 -1| 2.730 | 100 MB/s | 415 MB/s | |**LZ4 HC -9 (v1.9.0)** |**2.721**| 41 MB/s | **4900 MB/s** | | [zlib] deflate 1.2.11 -6| 3.099 | 36 MB/s | 445 MB/s | [zlib]: http://www.zlib.net/ [Zstandard]: http://www.zstd.net/ LZ4 is also compatible and optimized for x32 mode, for which it provides additional speed performance. Installation ------------------------- ``` make make install # this command may require root permissions ``` LZ4's `Makefile` supports standard [Makefile conventions], including [staged installs], [redirection], or [command redefinition]. It is compatible with parallel builds (`-j#`). [Makefile conventions]: https://www.gnu.org/prep/standards/html_node/Makefile-Conventions.html [staged installs]: https://www.gnu.org/prep/standards/html_node/DESTDIR.html [redirection]: https://www.gnu.org/prep/standards/html_node/Directory-Variables.html [command redefinition]: https://www.gnu.org/prep/standards/html_node/Utilities-in-Makefiles.html Documentation ------------------------- The raw LZ4 block compression format is detailed within [lz4_Block_format]. Arbitrarily long files or data streams are compressed using multiple blocks, for streaming requirements. These blocks are organized into a frame, defined into [lz4_Frame_format]. Interoperable versions of LZ4 must also respect the frame format. [lz4_Block_format]: doc/lz4_Block_format.md [lz4_Frame_format]: doc/lz4_Frame_format.md Other source versions ------------------------- Beyond the C reference source, many contributors have created versions of lz4 in multiple languages (Java, C#, Python, Perl, Ruby, etc.). A list of known source ports is maintained on the [LZ4 Homepage]. [LZ4 Homepage]: http://www.lz4.org py-lz4framed-0.14.0/lz4/lz4.c000066400000000000000000003056441357043434000155170ustar00rootroot00000000000000/* LZ4 - Fast LZ compression algorithm Copyright (C) 2011-present, Yann Collet. BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. You can contact the author at : - LZ4 homepage : http://www.lz4.org - LZ4 source repository : https://github.com/lz4/lz4 */ /*-************************************ * Tuning parameters **************************************/ /* * LZ4_HEAPMODE : * Select how default compression functions will allocate memory for their hash table, * in memory stack (0:default, fastest), or in memory heap (1:requires malloc()). */ #ifndef LZ4_HEAPMODE # define LZ4_HEAPMODE 0 #endif /* * ACCELERATION_DEFAULT : * Select "acceleration" for LZ4_compress_fast() when parameter value <= 0 */ #define ACCELERATION_DEFAULT 1 /*-************************************ * CPU Feature Detection **************************************/ /* LZ4_FORCE_MEMORY_ACCESS * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. * The below switch allow to select different access method for improved performance. * Method 0 (default) : use `memcpy()`. Safe and portable. * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. * Method 2 : direct access. This method is portable but violate C standard. * It can generate buggy code on targets which assembly generation depends on alignment. * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) * See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. * Prefer these methods in priority order (0 > 1 > 2) */ #ifndef LZ4_FORCE_MEMORY_ACCESS /* can be defined externally */ # if defined(__GNUC__) && \ ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) \ || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) # define LZ4_FORCE_MEMORY_ACCESS 2 # elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__) # define LZ4_FORCE_MEMORY_ACCESS 1 # endif #endif /* * LZ4_FORCE_SW_BITCOUNT * Define this parameter if your target system or compiler does not support hardware bit count */ #if defined(_MSC_VER) && defined(_WIN32_WCE) /* Visual Studio for WinCE doesn't support Hardware bit count */ # define LZ4_FORCE_SW_BITCOUNT #endif /*-************************************ * Dependency **************************************/ /* * LZ4_SRC_INCLUDED: * Amalgamation flag, whether lz4.c is included */ #ifndef LZ4_SRC_INCLUDED # define LZ4_SRC_INCLUDED 1 #endif #ifndef LZ4_STATIC_LINKING_ONLY #define LZ4_STATIC_LINKING_ONLY #endif #ifndef LZ4_DISABLE_DEPRECATE_WARNINGS #define LZ4_DISABLE_DEPRECATE_WARNINGS /* due to LZ4_decompress_safe_withPrefix64k */ #endif #define LZ4_STATIC_LINKING_ONLY /* LZ4_DISTANCE_MAX */ #include "lz4.h" /* see also "memory routines" below */ /*-************************************ * Compiler Options **************************************/ #ifdef _MSC_VER /* Visual Studio */ # include # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ # pragma warning(disable : 4293) /* disable: C4293: too large shift (32-bits) */ #endif /* _MSC_VER */ #ifndef LZ4_FORCE_INLINE # ifdef _MSC_VER /* Visual Studio */ # define LZ4_FORCE_INLINE static __forceinline # else # if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ # ifdef __GNUC__ # define LZ4_FORCE_INLINE static inline __attribute__((always_inline)) # else # define LZ4_FORCE_INLINE static inline # endif # else # define LZ4_FORCE_INLINE static # endif /* __STDC_VERSION__ */ # endif /* _MSC_VER */ #endif /* LZ4_FORCE_INLINE */ /* LZ4_FORCE_O2_GCC_PPC64LE and LZ4_FORCE_O2_INLINE_GCC_PPC64LE * gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy8, * together with a simple 8-byte copy loop as a fall-back path. * However, this optimization hurts the decompression speed by >30%, * because the execution does not go to the optimized loop * for typical compressible data, and all of the preamble checks * before going to the fall-back path become useless overhead. * This optimization happens only with the -O3 flag, and -O2 generates * a simple 8-byte copy loop. * With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy8 * functions are annotated with __attribute__((optimize("O2"))), * and also LZ4_wildCopy8 is forcibly inlined, so that the O2 attribute * of LZ4_wildCopy8 does not affect the compression speed. */ #if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__) # define LZ4_FORCE_O2_GCC_PPC64LE __attribute__((optimize("O2"))) # define LZ4_FORCE_O2_INLINE_GCC_PPC64LE __attribute__((optimize("O2"))) LZ4_FORCE_INLINE #else # define LZ4_FORCE_O2_GCC_PPC64LE # define LZ4_FORCE_O2_INLINE_GCC_PPC64LE static #endif #if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__) # define expect(expr,value) (__builtin_expect ((expr),(value)) ) #else # define expect(expr,value) (expr) #endif #ifndef likely #define likely(expr) expect((expr) != 0, 1) #endif #ifndef unlikely #define unlikely(expr) expect((expr) != 0, 0) #endif /*-************************************ * Memory routines **************************************/ #include /* malloc, calloc, free */ #define ALLOC(s) malloc(s) #define ALLOC_AND_ZERO(s) calloc(1,s) #define FREEMEM(p) free(p) #include /* memset, memcpy */ #define MEM_INIT(p,v,s) memset((p),(v),(s)) /*-************************************ * Common Constants **************************************/ #define MINMATCH 4 #define WILDCOPYLENGTH 8 #define LASTLITERALS 5 /* see ../doc/lz4_Block_format.md#parsing-restrictions */ #define MFLIMIT 12 /* see ../doc/lz4_Block_format.md#parsing-restrictions */ #define MATCH_SAFEGUARD_DISTANCE ((2*WILDCOPYLENGTH) - MINMATCH) /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */ #define FASTLOOP_SAFE_DISTANCE 64 static const int LZ4_minLength = (MFLIMIT+1); #define KB *(1 <<10) #define MB *(1 <<20) #define GB *(1U<<30) #define LZ4_DISTANCE_ABSOLUTE_MAX 65535 #if (LZ4_DISTANCE_MAX > LZ4_DISTANCE_ABSOLUTE_MAX) /* max supported by LZ4 format */ # error "LZ4_DISTANCE_MAX is too big : must be <= 65535" #endif #define ML_BITS 4 #define ML_MASK ((1U<=1) # include #else # ifndef assert # define assert(condition) ((void)0) # endif #endif #define LZ4_STATIC_ASSERT(c) { enum { LZ4_static_assert = 1/(int)(!!(c)) }; } /* use after variable declarations */ #if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2) # include static int g_debuglog_enable = 1; # define DEBUGLOG(l, ...) { \ if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) { \ fprintf(stderr, __FILE__ ": "); \ fprintf(stderr, __VA_ARGS__); \ fprintf(stderr, " \n"); \ } } #else # define DEBUGLOG(l, ...) {} /* disabled */ #endif /*-************************************ * Types **************************************/ #if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) # include typedef uint8_t BYTE; typedef uint16_t U16; typedef uint32_t U32; typedef int32_t S32; typedef uint64_t U64; typedef uintptr_t uptrval; #else typedef unsigned char BYTE; typedef unsigned short U16; typedef unsigned int U32; typedef signed int S32; typedef unsigned long long U64; typedef size_t uptrval; /* generally true, except OpenVMS-64 */ #endif #if defined(__x86_64__) typedef U64 reg_t; /* 64-bits in x32 mode */ #else typedef size_t reg_t; /* 32-bits in x32 mode */ #endif typedef enum { notLimited = 0, limitedOutput = 1, fillOutput = 2 } limitedOutput_directive; /*-************************************ * Reading and writing into memory **************************************/ static unsigned LZ4_isLittleEndian(void) { const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ return one.c[0]; } #if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2) /* lie to the compiler about data alignment; use with caution */ static U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; } static U32 LZ4_read32(const void* memPtr) { return *(const U32*) memPtr; } static reg_t LZ4_read_ARCH(const void* memPtr) { return *(const reg_t*) memPtr; } static void LZ4_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } static void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } #elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1) /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ /* currently only defined for gcc and icc */ typedef union { U16 u16; U32 u32; reg_t uArch; } __attribute__((packed)) unalign; static U16 LZ4_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } static U32 LZ4_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } static reg_t LZ4_read_ARCH(const void* ptr) { return ((const unalign*)ptr)->uArch; } static void LZ4_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } static void LZ4_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; } #else /* safe and portable access using memcpy() */ static U16 LZ4_read16(const void* memPtr) { U16 val; memcpy(&val, memPtr, sizeof(val)); return val; } static U32 LZ4_read32(const void* memPtr) { U32 val; memcpy(&val, memPtr, sizeof(val)); return val; } static reg_t LZ4_read_ARCH(const void* memPtr) { reg_t val; memcpy(&val, memPtr, sizeof(val)); return val; } static void LZ4_write16(void* memPtr, U16 value) { memcpy(memPtr, &value, sizeof(value)); } static void LZ4_write32(void* memPtr, U32 value) { memcpy(memPtr, &value, sizeof(value)); } #endif /* LZ4_FORCE_MEMORY_ACCESS */ static U16 LZ4_readLE16(const void* memPtr) { if (LZ4_isLittleEndian()) { return LZ4_read16(memPtr); } else { const BYTE* p = (const BYTE*)memPtr; return (U16)((U16)p[0] + (p[1]<<8)); } } static void LZ4_writeLE16(void* memPtr, U16 value) { if (LZ4_isLittleEndian()) { LZ4_write16(memPtr, value); } else { BYTE* p = (BYTE*)memPtr; p[0] = (BYTE) value; p[1] = (BYTE)(value>>8); } } /* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */ LZ4_FORCE_O2_INLINE_GCC_PPC64LE void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd) { BYTE* d = (BYTE*)dstPtr; const BYTE* s = (const BYTE*)srcPtr; BYTE* const e = (BYTE*)dstEnd; do { memcpy(d,s,8); d+=8; s+=8; } while (d= 16. */ LZ4_FORCE_O2_INLINE_GCC_PPC64LE void LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd) { BYTE* d = (BYTE*)dstPtr; const BYTE* s = (const BYTE*)srcPtr; BYTE* const e = (BYTE*)dstEnd; do { memcpy(d,s,16); memcpy(d+16,s+16,16); d+=32; s+=32; } while (d= dstPtr + MINMATCH * - there is at least 8 bytes available to write after dstEnd */ LZ4_FORCE_O2_INLINE_GCC_PPC64LE void LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset) { BYTE v[8]; assert(dstEnd >= dstPtr + MINMATCH); LZ4_write32(dstPtr, 0); /* silence an msan warning when offset==0 */ switch(offset) { case 1: memset(v, *srcPtr, 8); break; case 2: memcpy(v, srcPtr, 2); memcpy(&v[2], srcPtr, 2); memcpy(&v[4], &v[0], 4); break; case 4: memcpy(v, srcPtr, 4); memcpy(&v[4], srcPtr, 4); break; default: LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset); return; } memcpy(dstPtr, v, 8); dstPtr += 8; while (dstPtr < dstEnd) { memcpy(dstPtr, v, 8); dstPtr += 8; } } #endif /*-************************************ * Common functions **************************************/ static unsigned LZ4_NbCommonBytes (reg_t val) { if (LZ4_isLittleEndian()) { if (sizeof(val)==8) { # if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT) unsigned long r = 0; _BitScanForward64( &r, (U64)val ); return (int)(r>>3); # elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT) return (unsigned)__builtin_ctzll((U64)val) >> 3; # else static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; # endif } else /* 32 bits */ { # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) unsigned long r; _BitScanForward( &r, (U32)val ); return (int)(r>>3); # elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT) return (unsigned)__builtin_ctz((U32)val) >> 3; # else static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; # endif } } else /* Big Endian CPU */ { if (sizeof(val)==8) { /* 64-bits */ # if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT) unsigned long r = 0; _BitScanReverse64( &r, val ); return (unsigned)(r>>3); # elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT) return (unsigned)__builtin_clzll((U64)val) >> 3; # else static const U32 by32 = sizeof(val)*4; /* 32 on 64 bits (goal), 16 on 32 bits. Just to avoid some static analyzer complaining about shift by 32 on 32-bits target. Note that this code path is never triggered in 32-bits mode. */ unsigned r; if (!(val>>by32)) { r=4; } else { r=0; val>>=by32; } if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } r += (!val); return r; # endif } else /* 32 bits */ { # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) unsigned long r = 0; _BitScanReverse( &r, (unsigned long)val ); return (unsigned)(r>>3); # elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT) return (unsigned)__builtin_clz((U32)val) >> 3; # else unsigned r; if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } r += (!val); return r; # endif } } } #define STEPSIZE sizeof(reg_t) LZ4_FORCE_INLINE unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit) { const BYTE* const pStart = pIn; if (likely(pIn < pInLimit-(STEPSIZE-1))) { reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn); if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; } else { return LZ4_NbCommonBytes(diff); } } while (likely(pIn < pInLimit-(STEPSIZE-1))) { reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn); if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; continue; } pIn += LZ4_NbCommonBytes(diff); return (unsigned)(pIn - pStart); } if ((STEPSIZE==8) && (pIn<(pInLimit-3)) && (LZ4_read32(pMatch) == LZ4_read32(pIn))) { pIn+=4; pMatch+=4; } if ((pIn<(pInLimit-1)) && (LZ4_read16(pMatch) == LZ4_read16(pIn))) { pIn+=2; pMatch+=2; } if ((pIn compression run slower on incompressible data */ /*-************************************ * Local Structures and types **************************************/ typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t; /** * This enum distinguishes several different modes of accessing previous * content in the stream. * * - noDict : There is no preceding content. * - withPrefix64k : Table entries up to ctx->dictSize before the current blob * blob being compressed are valid and refer to the preceding * content (of length ctx->dictSize), which is available * contiguously preceding in memory the content currently * being compressed. * - usingExtDict : Like withPrefix64k, but the preceding content is somewhere * else in memory, starting at ctx->dictionary with length * ctx->dictSize. * - usingDictCtx : Like usingExtDict, but everything concerning the preceding * content is in a separate context, pointed to by * ctx->dictCtx. ctx->dictionary, ctx->dictSize, and table * entries in the current context that refer to positions * preceding the beginning of the current compression are * ignored. Instead, ctx->dictCtx->dictionary and ctx->dictCtx * ->dictSize describe the location and size of the preceding * content, and matches are found by looking in the ctx * ->dictCtx->hashTable. */ typedef enum { noDict = 0, withPrefix64k, usingExtDict, usingDictCtx } dict_directive; typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive; /*-************************************ * Local Utils **************************************/ int LZ4_versionNumber (void) { return LZ4_VERSION_NUMBER; } const char* LZ4_versionString(void) { return LZ4_VERSION_STRING; } int LZ4_compressBound(int isize) { return LZ4_COMPRESSBOUND(isize); } int LZ4_sizeofState() { return LZ4_STREAMSIZE; } /*-************************************ * Internal Definitions used in Tests **************************************/ #if defined (__cplusplus) extern "C" { #endif int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize); int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const void* dictStart, size_t dictSize); #if defined (__cplusplus) } #endif /*-****************************** * Compression functions ********************************/ static U32 LZ4_hash4(U32 sequence, tableType_t const tableType) { if (tableType == byU16) return ((sequence * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1))); else return ((sequence * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG)); } static U32 LZ4_hash5(U64 sequence, tableType_t const tableType) { const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG+1 : LZ4_HASHLOG; if (LZ4_isLittleEndian()) { const U64 prime5bytes = 889523592379ULL; return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog)); } else { const U64 prime8bytes = 11400714785074694791ULL; return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog)); } } LZ4_FORCE_INLINE U32 LZ4_hashPosition(const void* const p, tableType_t const tableType) { if ((sizeof(reg_t)==8) && (tableType != byU16)) return LZ4_hash5(LZ4_read_ARCH(p), tableType); return LZ4_hash4(LZ4_read32(p), tableType); } static void LZ4_clearHash(U32 h, void* tableBase, tableType_t const tableType) { switch (tableType) { default: /* fallthrough */ case clearedTable: { /* illegal! */ assert(0); return; } case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = NULL; return; } case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = 0; return; } case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = 0; return; } } } static void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t const tableType) { switch (tableType) { default: /* fallthrough */ case clearedTable: /* fallthrough */ case byPtr: { /* illegal! */ assert(0); return; } case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = idx; return; } case byU16: { U16* hashTable = (U16*) tableBase; assert(idx < 65536); hashTable[h] = (U16)idx; return; } } } static void LZ4_putPositionOnHash(const BYTE* p, U32 h, void* tableBase, tableType_t const tableType, const BYTE* srcBase) { switch (tableType) { case clearedTable: { /* illegal! */ assert(0); return; } case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = p; return; } case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); return; } case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); return; } } } LZ4_FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase) { U32 const h = LZ4_hashPosition(p, tableType); LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase); } /* LZ4_getIndexOnHash() : * Index of match position registered in hash table. * hash position must be calculated by using base+index, or dictBase+index. * Assumption 1 : only valid if tableType == byU32 or byU16. * Assumption 2 : h is presumed valid (within limits of hash table) */ static U32 LZ4_getIndexOnHash(U32 h, const void* tableBase, tableType_t tableType) { LZ4_STATIC_ASSERT(LZ4_MEMORY_USAGE > 2); if (tableType == byU32) { const U32* const hashTable = (const U32*) tableBase; assert(h < (1U << (LZ4_MEMORY_USAGE-2))); return hashTable[h]; } if (tableType == byU16) { const U16* const hashTable = (const U16*) tableBase; assert(h < (1U << (LZ4_MEMORY_USAGE-1))); return hashTable[h]; } assert(0); return 0; /* forbidden case */ } static const BYTE* LZ4_getPositionOnHash(U32 h, const void* tableBase, tableType_t tableType, const BYTE* srcBase) { if (tableType == byPtr) { const BYTE* const* hashTable = (const BYTE* const*) tableBase; return hashTable[h]; } if (tableType == byU32) { const U32* const hashTable = (const U32*) tableBase; return hashTable[h] + srcBase; } { const U16* const hashTable = (const U16*) tableBase; return hashTable[h] + srcBase; } /* default, to ensure a return */ } LZ4_FORCE_INLINE const BYTE* LZ4_getPosition(const BYTE* p, const void* tableBase, tableType_t tableType, const BYTE* srcBase) { U32 const h = LZ4_hashPosition(p, tableType); return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase); } LZ4_FORCE_INLINE void LZ4_prepareTable(LZ4_stream_t_internal* const cctx, const int inputSize, const tableType_t tableType) { /* If compression failed during the previous step, then the context * is marked as dirty, therefore, it has to be fully reset. */ if (cctx->dirty) { DEBUGLOG(5, "LZ4_prepareTable: Full reset for %p", cctx); MEM_INIT(cctx, 0, sizeof(LZ4_stream_t_internal)); return; } /* If the table hasn't been used, it's guaranteed to be zeroed out, and is * therefore safe to use no matter what mode we're in. Otherwise, we figure * out if it's safe to leave as is or whether it needs to be reset. */ if (cctx->tableType != clearedTable) { assert(inputSize >= 0); if (cctx->tableType != tableType || ((tableType == byU16) && cctx->currentOffset + (unsigned)inputSize >= 0xFFFFU) || ((tableType == byU32) && cctx->currentOffset > 1 GB) || tableType == byPtr || inputSize >= 4 KB) { DEBUGLOG(4, "LZ4_prepareTable: Resetting table in %p", cctx); MEM_INIT(cctx->hashTable, 0, LZ4_HASHTABLESIZE); cctx->currentOffset = 0; cctx->tableType = clearedTable; } else { DEBUGLOG(4, "LZ4_prepareTable: Re-use hash table (no reset)"); } } /* Adding a gap, so all previous entries are > LZ4_DISTANCE_MAX back, is faster * than compressing without a gap. However, compressing with * currentOffset == 0 is faster still, so we preserve that case. */ if (cctx->currentOffset != 0 && tableType == byU32) { DEBUGLOG(5, "LZ4_prepareTable: adding 64KB to currentOffset"); cctx->currentOffset += 64 KB; } /* Finally, clear history */ cctx->dictCtx = NULL; cctx->dictionary = NULL; cctx->dictSize = 0; } /** LZ4_compress_generic() : inlined, to ensure branches are decided at compilation time */ LZ4_FORCE_INLINE int LZ4_compress_generic( LZ4_stream_t_internal* const cctx, const char* const source, char* const dest, const int inputSize, int *inputConsumed, /* only written when outputDirective == fillOutput */ const int maxOutputSize, const limitedOutput_directive outputDirective, const tableType_t tableType, const dict_directive dictDirective, const dictIssue_directive dictIssue, const int acceleration) { int result; const BYTE* ip = (const BYTE*) source; U32 const startIndex = cctx->currentOffset; const BYTE* base = (const BYTE*) source - startIndex; const BYTE* lowLimit; const LZ4_stream_t_internal* dictCtx = (const LZ4_stream_t_internal*) cctx->dictCtx; const BYTE* const dictionary = dictDirective == usingDictCtx ? dictCtx->dictionary : cctx->dictionary; const U32 dictSize = dictDirective == usingDictCtx ? dictCtx->dictSize : cctx->dictSize; const U32 dictDelta = (dictDirective == usingDictCtx) ? startIndex - dictCtx->currentOffset : 0; /* make indexes in dictCtx comparable with index in current context */ int const maybe_extMem = (dictDirective == usingExtDict) || (dictDirective == usingDictCtx); U32 const prefixIdxLimit = startIndex - dictSize; /* used when dictDirective == dictSmall */ const BYTE* const dictEnd = dictionary + dictSize; const BYTE* anchor = (const BYTE*) source; const BYTE* const iend = ip + inputSize; const BYTE* const mflimitPlusOne = iend - MFLIMIT + 1; const BYTE* const matchlimit = iend - LASTLITERALS; /* the dictCtx currentOffset is indexed on the start of the dictionary, * while a dictionary in the current context precedes the currentOffset */ const BYTE* dictBase = (dictDirective == usingDictCtx) ? dictionary + dictSize - dictCtx->currentOffset : dictionary + dictSize - startIndex; BYTE* op = (BYTE*) dest; BYTE* const olimit = op + maxOutputSize; U32 offset = 0; U32 forwardH; DEBUGLOG(5, "LZ4_compress_generic: srcSize=%i, tableType=%u", inputSize, tableType); /* If init conditions are not met, we don't have to mark stream * as having dirty context, since no action was taken yet */ if (outputDirective == fillOutput && maxOutputSize < 1) { return 0; } /* Impossible to store anything */ if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) { return 0; } /* Unsupported inputSize, too large (or negative) */ if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) { return 0; } /* Size too large (not within 64K limit) */ if (tableType==byPtr) assert(dictDirective==noDict); /* only supported use case with byPtr */ assert(acceleration >= 1); lowLimit = (const BYTE*)source - (dictDirective == withPrefix64k ? dictSize : 0); /* Update context state */ if (dictDirective == usingDictCtx) { /* Subsequent linked blocks can't use the dictionary. */ /* Instead, they use the block we just compressed. */ cctx->dictCtx = NULL; cctx->dictSize = (U32)inputSize; } else { cctx->dictSize += (U32)inputSize; } cctx->currentOffset += (U32)inputSize; cctx->tableType = (U16)tableType; if (inputSizehashTable, tableType, base); ip++; forwardH = LZ4_hashPosition(ip, tableType); /* Main Loop */ for ( ; ; ) { const BYTE* match; BYTE* token; const BYTE* filledIp; /* Find a match */ if (tableType == byPtr) { const BYTE* forwardIp = ip; int step = 1; int searchMatchNb = acceleration << LZ4_skipTrigger; do { U32 const h = forwardH; ip = forwardIp; forwardIp += step; step = (searchMatchNb++ >> LZ4_skipTrigger); if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals; assert(ip < mflimitPlusOne); match = LZ4_getPositionOnHash(h, cctx->hashTable, tableType, base); forwardH = LZ4_hashPosition(forwardIp, tableType); LZ4_putPositionOnHash(ip, h, cctx->hashTable, tableType, base); } while ( (match+LZ4_DISTANCE_MAX < ip) || (LZ4_read32(match) != LZ4_read32(ip)) ); } else { /* byU32, byU16 */ const BYTE* forwardIp = ip; int step = 1; int searchMatchNb = acceleration << LZ4_skipTrigger; do { U32 const h = forwardH; U32 const current = (U32)(forwardIp - base); U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType); assert(matchIndex <= current); assert(forwardIp - base < (ptrdiff_t)(2 GB - 1)); ip = forwardIp; forwardIp += step; step = (searchMatchNb++ >> LZ4_skipTrigger); if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals; assert(ip < mflimitPlusOne); if (dictDirective == usingDictCtx) { if (matchIndex < startIndex) { /* there was no match, try the dictionary */ assert(tableType == byU32); matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32); match = dictBase + matchIndex; matchIndex += dictDelta; /* make dictCtx index comparable with current context */ lowLimit = dictionary; } else { match = base + matchIndex; lowLimit = (const BYTE*)source; } } else if (dictDirective==usingExtDict) { if (matchIndex < startIndex) { DEBUGLOG(7, "extDict candidate: matchIndex=%5u < startIndex=%5u", matchIndex, startIndex); assert(startIndex - matchIndex >= MINMATCH); match = dictBase + matchIndex; lowLimit = dictionary; } else { match = base + matchIndex; lowLimit = (const BYTE*)source; } } else { /* single continuous memory segment */ match = base + matchIndex; } forwardH = LZ4_hashPosition(forwardIp, tableType); LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType); DEBUGLOG(7, "candidate at pos=%u (offset=%u \n", matchIndex, current - matchIndex); if ((dictIssue == dictSmall) && (matchIndex < prefixIdxLimit)) { continue; } /* match outside of valid area */ assert(matchIndex < current); if ( ((tableType != byU16) || (LZ4_DISTANCE_MAX < LZ4_DISTANCE_ABSOLUTE_MAX)) && (matchIndex+LZ4_DISTANCE_MAX < current)) { continue; } /* too far */ assert((current - matchIndex) <= LZ4_DISTANCE_MAX); /* match now expected within distance */ if (LZ4_read32(match) == LZ4_read32(ip)) { if (maybe_extMem) offset = current - matchIndex; break; /* match found */ } } while(1); } /* Catch up */ filledIp = ip; while (((ip>anchor) & (match > lowLimit)) && (unlikely(ip[-1]==match[-1]))) { ip--; match--; } /* Encode Literals */ { unsigned const litLength = (unsigned)(ip - anchor); token = op++; if ((outputDirective == limitedOutput) && /* Check output buffer overflow */ (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)) ) { return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */ } if ((outputDirective == fillOutput) && (unlikely(op + (litLength+240)/255 /* litlen */ + litLength /* literals */ + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit))) { op--; goto _last_literals; } if (litLength >= RUN_MASK) { int len = (int)(litLength - RUN_MASK); *token = (RUN_MASK<= 255 ; len-=255) *op++ = 255; *op++ = (BYTE)len; } else *token = (BYTE)(litLength< olimit)) { /* the match was too close to the end, rewind and go to last literals */ op = token; goto _last_literals; } /* Encode Offset */ if (maybe_extMem) { /* static test */ DEBUGLOG(6, " with offset=%u (ext if > %i)", offset, (int)(ip - (const BYTE*)source)); assert(offset <= LZ4_DISTANCE_MAX && offset > 0); LZ4_writeLE16(op, (U16)offset); op+=2; } else { DEBUGLOG(6, " with offset=%u (same segment)", (U32)(ip - match)); assert(ip-match <= LZ4_DISTANCE_MAX); LZ4_writeLE16(op, (U16)(ip - match)); op+=2; } /* Encode MatchLength */ { unsigned matchCode; if ( (dictDirective==usingExtDict || dictDirective==usingDictCtx) && (lowLimit==dictionary) /* match within extDict */ ) { const BYTE* limit = ip + (dictEnd-match); assert(dictEnd > match); if (limit > matchlimit) limit = matchlimit; matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, limit); ip += (size_t)matchCode + MINMATCH; if (ip==limit) { unsigned const more = LZ4_count(limit, (const BYTE*)source, matchlimit); matchCode += more; ip += more; } DEBUGLOG(6, " with matchLength=%u starting in extDict", matchCode+MINMATCH); } else { matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit); ip += (size_t)matchCode + MINMATCH; DEBUGLOG(6, " with matchLength=%u", matchCode+MINMATCH); } if ((outputDirective) && /* Check output buffer overflow */ (unlikely(op + (1 + LASTLITERALS) + (matchCode+240)/255 > olimit)) ) { if (outputDirective == fillOutput) { /* Match description too long : reduce it */ U32 newMatchCode = 15 /* in token */ - 1 /* to avoid needing a zero byte */ + ((U32)(olimit - op) - 1 - LASTLITERALS) * 255; ip -= matchCode - newMatchCode; assert(newMatchCode < matchCode); matchCode = newMatchCode; if (unlikely(ip <= filledIp)) { /* We have already filled up to filledIp so if ip ends up less than filledIp * we have positions in the hash table beyond the current position. This is * a problem if we reuse the hash table. So we have to remove these positions * from the hash table. */ const BYTE* ptr; DEBUGLOG(5, "Clearing %u positions", (U32)(filledIp - ip)); for (ptr = ip; ptr <= filledIp; ++ptr) { U32 const h = LZ4_hashPosition(ptr, tableType); LZ4_clearHash(h, cctx->hashTable, tableType); } } } else { assert(outputDirective == limitedOutput); return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */ } } if (matchCode >= ML_MASK) { *token += ML_MASK; matchCode -= ML_MASK; LZ4_write32(op, 0xFFFFFFFF); while (matchCode >= 4*255) { op+=4; LZ4_write32(op, 0xFFFFFFFF); matchCode -= 4*255; } op += matchCode / 255; *op++ = (BYTE)(matchCode % 255); } else *token += (BYTE)(matchCode); } /* Ensure we have enough space for the last literals. */ assert(!(outputDirective == fillOutput && op + 1 + LASTLITERALS > olimit)); anchor = ip; /* Test end of chunk */ if (ip >= mflimitPlusOne) break; /* Fill table */ LZ4_putPosition(ip-2, cctx->hashTable, tableType, base); /* Test next position */ if (tableType == byPtr) { match = LZ4_getPosition(ip, cctx->hashTable, tableType, base); LZ4_putPosition(ip, cctx->hashTable, tableType, base); if ( (match+LZ4_DISTANCE_MAX >= ip) && (LZ4_read32(match) == LZ4_read32(ip)) ) { token=op++; *token=0; goto _next_match; } } else { /* byU32, byU16 */ U32 const h = LZ4_hashPosition(ip, tableType); U32 const current = (U32)(ip-base); U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType); assert(matchIndex < current); if (dictDirective == usingDictCtx) { if (matchIndex < startIndex) { /* there was no match, try the dictionary */ matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32); match = dictBase + matchIndex; lowLimit = dictionary; /* required for match length counter */ matchIndex += dictDelta; } else { match = base + matchIndex; lowLimit = (const BYTE*)source; /* required for match length counter */ } } else if (dictDirective==usingExtDict) { if (matchIndex < startIndex) { match = dictBase + matchIndex; lowLimit = dictionary; /* required for match length counter */ } else { match = base + matchIndex; lowLimit = (const BYTE*)source; /* required for match length counter */ } } else { /* single memory segment */ match = base + matchIndex; } LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType); assert(matchIndex < current); if ( ((dictIssue==dictSmall) ? (matchIndex >= prefixIdxLimit) : 1) && (((tableType==byU16) && (LZ4_DISTANCE_MAX == LZ4_DISTANCE_ABSOLUTE_MAX)) ? 1 : (matchIndex+LZ4_DISTANCE_MAX >= current)) && (LZ4_read32(match) == LZ4_read32(ip)) ) { token=op++; *token=0; if (maybe_extMem) offset = current - matchIndex; DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i", (int)(anchor-(const BYTE*)source), 0, (int)(ip-(const BYTE*)source)); goto _next_match; } } /* Prepare next loop */ forwardH = LZ4_hashPosition(++ip, tableType); } _last_literals: /* Encode Last Literals */ { size_t lastRun = (size_t)(iend - anchor); if ( (outputDirective) && /* Check output buffer overflow */ (op + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > olimit)) { if (outputDirective == fillOutput) { /* adapt lastRun to fill 'dst' */ assert(olimit >= op); lastRun = (size_t)(olimit-op) - 1; lastRun -= (lastRun+240)/255; } else { assert(outputDirective == limitedOutput); return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */ } } if (lastRun >= RUN_MASK) { size_t accumulator = lastRun - RUN_MASK; *op++ = RUN_MASK << ML_BITS; for(; accumulator >= 255 ; accumulator-=255) *op++ = 255; *op++ = (BYTE) accumulator; } else { *op++ = (BYTE)(lastRun< 0); return result; } int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration) { LZ4_stream_t_internal* const ctx = & LZ4_initStream(state, sizeof(LZ4_stream_t)) -> internal_donotuse; assert(ctx != NULL); if (acceleration < 1) acceleration = ACCELERATION_DEFAULT; if (maxOutputSize >= LZ4_compressBound(inputSize)) { if (inputSize < LZ4_64Klimit) { return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, byU16, noDict, noDictIssue, acceleration); } else { const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32; return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration); } } else { if (inputSize < LZ4_64Klimit) { return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration); } else { const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32; return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, noDict, noDictIssue, acceleration); } } } /** * LZ4_compress_fast_extState_fastReset() : * A variant of LZ4_compress_fast_extState(). * * Using this variant avoids an expensive initialization step. It is only safe * to call if the state buffer is known to be correctly initialized already * (see comment in lz4.h on LZ4_resetStream_fast() for a definition of * "correctly initialized"). */ int LZ4_compress_fast_extState_fastReset(void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration) { LZ4_stream_t_internal* ctx = &((LZ4_stream_t*)state)->internal_donotuse; if (acceleration < 1) acceleration = ACCELERATION_DEFAULT; if (dstCapacity >= LZ4_compressBound(srcSize)) { if (srcSize < LZ4_64Klimit) { const tableType_t tableType = byU16; LZ4_prepareTable(ctx, srcSize, tableType); if (ctx->currentOffset) { return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, dictSmall, acceleration); } else { return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration); } } else { const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32; LZ4_prepareTable(ctx, srcSize, tableType); return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration); } } else { if (srcSize < LZ4_64Klimit) { const tableType_t tableType = byU16; LZ4_prepareTable(ctx, srcSize, tableType); if (ctx->currentOffset) { return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, dictSmall, acceleration); } else { return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration); } } else { const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32; LZ4_prepareTable(ctx, srcSize, tableType); return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration); } } } int LZ4_compress_fast(const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration) { int result; #if (LZ4_HEAPMODE) LZ4_stream_t* ctxPtr = ALLOC(sizeof(LZ4_stream_t)); /* malloc-calloc always properly aligned */ if (ctxPtr == NULL) return 0; #else LZ4_stream_t ctx; LZ4_stream_t* const ctxPtr = &ctx; #endif result = LZ4_compress_fast_extState(ctxPtr, source, dest, inputSize, maxOutputSize, acceleration); #if (LZ4_HEAPMODE) FREEMEM(ctxPtr); #endif return result; } int LZ4_compress_default(const char* src, char* dst, int srcSize, int maxOutputSize) { return LZ4_compress_fast(src, dst, srcSize, maxOutputSize, 1); } /* hidden debug function */ /* strangely enough, gcc generates faster code when this function is uncommented, even if unused */ int LZ4_compress_fast_force(const char* src, char* dst, int srcSize, int dstCapacity, int acceleration) { LZ4_stream_t ctx; LZ4_initStream(&ctx, sizeof(ctx)); if (srcSize < LZ4_64Klimit) { return LZ4_compress_generic(&ctx.internal_donotuse, src, dst, srcSize, NULL, dstCapacity, limitedOutput, byU16, noDict, noDictIssue, acceleration); } else { tableType_t const addrMode = (sizeof(void*) > 4) ? byU32 : byPtr; return LZ4_compress_generic(&ctx.internal_donotuse, src, dst, srcSize, NULL, dstCapacity, limitedOutput, addrMode, noDict, noDictIssue, acceleration); } } /* Note!: This function leaves the stream in an unclean/broken state! * It is not safe to subsequently use the same state with a _fastReset() or * _continue() call without resetting it. */ static int LZ4_compress_destSize_extState (LZ4_stream_t* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize) { void* const s = LZ4_initStream(state, sizeof (*state)); assert(s != NULL); (void)s; if (targetDstSize >= LZ4_compressBound(*srcSizePtr)) { /* compression success is guaranteed */ return LZ4_compress_fast_extState(state, src, dst, *srcSizePtr, targetDstSize, 1); } else { if (*srcSizePtr < LZ4_64Klimit) { return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, byU16, noDict, noDictIssue, 1); } else { tableType_t const addrMode = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32; return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, addrMode, noDict, noDictIssue, 1); } } } int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targetDstSize) { #if (LZ4_HEAPMODE) LZ4_stream_t* ctx = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t)); /* malloc-calloc always properly aligned */ if (ctx == NULL) return 0; #else LZ4_stream_t ctxBody; LZ4_stream_t* ctx = &ctxBody; #endif int result = LZ4_compress_destSize_extState(ctx, src, dst, srcSizePtr, targetDstSize); #if (LZ4_HEAPMODE) FREEMEM(ctx); #endif return result; } /*-****************************** * Streaming functions ********************************/ LZ4_stream_t* LZ4_createStream(void) { LZ4_stream_t* const lz4s = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t)); LZ4_STATIC_ASSERT(LZ4_STREAMSIZE >= sizeof(LZ4_stream_t_internal)); /* A compilation error here means LZ4_STREAMSIZE is not large enough */ DEBUGLOG(4, "LZ4_createStream %p", lz4s); if (lz4s == NULL) return NULL; LZ4_initStream(lz4s, sizeof(*lz4s)); return lz4s; } #ifndef _MSC_VER /* for some reason, Visual fails the aligment test on 32-bit x86 : it reports an aligment of 8-bytes, while actually aligning LZ4_stream_t on 4 bytes. */ static size_t LZ4_stream_t_alignment(void) { struct { char c; LZ4_stream_t t; } t_a; return sizeof(t_a) - sizeof(t_a.t); } #endif LZ4_stream_t* LZ4_initStream (void* buffer, size_t size) { DEBUGLOG(5, "LZ4_initStream"); if (buffer == NULL) { return NULL; } if (size < sizeof(LZ4_stream_t)) { return NULL; } #ifndef _MSC_VER /* for some reason, Visual fails the aligment test on 32-bit x86 : it reports an aligment of 8-bytes, while actually aligning LZ4_stream_t on 4 bytes. */ if (((size_t)buffer) & (LZ4_stream_t_alignment() - 1)) { return NULL; } /* alignment check */ #endif MEM_INIT(buffer, 0, sizeof(LZ4_stream_t)); return (LZ4_stream_t*)buffer; } /* resetStream is now deprecated, * prefer initStream() which is more general */ void LZ4_resetStream (LZ4_stream_t* LZ4_stream) { DEBUGLOG(5, "LZ4_resetStream (ctx:%p)", LZ4_stream); MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t)); } void LZ4_resetStream_fast(LZ4_stream_t* ctx) { LZ4_prepareTable(&(ctx->internal_donotuse), 0, byU32); } int LZ4_freeStream (LZ4_stream_t* LZ4_stream) { if (!LZ4_stream) return 0; /* support free on NULL */ DEBUGLOG(5, "LZ4_freeStream %p", LZ4_stream); FREEMEM(LZ4_stream); return (0); } #define HASH_UNIT sizeof(reg_t) int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize) { LZ4_stream_t_internal* dict = &LZ4_dict->internal_donotuse; const tableType_t tableType = byU32; const BYTE* p = (const BYTE*)dictionary; const BYTE* const dictEnd = p + dictSize; const BYTE* base; DEBUGLOG(4, "LZ4_loadDict (%i bytes from %p into %p)", dictSize, dictionary, LZ4_dict); /* It's necessary to reset the context, * and not just continue it with prepareTable() * to avoid any risk of generating overflowing matchIndex * when compressing using this dictionary */ LZ4_resetStream(LZ4_dict); /* We always increment the offset by 64 KB, since, if the dict is longer, * we truncate it to the last 64k, and if it's shorter, we still want to * advance by a whole window length so we can provide the guarantee that * there are only valid offsets in the window, which allows an optimization * in LZ4_compress_fast_continue() where it uses noDictIssue even when the * dictionary isn't a full 64k. */ dict->currentOffset += 64 KB; if (dictSize < (int)HASH_UNIT) { return 0; } if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB; base = dictEnd - dict->currentOffset; dict->dictionary = p; dict->dictSize = (U32)(dictEnd - p); dict->tableType = tableType; while (p <= dictEnd-HASH_UNIT) { LZ4_putPosition(p, dict->hashTable, tableType, base); p+=3; } return (int)dict->dictSize; } void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream) { const LZ4_stream_t_internal* dictCtx = dictionaryStream == NULL ? NULL : &(dictionaryStream->internal_donotuse); DEBUGLOG(4, "LZ4_attach_dictionary (%p, %p, size %u)", workingStream, dictionaryStream, dictCtx != NULL ? dictCtx->dictSize : 0); /* Calling LZ4_resetStream_fast() here makes sure that changes will not be * erased by subsequent calls to LZ4_resetStream_fast() in case stream was * marked as having dirty context, e.g. requiring full reset. */ LZ4_resetStream_fast(workingStream); if (dictCtx != NULL) { /* If the current offset is zero, we will never look in the * external dictionary context, since there is no value a table * entry can take that indicate a miss. In that case, we need * to bump the offset to something non-zero. */ if (workingStream->internal_donotuse.currentOffset == 0) { workingStream->internal_donotuse.currentOffset = 64 KB; } /* Don't actually attach an empty dictionary. */ if (dictCtx->dictSize == 0) { dictCtx = NULL; } } workingStream->internal_donotuse.dictCtx = dictCtx; } static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, int nextSize) { assert(nextSize >= 0); if (LZ4_dict->currentOffset + (unsigned)nextSize > 0x80000000) { /* potential ptrdiff_t overflow (32-bits mode) */ /* rescale hash table */ U32 const delta = LZ4_dict->currentOffset - 64 KB; const BYTE* dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize; int i; DEBUGLOG(4, "LZ4_renormDictT"); for (i=0; ihashTable[i] < delta) LZ4_dict->hashTable[i]=0; else LZ4_dict->hashTable[i] -= delta; } LZ4_dict->currentOffset = 64 KB; if (LZ4_dict->dictSize > 64 KB) LZ4_dict->dictSize = 64 KB; LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize; } } int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration) { const tableType_t tableType = byU32; LZ4_stream_t_internal* streamPtr = &LZ4_stream->internal_donotuse; const BYTE* dictEnd = streamPtr->dictionary + streamPtr->dictSize; DEBUGLOG(5, "LZ4_compress_fast_continue (inputSize=%i)", inputSize); if (streamPtr->dirty) { return 0; } /* Uninitialized structure detected */ LZ4_renormDictT(streamPtr, inputSize); /* avoid index overflow */ if (acceleration < 1) acceleration = ACCELERATION_DEFAULT; /* invalidate tiny dictionaries */ if ( (streamPtr->dictSize-1 < 4-1) /* intentional underflow */ && (dictEnd != (const BYTE*)source) ) { DEBUGLOG(5, "LZ4_compress_fast_continue: dictSize(%u) at addr:%p is too small", streamPtr->dictSize, streamPtr->dictionary); streamPtr->dictSize = 0; streamPtr->dictionary = (const BYTE*)source; dictEnd = (const BYTE*)source; } /* Check overlapping input/dictionary space */ { const BYTE* sourceEnd = (const BYTE*) source + inputSize; if ((sourceEnd > streamPtr->dictionary) && (sourceEnd < dictEnd)) { streamPtr->dictSize = (U32)(dictEnd - sourceEnd); if (streamPtr->dictSize > 64 KB) streamPtr->dictSize = 64 KB; if (streamPtr->dictSize < 4) streamPtr->dictSize = 0; streamPtr->dictionary = dictEnd - streamPtr->dictSize; } } /* prefix mode : source data follows dictionary */ if (dictEnd == (const BYTE*)source) { if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, dictSmall, acceleration); else return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, noDictIssue, acceleration); } /* external dictionary mode */ { int result; if (streamPtr->dictCtx) { /* We depend here on the fact that dictCtx'es (produced by * LZ4_loadDict) guarantee that their tables contain no references * to offsets between dictCtx->currentOffset - 64 KB and * dictCtx->currentOffset - dictCtx->dictSize. This makes it safe * to use noDictIssue even when the dict isn't a full 64 KB. */ if (inputSize > 4 KB) { /* For compressing large blobs, it is faster to pay the setup * cost to copy the dictionary's tables into the active context, * so that the compression loop is only looking into one table. */ memcpy(streamPtr, streamPtr->dictCtx, sizeof(LZ4_stream_t)); result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration); } else { result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingDictCtx, noDictIssue, acceleration); } } else { if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) { result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, dictSmall, acceleration); } else { result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration); } } streamPtr->dictionary = (const BYTE*)source; streamPtr->dictSize = (U32)inputSize; return result; } } /* Hidden debug function, to force-test external dictionary mode */ int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize) { LZ4_stream_t_internal* streamPtr = &LZ4_dict->internal_donotuse; int result; LZ4_renormDictT(streamPtr, srcSize); if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) { result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, dictSmall, 1); } else { result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, noDictIssue, 1); } streamPtr->dictionary = (const BYTE*)source; streamPtr->dictSize = (U32)srcSize; return result; } /*! LZ4_saveDict() : * If previously compressed data block is not guaranteed to remain available at its memory location, * save it into a safer place (char* safeBuffer). * Note : you don't need to call LZ4_loadDict() afterwards, * dictionary is immediately usable, you can therefore call LZ4_compress_fast_continue(). * Return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error. */ int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize) { LZ4_stream_t_internal* const dict = &LZ4_dict->internal_donotuse; const BYTE* const previousDictEnd = dict->dictionary + dict->dictSize; if ((U32)dictSize > 64 KB) { dictSize = 64 KB; } /* useless to define a dictionary > 64 KB */ if ((U32)dictSize > dict->dictSize) { dictSize = (int)dict->dictSize; } memmove(safeBuffer, previousDictEnd - dictSize, dictSize); dict->dictionary = (const BYTE*)safeBuffer; dict->dictSize = (U32)dictSize; return dictSize; } /*-******************************* * Decompression functions ********************************/ typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive; typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive; #undef MIN #define MIN(a,b) ( (a) < (b) ? (a) : (b) ) /* Read the variable-length literal or match length. * * ip - pointer to use as input. * lencheck - end ip. Return an error if ip advances >= lencheck. * loop_check - check ip >= lencheck in body of loop. Returns loop_error if so. * initial_check - check ip >= lencheck before start of loop. Returns initial_error if so. * error (output) - error code. Should be set to 0 before call. */ typedef enum { loop_error = -2, initial_error = -1, ok = 0 } variable_length_error; LZ4_FORCE_INLINE unsigned read_variable_length(const BYTE**ip, const BYTE* lencheck, int loop_check, int initial_check, variable_length_error* error) { unsigned length = 0; unsigned s; if (initial_check && unlikely((*ip) >= lencheck)) { /* overflow detection */ *error = initial_error; return length; } do { s = **ip; (*ip)++; length += s; if (loop_check && unlikely((*ip) >= lencheck)) { /* overflow detection */ *error = loop_error; return length; } } while (s==255); return length; } /*! LZ4_decompress_generic() : * This generic decompression function covers all use cases. * It shall be instantiated several times, using different sets of directives. * Note that it is important for performance that this function really get inlined, * in order to remove useless branches during compilation optimization. */ LZ4_FORCE_INLINE int LZ4_decompress_generic( const char* const src, char* const dst, int srcSize, int outputSize, /* If endOnInput==endOnInputSize, this value is `dstCapacity` */ endCondition_directive endOnInput, /* endOnOutputSize, endOnInputSize */ earlyEnd_directive partialDecoding, /* full, partial */ dict_directive dict, /* noDict, withPrefix64k, usingExtDict */ const BYTE* const lowPrefix, /* always <= dst, == dst when no prefix */ const BYTE* const dictStart, /* only if dict==usingExtDict */ const size_t dictSize /* note : = 0 if noDict */ ) { if (src == NULL) { return -1; } { const BYTE* ip = (const BYTE*) src; const BYTE* const iend = ip + srcSize; BYTE* op = (BYTE*) dst; BYTE* const oend = op + outputSize; BYTE* cpy; const BYTE* const dictEnd = (dictStart == NULL) ? NULL : dictStart + dictSize; const int safeDecode = (endOnInput==endOnInputSize); const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB))); /* Set up the "end" pointers for the shortcut. */ const BYTE* const shortiend = iend - (endOnInput ? 14 : 8) /*maxLL*/ - 2 /*offset*/; const BYTE* const shortoend = oend - (endOnInput ? 14 : 8) /*maxLL*/ - 18 /*maxML*/; const BYTE* match; size_t offset; unsigned token; size_t length; DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize); /* Special cases */ assert(lowPrefix <= op); if ((endOnInput) && (unlikely(outputSize==0))) { /* Empty output buffer */ if (partialDecoding) return 0; return ((srcSize==1) && (*ip==0)) ? 0 : -1; } if ((!endOnInput) && (unlikely(outputSize==0))) { return (*ip==0 ? 1 : -1); } if ((endOnInput) && unlikely(srcSize==0)) { return -1; } /* Currently the fast loop shows a regression on qualcomm arm chips. */ #if LZ4_FAST_DEC_LOOP if ((oend - op) < FASTLOOP_SAFE_DISTANCE) { DEBUGLOG(6, "skip fast decode loop"); goto safe_decode; } /* Fast loop : decode sequences as long as output < iend-FASTLOOP_SAFE_DISTANCE */ while (1) { /* Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE */ assert(oend - op >= FASTLOOP_SAFE_DISTANCE); if (endOnInput) { assert(ip < iend); } token = *ip++; length = token >> ML_BITS; /* literal length */ assert(!endOnInput || ip <= iend); /* ip < iend before the increment */ /* decode literal length */ if (length == RUN_MASK) { variable_length_error error = ok; length += read_variable_length(&ip, iend-RUN_MASK, endOnInput, endOnInput, &error); if (error == initial_error) { goto _output_error; } if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */ if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */ /* copy literals */ cpy = op+length; LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH); if (endOnInput) { /* LZ4_decompress_safe() */ if ((cpy>oend-32) || (ip+length>iend-32)) { goto safe_literal_copy; } LZ4_wildCopy32(op, ip, cpy); } else { /* LZ4_decompress_fast() */ if (cpy>oend-8) { goto safe_literal_copy; } LZ4_wildCopy8(op, ip, cpy); /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time : * it doesn't know input length, and only relies on end-of-block properties */ } ip += length; op = cpy; } else { cpy = op+length; if (endOnInput) { /* LZ4_decompress_safe() */ DEBUGLOG(7, "copy %u bytes in a 16-bytes stripe", (unsigned)length); /* We don't need to check oend, since we check it once for each loop below */ if (ip > iend-(16 + 1/*max lit + offset + nextToken*/)) { goto safe_literal_copy; } /* Literals can only be 14, but hope compilers optimize if we copy by a register size */ memcpy(op, ip, 16); } else { /* LZ4_decompress_fast() */ /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time : * it doesn't know input length, and relies on end-of-block properties */ memcpy(op, ip, 8); if (length > 8) { memcpy(op+8, ip+8, 8); } } ip += length; op = cpy; } /* get offset */ offset = LZ4_readLE16(ip); ip+=2; match = op - offset; assert(match <= op); /* get matchlength */ length = token & ML_MASK; if (length == ML_MASK) { variable_length_error error = ok; if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */ length += read_variable_length(&ip, iend - LASTLITERALS + 1, endOnInput, 0, &error); if (error != ok) { goto _output_error; } if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) { goto _output_error; } /* overflow detection */ length += MINMATCH; if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) { goto safe_match_copy; } } else { length += MINMATCH; if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) { goto safe_match_copy; } /* Fastpath check: Avoids a branch in LZ4_wildCopy32 if true */ if ((dict == withPrefix64k) || (match >= lowPrefix)) { if (offset >= 8) { assert(match >= lowPrefix); assert(match <= op); assert(op + 18 <= oend); memcpy(op, match, 8); memcpy(op+8, match+8, 8); memcpy(op+16, match+16, 2); op += length; continue; } } } if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */ /* match starting within external dictionary */ if ((dict==usingExtDict) && (match < lowPrefix)) { if (unlikely(op+length > oend-LASTLITERALS)) { if (partialDecoding) { length = MIN(length, (size_t)(oend-op)); /* reach end of buffer */ } else { goto _output_error; /* end-of-block condition violated */ } } if (length <= (size_t)(lowPrefix-match)) { /* match fits entirely within external dictionary : just copy */ memmove(op, dictEnd - (lowPrefix-match), length); op += length; } else { /* match stretches into both external dictionary and current block */ size_t const copySize = (size_t)(lowPrefix - match); size_t const restSize = length - copySize; memcpy(op, dictEnd - copySize, copySize); op += copySize; if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */ BYTE* const endOfMatch = op + restSize; const BYTE* copyFrom = lowPrefix; while (op < endOfMatch) { *op++ = *copyFrom++; } } else { memcpy(op, lowPrefix, restSize); op += restSize; } } continue; } /* copy match within block */ cpy = op + length; assert((op <= oend) && (oend-op >= 32)); if (unlikely(offset<16)) { LZ4_memcpy_using_offset(op, match, cpy, offset); } else { LZ4_wildCopy32(op, match, cpy); } op = cpy; /* wildcopy correction */ } safe_decode: #endif /* Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE */ while (1) { token = *ip++; length = token >> ML_BITS; /* literal length */ assert(!endOnInput || ip <= iend); /* ip < iend before the increment */ /* A two-stage shortcut for the most common case: * 1) If the literal length is 0..14, and there is enough space, * enter the shortcut and copy 16 bytes on behalf of the literals * (in the fast mode, only 8 bytes can be safely copied this way). * 2) Further if the match length is 4..18, copy 18 bytes in a similar * manner; but we ensure that there's enough space in the output for * those 18 bytes earlier, upon entering the shortcut (in other words, * there is a combined check for both stages). */ if ( (endOnInput ? length != RUN_MASK : length <= 8) /* strictly "less than" on input, to re-enter the loop with at least one byte */ && likely((endOnInput ? ip < shortiend : 1) & (op <= shortoend)) ) { /* Copy the literals */ memcpy(op, ip, endOnInput ? 16 : 8); op += length; ip += length; /* The second stage: prepare for match copying, decode full info. * If it doesn't work out, the info won't be wasted. */ length = token & ML_MASK; /* match length */ offset = LZ4_readLE16(ip); ip += 2; match = op - offset; assert(match <= op); /* check overflow */ /* Do not deal with overlapping matches. */ if ( (length != ML_MASK) && (offset >= 8) && (dict==withPrefix64k || match >= lowPrefix) ) { /* Copy the match. */ memcpy(op + 0, match + 0, 8); memcpy(op + 8, match + 8, 8); memcpy(op +16, match +16, 2); op += length + MINMATCH; /* Both stages worked, load the next token. */ continue; } /* The second stage didn't work out, but the info is ready. * Propel it right to the point of match copying. */ goto _copy_match; } /* decode literal length */ if (length == RUN_MASK) { variable_length_error error = ok; length += read_variable_length(&ip, iend-RUN_MASK, endOnInput, endOnInput, &error); if (error == initial_error) { goto _output_error; } if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */ if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */ } /* copy literals */ cpy = op+length; #if LZ4_FAST_DEC_LOOP safe_literal_copy: #endif LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH); if ( ((endOnInput) && ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) ) || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) ) { /* We've either hit the input parsing restriction or the output parsing restriction. * If we've hit the input parsing condition then this must be the last sequence. * If we've hit the output parsing condition then we are either using partialDecoding * or we've hit the output parsing condition. */ if (partialDecoding) { /* Since we are partial decoding we may be in this block because of the output parsing * restriction, which is not valid since the output buffer is allowed to be undersized. */ assert(endOnInput); /* If we're in this block because of the input parsing condition, then we must be on the * last sequence (or invalid), so we must check that we exactly consume the input. */ if ((ip+length>iend-(2+1+LASTLITERALS)) && (ip+length != iend)) { goto _output_error; } assert(ip+length <= iend); /* We are finishing in the middle of a literals segment. * Break after the copy. */ if (cpy > oend) { cpy = oend; assert(op<=oend); length = (size_t)(oend-op); } assert(ip+length <= iend); } else { /* We must be on the last sequence because of the parsing limitations so check * that we exactly regenerate the original size (must be exact when !endOnInput). */ if ((!endOnInput) && (cpy != oend)) { goto _output_error; } /* We must be on the last sequence (or invalid) because of the parsing limitations * so check that we exactly consume the input and don't overrun the output buffer. */ if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) { goto _output_error; } } memmove(op, ip, length); /* supports overlapping memory regions, which only matters for in-place decompression scenarios */ ip += length; op += length; /* Necessarily EOF when !partialDecoding. When partialDecoding * it is EOF if we've either filled the output buffer or hit * the input parsing restriction. */ if (!partialDecoding || (cpy == oend) || (ip == iend)) { break; } } else { LZ4_wildCopy8(op, ip, cpy); /* may overwrite up to WILDCOPYLENGTH beyond cpy */ ip += length; op = cpy; } /* get offset */ offset = LZ4_readLE16(ip); ip+=2; match = op - offset; /* get matchlength */ length = token & ML_MASK; _copy_match: if (length == ML_MASK) { variable_length_error error = ok; length += read_variable_length(&ip, iend - LASTLITERALS + 1, endOnInput, 0, &error); if (error != ok) goto _output_error; if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error; /* overflow detection */ } length += MINMATCH; #if LZ4_FAST_DEC_LOOP safe_match_copy: #endif if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error; /* Error : offset outside buffers */ /* match starting within external dictionary */ if ((dict==usingExtDict) && (match < lowPrefix)) { if (unlikely(op+length > oend-LASTLITERALS)) { if (partialDecoding) length = MIN(length, (size_t)(oend-op)); else goto _output_error; /* doesn't respect parsing restriction */ } if (length <= (size_t)(lowPrefix-match)) { /* match fits entirely within external dictionary : just copy */ memmove(op, dictEnd - (lowPrefix-match), length); op += length; } else { /* match stretches into both external dictionary and current block */ size_t const copySize = (size_t)(lowPrefix - match); size_t const restSize = length - copySize; memcpy(op, dictEnd - copySize, copySize); op += copySize; if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */ BYTE* const endOfMatch = op + restSize; const BYTE* copyFrom = lowPrefix; while (op < endOfMatch) *op++ = *copyFrom++; } else { memcpy(op, lowPrefix, restSize); op += restSize; } } continue; } assert(match >= lowPrefix); /* copy match within block */ cpy = op + length; /* partialDecoding : may end anywhere within the block */ assert(op<=oend); if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) { size_t const mlen = MIN(length, (size_t)(oend-op)); const BYTE* const matchEnd = match + mlen; BYTE* const copyEnd = op + mlen; if (matchEnd > op) { /* overlap copy */ while (op < copyEnd) { *op++ = *match++; } } else { memcpy(op, match, mlen); } op = copyEnd; if (op == oend) { break; } continue; } if (unlikely(offset<8)) { LZ4_write32(op, 0); /* silence msan warning when offset==0 */ op[0] = match[0]; op[1] = match[1]; op[2] = match[2]; op[3] = match[3]; match += inc32table[offset]; memcpy(op+4, match, 4); match -= dec64table[offset]; } else { memcpy(op, match, 8); match += 8; } op += 8; if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) { BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1); if (cpy > oend-LASTLITERALS) { goto _output_error; } /* Error : last LASTLITERALS bytes must be literals (uncompressed) */ if (op < oCopyLimit) { LZ4_wildCopy8(op, match, oCopyLimit); match += oCopyLimit - op; op = oCopyLimit; } while (op < cpy) { *op++ = *match++; } } else { memcpy(op, match, 8); if (length > 16) { LZ4_wildCopy8(op+8, match+8, cpy); } } op = cpy; /* wildcopy correction */ } /* end of decoding */ if (endOnInput) { return (int) (((char*)op)-dst); /* Nb of output bytes decoded */ } else { return (int) (((const char*)ip)-src); /* Nb of input bytes read */ } /* Overflow error detected */ _output_error: return (int) (-(((const char*)ip)-src))-1; } } /*===== Instantiate the API decoding functions. =====*/ LZ4_FORCE_O2_GCC_PPC64LE int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize) { return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, endOnInputSize, decode_full_block, noDict, (BYTE*)dest, NULL, 0); } LZ4_FORCE_O2_GCC_PPC64LE int LZ4_decompress_safe_partial(const char* src, char* dst, int compressedSize, int targetOutputSize, int dstCapacity) { dstCapacity = MIN(targetOutputSize, dstCapacity); return LZ4_decompress_generic(src, dst, compressedSize, dstCapacity, endOnInputSize, partial_decode, noDict, (BYTE*)dst, NULL, 0); } LZ4_FORCE_O2_GCC_PPC64LE int LZ4_decompress_fast(const char* source, char* dest, int originalSize) { return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, decode_full_block, withPrefix64k, (BYTE*)dest - 64 KB, NULL, 0); } /*===== Instantiate a few more decoding cases, used more than once. =====*/ LZ4_FORCE_O2_GCC_PPC64LE /* Exported, an obsolete API function. */ int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize) { return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, decode_full_block, withPrefix64k, (BYTE*)dest - 64 KB, NULL, 0); } /* Another obsolete API function, paired with the previous one. */ int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize) { /* LZ4_decompress_fast doesn't validate match offsets, * and thus serves well with any prefixed dictionary. */ return LZ4_decompress_fast(source, dest, originalSize); } LZ4_FORCE_O2_GCC_PPC64LE static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, int compressedSize, int maxOutputSize, size_t prefixSize) { return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, decode_full_block, noDict, (BYTE*)dest-prefixSize, NULL, 0); } LZ4_FORCE_O2_GCC_PPC64LE int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const void* dictStart, size_t dictSize) { return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, decode_full_block, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize); } LZ4_FORCE_O2_GCC_PPC64LE static int LZ4_decompress_fast_extDict(const char* source, char* dest, int originalSize, const void* dictStart, size_t dictSize) { return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, decode_full_block, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize); } /* The "double dictionary" mode, for use with e.g. ring buffers: the first part * of the dictionary is passed as prefix, and the second via dictStart + dictSize. * These routines are used only once, in LZ4_decompress_*_continue(). */ LZ4_FORCE_INLINE int LZ4_decompress_safe_doubleDict(const char* source, char* dest, int compressedSize, int maxOutputSize, size_t prefixSize, const void* dictStart, size_t dictSize) { return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, decode_full_block, usingExtDict, (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize); } LZ4_FORCE_INLINE int LZ4_decompress_fast_doubleDict(const char* source, char* dest, int originalSize, size_t prefixSize, const void* dictStart, size_t dictSize) { return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, decode_full_block, usingExtDict, (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize); } /*===== streaming decompression functions =====*/ LZ4_streamDecode_t* LZ4_createStreamDecode(void) { LZ4_streamDecode_t* lz4s = (LZ4_streamDecode_t*) ALLOC_AND_ZERO(sizeof(LZ4_streamDecode_t)); LZ4_STATIC_ASSERT(LZ4_STREAMDECODESIZE >= sizeof(LZ4_streamDecode_t_internal)); /* A compilation error here means LZ4_STREAMDECODESIZE is not large enough */ return lz4s; } int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream) { if (LZ4_stream == NULL) { return 0; } /* support free on NULL */ FREEMEM(LZ4_stream); return 0; } /*! LZ4_setStreamDecode() : * Use this function to instruct where to find the dictionary. * This function is not necessary if previous data is still available where it was decoded. * Loading a size of 0 is allowed (same effect as no dictionary). * @return : 1 if OK, 0 if error */ int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize) { LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse; lz4sd->prefixSize = (size_t) dictSize; lz4sd->prefixEnd = (const BYTE*) dictionary + dictSize; lz4sd->externalDict = NULL; lz4sd->extDictSize = 0; return 1; } /*! LZ4_decoderRingBufferSize() : * when setting a ring buffer for streaming decompression (optional scenario), * provides the minimum size of this ring buffer * to be compatible with any source respecting maxBlockSize condition. * Note : in a ring buffer scenario, * blocks are presumed decompressed next to each other. * When not enough space remains for next block (remainingSize < maxBlockSize), * decoding resumes from beginning of ring buffer. * @return : minimum ring buffer size, * or 0 if there is an error (invalid maxBlockSize). */ int LZ4_decoderRingBufferSize(int maxBlockSize) { if (maxBlockSize < 0) return 0; if (maxBlockSize > LZ4_MAX_INPUT_SIZE) return 0; if (maxBlockSize < 16) maxBlockSize = 16; return LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize); } /* *_continue() : These decoding functions allow decompression of multiple blocks in "streaming" mode. Previously decoded blocks must still be available at the memory position where they were decoded. If it's not possible, save the relevant part of decoded data into a safe buffer, and indicate where it stands using LZ4_setStreamDecode() */ LZ4_FORCE_O2_GCC_PPC64LE int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize) { LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse; int result; if (lz4sd->prefixSize == 0) { /* The first call, no dictionary yet. */ assert(lz4sd->extDictSize == 0); result = LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize); if (result <= 0) return result; lz4sd->prefixSize = (size_t)result; lz4sd->prefixEnd = (BYTE*)dest + result; } else if (lz4sd->prefixEnd == (BYTE*)dest) { /* They're rolling the current segment. */ if (lz4sd->prefixSize >= 64 KB - 1) result = LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize); else if (lz4sd->extDictSize == 0) result = LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, lz4sd->prefixSize); else result = LZ4_decompress_safe_doubleDict(source, dest, compressedSize, maxOutputSize, lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize); if (result <= 0) return result; lz4sd->prefixSize += (size_t)result; lz4sd->prefixEnd += result; } else { /* The buffer wraps around, or they're switching to another buffer. */ lz4sd->extDictSize = lz4sd->prefixSize; lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize; result = LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, lz4sd->externalDict, lz4sd->extDictSize); if (result <= 0) return result; lz4sd->prefixSize = (size_t)result; lz4sd->prefixEnd = (BYTE*)dest + result; } return result; } LZ4_FORCE_O2_GCC_PPC64LE int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize) { LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse; int result; assert(originalSize >= 0); if (lz4sd->prefixSize == 0) { assert(lz4sd->extDictSize == 0); result = LZ4_decompress_fast(source, dest, originalSize); if (result <= 0) return result; lz4sd->prefixSize = (size_t)originalSize; lz4sd->prefixEnd = (BYTE*)dest + originalSize; } else if (lz4sd->prefixEnd == (BYTE*)dest) { if (lz4sd->prefixSize >= 64 KB - 1 || lz4sd->extDictSize == 0) result = LZ4_decompress_fast(source, dest, originalSize); else result = LZ4_decompress_fast_doubleDict(source, dest, originalSize, lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize); if (result <= 0) return result; lz4sd->prefixSize += (size_t)originalSize; lz4sd->prefixEnd += originalSize; } else { lz4sd->extDictSize = lz4sd->prefixSize; lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize; result = LZ4_decompress_fast_extDict(source, dest, originalSize, lz4sd->externalDict, lz4sd->extDictSize); if (result <= 0) return result; lz4sd->prefixSize = (size_t)originalSize; lz4sd->prefixEnd = (BYTE*)dest + originalSize; } return result; } /* Advanced decoding functions : *_usingDict() : These decoding functions work the same as "_continue" ones, the dictionary must be explicitly provided within parameters */ int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize) { if (dictSize==0) return LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize); if (dictStart+dictSize == dest) { if (dictSize >= 64 KB - 1) { return LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize); } assert(dictSize >= 0); return LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, (size_t)dictSize); } assert(dictSize >= 0); return LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, dictStart, (size_t)dictSize); } int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize) { if (dictSize==0 || dictStart+dictSize == dest) return LZ4_decompress_fast(source, dest, originalSize); assert(dictSize >= 0); return LZ4_decompress_fast_extDict(source, dest, originalSize, dictStart, (size_t)dictSize); } /*=************************************************* * Obsolete Functions ***************************************************/ /* obsolete compression functions */ int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) { return LZ4_compress_default(source, dest, inputSize, maxOutputSize); } int LZ4_compress(const char* src, char* dest, int srcSize) { return LZ4_compress_default(src, dest, srcSize, LZ4_compressBound(srcSize)); } int LZ4_compress_limitedOutput_withState (void* state, const char* src, char* dst, int srcSize, int dstSize) { return LZ4_compress_fast_extState(state, src, dst, srcSize, dstSize, 1); } int LZ4_compress_withState (void* state, const char* src, char* dst, int srcSize) { return LZ4_compress_fast_extState(state, src, dst, srcSize, LZ4_compressBound(srcSize), 1); } int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* src, char* dst, int srcSize, int dstCapacity) { return LZ4_compress_fast_continue(LZ4_stream, src, dst, srcSize, dstCapacity, 1); } int LZ4_compress_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize) { return LZ4_compress_fast_continue(LZ4_stream, source, dest, inputSize, LZ4_compressBound(inputSize), 1); } /* These decompression functions are deprecated and should no longer be used. They are only provided here for compatibility with older user programs. - LZ4_uncompress is totally equivalent to LZ4_decompress_fast - LZ4_uncompress_unknownOutputSize is totally equivalent to LZ4_decompress_safe */ int LZ4_uncompress (const char* source, char* dest, int outputSize) { return LZ4_decompress_fast(source, dest, outputSize); } int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize) { return LZ4_decompress_safe(source, dest, isize, maxOutputSize); } /* Obsolete Streaming functions */ int LZ4_sizeofStreamState() { return LZ4_STREAMSIZE; } int LZ4_resetStreamState(void* state, char* inputBuffer) { (void)inputBuffer; LZ4_resetStream((LZ4_stream_t*)state); return 0; } void* LZ4_create (char* inputBuffer) { (void)inputBuffer; return LZ4_createStream(); } char* LZ4_slideInputBuffer (void* state) { /* avoid const char * -> char * conversion warning */ return (char *)(uptrval)((LZ4_stream_t*)state)->internal_donotuse.dictionary; } #endif /* LZ4_COMMONDEFS_ONLY */ py-lz4framed-0.14.0/lz4/lz4.h000066400000000000000000001160071357043434000155150ustar00rootroot00000000000000/* * LZ4 - Fast LZ compression algorithm * Header File * Copyright (C) 2011-present, Yann Collet. BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. You can contact the author at : - LZ4 homepage : http://www.lz4.org - LZ4 source repository : https://github.com/lz4/lz4 */ #if defined (__cplusplus) extern "C" { #endif #ifndef LZ4_H_2983827168210 #define LZ4_H_2983827168210 /* --- Dependency --- */ #include /* size_t */ /** Introduction LZ4 is lossless compression algorithm, providing compression speed >500 MB/s per core, scalable with multi-cores CPU. It features an extremely fast decoder, with speed in multiple GB/s per core, typically reaching RAM speed limits on multi-core systems. The LZ4 compression library provides in-memory compression and decompression functions. It gives full buffer control to user. Compression can be done in: - a single step (described as Simple Functions) - a single step, reusing a context (described in Advanced Functions) - unbounded multiple steps (described as Streaming compression) lz4.h generates and decodes LZ4-compressed blocks (doc/lz4_Block_format.md). Decompressing such a compressed block requires additional metadata. Exact metadata depends on exact decompression function. For the typical case of LZ4_decompress_safe(), metadata includes block's compressed size, and maximum bound of decompressed size. Each application is free to encode and pass such metadata in whichever way it wants. lz4.h only handle blocks, it can not generate Frames. Blocks are different from Frames (doc/lz4_Frame_format.md). Frames bundle both blocks and metadata in a specified manner. Embedding metadata is required for compressed data to be self-contained and portable. Frame format is delivered through a companion API, declared in lz4frame.h. The `lz4` CLI can only manage frames. */ /*^*************************************************************** * Export parameters *****************************************************************/ /* * LZ4_DLL_EXPORT : * Enable exporting of functions when building a Windows DLL * LZ4LIB_VISIBILITY : * Control library symbols visibility. */ #ifndef LZ4LIB_VISIBILITY # if defined(__GNUC__) && (__GNUC__ >= 4) # define LZ4LIB_VISIBILITY __attribute__ ((visibility ("default"))) # else # define LZ4LIB_VISIBILITY # endif #endif #if defined(LZ4_DLL_EXPORT) && (LZ4_DLL_EXPORT==1) # define LZ4LIB_API __declspec(dllexport) LZ4LIB_VISIBILITY #elif defined(LZ4_DLL_IMPORT) && (LZ4_DLL_IMPORT==1) # define LZ4LIB_API __declspec(dllimport) LZ4LIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ #else # define LZ4LIB_API LZ4LIB_VISIBILITY #endif /*------ Version ------*/ #define LZ4_VERSION_MAJOR 1 /* for breaking interface changes */ #define LZ4_VERSION_MINOR 9 /* for new (non-breaking) interface capabilities */ #define LZ4_VERSION_RELEASE 2 /* for tweaks, bug-fixes, or development */ #define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE) #define LZ4_LIB_VERSION LZ4_VERSION_MAJOR.LZ4_VERSION_MINOR.LZ4_VERSION_RELEASE #define LZ4_QUOTE(str) #str #define LZ4_EXPAND_AND_QUOTE(str) LZ4_QUOTE(str) #define LZ4_VERSION_STRING LZ4_EXPAND_AND_QUOTE(LZ4_LIB_VERSION) LZ4LIB_API int LZ4_versionNumber (void); /**< library version number; useful to check dll version */ LZ4LIB_API const char* LZ4_versionString (void); /**< library version string; useful to check dll version */ /*-************************************ * Tuning parameter **************************************/ /*! * LZ4_MEMORY_USAGE : * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) * Increasing memory usage improves compression ratio. * Reduced memory usage may improve speed, thanks to better cache locality. * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ #ifndef LZ4_MEMORY_USAGE # define LZ4_MEMORY_USAGE 14 #endif /*-************************************ * Simple Functions **************************************/ /*! LZ4_compress_default() : * Compresses 'srcSize' bytes from buffer 'src' * into already allocated 'dst' buffer of size 'dstCapacity'. * Compression is guaranteed to succeed if 'dstCapacity' >= LZ4_compressBound(srcSize). * It also runs faster, so it's a recommended setting. * If the function cannot compress 'src' into a more limited 'dst' budget, * compression stops *immediately*, and the function result is zero. * In which case, 'dst' content is undefined (invalid). * srcSize : max supported value is LZ4_MAX_INPUT_SIZE. * dstCapacity : size of buffer 'dst' (which must be already allocated) * @return : the number of bytes written into buffer 'dst' (necessarily <= dstCapacity) * or 0 if compression fails * Note : This function is protected against buffer overflow scenarios (never writes outside 'dst' buffer, nor read outside 'source' buffer). */ LZ4LIB_API int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity); /*! LZ4_decompress_safe() : * compressedSize : is the exact complete size of the compressed block. * dstCapacity : is the size of destination buffer (which must be already allocated), presumed an upper bound of decompressed size. * @return : the number of bytes decompressed into destination buffer (necessarily <= dstCapacity) * If destination buffer is not large enough, decoding will stop and output an error code (negative value). * If the source stream is detected malformed, the function will stop decoding and return a negative result. * Note 1 : This function is protected against malicious data packets : * it will never writes outside 'dst' buffer, nor read outside 'source' buffer, * even if the compressed block is maliciously modified to order the decoder to do these actions. * In such case, the decoder stops immediately, and considers the compressed block malformed. * Note 2 : compressedSize and dstCapacity must be provided to the function, the compressed block does not contain them. * The implementation is free to send / store / derive this information in whichever way is most beneficial. * If there is a need for a different format which bundles together both compressed data and its metadata, consider looking at lz4frame.h instead. */ LZ4LIB_API int LZ4_decompress_safe (const char* src, char* dst, int compressedSize, int dstCapacity); /*-************************************ * Advanced Functions **************************************/ #define LZ4_MAX_INPUT_SIZE 0x7E000000 /* 2 113 929 216 bytes */ #define LZ4_COMPRESSBOUND(isize) ((unsigned)(isize) > (unsigned)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16) /*! LZ4_compressBound() : Provides the maximum size that LZ4 compression may output in a "worst case" scenario (input data not compressible) This function is primarily useful for memory allocation purposes (destination buffer size). Macro LZ4_COMPRESSBOUND() is also provided for compilation-time evaluation (stack memory allocation for example). Note that LZ4_compress_default() compresses faster when dstCapacity is >= LZ4_compressBound(srcSize) inputSize : max supported value is LZ4_MAX_INPUT_SIZE return : maximum output size in a "worst case" scenario or 0, if input size is incorrect (too large or negative) */ LZ4LIB_API int LZ4_compressBound(int inputSize); /*! LZ4_compress_fast() : Same as LZ4_compress_default(), but allows selection of "acceleration" factor. The larger the acceleration value, the faster the algorithm, but also the lesser the compression. It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed. An acceleration value of "1" is the same as regular LZ4_compress_default() Values <= 0 will be replaced by ACCELERATION_DEFAULT (currently == 1, see lz4.c). */ LZ4LIB_API int LZ4_compress_fast (const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); /*! LZ4_compress_fast_extState() : * Same as LZ4_compress_fast(), using an externally allocated memory space for its state. * Use LZ4_sizeofState() to know how much memory must be allocated, * and allocate it on 8-bytes boundaries (using `malloc()` typically). * Then, provide this buffer as `void* state` to compression function. */ LZ4LIB_API int LZ4_sizeofState(void); LZ4LIB_API int LZ4_compress_fast_extState (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); /*! LZ4_compress_destSize() : * Reverse the logic : compresses as much data as possible from 'src' buffer * into already allocated buffer 'dst', of size >= 'targetDestSize'. * This function either compresses the entire 'src' content into 'dst' if it's large enough, * or fill 'dst' buffer completely with as much data as possible from 'src'. * note: acceleration parameter is fixed to "default". * * *srcSizePtr : will be modified to indicate how many bytes where read from 'src' to fill 'dst'. * New value is necessarily <= input value. * @return : Nb bytes written into 'dst' (necessarily <= targetDestSize) * or 0 if compression fails. */ LZ4LIB_API int LZ4_compress_destSize (const char* src, char* dst, int* srcSizePtr, int targetDstSize); /*! LZ4_decompress_safe_partial() : * Decompress an LZ4 compressed block, of size 'srcSize' at position 'src', * into destination buffer 'dst' of size 'dstCapacity'. * Up to 'targetOutputSize' bytes will be decoded. * The function stops decoding on reaching this objective, * which can boost performance when only the beginning of a block is required. * * @return : the number of bytes decoded in `dst` (necessarily <= dstCapacity) * If source stream is detected malformed, function returns a negative result. * * Note : @return can be < targetOutputSize, if compressed block contains less data. * * Note 2 : this function features 2 parameters, targetOutputSize and dstCapacity, * and expects targetOutputSize <= dstCapacity. * It effectively stops decoding on reaching targetOutputSize, * so dstCapacity is kind of redundant. * This is because in a previous version of this function, * decoding operation would not "break" a sequence in the middle. * As a consequence, there was no guarantee that decoding would stop at exactly targetOutputSize, * it could write more bytes, though only up to dstCapacity. * Some "margin" used to be required for this operation to work properly. * This is no longer necessary. * The function nonetheless keeps its signature, in an effort to not break API. */ LZ4LIB_API int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity); /*-********************************************* * Streaming Compression Functions ***********************************************/ typedef union LZ4_stream_u LZ4_stream_t; /* incomplete type (defined later) */ LZ4LIB_API LZ4_stream_t* LZ4_createStream(void); LZ4LIB_API int LZ4_freeStream (LZ4_stream_t* streamPtr); /*! LZ4_resetStream_fast() : v1.9.0+ * Use this to prepare an LZ4_stream_t for a new chain of dependent blocks * (e.g., LZ4_compress_fast_continue()). * * An LZ4_stream_t must be initialized once before usage. * This is automatically done when created by LZ4_createStream(). * However, should the LZ4_stream_t be simply declared on stack (for example), * it's necessary to initialize it first, using LZ4_initStream(). * * After init, start any new stream with LZ4_resetStream_fast(). * A same LZ4_stream_t can be re-used multiple times consecutively * and compress multiple streams, * provided that it starts each new stream with LZ4_resetStream_fast(). * * LZ4_resetStream_fast() is much faster than LZ4_initStream(), * but is not compatible with memory regions containing garbage data. * * Note: it's only useful to call LZ4_resetStream_fast() * in the context of streaming compression. * The *extState* functions perform their own resets. * Invoking LZ4_resetStream_fast() before is redundant, and even counterproductive. */ LZ4LIB_API void LZ4_resetStream_fast (LZ4_stream_t* streamPtr); /*! LZ4_loadDict() : * Use this function to reference a static dictionary into LZ4_stream_t. * The dictionary must remain available during compression. * LZ4_loadDict() triggers a reset, so any previous data will be forgotten. * The same dictionary will have to be loaded on decompression side for successful decoding. * Dictionary are useful for better compression of small data (KB range). * While LZ4 accept any input as dictionary, * results are generally better when using Zstandard's Dictionary Builder. * Loading a size of 0 is allowed, and is the same as reset. * @return : loaded dictionary size, in bytes (necessarily <= 64 KB) */ LZ4LIB_API int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize); /*! LZ4_compress_fast_continue() : * Compress 'src' content using data from previously compressed blocks, for better compression ratio. * 'dst' buffer must be already allocated. * If dstCapacity >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster. * * @return : size of compressed block * or 0 if there is an error (typically, cannot fit into 'dst'). * * Note 1 : Each invocation to LZ4_compress_fast_continue() generates a new block. * Each block has precise boundaries. * Each block must be decompressed separately, calling LZ4_decompress_*() with relevant metadata. * It's not possible to append blocks together and expect a single invocation of LZ4_decompress_*() to decompress them together. * * Note 2 : The previous 64KB of source data is __assumed__ to remain present, unmodified, at same address in memory ! * * Note 3 : When input is structured as a double-buffer, each buffer can have any size, including < 64 KB. * Make sure that buffers are separated, by at least one byte. * This construction ensures that each block only depends on previous block. * * Note 4 : If input buffer is a ring-buffer, it can have any size, including < 64 KB. * * Note 5 : After an error, the stream status is undefined (invalid), it can only be reset or freed. */ LZ4LIB_API int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); /*! LZ4_saveDict() : * If last 64KB data cannot be guaranteed to remain available at its current memory location, * save it into a safer place (char* safeBuffer). * This is schematically equivalent to a memcpy() followed by LZ4_loadDict(), * but is much faster, because LZ4_saveDict() doesn't need to rebuild tables. * @return : saved dictionary size in bytes (necessarily <= maxDictSize), or 0 if error. */ LZ4LIB_API int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int maxDictSize); /*-********************************************** * Streaming Decompression Functions * Bufferless synchronous API ************************************************/ typedef union LZ4_streamDecode_u LZ4_streamDecode_t; /* tracking context */ /*! LZ4_createStreamDecode() and LZ4_freeStreamDecode() : * creation / destruction of streaming decompression tracking context. * A tracking context can be re-used multiple times. */ LZ4LIB_API LZ4_streamDecode_t* LZ4_createStreamDecode(void); LZ4LIB_API int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream); /*! LZ4_setStreamDecode() : * An LZ4_streamDecode_t context can be allocated once and re-used multiple times. * Use this function to start decompression of a new stream of blocks. * A dictionary can optionally be set. Use NULL or size 0 for a reset order. * Dictionary is presumed stable : it must remain accessible and unmodified during next decompression. * @return : 1 if OK, 0 if error */ LZ4LIB_API int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize); /*! LZ4_decoderRingBufferSize() : v1.8.2+ * Note : in a ring buffer scenario (optional), * blocks are presumed decompressed next to each other * up to the moment there is not enough remaining space for next block (remainingSize < maxBlockSize), * at which stage it resumes from beginning of ring buffer. * When setting such a ring buffer for streaming decompression, * provides the minimum size of this ring buffer * to be compatible with any source respecting maxBlockSize condition. * @return : minimum ring buffer size, * or 0 if there is an error (invalid maxBlockSize). */ LZ4LIB_API int LZ4_decoderRingBufferSize(int maxBlockSize); #define LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize) (65536 + 14 + (maxBlockSize)) /* for static allocation; maxBlockSize presumed valid */ /*! LZ4_decompress_*_continue() : * These decoding functions allow decompression of consecutive blocks in "streaming" mode. * A block is an unsplittable entity, it must be presented entirely to a decompression function. * Decompression functions only accepts one block at a time. * The last 64KB of previously decoded data *must* remain available and unmodified at the memory position where they were decoded. * If less than 64KB of data has been decoded, all the data must be present. * * Special : if decompression side sets a ring buffer, it must respect one of the following conditions : * - Decompression buffer size is _at least_ LZ4_decoderRingBufferSize(maxBlockSize). * maxBlockSize is the maximum size of any single block. It can have any value > 16 bytes. * In which case, encoding and decoding buffers do not need to be synchronized. * Actually, data can be produced by any source compliant with LZ4 format specification, and respecting maxBlockSize. * - Synchronized mode : * Decompression buffer size is _exactly_ the same as compression buffer size, * and follows exactly same update rule (block boundaries at same positions), * and decoding function is provided with exact decompressed size of each block (exception for last block of the stream), * _then_ decoding & encoding ring buffer can have any size, including small ones ( < 64 KB). * - Decompression buffer is larger than encoding buffer, by a minimum of maxBlockSize more bytes. * In which case, encoding and decoding buffers do not need to be synchronized, * and encoding ring buffer can have any size, including small ones ( < 64 KB). * * Whenever these conditions are not possible, * save the last 64KB of decoded data into a safe buffer where it can't be modified during decompression, * then indicate where this data is saved using LZ4_setStreamDecode(), before decompressing next block. */ LZ4LIB_API int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int srcSize, int dstCapacity); /*! LZ4_decompress_*_usingDict() : * These decoding functions work the same as * a combination of LZ4_setStreamDecode() followed by LZ4_decompress_*_continue() * They are stand-alone, and don't need an LZ4_streamDecode_t structure. * Dictionary is presumed stable : it must remain accessible and unmodified during decompression. * Performance tip : Decompression speed can be substantially increased * when dst == dictStart + dictSize. */ LZ4LIB_API int LZ4_decompress_safe_usingDict (const char* src, char* dst, int srcSize, int dstCapcity, const char* dictStart, int dictSize); #endif /* LZ4_H_2983827168210 */ /*^************************************* * !!!!!! STATIC LINKING ONLY !!!!!! ***************************************/ /*-**************************************************************************** * Experimental section * * Symbols declared in this section must be considered unstable. Their * signatures or semantics may change, or they may be removed altogether in the * future. They are therefore only safe to depend on when the caller is * statically linked against the library. * * To protect against unsafe usage, not only are the declarations guarded, * the definitions are hidden by default * when building LZ4 as a shared/dynamic library. * * In order to access these declarations, * define LZ4_STATIC_LINKING_ONLY in your application * before including LZ4's headers. * * In order to make their implementations accessible dynamically, you must * define LZ4_PUBLISH_STATIC_FUNCTIONS when building the LZ4 library. ******************************************************************************/ #ifdef LZ4_STATIC_LINKING_ONLY #ifndef LZ4_STATIC_3504398509 #define LZ4_STATIC_3504398509 #ifdef LZ4_PUBLISH_STATIC_FUNCTIONS #define LZ4LIB_STATIC_API LZ4LIB_API #else #define LZ4LIB_STATIC_API #endif /*! LZ4_compress_fast_extState_fastReset() : * A variant of LZ4_compress_fast_extState(). * * Using this variant avoids an expensive initialization step. * It is only safe to call if the state buffer is known to be correctly initialized already * (see above comment on LZ4_resetStream_fast() for a definition of "correctly initialized"). * From a high level, the difference is that * this function initializes the provided state with a call to something like LZ4_resetStream_fast() * while LZ4_compress_fast_extState() starts with a call to LZ4_resetStream(). */ LZ4LIB_STATIC_API int LZ4_compress_fast_extState_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); /*! LZ4_attach_dictionary() : * This is an experimental API that allows * efficient use of a static dictionary many times. * * Rather than re-loading the dictionary buffer into a working context before * each compression, or copying a pre-loaded dictionary's LZ4_stream_t into a * working LZ4_stream_t, this function introduces a no-copy setup mechanism, * in which the working stream references the dictionary stream in-place. * * Several assumptions are made about the state of the dictionary stream. * Currently, only streams which have been prepared by LZ4_loadDict() should * be expected to work. * * Alternatively, the provided dictionaryStream may be NULL, * in which case any existing dictionary stream is unset. * * If a dictionary is provided, it replaces any pre-existing stream history. * The dictionary contents are the only history that can be referenced and * logically immediately precede the data compressed in the first subsequent * compression call. * * The dictionary will only remain attached to the working stream through the * first compression call, at the end of which it is cleared. The dictionary * stream (and source buffer) must remain in-place / accessible / unchanged * through the completion of the first compression call on the stream. */ LZ4LIB_STATIC_API void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream); /*! In-place compression and decompression * * It's possible to have input and output sharing the same buffer, * for highly contrained memory environments. * In both cases, it requires input to lay at the end of the buffer, * and decompression to start at beginning of the buffer. * Buffer size must feature some margin, hence be larger than final size. * * |<------------------------buffer--------------------------------->| * |<-----------compressed data--------->| * |<-----------decompressed size------------------>| * |<----margin---->| * * This technique is more useful for decompression, * since decompressed size is typically larger, * and margin is short. * * In-place decompression will work inside any buffer * which size is >= LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize). * This presumes that decompressedSize > compressedSize. * Otherwise, it means compression actually expanded data, * and it would be more efficient to store such data with a flag indicating it's not compressed. * This can happen when data is not compressible (already compressed, or encrypted). * * For in-place compression, margin is larger, as it must be able to cope with both * history preservation, requiring input data to remain unmodified up to LZ4_DISTANCE_MAX, * and data expansion, which can happen when input is not compressible. * As a consequence, buffer size requirements are much higher, * and memory savings offered by in-place compression are more limited. * * There are ways to limit this cost for compression : * - Reduce history size, by modifying LZ4_DISTANCE_MAX. * Note that it is a compile-time constant, so all compressions will apply this limit. * Lower values will reduce compression ratio, except when input_size < LZ4_DISTANCE_MAX, * so it's a reasonable trick when inputs are known to be small. * - Require the compressor to deliver a "maximum compressed size". * This is the `dstCapacity` parameter in `LZ4_compress*()`. * When this size is < LZ4_COMPRESSBOUND(inputSize), then compression can fail, * in which case, the return code will be 0 (zero). * The caller must be ready for these cases to happen, * and typically design a backup scheme to send data uncompressed. * The combination of both techniques can significantly reduce * the amount of margin required for in-place compression. * * In-place compression can work in any buffer * which size is >= (maxCompressedSize) * with maxCompressedSize == LZ4_COMPRESSBOUND(srcSize) for guaranteed compression success. * LZ4_COMPRESS_INPLACE_BUFFER_SIZE() depends on both maxCompressedSize and LZ4_DISTANCE_MAX, * so it's possible to reduce memory requirements by playing with them. */ #define LZ4_DECOMPRESS_INPLACE_MARGIN(compressedSize) (((compressedSize) >> 8) + 32) #define LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize) ((decompressedSize) + LZ4_DECOMPRESS_INPLACE_MARGIN(decompressedSize)) /**< note: presumes that compressedSize < decompressedSize. note2: margin is overestimated a bit, since it could use compressedSize instead */ #ifndef LZ4_DISTANCE_MAX /* history window size; can be user-defined at compile time */ # define LZ4_DISTANCE_MAX 65535 /* set to maximum value by default */ #endif #define LZ4_COMPRESS_INPLACE_MARGIN (LZ4_DISTANCE_MAX + 32) /* LZ4_DISTANCE_MAX can be safely replaced by srcSize when it's smaller */ #define LZ4_COMPRESS_INPLACE_BUFFER_SIZE(maxCompressedSize) ((maxCompressedSize) + LZ4_COMPRESS_INPLACE_MARGIN) /**< maxCompressedSize is generally LZ4_COMPRESSBOUND(inputSize), but can be set to any lower value, with the risk that compression can fail (return code 0(zero)) */ #endif /* LZ4_STATIC_3504398509 */ #endif /* LZ4_STATIC_LINKING_ONLY */ #ifndef LZ4_H_98237428734687 #define LZ4_H_98237428734687 /*-************************************************************ * PRIVATE DEFINITIONS ************************************************************** * Do not use these definitions directly. * They are only exposed to allow static allocation of `LZ4_stream_t` and `LZ4_streamDecode_t`. * Accessing members will expose code to API and/or ABI break in future versions of the library. **************************************************************/ #define LZ4_HASHLOG (LZ4_MEMORY_USAGE-2) #define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE) #define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG) /* required as macro for static allocation */ #if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) #include typedef struct LZ4_stream_t_internal LZ4_stream_t_internal; struct LZ4_stream_t_internal { uint32_t hashTable[LZ4_HASH_SIZE_U32]; uint32_t currentOffset; uint16_t dirty; uint16_t tableType; const uint8_t* dictionary; const LZ4_stream_t_internal* dictCtx; uint32_t dictSize; }; typedef struct { const uint8_t* externalDict; size_t extDictSize; const uint8_t* prefixEnd; size_t prefixSize; } LZ4_streamDecode_t_internal; #else typedef struct LZ4_stream_t_internal LZ4_stream_t_internal; struct LZ4_stream_t_internal { unsigned int hashTable[LZ4_HASH_SIZE_U32]; unsigned int currentOffset; unsigned short dirty; unsigned short tableType; const unsigned char* dictionary; const LZ4_stream_t_internal* dictCtx; unsigned int dictSize; }; typedef struct { const unsigned char* externalDict; const unsigned char* prefixEnd; size_t extDictSize; size_t prefixSize; } LZ4_streamDecode_t_internal; #endif /*! LZ4_stream_t : * information structure to track an LZ4 stream. * LZ4_stream_t can also be created using LZ4_createStream(), which is recommended. * The structure definition can be convenient for static allocation * (on stack, or as part of larger structure). * Init this structure with LZ4_initStream() before first use. * note : only use this definition in association with static linking ! * this definition is not API/ABI safe, and may change in a future version. */ #define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4 + ((sizeof(void*)==16) ? 4 : 0) /*AS-400*/ ) #define LZ4_STREAMSIZE (LZ4_STREAMSIZE_U64 * sizeof(unsigned long long)) union LZ4_stream_u { unsigned long long table[LZ4_STREAMSIZE_U64]; LZ4_stream_t_internal internal_donotuse; } ; /* previously typedef'd to LZ4_stream_t */ /*! LZ4_initStream() : v1.9.0+ * An LZ4_stream_t structure must be initialized at least once. * This is automatically done when invoking LZ4_createStream(), * but it's not when the structure is simply declared on stack (for example). * * Use LZ4_initStream() to properly initialize a newly declared LZ4_stream_t. * It can also initialize any arbitrary buffer of sufficient size, * and will @return a pointer of proper type upon initialization. * * Note : initialization fails if size and alignment conditions are not respected. * In which case, the function will @return NULL. * Note2: An LZ4_stream_t structure guarantees correct alignment and size. * Note3: Before v1.9.0, use LZ4_resetStream() instead */ LZ4LIB_API LZ4_stream_t* LZ4_initStream (void* buffer, size_t size); /*! LZ4_streamDecode_t : * information structure to track an LZ4 stream during decompression. * init this structure using LZ4_setStreamDecode() before first use. * note : only use in association with static linking ! * this definition is not API/ABI safe, * and may change in a future version ! */ #define LZ4_STREAMDECODESIZE_U64 (4 + ((sizeof(void*)==16) ? 2 : 0) /*AS-400*/ ) #define LZ4_STREAMDECODESIZE (LZ4_STREAMDECODESIZE_U64 * sizeof(unsigned long long)) union LZ4_streamDecode_u { unsigned long long table[LZ4_STREAMDECODESIZE_U64]; LZ4_streamDecode_t_internal internal_donotuse; } ; /* previously typedef'd to LZ4_streamDecode_t */ /*-************************************ * Obsolete Functions **************************************/ /*! Deprecation warnings * * Deprecated functions make the compiler generate a warning when invoked. * This is meant to invite users to update their source code. * Should deprecation warnings be a problem, it is generally possible to disable them, * typically with -Wno-deprecated-declarations for gcc * or _CRT_SECURE_NO_WARNINGS in Visual. * * Another method is to define LZ4_DISABLE_DEPRECATE_WARNINGS * before including the header file. */ #ifdef LZ4_DISABLE_DEPRECATE_WARNINGS # define LZ4_DEPRECATED(message) /* disable deprecation warnings */ #else # define LZ4_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) # if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */ # define LZ4_DEPRECATED(message) [[deprecated(message)]] # elif (LZ4_GCC_VERSION >= 405) || defined(__clang__) # define LZ4_DEPRECATED(message) __attribute__((deprecated(message))) # elif (LZ4_GCC_VERSION >= 301) # define LZ4_DEPRECATED(message) __attribute__((deprecated)) # elif defined(_MSC_VER) # define LZ4_DEPRECATED(message) __declspec(deprecated(message)) # else # pragma message("WARNING: You need to implement LZ4_DEPRECATED for this compiler") # define LZ4_DEPRECATED(message) # endif #endif /* LZ4_DISABLE_DEPRECATE_WARNINGS */ /* Obsolete compression functions */ LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress (const char* src, char* dest, int srcSize); LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress_limitedOutput (const char* src, char* dest, int srcSize, int maxOutputSize); LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_withState (void* state, const char* source, char* dest, int inputSize); LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize); LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize); LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize); /* Obsolete decompression functions */ LZ4_DEPRECATED("use LZ4_decompress_fast() instead") LZ4LIB_API int LZ4_uncompress (const char* source, char* dest, int outputSize); LZ4_DEPRECATED("use LZ4_decompress_safe() instead") LZ4LIB_API int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize); /* Obsolete streaming functions; degraded functionality; do not use! * * In order to perform streaming compression, these functions depended on data * that is no longer tracked in the state. They have been preserved as well as * possible: using them will still produce a correct output. However, they don't * actually retain any history between compression calls. The compression ratio * achieved will therefore be no better than compressing each chunk * independently. */ LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API void* LZ4_create (char* inputBuffer); LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API int LZ4_sizeofStreamState(void); LZ4_DEPRECATED("Use LZ4_resetStream() instead") LZ4LIB_API int LZ4_resetStreamState(void* state, char* inputBuffer); LZ4_DEPRECATED("Use LZ4_saveDict() instead") LZ4LIB_API char* LZ4_slideInputBuffer (void* state); /* Obsolete streaming decoding functions */ LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") LZ4LIB_API int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize); LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") LZ4LIB_API int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize); /*! LZ4_decompress_fast() : **unsafe!** * These functions used to be faster than LZ4_decompress_safe(), * but it has changed, and they are now slower than LZ4_decompress_safe(). * This is because LZ4_decompress_fast() doesn't know the input size, * and therefore must progress more cautiously in the input buffer to not read beyond the end of block. * On top of that `LZ4_decompress_fast()` is not protected vs malformed or malicious inputs, making it a security liability. * As a consequence, LZ4_decompress_fast() is strongly discouraged, and deprecated. * * The last remaining LZ4_decompress_fast() specificity is that * it can decompress a block without knowing its compressed size. * Such functionality could be achieved in a more secure manner, * by also providing the maximum size of input buffer, * but it would require new prototypes, and adaptation of the implementation to this new use case. * * Parameters: * originalSize : is the uncompressed size to regenerate. * `dst` must be already allocated, its size must be >= 'originalSize' bytes. * @return : number of bytes read from source buffer (== compressed size). * The function expects to finish at block's end exactly. * If the source stream is detected malformed, the function stops decoding and returns a negative result. * note : LZ4_decompress_fast*() requires originalSize. Thanks to this information, it never writes past the output buffer. * However, since it doesn't know its 'src' size, it may read an unknown amount of input, past input buffer bounds. * Also, since match offsets are not validated, match reads from 'src' may underflow too. * These issues never happen if input (compressed) data is correct. * But they may happen if input data is invalid (error or intentional tampering). * As a consequence, use these functions in trusted environments with trusted data **only**. */ LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe() instead") LZ4LIB_API int LZ4_decompress_fast (const char* src, char* dst, int originalSize); LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_continue() instead") LZ4LIB_API int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int originalSize); LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_usingDict() instead") LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* src, char* dst, int originalSize, const char* dictStart, int dictSize); /*! LZ4_resetStream() : * An LZ4_stream_t structure must be initialized at least once. * This is done with LZ4_initStream(), or LZ4_resetStream(). * Consider switching to LZ4_initStream(), * invoking LZ4_resetStream() will trigger deprecation warnings in the future. */ LZ4LIB_API void LZ4_resetStream (LZ4_stream_t* streamPtr); #endif /* LZ4_H_98237428734687 */ #if defined (__cplusplus) } #endif py-lz4framed-0.14.0/lz4/lz4frame.c000066400000000000000000002304731357043434000165270ustar00rootroot00000000000000/* * LZ4 auto-framing library * Copyright (C) 2011-2016, Yann Collet. * * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * You can contact the author at : * - LZ4 homepage : http://www.lz4.org * - LZ4 source repository : https://github.com/lz4/lz4 */ /* LZ4F is a stand-alone API to create LZ4-compressed Frames * in full conformance with specification v1.6.1 . * This library rely upon memory management capabilities (malloc, free) * provided either by , * or redirected towards another library of user's choice * (see Memory Routines below). */ /*-************************************ * Compiler Options **************************************/ #ifdef _MSC_VER /* Visual Studio */ # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ #endif /*-************************************ * Tuning parameters **************************************/ /* * LZ4F_HEAPMODE : * Select how default compression functions will allocate memory for their hash table, * in memory stack (0:default, fastest), or in memory heap (1:requires malloc()). */ #ifndef LZ4F_HEAPMODE # define LZ4F_HEAPMODE 0 #endif /*-************************************ * Memory routines **************************************/ /* * User may redirect invocations of * malloc(), calloc() and free() * towards another library or solution of their choice * by modifying below section. */ #include /* malloc, calloc, free */ #ifndef LZ4_SRC_INCLUDED /* avoid redefinition when sources are coalesced */ # define ALLOC(s) malloc(s) # define ALLOC_AND_ZERO(s) calloc(1,(s)) # define FREEMEM(p) free(p) #endif #include /* memset, memcpy, memmove */ #ifndef LZ4_SRC_INCLUDED /* avoid redefinition when sources are coalesced */ # define MEM_INIT(p,v,s) memset((p),(v),(s)) #endif /*-************************************ * Library declarations **************************************/ #define LZ4F_STATIC_LINKING_ONLY #include "lz4frame.h" #define LZ4_STATIC_LINKING_ONLY #include "lz4.h" #define LZ4_HC_STATIC_LINKING_ONLY #include "lz4hc.h" #define XXH_STATIC_LINKING_ONLY #include "xxhash.h" /*-************************************ * Debug **************************************/ #if defined(LZ4_DEBUG) && (LZ4_DEBUG>=1) # include #else # ifndef assert # define assert(condition) ((void)0) # endif #endif #define LZ4F_STATIC_ASSERT(c) { enum { LZ4F_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ #if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2) && !defined(DEBUGLOG) # include static int g_debuglog_enable = 1; # define DEBUGLOG(l, ...) { \ if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) { \ fprintf(stderr, __FILE__ ": "); \ fprintf(stderr, __VA_ARGS__); \ fprintf(stderr, " \n"); \ } } #else # define DEBUGLOG(l, ...) {} /* disabled */ #endif /*-************************************ * Basic Types **************************************/ #if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) # include typedef uint8_t BYTE; typedef uint16_t U16; typedef uint32_t U32; typedef int32_t S32; typedef uint64_t U64; #else typedef unsigned char BYTE; typedef unsigned short U16; typedef unsigned int U32; typedef signed int S32; typedef unsigned long long U64; #endif /* unoptimized version; solves endianess & alignment issues */ static U32 LZ4F_readLE32 (const void* src) { const BYTE* const srcPtr = (const BYTE*)src; U32 value32 = srcPtr[0]; value32 += ((U32)srcPtr[1])<< 8; value32 += ((U32)srcPtr[2])<<16; value32 += ((U32)srcPtr[3])<<24; return value32; } static void LZ4F_writeLE32 (void* dst, U32 value32) { BYTE* const dstPtr = (BYTE*)dst; dstPtr[0] = (BYTE)value32; dstPtr[1] = (BYTE)(value32 >> 8); dstPtr[2] = (BYTE)(value32 >> 16); dstPtr[3] = (BYTE)(value32 >> 24); } static U64 LZ4F_readLE64 (const void* src) { const BYTE* const srcPtr = (const BYTE*)src; U64 value64 = srcPtr[0]; value64 += ((U64)srcPtr[1]<<8); value64 += ((U64)srcPtr[2]<<16); value64 += ((U64)srcPtr[3]<<24); value64 += ((U64)srcPtr[4]<<32); value64 += ((U64)srcPtr[5]<<40); value64 += ((U64)srcPtr[6]<<48); value64 += ((U64)srcPtr[7]<<56); return value64; } static void LZ4F_writeLE64 (void* dst, U64 value64) { BYTE* const dstPtr = (BYTE*)dst; dstPtr[0] = (BYTE)value64; dstPtr[1] = (BYTE)(value64 >> 8); dstPtr[2] = (BYTE)(value64 >> 16); dstPtr[3] = (BYTE)(value64 >> 24); dstPtr[4] = (BYTE)(value64 >> 32); dstPtr[5] = (BYTE)(value64 >> 40); dstPtr[6] = (BYTE)(value64 >> 48); dstPtr[7] = (BYTE)(value64 >> 56); } /*-************************************ * Constants **************************************/ #ifndef LZ4_SRC_INCLUDED /* avoid double definition */ # define KB *(1<<10) # define MB *(1<<20) # define GB *(1<<30) #endif #define _1BIT 0x01 #define _2BITS 0x03 #define _3BITS 0x07 #define _4BITS 0x0F #define _8BITS 0xFF #define LZ4F_MAGIC_SKIPPABLE_START 0x184D2A50U #define LZ4F_MAGICNUMBER 0x184D2204U #define LZ4F_BLOCKUNCOMPRESSED_FLAG 0x80000000U #define LZ4F_BLOCKSIZEID_DEFAULT LZ4F_max64KB static const size_t minFHSize = LZ4F_HEADER_SIZE_MIN; /* 7 */ static const size_t maxFHSize = LZ4F_HEADER_SIZE_MAX; /* 19 */ static const size_t BHSize = LZ4F_BLOCK_HEADER_SIZE; /* block header : size, and compress flag */ static const size_t BFSize = LZ4F_BLOCK_CHECKSUM_SIZE; /* block footer : checksum (optional) */ /*-************************************ * Structures and local types **************************************/ typedef struct LZ4F_cctx_s { LZ4F_preferences_t prefs; U32 version; U32 cStage; const LZ4F_CDict* cdict; size_t maxBlockSize; size_t maxBufferSize; BYTE* tmpBuff; BYTE* tmpIn; size_t tmpInSize; U64 totalInSize; XXH32_state_t xxh; void* lz4CtxPtr; U16 lz4CtxAlloc; /* sized for: 0 = none, 1 = lz4 ctx, 2 = lz4hc ctx */ U16 lz4CtxState; /* in use as: 0 = none, 1 = lz4 ctx, 2 = lz4hc ctx */ } LZ4F_cctx_t; /*-************************************ * Error management **************************************/ #define LZ4F_GENERATE_STRING(STRING) #STRING, static const char* LZ4F_errorStrings[] = { LZ4F_LIST_ERRORS(LZ4F_GENERATE_STRING) }; unsigned LZ4F_isError(LZ4F_errorCode_t code) { return (code > (LZ4F_errorCode_t)(-LZ4F_ERROR_maxCode)); } const char* LZ4F_getErrorName(LZ4F_errorCode_t code) { static const char* codeError = "Unspecified error code"; if (LZ4F_isError(code)) return LZ4F_errorStrings[-(int)(code)]; return codeError; } LZ4F_errorCodes LZ4F_getErrorCode(size_t functionResult) { if (!LZ4F_isError(functionResult)) return LZ4F_OK_NoError; return (LZ4F_errorCodes)(-(ptrdiff_t)functionResult); } static LZ4F_errorCode_t err0r(LZ4F_errorCodes code) { /* A compilation error here means sizeof(ptrdiff_t) is not large enough */ LZ4F_STATIC_ASSERT(sizeof(ptrdiff_t) >= sizeof(size_t)); return (LZ4F_errorCode_t)-(ptrdiff_t)code; } unsigned LZ4F_getVersion(void) { return LZ4F_VERSION; } int LZ4F_compressionLevel_max(void) { return LZ4HC_CLEVEL_MAX; } size_t LZ4F_getBlockSize(unsigned blockSizeID) { static const size_t blockSizes[4] = { 64 KB, 256 KB, 1 MB, 4 MB }; if (blockSizeID == 0) blockSizeID = LZ4F_BLOCKSIZEID_DEFAULT; if (blockSizeID < LZ4F_max64KB || blockSizeID > LZ4F_max4MB) return err0r(LZ4F_ERROR_maxBlockSize_invalid); blockSizeID -= LZ4F_max64KB; return blockSizes[blockSizeID]; } /*-************************************ * Private functions **************************************/ #define MIN(a,b) ( (a) < (b) ? (a) : (b) ) static BYTE LZ4F_headerChecksum (const void* header, size_t length) { U32 const xxh = XXH32(header, length, 0); return (BYTE)(xxh >> 8); } /*-************************************ * Simple-pass compression functions **************************************/ static LZ4F_blockSizeID_t LZ4F_optimalBSID(const LZ4F_blockSizeID_t requestedBSID, const size_t srcSize) { LZ4F_blockSizeID_t proposedBSID = LZ4F_max64KB; size_t maxBlockSize = 64 KB; while (requestedBSID > proposedBSID) { if (srcSize <= maxBlockSize) return proposedBSID; proposedBSID = (LZ4F_blockSizeID_t)((int)proposedBSID + 1); maxBlockSize <<= 2; } return requestedBSID; } /*! LZ4F_compressBound_internal() : * Provides dstCapacity given a srcSize to guarantee operation success in worst case situations. * prefsPtr is optional : if NULL is provided, preferences will be set to cover worst case scenario. * @return is always the same for a srcSize and prefsPtr, so it can be relied upon to size reusable buffers. * When srcSize==0, LZ4F_compressBound() provides an upper bound for LZ4F_flush() and LZ4F_compressEnd() operations. */ static size_t LZ4F_compressBound_internal(size_t srcSize, const LZ4F_preferences_t* preferencesPtr, size_t alreadyBuffered) { LZ4F_preferences_t prefsNull = LZ4F_INIT_PREFERENCES; prefsNull.frameInfo.contentChecksumFlag = LZ4F_contentChecksumEnabled; /* worst case */ prefsNull.frameInfo.blockChecksumFlag = LZ4F_blockChecksumEnabled; /* worst case */ { const LZ4F_preferences_t* const prefsPtr = (preferencesPtr==NULL) ? &prefsNull : preferencesPtr; U32 const flush = prefsPtr->autoFlush | (srcSize==0); LZ4F_blockSizeID_t const blockID = prefsPtr->frameInfo.blockSizeID; size_t const blockSize = LZ4F_getBlockSize(blockID); size_t const maxBuffered = blockSize - 1; size_t const bufferedSize = MIN(alreadyBuffered, maxBuffered); size_t const maxSrcSize = srcSize + bufferedSize; unsigned const nbFullBlocks = (unsigned)(maxSrcSize / blockSize); size_t const partialBlockSize = maxSrcSize & (blockSize-1); size_t const lastBlockSize = flush ? partialBlockSize : 0; unsigned const nbBlocks = nbFullBlocks + (lastBlockSize>0); size_t const blockCRCSize = BFSize * prefsPtr->frameInfo.blockChecksumFlag; size_t const frameEnd = BHSize + (prefsPtr->frameInfo.contentChecksumFlag*BFSize); return ((BHSize + blockCRCSize) * nbBlocks) + (blockSize * nbFullBlocks) + lastBlockSize + frameEnd; } } size_t LZ4F_compressFrameBound(size_t srcSize, const LZ4F_preferences_t* preferencesPtr) { LZ4F_preferences_t prefs; size_t const headerSize = maxFHSize; /* max header size, including optional fields */ if (preferencesPtr!=NULL) prefs = *preferencesPtr; else MEM_INIT(&prefs, 0, sizeof(prefs)); prefs.autoFlush = 1; return headerSize + LZ4F_compressBound_internal(srcSize, &prefs, 0);; } /*! LZ4F_compressFrame_usingCDict() : * Compress srcBuffer using a dictionary, in a single step. * cdict can be NULL, in which case, no dictionary is used. * dstBuffer MUST be >= LZ4F_compressFrameBound(srcSize, preferencesPtr). * The LZ4F_preferences_t structure is optional : you may provide NULL as argument, * however, it's the only way to provide a dictID, so it's not recommended. * @return : number of bytes written into dstBuffer, * or an error code if it fails (can be tested using LZ4F_isError()) */ size_t LZ4F_compressFrame_usingCDict(LZ4F_cctx* cctx, void* dstBuffer, size_t dstCapacity, const void* srcBuffer, size_t srcSize, const LZ4F_CDict* cdict, const LZ4F_preferences_t* preferencesPtr) { LZ4F_preferences_t prefs; LZ4F_compressOptions_t options; BYTE* const dstStart = (BYTE*) dstBuffer; BYTE* dstPtr = dstStart; BYTE* const dstEnd = dstStart + dstCapacity; if (preferencesPtr!=NULL) prefs = *preferencesPtr; else MEM_INIT(&prefs, 0, sizeof(prefs)); if (prefs.frameInfo.contentSize != 0) prefs.frameInfo.contentSize = (U64)srcSize; /* auto-correct content size if selected (!=0) */ prefs.frameInfo.blockSizeID = LZ4F_optimalBSID(prefs.frameInfo.blockSizeID, srcSize); prefs.autoFlush = 1; if (srcSize <= LZ4F_getBlockSize(prefs.frameInfo.blockSizeID)) prefs.frameInfo.blockMode = LZ4F_blockIndependent; /* only one block => no need for inter-block link */ MEM_INIT(&options, 0, sizeof(options)); options.stableSrc = 1; if (dstCapacity < LZ4F_compressFrameBound(srcSize, &prefs)) /* condition to guarantee success */ return err0r(LZ4F_ERROR_dstMaxSize_tooSmall); { size_t const headerSize = LZ4F_compressBegin_usingCDict(cctx, dstBuffer, dstCapacity, cdict, &prefs); /* write header */ if (LZ4F_isError(headerSize)) return headerSize; dstPtr += headerSize; /* header size */ } assert(dstEnd >= dstPtr); { size_t const cSize = LZ4F_compressUpdate(cctx, dstPtr, (size_t)(dstEnd-dstPtr), srcBuffer, srcSize, &options); if (LZ4F_isError(cSize)) return cSize; dstPtr += cSize; } assert(dstEnd >= dstPtr); { size_t const tailSize = LZ4F_compressEnd(cctx, dstPtr, (size_t)(dstEnd-dstPtr), &options); /* flush last block, and generate suffix */ if (LZ4F_isError(tailSize)) return tailSize; dstPtr += tailSize; } assert(dstEnd >= dstStart); return (size_t)(dstPtr - dstStart); } /*! LZ4F_compressFrame() : * Compress an entire srcBuffer into a valid LZ4 frame, in a single step. * dstBuffer MUST be >= LZ4F_compressFrameBound(srcSize, preferencesPtr). * The LZ4F_preferences_t structure is optional : you can provide NULL as argument. All preferences will be set to default. * @return : number of bytes written into dstBuffer. * or an error code if it fails (can be tested using LZ4F_isError()) */ size_t LZ4F_compressFrame(void* dstBuffer, size_t dstCapacity, const void* srcBuffer, size_t srcSize, const LZ4F_preferences_t* preferencesPtr) { size_t result; #if (LZ4F_HEAPMODE) LZ4F_cctx_t *cctxPtr; result = LZ4F_createCompressionContext(&cctxPtr, LZ4F_VERSION); if (LZ4F_isError(result)) return result; #else LZ4F_cctx_t cctx; LZ4_stream_t lz4ctx; LZ4F_cctx_t *cctxPtr = &cctx; DEBUGLOG(4, "LZ4F_compressFrame"); MEM_INIT(&cctx, 0, sizeof(cctx)); cctx.version = LZ4F_VERSION; cctx.maxBufferSize = 5 MB; /* mess with real buffer size to prevent dynamic allocation; works only because autoflush==1 & stableSrc==1 */ if (preferencesPtr == NULL || preferencesPtr->compressionLevel < LZ4HC_CLEVEL_MIN) { LZ4_initStream(&lz4ctx, sizeof(lz4ctx)); cctxPtr->lz4CtxPtr = &lz4ctx; cctxPtr->lz4CtxAlloc = 1; cctxPtr->lz4CtxState = 1; } #endif result = LZ4F_compressFrame_usingCDict(cctxPtr, dstBuffer, dstCapacity, srcBuffer, srcSize, NULL, preferencesPtr); #if (LZ4F_HEAPMODE) LZ4F_freeCompressionContext(cctxPtr); #else if (preferencesPtr != NULL && preferencesPtr->compressionLevel >= LZ4HC_CLEVEL_MIN) { FREEMEM(cctxPtr->lz4CtxPtr); } #endif return result; } /*-*************************************************** * Dictionary compression *****************************************************/ struct LZ4F_CDict_s { void* dictContent; LZ4_stream_t* fastCtx; LZ4_streamHC_t* HCCtx; }; /* typedef'd to LZ4F_CDict within lz4frame_static.h */ /*! LZ4F_createCDict() : * When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once. * LZ4F_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay. * LZ4F_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only. * `dictBuffer` can be released after LZ4F_CDict creation, since its content is copied within CDict * @return : digested dictionary for compression, or NULL if failed */ LZ4F_CDict* LZ4F_createCDict(const void* dictBuffer, size_t dictSize) { const char* dictStart = (const char*)dictBuffer; LZ4F_CDict* cdict = (LZ4F_CDict*) ALLOC(sizeof(*cdict)); DEBUGLOG(4, "LZ4F_createCDict"); if (!cdict) return NULL; if (dictSize > 64 KB) { dictStart += dictSize - 64 KB; dictSize = 64 KB; } cdict->dictContent = ALLOC(dictSize); cdict->fastCtx = LZ4_createStream(); cdict->HCCtx = LZ4_createStreamHC(); if (!cdict->dictContent || !cdict->fastCtx || !cdict->HCCtx) { LZ4F_freeCDict(cdict); return NULL; } memcpy(cdict->dictContent, dictStart, dictSize); LZ4_loadDict (cdict->fastCtx, (const char*)cdict->dictContent, (int)dictSize); LZ4_setCompressionLevel(cdict->HCCtx, LZ4HC_CLEVEL_DEFAULT); LZ4_loadDictHC(cdict->HCCtx, (const char*)cdict->dictContent, (int)dictSize); return cdict; } void LZ4F_freeCDict(LZ4F_CDict* cdict) { if (cdict==NULL) return; /* support free on NULL */ FREEMEM(cdict->dictContent); LZ4_freeStream(cdict->fastCtx); LZ4_freeStreamHC(cdict->HCCtx); FREEMEM(cdict); } /*-********************************* * Advanced compression functions ***********************************/ /*! LZ4F_createCompressionContext() : * The first thing to do is to create a compressionContext object, which will be used in all compression operations. * This is achieved using LZ4F_createCompressionContext(), which takes as argument a version and an LZ4F_preferences_t structure. * The version provided MUST be LZ4F_VERSION. It is intended to track potential incompatible differences between different binaries. * The function will provide a pointer to an allocated LZ4F_compressionContext_t object. * If the result LZ4F_errorCode_t is not OK_NoError, there was an error during context creation. * Object can release its memory using LZ4F_freeCompressionContext(); */ LZ4F_errorCode_t LZ4F_createCompressionContext(LZ4F_compressionContext_t* LZ4F_compressionContextPtr, unsigned version) { LZ4F_cctx_t* const cctxPtr = (LZ4F_cctx_t*)ALLOC_AND_ZERO(sizeof(LZ4F_cctx_t)); if (cctxPtr==NULL) return err0r(LZ4F_ERROR_allocation_failed); cctxPtr->version = version; cctxPtr->cStage = 0; /* Next stage : init stream */ *LZ4F_compressionContextPtr = (LZ4F_compressionContext_t)cctxPtr; return LZ4F_OK_NoError; } LZ4F_errorCode_t LZ4F_freeCompressionContext(LZ4F_compressionContext_t LZ4F_compressionContext) { LZ4F_cctx_t* const cctxPtr = (LZ4F_cctx_t*)LZ4F_compressionContext; if (cctxPtr != NULL) { /* support free on NULL */ FREEMEM(cctxPtr->lz4CtxPtr); /* works because LZ4_streamHC_t and LZ4_stream_t are simple POD types */ FREEMEM(cctxPtr->tmpBuff); FREEMEM(LZ4F_compressionContext); } return LZ4F_OK_NoError; } /** * This function prepares the internal LZ4(HC) stream for a new compression, * resetting the context and attaching the dictionary, if there is one. * * It needs to be called at the beginning of each independent compression * stream (i.e., at the beginning of a frame in blockLinked mode, or at the * beginning of each block in blockIndependent mode). */ static void LZ4F_initStream(void* ctx, const LZ4F_CDict* cdict, int level, LZ4F_blockMode_t blockMode) { if (level < LZ4HC_CLEVEL_MIN) { if (cdict != NULL || blockMode == LZ4F_blockLinked) { /* In these cases, we will call LZ4_compress_fast_continue(), * which needs an already reset context. Otherwise, we'll call a * one-shot API. The non-continued APIs internally perform their own * resets at the beginning of their calls, where they know what * tableType they need the context to be in. So in that case this * would be misguided / wasted work. */ LZ4_resetStream_fast((LZ4_stream_t*)ctx); } LZ4_attach_dictionary((LZ4_stream_t *)ctx, cdict ? cdict->fastCtx : NULL); } else { LZ4_resetStreamHC_fast((LZ4_streamHC_t*)ctx, level); LZ4_attach_HC_dictionary((LZ4_streamHC_t *)ctx, cdict ? cdict->HCCtx : NULL); } } /*! LZ4F_compressBegin_usingCDict() : * init streaming compression and writes frame header into dstBuffer. * dstBuffer must be >= LZ4F_HEADER_SIZE_MAX bytes. * @return : number of bytes written into dstBuffer for the header * or an error code (can be tested using LZ4F_isError()) */ size_t LZ4F_compressBegin_usingCDict(LZ4F_cctx* cctxPtr, void* dstBuffer, size_t dstCapacity, const LZ4F_CDict* cdict, const LZ4F_preferences_t* preferencesPtr) { LZ4F_preferences_t prefNull; BYTE* const dstStart = (BYTE*)dstBuffer; BYTE* dstPtr = dstStart; BYTE* headerStart; if (dstCapacity < maxFHSize) return err0r(LZ4F_ERROR_dstMaxSize_tooSmall); MEM_INIT(&prefNull, 0, sizeof(prefNull)); if (preferencesPtr == NULL) preferencesPtr = &prefNull; cctxPtr->prefs = *preferencesPtr; /* Ctx Management */ { U16 const ctxTypeID = (cctxPtr->prefs.compressionLevel < LZ4HC_CLEVEL_MIN) ? 1 : 2; if (cctxPtr->lz4CtxAlloc < ctxTypeID) { FREEMEM(cctxPtr->lz4CtxPtr); if (cctxPtr->prefs.compressionLevel < LZ4HC_CLEVEL_MIN) { cctxPtr->lz4CtxPtr = LZ4_createStream(); } else { cctxPtr->lz4CtxPtr = LZ4_createStreamHC(); } if (cctxPtr->lz4CtxPtr == NULL) return err0r(LZ4F_ERROR_allocation_failed); cctxPtr->lz4CtxAlloc = ctxTypeID; cctxPtr->lz4CtxState = ctxTypeID; } else if (cctxPtr->lz4CtxState != ctxTypeID) { /* otherwise, a sufficient buffer is allocated, but we need to * reset it to the correct context type */ if (cctxPtr->prefs.compressionLevel < LZ4HC_CLEVEL_MIN) { LZ4_initStream((LZ4_stream_t *) cctxPtr->lz4CtxPtr, sizeof (LZ4_stream_t)); } else { LZ4_initStreamHC((LZ4_streamHC_t *) cctxPtr->lz4CtxPtr, sizeof(LZ4_streamHC_t)); LZ4_setCompressionLevel((LZ4_streamHC_t *) cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel); } cctxPtr->lz4CtxState = ctxTypeID; } } /* Buffer Management */ if (cctxPtr->prefs.frameInfo.blockSizeID == 0) cctxPtr->prefs.frameInfo.blockSizeID = LZ4F_BLOCKSIZEID_DEFAULT; cctxPtr->maxBlockSize = LZ4F_getBlockSize(cctxPtr->prefs.frameInfo.blockSizeID); { size_t const requiredBuffSize = preferencesPtr->autoFlush ? ((cctxPtr->prefs.frameInfo.blockMode == LZ4F_blockLinked) ? 64 KB : 0) : /* only needs past data up to window size */ cctxPtr->maxBlockSize + ((cctxPtr->prefs.frameInfo.blockMode == LZ4F_blockLinked) ? 128 KB : 0); if (cctxPtr->maxBufferSize < requiredBuffSize) { cctxPtr->maxBufferSize = 0; FREEMEM(cctxPtr->tmpBuff); cctxPtr->tmpBuff = (BYTE*)ALLOC_AND_ZERO(requiredBuffSize); if (cctxPtr->tmpBuff == NULL) return err0r(LZ4F_ERROR_allocation_failed); cctxPtr->maxBufferSize = requiredBuffSize; } } cctxPtr->tmpIn = cctxPtr->tmpBuff; cctxPtr->tmpInSize = 0; (void)XXH32_reset(&(cctxPtr->xxh), 0); /* context init */ cctxPtr->cdict = cdict; if (cctxPtr->prefs.frameInfo.blockMode == LZ4F_blockLinked) { /* frame init only for blockLinked : blockIndependent will be init at each block */ LZ4F_initStream(cctxPtr->lz4CtxPtr, cdict, cctxPtr->prefs.compressionLevel, LZ4F_blockLinked); } if (preferencesPtr->compressionLevel >= LZ4HC_CLEVEL_MIN) { LZ4_favorDecompressionSpeed((LZ4_streamHC_t*)cctxPtr->lz4CtxPtr, (int)preferencesPtr->favorDecSpeed); } /* Magic Number */ LZ4F_writeLE32(dstPtr, LZ4F_MAGICNUMBER); dstPtr += 4; headerStart = dstPtr; /* FLG Byte */ *dstPtr++ = (BYTE)(((1 & _2BITS) << 6) /* Version('01') */ + ((cctxPtr->prefs.frameInfo.blockMode & _1BIT ) << 5) + ((cctxPtr->prefs.frameInfo.blockChecksumFlag & _1BIT ) << 4) + ((unsigned)(cctxPtr->prefs.frameInfo.contentSize > 0) << 3) + ((cctxPtr->prefs.frameInfo.contentChecksumFlag & _1BIT ) << 2) + (cctxPtr->prefs.frameInfo.dictID > 0) ); /* BD Byte */ *dstPtr++ = (BYTE)((cctxPtr->prefs.frameInfo.blockSizeID & _3BITS) << 4); /* Optional Frame content size field */ if (cctxPtr->prefs.frameInfo.contentSize) { LZ4F_writeLE64(dstPtr, cctxPtr->prefs.frameInfo.contentSize); dstPtr += 8; cctxPtr->totalInSize = 0; } /* Optional dictionary ID field */ if (cctxPtr->prefs.frameInfo.dictID) { LZ4F_writeLE32(dstPtr, cctxPtr->prefs.frameInfo.dictID); dstPtr += 4; } /* Header CRC Byte */ *dstPtr = LZ4F_headerChecksum(headerStart, (size_t)(dstPtr - headerStart)); dstPtr++; cctxPtr->cStage = 1; /* header written, now request input data block */ return (size_t)(dstPtr - dstStart); } /*! LZ4F_compressBegin() : * init streaming compression and writes frame header into dstBuffer. * dstBuffer must be >= LZ4F_HEADER_SIZE_MAX bytes. * preferencesPtr can be NULL, in which case default parameters are selected. * @return : number of bytes written into dstBuffer for the header * or an error code (can be tested using LZ4F_isError()) */ size_t LZ4F_compressBegin(LZ4F_cctx* cctxPtr, void* dstBuffer, size_t dstCapacity, const LZ4F_preferences_t* preferencesPtr) { return LZ4F_compressBegin_usingCDict(cctxPtr, dstBuffer, dstCapacity, NULL, preferencesPtr); } /* LZ4F_compressBound() : * @return minimum capacity of dstBuffer for a given srcSize to handle worst case scenario. * LZ4F_preferences_t structure is optional : if NULL, preferences will be set to cover worst case scenario. * This function cannot fail. */ size_t LZ4F_compressBound(size_t srcSize, const LZ4F_preferences_t* preferencesPtr) { return LZ4F_compressBound_internal(srcSize, preferencesPtr, (size_t)-1); } typedef int (*compressFunc_t)(void* ctx, const char* src, char* dst, int srcSize, int dstSize, int level, const LZ4F_CDict* cdict); /*! LZ4F_makeBlock(): * compress a single block, add header and optional checksum. * assumption : dst buffer capacity is >= BHSize + srcSize + crcSize */ static size_t LZ4F_makeBlock(void* dst, const void* src, size_t srcSize, compressFunc_t compress, void* lz4ctx, int level, const LZ4F_CDict* cdict, LZ4F_blockChecksum_t crcFlag) { BYTE* const cSizePtr = (BYTE*)dst; U32 cSize = (U32)compress(lz4ctx, (const char*)src, (char*)(cSizePtr+BHSize), (int)(srcSize), (int)(srcSize-1), level, cdict); if (cSize == 0) { /* compression failed */ cSize = (U32)srcSize; LZ4F_writeLE32(cSizePtr, cSize | LZ4F_BLOCKUNCOMPRESSED_FLAG); memcpy(cSizePtr+BHSize, src, srcSize); } else { LZ4F_writeLE32(cSizePtr, cSize); } if (crcFlag) { U32 const crc32 = XXH32(cSizePtr+BHSize, cSize, 0); /* checksum of compressed data */ LZ4F_writeLE32(cSizePtr+BHSize+cSize, crc32); } return BHSize + cSize + ((U32)crcFlag)*BFSize; } static int LZ4F_compressBlock(void* ctx, const char* src, char* dst, int srcSize, int dstCapacity, int level, const LZ4F_CDict* cdict) { int const acceleration = (level < 0) ? -level + 1 : 1; LZ4F_initStream(ctx, cdict, level, LZ4F_blockIndependent); if (cdict) { return LZ4_compress_fast_continue((LZ4_stream_t*)ctx, src, dst, srcSize, dstCapacity, acceleration); } else { return LZ4_compress_fast_extState_fastReset(ctx, src, dst, srcSize, dstCapacity, acceleration); } } static int LZ4F_compressBlock_continue(void* ctx, const char* src, char* dst, int srcSize, int dstCapacity, int level, const LZ4F_CDict* cdict) { int const acceleration = (level < 0) ? -level + 1 : 1; (void)cdict; /* init once at beginning of frame */ return LZ4_compress_fast_continue((LZ4_stream_t*)ctx, src, dst, srcSize, dstCapacity, acceleration); } static int LZ4F_compressBlockHC(void* ctx, const char* src, char* dst, int srcSize, int dstCapacity, int level, const LZ4F_CDict* cdict) { LZ4F_initStream(ctx, cdict, level, LZ4F_blockIndependent); if (cdict) { return LZ4_compress_HC_continue((LZ4_streamHC_t*)ctx, src, dst, srcSize, dstCapacity); } return LZ4_compress_HC_extStateHC_fastReset(ctx, src, dst, srcSize, dstCapacity, level); } static int LZ4F_compressBlockHC_continue(void* ctx, const char* src, char* dst, int srcSize, int dstCapacity, int level, const LZ4F_CDict* cdict) { (void)level; (void)cdict; /* init once at beginning of frame */ return LZ4_compress_HC_continue((LZ4_streamHC_t*)ctx, src, dst, srcSize, dstCapacity); } static compressFunc_t LZ4F_selectCompression(LZ4F_blockMode_t blockMode, int level) { if (level < LZ4HC_CLEVEL_MIN) { if (blockMode == LZ4F_blockIndependent) return LZ4F_compressBlock; return LZ4F_compressBlock_continue; } if (blockMode == LZ4F_blockIndependent) return LZ4F_compressBlockHC; return LZ4F_compressBlockHC_continue; } static int LZ4F_localSaveDict(LZ4F_cctx_t* cctxPtr) { if (cctxPtr->prefs.compressionLevel < LZ4HC_CLEVEL_MIN) return LZ4_saveDict ((LZ4_stream_t*)(cctxPtr->lz4CtxPtr), (char*)(cctxPtr->tmpBuff), 64 KB); return LZ4_saveDictHC ((LZ4_streamHC_t*)(cctxPtr->lz4CtxPtr), (char*)(cctxPtr->tmpBuff), 64 KB); } typedef enum { notDone, fromTmpBuffer, fromSrcBuffer } LZ4F_lastBlockStatus; /*! LZ4F_compressUpdate() : * LZ4F_compressUpdate() can be called repetitively to compress as much data as necessary. * dstBuffer MUST be >= LZ4F_compressBound(srcSize, preferencesPtr). * LZ4F_compressOptions_t structure is optional : you can provide NULL as argument. * @return : the number of bytes written into dstBuffer. It can be zero, meaning input data was just buffered. * or an error code if it fails (which can be tested using LZ4F_isError()) */ size_t LZ4F_compressUpdate(LZ4F_cctx* cctxPtr, void* dstBuffer, size_t dstCapacity, const void* srcBuffer, size_t srcSize, const LZ4F_compressOptions_t* compressOptionsPtr) { LZ4F_compressOptions_t cOptionsNull; size_t const blockSize = cctxPtr->maxBlockSize; const BYTE* srcPtr = (const BYTE*)srcBuffer; const BYTE* const srcEnd = srcPtr + srcSize; BYTE* const dstStart = (BYTE*)dstBuffer; BYTE* dstPtr = dstStart; LZ4F_lastBlockStatus lastBlockCompressed = notDone; compressFunc_t const compress = LZ4F_selectCompression(cctxPtr->prefs.frameInfo.blockMode, cctxPtr->prefs.compressionLevel); DEBUGLOG(4, "LZ4F_compressUpdate (srcSize=%zu)", srcSize); if (cctxPtr->cStage != 1) return err0r(LZ4F_ERROR_GENERIC); if (dstCapacity < LZ4F_compressBound_internal(srcSize, &(cctxPtr->prefs), cctxPtr->tmpInSize)) return err0r(LZ4F_ERROR_dstMaxSize_tooSmall); MEM_INIT(&cOptionsNull, 0, sizeof(cOptionsNull)); if (compressOptionsPtr == NULL) compressOptionsPtr = &cOptionsNull; /* complete tmp buffer */ if (cctxPtr->tmpInSize > 0) { /* some data already within tmp buffer */ size_t const sizeToCopy = blockSize - cctxPtr->tmpInSize; if (sizeToCopy > srcSize) { /* add src to tmpIn buffer */ memcpy(cctxPtr->tmpIn + cctxPtr->tmpInSize, srcBuffer, srcSize); srcPtr = srcEnd; cctxPtr->tmpInSize += srcSize; /* still needs some CRC */ } else { /* complete tmpIn block and then compress it */ lastBlockCompressed = fromTmpBuffer; memcpy(cctxPtr->tmpIn + cctxPtr->tmpInSize, srcBuffer, sizeToCopy); srcPtr += sizeToCopy; dstPtr += LZ4F_makeBlock(dstPtr, cctxPtr->tmpIn, blockSize, compress, cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel, cctxPtr->cdict, cctxPtr->prefs.frameInfo.blockChecksumFlag); if (cctxPtr->prefs.frameInfo.blockMode==LZ4F_blockLinked) cctxPtr->tmpIn += blockSize; cctxPtr->tmpInSize = 0; } } while ((size_t)(srcEnd - srcPtr) >= blockSize) { /* compress full blocks */ lastBlockCompressed = fromSrcBuffer; dstPtr += LZ4F_makeBlock(dstPtr, srcPtr, blockSize, compress, cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel, cctxPtr->cdict, cctxPtr->prefs.frameInfo.blockChecksumFlag); srcPtr += blockSize; } if ((cctxPtr->prefs.autoFlush) && (srcPtr < srcEnd)) { /* compress remaining input < blockSize */ lastBlockCompressed = fromSrcBuffer; dstPtr += LZ4F_makeBlock(dstPtr, srcPtr, (size_t)(srcEnd - srcPtr), compress, cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel, cctxPtr->cdict, cctxPtr->prefs.frameInfo.blockChecksumFlag); srcPtr = srcEnd; } /* preserve dictionary if necessary */ if ((cctxPtr->prefs.frameInfo.blockMode==LZ4F_blockLinked) && (lastBlockCompressed==fromSrcBuffer)) { if (compressOptionsPtr->stableSrc) { cctxPtr->tmpIn = cctxPtr->tmpBuff; } else { int const realDictSize = LZ4F_localSaveDict(cctxPtr); if (realDictSize==0) return err0r(LZ4F_ERROR_GENERIC); cctxPtr->tmpIn = cctxPtr->tmpBuff + realDictSize; } } /* keep tmpIn within limits */ if ((cctxPtr->tmpIn + blockSize) > (cctxPtr->tmpBuff + cctxPtr->maxBufferSize) /* necessarily LZ4F_blockLinked && lastBlockCompressed==fromTmpBuffer */ && !(cctxPtr->prefs.autoFlush)) { int const realDictSize = LZ4F_localSaveDict(cctxPtr); cctxPtr->tmpIn = cctxPtr->tmpBuff + realDictSize; } /* some input data left, necessarily < blockSize */ if (srcPtr < srcEnd) { /* fill tmp buffer */ size_t const sizeToCopy = (size_t)(srcEnd - srcPtr); memcpy(cctxPtr->tmpIn, srcPtr, sizeToCopy); cctxPtr->tmpInSize = sizeToCopy; } if (cctxPtr->prefs.frameInfo.contentChecksumFlag == LZ4F_contentChecksumEnabled) (void)XXH32_update(&(cctxPtr->xxh), srcBuffer, srcSize); cctxPtr->totalInSize += srcSize; return (size_t)(dstPtr - dstStart); } /*! LZ4F_flush() : * When compressed data must be sent immediately, without waiting for a block to be filled, * invoke LZ4_flush(), which will immediately compress any remaining data stored within LZ4F_cctx. * The result of the function is the number of bytes written into dstBuffer. * It can be zero, this means there was no data left within LZ4F_cctx. * The function outputs an error code if it fails (can be tested using LZ4F_isError()) * LZ4F_compressOptions_t* is optional. NULL is a valid argument. */ size_t LZ4F_flush(LZ4F_cctx* cctxPtr, void* dstBuffer, size_t dstCapacity, const LZ4F_compressOptions_t* compressOptionsPtr) { BYTE* const dstStart = (BYTE*)dstBuffer; BYTE* dstPtr = dstStart; compressFunc_t compress; if (cctxPtr->tmpInSize == 0) return 0; /* nothing to flush */ if (cctxPtr->cStage != 1) return err0r(LZ4F_ERROR_GENERIC); if (dstCapacity < (cctxPtr->tmpInSize + BHSize + BFSize)) return err0r(LZ4F_ERROR_dstMaxSize_tooSmall); (void)compressOptionsPtr; /* not yet useful */ /* select compression function */ compress = LZ4F_selectCompression(cctxPtr->prefs.frameInfo.blockMode, cctxPtr->prefs.compressionLevel); /* compress tmp buffer */ dstPtr += LZ4F_makeBlock(dstPtr, cctxPtr->tmpIn, cctxPtr->tmpInSize, compress, cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel, cctxPtr->cdict, cctxPtr->prefs.frameInfo.blockChecksumFlag); assert(((void)"flush overflows dstBuffer!", (size_t)(dstPtr - dstStart) <= dstCapacity)); if (cctxPtr->prefs.frameInfo.blockMode == LZ4F_blockLinked) cctxPtr->tmpIn += cctxPtr->tmpInSize; cctxPtr->tmpInSize = 0; /* keep tmpIn within limits */ if ((cctxPtr->tmpIn + cctxPtr->maxBlockSize) > (cctxPtr->tmpBuff + cctxPtr->maxBufferSize)) { /* necessarily LZ4F_blockLinked */ int const realDictSize = LZ4F_localSaveDict(cctxPtr); cctxPtr->tmpIn = cctxPtr->tmpBuff + realDictSize; } return (size_t)(dstPtr - dstStart); } /*! LZ4F_compressEnd() : * When you want to properly finish the compressed frame, just call LZ4F_compressEnd(). * It will flush whatever data remained within compressionContext (like LZ4_flush()) * but also properly finalize the frame, with an endMark and an (optional) checksum. * LZ4F_compressOptions_t structure is optional : you can provide NULL as argument. * @return: the number of bytes written into dstBuffer (necessarily >= 4 (endMark size)) * or an error code if it fails (can be tested using LZ4F_isError()) * The context can then be used again to compress a new frame, starting with LZ4F_compressBegin(). */ size_t LZ4F_compressEnd(LZ4F_cctx* cctxPtr, void* dstBuffer, size_t dstCapacity, const LZ4F_compressOptions_t* compressOptionsPtr) { BYTE* const dstStart = (BYTE*)dstBuffer; BYTE* dstPtr = dstStart; size_t const flushSize = LZ4F_flush(cctxPtr, dstBuffer, dstCapacity, compressOptionsPtr); if (LZ4F_isError(flushSize)) return flushSize; dstPtr += flushSize; assert(flushSize <= dstCapacity); dstCapacity -= flushSize; if (dstCapacity < 4) return err0r(LZ4F_ERROR_dstMaxSize_tooSmall); LZ4F_writeLE32(dstPtr, 0); dstPtr += 4; /* endMark */ if (cctxPtr->prefs.frameInfo.contentChecksumFlag == LZ4F_contentChecksumEnabled) { U32 const xxh = XXH32_digest(&(cctxPtr->xxh)); if (dstCapacity < 8) return err0r(LZ4F_ERROR_dstMaxSize_tooSmall); LZ4F_writeLE32(dstPtr, xxh); dstPtr+=4; /* content Checksum */ } cctxPtr->cStage = 0; /* state is now re-usable (with identical preferences) */ cctxPtr->maxBufferSize = 0; /* reuse HC context */ if (cctxPtr->prefs.frameInfo.contentSize) { if (cctxPtr->prefs.frameInfo.contentSize != cctxPtr->totalInSize) return err0r(LZ4F_ERROR_frameSize_wrong); } return (size_t)(dstPtr - dstStart); } /*-*************************************************** * Frame Decompression *****************************************************/ typedef enum { dstage_getFrameHeader=0, dstage_storeFrameHeader, dstage_init, dstage_getBlockHeader, dstage_storeBlockHeader, dstage_copyDirect, dstage_getBlockChecksum, dstage_getCBlock, dstage_storeCBlock, dstage_flushOut, dstage_getSuffix, dstage_storeSuffix, dstage_getSFrameSize, dstage_storeSFrameSize, dstage_skipSkippable } dStage_t; struct LZ4F_dctx_s { LZ4F_frameInfo_t frameInfo; U32 version; dStage_t dStage; U64 frameRemainingSize; size_t maxBlockSize; size_t maxBufferSize; BYTE* tmpIn; size_t tmpInSize; size_t tmpInTarget; BYTE* tmpOutBuffer; const BYTE* dict; size_t dictSize; BYTE* tmpOut; size_t tmpOutSize; size_t tmpOutStart; XXH32_state_t xxh; XXH32_state_t blockChecksum; BYTE header[LZ4F_HEADER_SIZE_MAX]; }; /* typedef'd to LZ4F_dctx in lz4frame.h */ /*! LZ4F_createDecompressionContext() : * Create a decompressionContext object, which will track all decompression operations. * Provides a pointer to a fully allocated and initialized LZ4F_decompressionContext object. * Object can later be released using LZ4F_freeDecompressionContext(). * @return : if != 0, there was an error during context creation. */ LZ4F_errorCode_t LZ4F_createDecompressionContext(LZ4F_dctx** LZ4F_decompressionContextPtr, unsigned versionNumber) { LZ4F_dctx* const dctx = (LZ4F_dctx*)ALLOC_AND_ZERO(sizeof(LZ4F_dctx)); if (dctx == NULL) { /* failed allocation */ *LZ4F_decompressionContextPtr = NULL; return err0r(LZ4F_ERROR_allocation_failed); } dctx->version = versionNumber; *LZ4F_decompressionContextPtr = dctx; return LZ4F_OK_NoError; } LZ4F_errorCode_t LZ4F_freeDecompressionContext(LZ4F_dctx* dctx) { LZ4F_errorCode_t result = LZ4F_OK_NoError; if (dctx != NULL) { /* can accept NULL input, like free() */ result = (LZ4F_errorCode_t)dctx->dStage; FREEMEM(dctx->tmpIn); FREEMEM(dctx->tmpOutBuffer); FREEMEM(dctx); } return result; } /*==--- Streaming Decompression operations ---==*/ void LZ4F_resetDecompressionContext(LZ4F_dctx* dctx) { dctx->dStage = dstage_getFrameHeader; dctx->dict = NULL; dctx->dictSize = 0; } /*! LZ4F_decodeHeader() : * input : `src` points at the **beginning of the frame** * output : set internal values of dctx, such as * dctx->frameInfo and dctx->dStage. * Also allocates internal buffers. * @return : nb Bytes read from src (necessarily <= srcSize) * or an error code (testable with LZ4F_isError()) */ static size_t LZ4F_decodeHeader(LZ4F_dctx* dctx, const void* src, size_t srcSize) { unsigned blockMode, blockChecksumFlag, contentSizeFlag, contentChecksumFlag, dictIDFlag, blockSizeID; size_t frameHeaderSize; const BYTE* srcPtr = (const BYTE*)src; /* need to decode header to get frameInfo */ if (srcSize < minFHSize) return err0r(LZ4F_ERROR_frameHeader_incomplete); /* minimal frame header size */ MEM_INIT(&(dctx->frameInfo), 0, sizeof(dctx->frameInfo)); /* special case : skippable frames */ if ((LZ4F_readLE32(srcPtr) & 0xFFFFFFF0U) == LZ4F_MAGIC_SKIPPABLE_START) { dctx->frameInfo.frameType = LZ4F_skippableFrame; if (src == (void*)(dctx->header)) { dctx->tmpInSize = srcSize; dctx->tmpInTarget = 8; dctx->dStage = dstage_storeSFrameSize; return srcSize; } else { dctx->dStage = dstage_getSFrameSize; return 4; } } /* control magic number */ #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION if (LZ4F_readLE32(srcPtr) != LZ4F_MAGICNUMBER) return err0r(LZ4F_ERROR_frameType_unknown); #endif dctx->frameInfo.frameType = LZ4F_frame; /* Flags */ { U32 const FLG = srcPtr[4]; U32 const version = (FLG>>6) & _2BITS; blockChecksumFlag = (FLG>>4) & _1BIT; blockMode = (FLG>>5) & _1BIT; contentSizeFlag = (FLG>>3) & _1BIT; contentChecksumFlag = (FLG>>2) & _1BIT; dictIDFlag = FLG & _1BIT; /* validate */ if (((FLG>>1)&_1BIT) != 0) return err0r(LZ4F_ERROR_reservedFlag_set); /* Reserved bit */ if (version != 1) return err0r(LZ4F_ERROR_headerVersion_wrong); /* Version Number, only supported value */ } /* Frame Header Size */ frameHeaderSize = minFHSize + (contentSizeFlag?8:0) + (dictIDFlag?4:0); if (srcSize < frameHeaderSize) { /* not enough input to fully decode frame header */ if (srcPtr != dctx->header) memcpy(dctx->header, srcPtr, srcSize); dctx->tmpInSize = srcSize; dctx->tmpInTarget = frameHeaderSize; dctx->dStage = dstage_storeFrameHeader; return srcSize; } { U32 const BD = srcPtr[5]; blockSizeID = (BD>>4) & _3BITS; /* validate */ if (((BD>>7)&_1BIT) != 0) return err0r(LZ4F_ERROR_reservedFlag_set); /* Reserved bit */ if (blockSizeID < 4) return err0r(LZ4F_ERROR_maxBlockSize_invalid); /* 4-7 only supported values for the time being */ if (((BD>>0)&_4BITS) != 0) return err0r(LZ4F_ERROR_reservedFlag_set); /* Reserved bits */ } /* check header */ assert(frameHeaderSize > 5); #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION { BYTE const HC = LZ4F_headerChecksum(srcPtr+4, frameHeaderSize-5); if (HC != srcPtr[frameHeaderSize-1]) return err0r(LZ4F_ERROR_headerChecksum_invalid); } #endif /* save */ dctx->frameInfo.blockMode = (LZ4F_blockMode_t)blockMode; dctx->frameInfo.blockChecksumFlag = (LZ4F_blockChecksum_t)blockChecksumFlag; dctx->frameInfo.contentChecksumFlag = (LZ4F_contentChecksum_t)contentChecksumFlag; dctx->frameInfo.blockSizeID = (LZ4F_blockSizeID_t)blockSizeID; dctx->maxBlockSize = LZ4F_getBlockSize(blockSizeID); if (contentSizeFlag) dctx->frameRemainingSize = dctx->frameInfo.contentSize = LZ4F_readLE64(srcPtr+6); if (dictIDFlag) dctx->frameInfo.dictID = LZ4F_readLE32(srcPtr + frameHeaderSize - 5); dctx->dStage = dstage_init; return frameHeaderSize; } /*! LZ4F_headerSize() : * @return : size of frame header * or an error code, which can be tested using LZ4F_isError() */ size_t LZ4F_headerSize(const void* src, size_t srcSize) { if (src == NULL) return err0r(LZ4F_ERROR_srcPtr_wrong); /* minimal srcSize to determine header size */ if (srcSize < LZ4F_MIN_SIZE_TO_KNOW_HEADER_LENGTH) return err0r(LZ4F_ERROR_frameHeader_incomplete); /* special case : skippable frames */ if ((LZ4F_readLE32(src) & 0xFFFFFFF0U) == LZ4F_MAGIC_SKIPPABLE_START) return 8; /* control magic number */ #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION if (LZ4F_readLE32(src) != LZ4F_MAGICNUMBER) return err0r(LZ4F_ERROR_frameType_unknown); #endif /* Frame Header Size */ { BYTE const FLG = ((const BYTE*)src)[4]; U32 const contentSizeFlag = (FLG>>3) & _1BIT; U32 const dictIDFlag = FLG & _1BIT; return minFHSize + (contentSizeFlag?8:0) + (dictIDFlag?4:0); } } /*! LZ4F_getFrameInfo() : * This function extracts frame parameters (max blockSize, frame checksum, etc.). * Usage is optional. Objective is to provide relevant information for allocation purposes. * This function works in 2 situations : * - At the beginning of a new frame, in which case it will decode this information from `srcBuffer`, and start the decoding process. * Amount of input data provided must be large enough to successfully decode the frame header. * A header size is variable, but is guaranteed to be <= LZ4F_HEADER_SIZE_MAX bytes. It's possible to provide more input data than this minimum. * - After decoding has been started. In which case, no input is read, frame parameters are extracted from dctx. * The number of bytes consumed from srcBuffer will be updated within *srcSizePtr (necessarily <= original value). * Decompression must resume from (srcBuffer + *srcSizePtr). * @return : an hint about how many srcSize bytes LZ4F_decompress() expects for next call, * or an error code which can be tested using LZ4F_isError() * note 1 : in case of error, dctx is not modified. Decoding operations can resume from where they stopped. * note 2 : frame parameters are *copied into* an already allocated LZ4F_frameInfo_t structure. */ LZ4F_errorCode_t LZ4F_getFrameInfo(LZ4F_dctx* dctx, LZ4F_frameInfo_t* frameInfoPtr, const void* srcBuffer, size_t* srcSizePtr) { LZ4F_STATIC_ASSERT(dstage_getFrameHeader < dstage_storeFrameHeader); if (dctx->dStage > dstage_storeFrameHeader) { /* frameInfo already decoded */ size_t o=0, i=0; *srcSizePtr = 0; *frameInfoPtr = dctx->frameInfo; /* returns : recommended nb of bytes for LZ4F_decompress() */ return LZ4F_decompress(dctx, NULL, &o, NULL, &i, NULL); } else { if (dctx->dStage == dstage_storeFrameHeader) { /* frame decoding already started, in the middle of header => automatic fail */ *srcSizePtr = 0; return err0r(LZ4F_ERROR_frameDecoding_alreadyStarted); } else { size_t const hSize = LZ4F_headerSize(srcBuffer, *srcSizePtr); if (LZ4F_isError(hSize)) { *srcSizePtr=0; return hSize; } if (*srcSizePtr < hSize) { *srcSizePtr=0; return err0r(LZ4F_ERROR_frameHeader_incomplete); } { size_t decodeResult = LZ4F_decodeHeader(dctx, srcBuffer, hSize); if (LZ4F_isError(decodeResult)) { *srcSizePtr = 0; } else { *srcSizePtr = decodeResult; decodeResult = BHSize; /* block header size */ } *frameInfoPtr = dctx->frameInfo; return decodeResult; } } } } /* LZ4F_updateDict() : * only used for LZ4F_blockLinked mode */ static void LZ4F_updateDict(LZ4F_dctx* dctx, const BYTE* dstPtr, size_t dstSize, const BYTE* dstBufferStart, unsigned withinTmp) { if (dctx->dictSize==0) dctx->dict = (const BYTE*)dstPtr; /* priority to dictionary continuity */ if (dctx->dict + dctx->dictSize == dstPtr) { /* dictionary continuity, directly within dstBuffer */ dctx->dictSize += dstSize; return; } assert(dstPtr >= dstBufferStart); if ((size_t)(dstPtr - dstBufferStart) + dstSize >= 64 KB) { /* history in dstBuffer becomes large enough to become dictionary */ dctx->dict = (const BYTE*)dstBufferStart; dctx->dictSize = (size_t)(dstPtr - dstBufferStart) + dstSize; return; } assert(dstSize < 64 KB); /* if dstSize >= 64 KB, dictionary would be set into dstBuffer directly */ /* dstBuffer does not contain whole useful history (64 KB), so it must be saved within tmpOut */ if ((withinTmp) && (dctx->dict == dctx->tmpOutBuffer)) { /* continue history within tmpOutBuffer */ /* withinTmp expectation : content of [dstPtr,dstSize] is same as [dict+dictSize,dstSize], so we just extend it */ assert(dctx->dict + dctx->dictSize == dctx->tmpOut + dctx->tmpOutStart); dctx->dictSize += dstSize; return; } if (withinTmp) { /* copy relevant dict portion in front of tmpOut within tmpOutBuffer */ size_t const preserveSize = (size_t)(dctx->tmpOut - dctx->tmpOutBuffer); size_t copySize = 64 KB - dctx->tmpOutSize; const BYTE* const oldDictEnd = dctx->dict + dctx->dictSize - dctx->tmpOutStart; if (dctx->tmpOutSize > 64 KB) copySize = 0; if (copySize > preserveSize) copySize = preserveSize; memcpy(dctx->tmpOutBuffer + preserveSize - copySize, oldDictEnd - copySize, copySize); dctx->dict = dctx->tmpOutBuffer; dctx->dictSize = preserveSize + dctx->tmpOutStart + dstSize; return; } if (dctx->dict == dctx->tmpOutBuffer) { /* copy dst into tmp to complete dict */ if (dctx->dictSize + dstSize > dctx->maxBufferSize) { /* tmp buffer not large enough */ size_t const preserveSize = 64 KB - dstSize; memcpy(dctx->tmpOutBuffer, dctx->dict + dctx->dictSize - preserveSize, preserveSize); dctx->dictSize = preserveSize; } memcpy(dctx->tmpOutBuffer + dctx->dictSize, dstPtr, dstSize); dctx->dictSize += dstSize; return; } /* join dict & dest into tmp */ { size_t preserveSize = 64 KB - dstSize; if (preserveSize > dctx->dictSize) preserveSize = dctx->dictSize; memcpy(dctx->tmpOutBuffer, dctx->dict + dctx->dictSize - preserveSize, preserveSize); memcpy(dctx->tmpOutBuffer + preserveSize, dstPtr, dstSize); dctx->dict = dctx->tmpOutBuffer; dctx->dictSize = preserveSize + dstSize; } } /*! LZ4F_decompress() : * Call this function repetitively to regenerate compressed data in srcBuffer. * The function will attempt to decode up to *srcSizePtr bytes from srcBuffer * into dstBuffer of capacity *dstSizePtr. * * The number of bytes regenerated into dstBuffer will be provided within *dstSizePtr (necessarily <= original value). * * The number of bytes effectively read from srcBuffer will be provided within *srcSizePtr (necessarily <= original value). * If number of bytes read is < number of bytes provided, then decompression operation is not complete. * Remaining data will have to be presented again in a subsequent invocation. * * The function result is an hint of the better srcSize to use for next call to LZ4F_decompress. * Schematically, it's the size of the current (or remaining) compressed block + header of next block. * Respecting the hint provides a small boost to performance, since it allows less buffer shuffling. * Note that this is just a hint, and it's always possible to any srcSize value. * When a frame is fully decoded, @return will be 0. * If decompression failed, @return is an error code which can be tested using LZ4F_isError(). */ size_t LZ4F_decompress(LZ4F_dctx* dctx, void* dstBuffer, size_t* dstSizePtr, const void* srcBuffer, size_t* srcSizePtr, const LZ4F_decompressOptions_t* decompressOptionsPtr) { LZ4F_decompressOptions_t optionsNull; const BYTE* const srcStart = (const BYTE*)srcBuffer; const BYTE* const srcEnd = srcStart + *srcSizePtr; const BYTE* srcPtr = srcStart; BYTE* const dstStart = (BYTE*)dstBuffer; BYTE* const dstEnd = dstStart + *dstSizePtr; BYTE* dstPtr = dstStart; const BYTE* selectedIn = NULL; unsigned doAnotherStage = 1; size_t nextSrcSizeHint = 1; MEM_INIT(&optionsNull, 0, sizeof(optionsNull)); if (decompressOptionsPtr==NULL) decompressOptionsPtr = &optionsNull; *srcSizePtr = 0; *dstSizePtr = 0; /* behaves as a state machine */ while (doAnotherStage) { switch(dctx->dStage) { case dstage_getFrameHeader: if ((size_t)(srcEnd-srcPtr) >= maxFHSize) { /* enough to decode - shortcut */ size_t const hSize = LZ4F_decodeHeader(dctx, srcPtr, (size_t)(srcEnd-srcPtr)); /* will update dStage appropriately */ if (LZ4F_isError(hSize)) return hSize; srcPtr += hSize; break; } dctx->tmpInSize = 0; if (srcEnd-srcPtr == 0) return minFHSize; /* 0-size input */ dctx->tmpInTarget = minFHSize; /* minimum size to decode header */ dctx->dStage = dstage_storeFrameHeader; /* fall-through */ case dstage_storeFrameHeader: { size_t const sizeToCopy = MIN(dctx->tmpInTarget - dctx->tmpInSize, (size_t)(srcEnd - srcPtr)); memcpy(dctx->header + dctx->tmpInSize, srcPtr, sizeToCopy); dctx->tmpInSize += sizeToCopy; srcPtr += sizeToCopy; } if (dctx->tmpInSize < dctx->tmpInTarget) { nextSrcSizeHint = (dctx->tmpInTarget - dctx->tmpInSize) + BHSize; /* rest of header + nextBlockHeader */ doAnotherStage = 0; /* not enough src data, ask for some more */ break; } { size_t const hSize = LZ4F_decodeHeader(dctx, dctx->header, dctx->tmpInTarget); /* will update dStage appropriately */ if (LZ4F_isError(hSize)) return hSize; } break; case dstage_init: if (dctx->frameInfo.contentChecksumFlag) (void)XXH32_reset(&(dctx->xxh), 0); /* internal buffers allocation */ { size_t const bufferNeeded = dctx->maxBlockSize + ((dctx->frameInfo.blockMode==LZ4F_blockLinked) ? 128 KB : 0); if (bufferNeeded > dctx->maxBufferSize) { /* tmp buffers too small */ dctx->maxBufferSize = 0; /* ensure allocation will be re-attempted on next entry*/ FREEMEM(dctx->tmpIn); dctx->tmpIn = (BYTE*)ALLOC(dctx->maxBlockSize + BFSize /* block checksum */); if (dctx->tmpIn == NULL) return err0r(LZ4F_ERROR_allocation_failed); FREEMEM(dctx->tmpOutBuffer); dctx->tmpOutBuffer= (BYTE*)ALLOC(bufferNeeded); if (dctx->tmpOutBuffer== NULL) return err0r(LZ4F_ERROR_allocation_failed); dctx->maxBufferSize = bufferNeeded; } } dctx->tmpInSize = 0; dctx->tmpInTarget = 0; dctx->tmpOut = dctx->tmpOutBuffer; dctx->tmpOutStart = 0; dctx->tmpOutSize = 0; dctx->dStage = dstage_getBlockHeader; /* fall-through */ case dstage_getBlockHeader: if ((size_t)(srcEnd - srcPtr) >= BHSize) { selectedIn = srcPtr; srcPtr += BHSize; } else { /* not enough input to read cBlockSize field */ dctx->tmpInSize = 0; dctx->dStage = dstage_storeBlockHeader; } if (dctx->dStage == dstage_storeBlockHeader) /* can be skipped */ case dstage_storeBlockHeader: { size_t const remainingInput = (size_t)(srcEnd - srcPtr); size_t const wantedData = BHSize - dctx->tmpInSize; size_t const sizeToCopy = MIN(wantedData, remainingInput); memcpy(dctx->tmpIn + dctx->tmpInSize, srcPtr, sizeToCopy); srcPtr += sizeToCopy; dctx->tmpInSize += sizeToCopy; if (dctx->tmpInSize < BHSize) { /* not enough input for cBlockSize */ nextSrcSizeHint = BHSize - dctx->tmpInSize; doAnotherStage = 0; break; } selectedIn = dctx->tmpIn; } /* if (dctx->dStage == dstage_storeBlockHeader) */ /* decode block header */ { size_t const nextCBlockSize = LZ4F_readLE32(selectedIn) & 0x7FFFFFFFU; size_t const crcSize = dctx->frameInfo.blockChecksumFlag * BFSize; if (nextCBlockSize==0) { /* frameEnd signal, no more block */ dctx->dStage = dstage_getSuffix; break; } if (nextCBlockSize > dctx->maxBlockSize) return err0r(LZ4F_ERROR_maxBlockSize_invalid); if (LZ4F_readLE32(selectedIn) & LZ4F_BLOCKUNCOMPRESSED_FLAG) { /* next block is uncompressed */ dctx->tmpInTarget = nextCBlockSize; if (dctx->frameInfo.blockChecksumFlag) { (void)XXH32_reset(&dctx->blockChecksum, 0); } dctx->dStage = dstage_copyDirect; break; } /* next block is a compressed block */ dctx->tmpInTarget = nextCBlockSize + crcSize; dctx->dStage = dstage_getCBlock; if (dstPtr==dstEnd || srcPtr==srcEnd) { nextSrcSizeHint = BHSize + nextCBlockSize + crcSize; doAnotherStage = 0; } break; } case dstage_copyDirect: /* uncompressed block */ { size_t const minBuffSize = MIN((size_t)(srcEnd-srcPtr), (size_t)(dstEnd-dstPtr)); size_t const sizeToCopy = MIN(dctx->tmpInTarget, minBuffSize); memcpy(dstPtr, srcPtr, sizeToCopy); if (dctx->frameInfo.blockChecksumFlag) { (void)XXH32_update(&dctx->blockChecksum, srcPtr, sizeToCopy); } if (dctx->frameInfo.contentChecksumFlag) (void)XXH32_update(&dctx->xxh, srcPtr, sizeToCopy); if (dctx->frameInfo.contentSize) dctx->frameRemainingSize -= sizeToCopy; /* history management (linked blocks only)*/ if (dctx->frameInfo.blockMode == LZ4F_blockLinked) LZ4F_updateDict(dctx, dstPtr, sizeToCopy, dstStart, 0); srcPtr += sizeToCopy; dstPtr += sizeToCopy; if (sizeToCopy == dctx->tmpInTarget) { /* all done */ if (dctx->frameInfo.blockChecksumFlag) { dctx->tmpInSize = 0; dctx->dStage = dstage_getBlockChecksum; } else dctx->dStage = dstage_getBlockHeader; /* new block */ break; } dctx->tmpInTarget -= sizeToCopy; /* need to copy more */ nextSrcSizeHint = dctx->tmpInTarget + +(dctx->frameInfo.blockChecksumFlag ? BFSize : 0) + BHSize /* next header size */; doAnotherStage = 0; break; } /* check block checksum for recently transferred uncompressed block */ case dstage_getBlockChecksum: { const void* crcSrc; if ((srcEnd-srcPtr >= 4) && (dctx->tmpInSize==0)) { crcSrc = srcPtr; srcPtr += 4; } else { size_t const stillToCopy = 4 - dctx->tmpInSize; size_t const sizeToCopy = MIN(stillToCopy, (size_t)(srcEnd-srcPtr)); memcpy(dctx->header + dctx->tmpInSize, srcPtr, sizeToCopy); dctx->tmpInSize += sizeToCopy; srcPtr += sizeToCopy; if (dctx->tmpInSize < 4) { /* all input consumed */ doAnotherStage = 0; break; } crcSrc = dctx->header; } { U32 const readCRC = LZ4F_readLE32(crcSrc); U32 const calcCRC = XXH32_digest(&dctx->blockChecksum); #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION if (readCRC != calcCRC) return err0r(LZ4F_ERROR_blockChecksum_invalid); #else (void)readCRC; (void)calcCRC; #endif } } dctx->dStage = dstage_getBlockHeader; /* new block */ break; case dstage_getCBlock: if ((size_t)(srcEnd-srcPtr) < dctx->tmpInTarget) { dctx->tmpInSize = 0; dctx->dStage = dstage_storeCBlock; break; } /* input large enough to read full block directly */ selectedIn = srcPtr; srcPtr += dctx->tmpInTarget; if (0) /* jump over next block */ case dstage_storeCBlock: { size_t const wantedData = dctx->tmpInTarget - dctx->tmpInSize; size_t const inputLeft = (size_t)(srcEnd-srcPtr); size_t const sizeToCopy = MIN(wantedData, inputLeft); memcpy(dctx->tmpIn + dctx->tmpInSize, srcPtr, sizeToCopy); dctx->tmpInSize += sizeToCopy; srcPtr += sizeToCopy; if (dctx->tmpInSize < dctx->tmpInTarget) { /* need more input */ nextSrcSizeHint = (dctx->tmpInTarget - dctx->tmpInSize) + (dctx->frameInfo.blockChecksumFlag ? BFSize : 0) + BHSize /* next header size */; doAnotherStage = 0; break; } selectedIn = dctx->tmpIn; } /* At this stage, input is large enough to decode a block */ if (dctx->frameInfo.blockChecksumFlag) { dctx->tmpInTarget -= 4; assert(selectedIn != NULL); /* selectedIn is defined at this stage (either srcPtr, or dctx->tmpIn) */ { U32 const readBlockCrc = LZ4F_readLE32(selectedIn + dctx->tmpInTarget); U32 const calcBlockCrc = XXH32(selectedIn, dctx->tmpInTarget, 0); #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION if (readBlockCrc != calcBlockCrc) return err0r(LZ4F_ERROR_blockChecksum_invalid); #else (void)readBlockCrc; (void)calcBlockCrc; #endif } } if ((size_t)(dstEnd-dstPtr) >= dctx->maxBlockSize) { const char* dict = (const char*)dctx->dict; size_t dictSize = dctx->dictSize; int decodedSize; if (dict && dictSize > 1 GB) { /* the dictSize param is an int, avoid truncation / sign issues */ dict += dictSize - 64 KB; dictSize = 64 KB; } /* enough capacity in `dst` to decompress directly there */ decodedSize = LZ4_decompress_safe_usingDict( (const char*)selectedIn, (char*)dstPtr, (int)dctx->tmpInTarget, (int)dctx->maxBlockSize, dict, (int)dictSize); if (decodedSize < 0) return err0r(LZ4F_ERROR_GENERIC); /* decompression failed */ if (dctx->frameInfo.contentChecksumFlag) XXH32_update(&(dctx->xxh), dstPtr, (size_t)decodedSize); if (dctx->frameInfo.contentSize) dctx->frameRemainingSize -= (size_t)decodedSize; /* dictionary management */ if (dctx->frameInfo.blockMode==LZ4F_blockLinked) LZ4F_updateDict(dctx, dstPtr, (size_t)decodedSize, dstStart, 0); dstPtr += decodedSize; dctx->dStage = dstage_getBlockHeader; break; } /* not enough place into dst : decode into tmpOut */ /* ensure enough place for tmpOut */ if (dctx->frameInfo.blockMode == LZ4F_blockLinked) { if (dctx->dict == dctx->tmpOutBuffer) { if (dctx->dictSize > 128 KB) { memcpy(dctx->tmpOutBuffer, dctx->dict + dctx->dictSize - 64 KB, 64 KB); dctx->dictSize = 64 KB; } dctx->tmpOut = dctx->tmpOutBuffer + dctx->dictSize; } else { /* dict not within tmp */ size_t const reservedDictSpace = MIN(dctx->dictSize, 64 KB); dctx->tmpOut = dctx->tmpOutBuffer + reservedDictSpace; } } /* Decode block */ { const char* dict = (const char*)dctx->dict; size_t dictSize = dctx->dictSize; int decodedSize; if (dict && dictSize > 1 GB) { /* the dictSize param is an int, avoid truncation / sign issues */ dict += dictSize - 64 KB; dictSize = 64 KB; } decodedSize = LZ4_decompress_safe_usingDict( (const char*)selectedIn, (char*)dctx->tmpOut, (int)dctx->tmpInTarget, (int)dctx->maxBlockSize, dict, (int)dictSize); if (decodedSize < 0) /* decompression failed */ return err0r(LZ4F_ERROR_decompressionFailed); if (dctx->frameInfo.contentChecksumFlag) XXH32_update(&(dctx->xxh), dctx->tmpOut, (size_t)decodedSize); if (dctx->frameInfo.contentSize) dctx->frameRemainingSize -= (size_t)decodedSize; dctx->tmpOutSize = (size_t)decodedSize; dctx->tmpOutStart = 0; dctx->dStage = dstage_flushOut; } /* fall-through */ case dstage_flushOut: /* flush decoded data from tmpOut to dstBuffer */ { size_t const sizeToCopy = MIN(dctx->tmpOutSize - dctx->tmpOutStart, (size_t)(dstEnd-dstPtr)); memcpy(dstPtr, dctx->tmpOut + dctx->tmpOutStart, sizeToCopy); /* dictionary management */ if (dctx->frameInfo.blockMode == LZ4F_blockLinked) LZ4F_updateDict(dctx, dstPtr, sizeToCopy, dstStart, 1 /*withinTmp*/); dctx->tmpOutStart += sizeToCopy; dstPtr += sizeToCopy; if (dctx->tmpOutStart == dctx->tmpOutSize) { /* all flushed */ dctx->dStage = dstage_getBlockHeader; /* get next block */ break; } /* could not flush everything : stop there, just request a block header */ doAnotherStage = 0; nextSrcSizeHint = BHSize; break; } case dstage_getSuffix: if (dctx->frameRemainingSize) return err0r(LZ4F_ERROR_frameSize_wrong); /* incorrect frame size decoded */ if (!dctx->frameInfo.contentChecksumFlag) { /* no checksum, frame is completed */ nextSrcSizeHint = 0; LZ4F_resetDecompressionContext(dctx); doAnotherStage = 0; break; } if ((srcEnd - srcPtr) < 4) { /* not enough size for entire CRC */ dctx->tmpInSize = 0; dctx->dStage = dstage_storeSuffix; } else { selectedIn = srcPtr; srcPtr += 4; } if (dctx->dStage == dstage_storeSuffix) /* can be skipped */ case dstage_storeSuffix: { size_t const remainingInput = (size_t)(srcEnd - srcPtr); size_t const wantedData = 4 - dctx->tmpInSize; size_t const sizeToCopy = MIN(wantedData, remainingInput); memcpy(dctx->tmpIn + dctx->tmpInSize, srcPtr, sizeToCopy); srcPtr += sizeToCopy; dctx->tmpInSize += sizeToCopy; if (dctx->tmpInSize < 4) { /* not enough input to read complete suffix */ nextSrcSizeHint = 4 - dctx->tmpInSize; doAnotherStage=0; break; } selectedIn = dctx->tmpIn; } /* if (dctx->dStage == dstage_storeSuffix) */ /* case dstage_checkSuffix: */ /* no direct entry, avoid initialization risks */ { U32 const readCRC = LZ4F_readLE32(selectedIn); U32 const resultCRC = XXH32_digest(&(dctx->xxh)); #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION if (readCRC != resultCRC) return err0r(LZ4F_ERROR_contentChecksum_invalid); #else (void)readCRC; (void)resultCRC; #endif nextSrcSizeHint = 0; LZ4F_resetDecompressionContext(dctx); doAnotherStage = 0; break; } case dstage_getSFrameSize: if ((srcEnd - srcPtr) >= 4) { selectedIn = srcPtr; srcPtr += 4; } else { /* not enough input to read cBlockSize field */ dctx->tmpInSize = 4; dctx->tmpInTarget = 8; dctx->dStage = dstage_storeSFrameSize; } if (dctx->dStage == dstage_storeSFrameSize) case dstage_storeSFrameSize: { size_t const sizeToCopy = MIN(dctx->tmpInTarget - dctx->tmpInSize, (size_t)(srcEnd - srcPtr) ); memcpy(dctx->header + dctx->tmpInSize, srcPtr, sizeToCopy); srcPtr += sizeToCopy; dctx->tmpInSize += sizeToCopy; if (dctx->tmpInSize < dctx->tmpInTarget) { /* not enough input to get full sBlockSize; wait for more */ nextSrcSizeHint = dctx->tmpInTarget - dctx->tmpInSize; doAnotherStage = 0; break; } selectedIn = dctx->header + 4; } /* if (dctx->dStage == dstage_storeSFrameSize) */ /* case dstage_decodeSFrameSize: */ /* no direct entry */ { size_t const SFrameSize = LZ4F_readLE32(selectedIn); dctx->frameInfo.contentSize = SFrameSize; dctx->tmpInTarget = SFrameSize; dctx->dStage = dstage_skipSkippable; break; } case dstage_skipSkippable: { size_t const skipSize = MIN(dctx->tmpInTarget, (size_t)(srcEnd-srcPtr)); srcPtr += skipSize; dctx->tmpInTarget -= skipSize; doAnotherStage = 0; nextSrcSizeHint = dctx->tmpInTarget; if (nextSrcSizeHint) break; /* still more to skip */ /* frame fully skipped : prepare context for a new frame */ LZ4F_resetDecompressionContext(dctx); break; } } /* switch (dctx->dStage) */ } /* while (doAnotherStage) */ /* preserve history within tmp whenever necessary */ LZ4F_STATIC_ASSERT((unsigned)dstage_init == 2); if ( (dctx->frameInfo.blockMode==LZ4F_blockLinked) /* next block will use up to 64KB from previous ones */ && (dctx->dict != dctx->tmpOutBuffer) /* dictionary is not already within tmp */ && (!decompressOptionsPtr->stableDst) /* cannot rely on dst data to remain there for next call */ && ((unsigned)(dctx->dStage)-2 < (unsigned)(dstage_getSuffix)-2) ) /* valid stages : [init ... getSuffix[ */ { if (dctx->dStage == dstage_flushOut) { size_t const preserveSize = (size_t)(dctx->tmpOut - dctx->tmpOutBuffer); size_t copySize = 64 KB - dctx->tmpOutSize; const BYTE* oldDictEnd = dctx->dict + dctx->dictSize - dctx->tmpOutStart; if (dctx->tmpOutSize > 64 KB) copySize = 0; if (copySize > preserveSize) copySize = preserveSize; if (copySize > 0) memcpy(dctx->tmpOutBuffer + preserveSize - copySize, oldDictEnd - copySize, copySize); dctx->dict = dctx->tmpOutBuffer; dctx->dictSize = preserveSize + dctx->tmpOutStart; } else { const BYTE* const oldDictEnd = dctx->dict + dctx->dictSize; size_t const newDictSize = MIN(dctx->dictSize, 64 KB); if (newDictSize > 0) memcpy(dctx->tmpOutBuffer, oldDictEnd - newDictSize, newDictSize); dctx->dict = dctx->tmpOutBuffer; dctx->dictSize = newDictSize; dctx->tmpOut = dctx->tmpOutBuffer + newDictSize; } } *srcSizePtr = (size_t)(srcPtr - srcStart); *dstSizePtr = (size_t)(dstPtr - dstStart); return nextSrcSizeHint; } /*! LZ4F_decompress_usingDict() : * Same as LZ4F_decompress(), using a predefined dictionary. * Dictionary is used "in place", without any preprocessing. * It must remain accessible throughout the entire frame decoding. */ size_t LZ4F_decompress_usingDict(LZ4F_dctx* dctx, void* dstBuffer, size_t* dstSizePtr, const void* srcBuffer, size_t* srcSizePtr, const void* dict, size_t dictSize, const LZ4F_decompressOptions_t* decompressOptionsPtr) { if (dctx->dStage <= dstage_init) { dctx->dict = (const BYTE*)dict; dctx->dictSize = dictSize; } return LZ4F_decompress(dctx, dstBuffer, dstSizePtr, srcBuffer, srcSizePtr, decompressOptionsPtr); } py-lz4framed-0.14.0/lz4/lz4frame.h000066400000000000000000000677611357043434000165440ustar00rootroot00000000000000/* LZ4 auto-framing library Header File Copyright (C) 2011-2017, Yann Collet. BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. You can contact the author at : - LZ4 source repository : https://github.com/lz4/lz4 - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c */ /* LZ4F is a stand-alone API able to create and decode LZ4 frames * conformant with specification v1.6.1 in doc/lz4_Frame_format.md . * Generated frames are compatible with `lz4` CLI. * * LZ4F also offers streaming capabilities. * * lz4.h is not required when using lz4frame.h, * except to extract common constant such as LZ4_VERSION_NUMBER. * */ #ifndef LZ4F_H_09782039843 #define LZ4F_H_09782039843 #if defined (__cplusplus) extern "C" { #endif /* --- Dependency --- */ #include /* size_t */ /** Introduction lz4frame.h implements LZ4 frame specification (doc/lz4_Frame_format.md). lz4frame.h provides frame compression functions that take care of encoding standard metadata alongside LZ4-compressed blocks. */ /*-*************************************************************** * Compiler specifics *****************************************************************/ /* LZ4_DLL_EXPORT : * Enable exporting of functions when building a Windows DLL * LZ4FLIB_API : * Control library symbols visibility. */ #if defined(LZ4_DLL_EXPORT) && (LZ4_DLL_EXPORT==1) # define LZ4FLIB_API __declspec(dllexport) #elif defined(LZ4_DLL_IMPORT) && (LZ4_DLL_IMPORT==1) # define LZ4FLIB_API __declspec(dllimport) #elif defined(__GNUC__) && (__GNUC__ >= 4) # define LZ4FLIB_API __attribute__ ((__visibility__ ("default"))) #else # define LZ4FLIB_API #endif #ifdef LZ4F_DISABLE_DEPRECATE_WARNINGS # define LZ4F_DEPRECATE(x) x #else # if defined(_MSC_VER) # define LZ4F_DEPRECATE(x) x /* __declspec(deprecated) x - only works with C++ */ # elif defined(__clang__) || (defined(__GNUC__) && (__GNUC__ >= 6)) # define LZ4F_DEPRECATE(x) x __attribute__((deprecated)) # else # define LZ4F_DEPRECATE(x) x /* no deprecation warning for this compiler */ # endif #endif /*-************************************ * Error management **************************************/ typedef size_t LZ4F_errorCode_t; LZ4FLIB_API unsigned LZ4F_isError(LZ4F_errorCode_t code); /**< tells when a function result is an error code */ LZ4FLIB_API const char* LZ4F_getErrorName(LZ4F_errorCode_t code); /**< return error code string; for debugging */ /*-************************************ * Frame compression types **************************************/ /* #define LZ4F_ENABLE_OBSOLETE_ENUMS // uncomment to enable obsolete enums */ #ifdef LZ4F_ENABLE_OBSOLETE_ENUMS # define LZ4F_OBSOLETE_ENUM(x) , LZ4F_DEPRECATE(x) = LZ4F_##x #else # define LZ4F_OBSOLETE_ENUM(x) #endif /* The larger the block size, the (slightly) better the compression ratio, * though there are diminishing returns. * Larger blocks also increase memory usage on both compression and decompression sides. */ typedef enum { LZ4F_default=0, LZ4F_max64KB=4, LZ4F_max256KB=5, LZ4F_max1MB=6, LZ4F_max4MB=7 LZ4F_OBSOLETE_ENUM(max64KB) LZ4F_OBSOLETE_ENUM(max256KB) LZ4F_OBSOLETE_ENUM(max1MB) LZ4F_OBSOLETE_ENUM(max4MB) } LZ4F_blockSizeID_t; /* Linked blocks sharply reduce inefficiencies when using small blocks, * they compress better. * However, some LZ4 decoders are only compatible with independent blocks */ typedef enum { LZ4F_blockLinked=0, LZ4F_blockIndependent LZ4F_OBSOLETE_ENUM(blockLinked) LZ4F_OBSOLETE_ENUM(blockIndependent) } LZ4F_blockMode_t; typedef enum { LZ4F_noContentChecksum=0, LZ4F_contentChecksumEnabled LZ4F_OBSOLETE_ENUM(noContentChecksum) LZ4F_OBSOLETE_ENUM(contentChecksumEnabled) } LZ4F_contentChecksum_t; typedef enum { LZ4F_noBlockChecksum=0, LZ4F_blockChecksumEnabled } LZ4F_blockChecksum_t; typedef enum { LZ4F_frame=0, LZ4F_skippableFrame LZ4F_OBSOLETE_ENUM(skippableFrame) } LZ4F_frameType_t; #ifdef LZ4F_ENABLE_OBSOLETE_ENUMS typedef LZ4F_blockSizeID_t blockSizeID_t; typedef LZ4F_blockMode_t blockMode_t; typedef LZ4F_frameType_t frameType_t; typedef LZ4F_contentChecksum_t contentChecksum_t; #endif /*! LZ4F_frameInfo_t : * makes it possible to set or read frame parameters. * Structure must be first init to 0, using memset() or LZ4F_INIT_FRAMEINFO, * setting all parameters to default. * It's then possible to update selectively some parameters */ typedef struct { LZ4F_blockSizeID_t blockSizeID; /* max64KB, max256KB, max1MB, max4MB; 0 == default */ LZ4F_blockMode_t blockMode; /* LZ4F_blockLinked, LZ4F_blockIndependent; 0 == default */ LZ4F_contentChecksum_t contentChecksumFlag; /* 1: frame terminated with 32-bit checksum of decompressed data; 0: disabled (default) */ LZ4F_frameType_t frameType; /* read-only field : LZ4F_frame or LZ4F_skippableFrame */ unsigned long long contentSize; /* Size of uncompressed content ; 0 == unknown */ unsigned dictID; /* Dictionary ID, sent by compressor to help decoder select correct dictionary; 0 == no dictID provided */ LZ4F_blockChecksum_t blockChecksumFlag; /* 1: each block followed by a checksum of block's compressed data; 0: disabled (default) */ } LZ4F_frameInfo_t; #define LZ4F_INIT_FRAMEINFO { LZ4F_default, LZ4F_blockLinked, LZ4F_noContentChecksum, LZ4F_frame, 0ULL, 0U, LZ4F_noBlockChecksum } /* v1.8.3+ */ /*! LZ4F_preferences_t : * makes it possible to supply advanced compression instructions to streaming interface. * Structure must be first init to 0, using memset() or LZ4F_INIT_PREFERENCES, * setting all parameters to default. * All reserved fields must be set to zero. */ typedef struct { LZ4F_frameInfo_t frameInfo; int compressionLevel; /* 0: default (fast mode); values > LZ4HC_CLEVEL_MAX count as LZ4HC_CLEVEL_MAX; values < 0 trigger "fast acceleration" */ unsigned autoFlush; /* 1: always flush; reduces usage of internal buffers */ unsigned favorDecSpeed; /* 1: parser favors decompression speed vs compression ratio. Only works for high compression modes (>= LZ4HC_CLEVEL_OPT_MIN) */ /* v1.8.2+ */ unsigned reserved[3]; /* must be zero for forward compatibility */ } LZ4F_preferences_t; #define LZ4F_INIT_PREFERENCES { LZ4F_INIT_FRAMEINFO, 0, 0u, 0u, { 0u, 0u, 0u } } /* v1.8.3+ */ /*-********************************* * Simple compression function ***********************************/ LZ4FLIB_API int LZ4F_compressionLevel_max(void); /* v1.8.0+ */ /*! LZ4F_compressFrameBound() : * Returns the maximum possible compressed size with LZ4F_compressFrame() given srcSize and preferences. * `preferencesPtr` is optional. It can be replaced by NULL, in which case, the function will assume default preferences. * Note : this result is only usable with LZ4F_compressFrame(). * It may also be used with LZ4F_compressUpdate() _if no flush() operation_ is performed. */ LZ4FLIB_API size_t LZ4F_compressFrameBound(size_t srcSize, const LZ4F_preferences_t* preferencesPtr); /*! LZ4F_compressFrame() : * Compress an entire srcBuffer into a valid LZ4 frame. * dstCapacity MUST be >= LZ4F_compressFrameBound(srcSize, preferencesPtr). * The LZ4F_preferences_t structure is optional : you can provide NULL as argument. All preferences will be set to default. * @return : number of bytes written into dstBuffer. * or an error code if it fails (can be tested using LZ4F_isError()) */ LZ4FLIB_API size_t LZ4F_compressFrame(void* dstBuffer, size_t dstCapacity, const void* srcBuffer, size_t srcSize, const LZ4F_preferences_t* preferencesPtr); /*-*********************************** * Advanced compression functions *************************************/ typedef struct LZ4F_cctx_s LZ4F_cctx; /* incomplete type */ typedef LZ4F_cctx* LZ4F_compressionContext_t; /* for compatibility with previous API version */ typedef struct { unsigned stableSrc; /* 1 == src content will remain present on future calls to LZ4F_compress(); skip copying src content within tmp buffer */ unsigned reserved[3]; } LZ4F_compressOptions_t; /*--- Resource Management ---*/ #define LZ4F_VERSION 100 /* This number can be used to check for an incompatible API breaking change */ LZ4FLIB_API unsigned LZ4F_getVersion(void); /*! LZ4F_createCompressionContext() : * The first thing to do is to create a compressionContext object, which will be used in all compression operations. * This is achieved using LZ4F_createCompressionContext(), which takes as argument a version. * The version provided MUST be LZ4F_VERSION. It is intended to track potential version mismatch, notably when using DLL. * The function will provide a pointer to a fully allocated LZ4F_cctx object. * If @return != zero, there was an error during context creation. * Object can release its memory using LZ4F_freeCompressionContext(); */ LZ4FLIB_API LZ4F_errorCode_t LZ4F_createCompressionContext(LZ4F_cctx** cctxPtr, unsigned version); LZ4FLIB_API LZ4F_errorCode_t LZ4F_freeCompressionContext(LZ4F_cctx* cctx); /*---- Compression ----*/ #define LZ4F_HEADER_SIZE_MIN 7 /* LZ4 Frame header size can vary, depending on selected paramaters */ #define LZ4F_HEADER_SIZE_MAX 19 /* Size in bytes of a block header in little-endian format. Highest bit indicates if block data is uncompressed */ #define LZ4F_BLOCK_HEADER_SIZE 4 /* Size in bytes of a block checksum footer in little-endian format. */ #define LZ4F_BLOCK_CHECKSUM_SIZE 4 /* Size in bytes of the content checksum. */ #define LZ4F_CONTENT_CHECKSUM_SIZE 4 /*! LZ4F_compressBegin() : * will write the frame header into dstBuffer. * dstCapacity must be >= LZ4F_HEADER_SIZE_MAX bytes. * `prefsPtr` is optional : you can provide NULL as argument, all preferences will then be set to default. * @return : number of bytes written into dstBuffer for the header * or an error code (which can be tested using LZ4F_isError()) */ LZ4FLIB_API size_t LZ4F_compressBegin(LZ4F_cctx* cctx, void* dstBuffer, size_t dstCapacity, const LZ4F_preferences_t* prefsPtr); /*! LZ4F_compressBound() : * Provides minimum dstCapacity required to guarantee success of * LZ4F_compressUpdate(), given a srcSize and preferences, for a worst case scenario. * When srcSize==0, LZ4F_compressBound() provides an upper bound for LZ4F_flush() and LZ4F_compressEnd() instead. * Note that the result is only valid for a single invocation of LZ4F_compressUpdate(). * When invoking LZ4F_compressUpdate() multiple times, * if the output buffer is gradually filled up instead of emptied and re-used from its start, * one must check if there is enough remaining capacity before each invocation, using LZ4F_compressBound(). * @return is always the same for a srcSize and prefsPtr. * prefsPtr is optional : when NULL is provided, preferences will be set to cover worst case scenario. * tech details : * @return includes the possibility that internal buffer might already be filled by up to (blockSize-1) bytes. * It also includes frame footer (ending + checksum), since it might be generated by LZ4F_compressEnd(). * @return doesn't include frame header, as it was already generated by LZ4F_compressBegin(). */ LZ4FLIB_API size_t LZ4F_compressBound(size_t srcSize, const LZ4F_preferences_t* prefsPtr); /*! LZ4F_compressUpdate() : * LZ4F_compressUpdate() can be called repetitively to compress as much data as necessary. * Important rule: dstCapacity MUST be large enough to ensure operation success even in worst case situations. * This value is provided by LZ4F_compressBound(). * If this condition is not respected, LZ4F_compress() will fail (result is an errorCode). * LZ4F_compressUpdate() doesn't guarantee error recovery. * When an error occurs, compression context must be freed or resized. * `cOptPtr` is optional : NULL can be provided, in which case all options are set to default. * @return : number of bytes written into `dstBuffer` (it can be zero, meaning input data was just buffered). * or an error code if it fails (which can be tested using LZ4F_isError()) */ LZ4FLIB_API size_t LZ4F_compressUpdate(LZ4F_cctx* cctx, void* dstBuffer, size_t dstCapacity, const void* srcBuffer, size_t srcSize, const LZ4F_compressOptions_t* cOptPtr); /*! LZ4F_flush() : * When data must be generated and sent immediately, without waiting for a block to be completely filled, * it's possible to call LZ4_flush(). It will immediately compress any data buffered within cctx. * `dstCapacity` must be large enough to ensure the operation will be successful. * `cOptPtr` is optional : it's possible to provide NULL, all options will be set to default. * @return : nb of bytes written into dstBuffer (can be zero, when there is no data stored within cctx) * or an error code if it fails (which can be tested using LZ4F_isError()) * Note : LZ4F_flush() is guaranteed to be successful when dstCapacity >= LZ4F_compressBound(0, prefsPtr). */ LZ4FLIB_API size_t LZ4F_flush(LZ4F_cctx* cctx, void* dstBuffer, size_t dstCapacity, const LZ4F_compressOptions_t* cOptPtr); /*! LZ4F_compressEnd() : * To properly finish an LZ4 frame, invoke LZ4F_compressEnd(). * It will flush whatever data remained within `cctx` (like LZ4_flush()) * and properly finalize the frame, with an endMark and a checksum. * `cOptPtr` is optional : NULL can be provided, in which case all options will be set to default. * @return : nb of bytes written into dstBuffer, necessarily >= 4 (endMark), * or an error code if it fails (which can be tested using LZ4F_isError()) * Note : LZ4F_compressEnd() is guaranteed to be successful when dstCapacity >= LZ4F_compressBound(0, prefsPtr). * A successful call to LZ4F_compressEnd() makes `cctx` available again for another compression task. */ LZ4FLIB_API size_t LZ4F_compressEnd(LZ4F_cctx* cctx, void* dstBuffer, size_t dstCapacity, const LZ4F_compressOptions_t* cOptPtr); /*-********************************* * Decompression functions ***********************************/ typedef struct LZ4F_dctx_s LZ4F_dctx; /* incomplete type */ typedef LZ4F_dctx* LZ4F_decompressionContext_t; /* compatibility with previous API versions */ typedef struct { unsigned stableDst; /* pledges that last 64KB decompressed data will remain available unmodified. This optimization skips storage operations in tmp buffers. */ unsigned reserved[3]; /* must be set to zero for forward compatibility */ } LZ4F_decompressOptions_t; /* Resource management */ /*! LZ4F_createDecompressionContext() : * Create an LZ4F_dctx object, to track all decompression operations. * The version provided MUST be LZ4F_VERSION. * The function provides a pointer to an allocated and initialized LZ4F_dctx object. * The result is an errorCode, which can be tested using LZ4F_isError(). * dctx memory can be released using LZ4F_freeDecompressionContext(); * Result of LZ4F_freeDecompressionContext() indicates current state of decompressionContext when being released. * That is, it should be == 0 if decompression has been completed fully and correctly. */ LZ4FLIB_API LZ4F_errorCode_t LZ4F_createDecompressionContext(LZ4F_dctx** dctxPtr, unsigned version); LZ4FLIB_API LZ4F_errorCode_t LZ4F_freeDecompressionContext(LZ4F_dctx* dctx); /*-*********************************** * Streaming decompression functions *************************************/ #define LZ4F_MIN_SIZE_TO_KNOW_HEADER_LENGTH 5 /*! LZ4F_headerSize() : v1.9.0+ * Provide the header size of a frame starting at `src`. * `srcSize` must be >= LZ4F_MIN_SIZE_TO_KNOW_HEADER_LENGTH, * which is enough to decode the header length. * @return : size of frame header * or an error code, which can be tested using LZ4F_isError() * note : Frame header size is variable, but is guaranteed to be * >= LZ4F_HEADER_SIZE_MIN bytes, and <= LZ4F_HEADER_SIZE_MAX bytes. */ size_t LZ4F_headerSize(const void* src, size_t srcSize); /*! LZ4F_getFrameInfo() : * This function extracts frame parameters (max blockSize, dictID, etc.). * Its usage is optional: user can call LZ4F_decompress() directly. * * Extracted information will fill an existing LZ4F_frameInfo_t structure. * This can be useful for allocation and dictionary identification purposes. * * LZ4F_getFrameInfo() can work in the following situations : * * 1) At the beginning of a new frame, before any invocation of LZ4F_decompress(). * It will decode header from `srcBuffer`, * consuming the header and starting the decoding process. * * Input size must be large enough to contain the full frame header. * Frame header size can be known beforehand by LZ4F_headerSize(). * Frame header size is variable, but is guaranteed to be >= LZ4F_HEADER_SIZE_MIN bytes, * and not more than <= LZ4F_HEADER_SIZE_MAX bytes. * Hence, blindly providing LZ4F_HEADER_SIZE_MAX bytes or more will always work. * It's allowed to provide more input data than the header size, * LZ4F_getFrameInfo() will only consume the header. * * If input size is not large enough, * aka if it's smaller than header size, * function will fail and return an error code. * * 2) After decoding has been started, * it's possible to invoke LZ4F_getFrameInfo() anytime * to extract already decoded frame parameters stored within dctx. * * Note that, if decoding has barely started, * and not yet read enough information to decode the header, * LZ4F_getFrameInfo() will fail. * * The number of bytes consumed from srcBuffer will be updated in *srcSizePtr (necessarily <= original value). * LZ4F_getFrameInfo() only consumes bytes when decoding has not yet started, * and when decoding the header has been successful. * Decompression must then resume from (srcBuffer + *srcSizePtr). * * @return : a hint about how many srcSize bytes LZ4F_decompress() expects for next call, * or an error code which can be tested using LZ4F_isError(). * note 1 : in case of error, dctx is not modified. Decoding operation can resume from beginning safely. * note 2 : frame parameters are *copied into* an already allocated LZ4F_frameInfo_t structure. */ LZ4FLIB_API size_t LZ4F_getFrameInfo(LZ4F_dctx* dctx, LZ4F_frameInfo_t* frameInfoPtr, const void* srcBuffer, size_t* srcSizePtr); /*! LZ4F_decompress() : * Call this function repetitively to regenerate compressed data from `srcBuffer`. * The function will read up to *srcSizePtr bytes from srcBuffer, * and decompress data into dstBuffer, of capacity *dstSizePtr. * * The nb of bytes consumed from srcBuffer will be written into *srcSizePtr (necessarily <= original value). * The nb of bytes decompressed into dstBuffer will be written into *dstSizePtr (necessarily <= original value). * * The function does not necessarily read all input bytes, so always check value in *srcSizePtr. * Unconsumed source data must be presented again in subsequent invocations. * * `dstBuffer` can freely change between each consecutive function invocation. * `dstBuffer` content will be overwritten. * * @return : an hint of how many `srcSize` bytes LZ4F_decompress() expects for next call. * Schematically, it's the size of the current (or remaining) compressed block + header of next block. * Respecting the hint provides some small speed benefit, because it skips intermediate buffers. * This is just a hint though, it's always possible to provide any srcSize. * * When a frame is fully decoded, @return will be 0 (no more data expected). * When provided with more bytes than necessary to decode a frame, * LZ4F_decompress() will stop reading exactly at end of current frame, and @return 0. * * If decompression failed, @return is an error code, which can be tested using LZ4F_isError(). * After a decompression error, the `dctx` context is not resumable. * Use LZ4F_resetDecompressionContext() to return to clean state. * * After a frame is fully decoded, dctx can be used again to decompress another frame. */ LZ4FLIB_API size_t LZ4F_decompress(LZ4F_dctx* dctx, void* dstBuffer, size_t* dstSizePtr, const void* srcBuffer, size_t* srcSizePtr, const LZ4F_decompressOptions_t* dOptPtr); /*! LZ4F_resetDecompressionContext() : added in v1.8.0 * In case of an error, the context is left in "undefined" state. * In which case, it's necessary to reset it, before re-using it. * This method can also be used to abruptly stop any unfinished decompression, * and start a new one using same context resources. */ LZ4FLIB_API void LZ4F_resetDecompressionContext(LZ4F_dctx* dctx); /* always successful */ #if defined (__cplusplus) } #endif #endif /* LZ4F_H_09782039843 */ #if defined(LZ4F_STATIC_LINKING_ONLY) && !defined(LZ4F_H_STATIC_09782039843) #define LZ4F_H_STATIC_09782039843 #if defined (__cplusplus) extern "C" { #endif /* These declarations are not stable and may change in the future. * They are therefore only safe to depend on * when the caller is statically linked against the library. * To access their declarations, define LZ4F_STATIC_LINKING_ONLY. * * By default, these symbols aren't published into shared/dynamic libraries. * You can override this behavior and force them to be published * by defining LZ4F_PUBLISH_STATIC_FUNCTIONS. * Use at your own risk. */ #ifdef LZ4F_PUBLISH_STATIC_FUNCTIONS #define LZ4FLIB_STATIC_API LZ4FLIB_API #else #define LZ4FLIB_STATIC_API #endif /* --- Error List --- */ #define LZ4F_LIST_ERRORS(ITEM) \ ITEM(OK_NoError) \ ITEM(ERROR_GENERIC) \ ITEM(ERROR_maxBlockSize_invalid) \ ITEM(ERROR_blockMode_invalid) \ ITEM(ERROR_contentChecksumFlag_invalid) \ ITEM(ERROR_compressionLevel_invalid) \ ITEM(ERROR_headerVersion_wrong) \ ITEM(ERROR_blockChecksum_invalid) \ ITEM(ERROR_reservedFlag_set) \ ITEM(ERROR_allocation_failed) \ ITEM(ERROR_srcSize_tooLarge) \ ITEM(ERROR_dstMaxSize_tooSmall) \ ITEM(ERROR_frameHeader_incomplete) \ ITEM(ERROR_frameType_unknown) \ ITEM(ERROR_frameSize_wrong) \ ITEM(ERROR_srcPtr_wrong) \ ITEM(ERROR_decompressionFailed) \ ITEM(ERROR_headerChecksum_invalid) \ ITEM(ERROR_contentChecksum_invalid) \ ITEM(ERROR_frameDecoding_alreadyStarted) \ ITEM(ERROR_maxCode) #define LZ4F_GENERATE_ENUM(ENUM) LZ4F_##ENUM, /* enum list is exposed, to handle specific errors */ typedef enum { LZ4F_LIST_ERRORS(LZ4F_GENERATE_ENUM) _LZ4F_dummy_error_enum_for_c89_never_used } LZ4F_errorCodes; LZ4FLIB_STATIC_API LZ4F_errorCodes LZ4F_getErrorCode(size_t functionResult); LZ4FLIB_STATIC_API size_t LZ4F_getBlockSize(unsigned); /********************************** * Bulk processing dictionary API *********************************/ /* A Dictionary is useful for the compression of small messages (KB range). * It dramatically improves compression efficiency. * * LZ4 can ingest any input as dictionary, though only the last 64 KB are useful. * Best results are generally achieved by using Zstandard's Dictionary Builder * to generate a high-quality dictionary from a set of samples. * * Loading a dictionary has a cost, since it involves construction of tables. * The Bulk processing dictionary API makes it possible to share this cost * over an arbitrary number of compression jobs, even concurrently, * markedly improving compression latency for these cases. * * The same dictionary will have to be used on the decompression side * for decoding to be successful. * To help identify the correct dictionary at decoding stage, * the frame header allows optional embedding of a dictID field. */ typedef struct LZ4F_CDict_s LZ4F_CDict; /*! LZ4_createCDict() : * When compressing multiple messages / blocks using the same dictionary, it's recommended to load it just once. * LZ4_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay. * LZ4_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only. * `dictBuffer` can be released after LZ4_CDict creation, since its content is copied within CDict */ LZ4FLIB_STATIC_API LZ4F_CDict* LZ4F_createCDict(const void* dictBuffer, size_t dictSize); LZ4FLIB_STATIC_API void LZ4F_freeCDict(LZ4F_CDict* CDict); /*! LZ4_compressFrame_usingCDict() : * Compress an entire srcBuffer into a valid LZ4 frame using a digested Dictionary. * cctx must point to a context created by LZ4F_createCompressionContext(). * If cdict==NULL, compress without a dictionary. * dstBuffer MUST be >= LZ4F_compressFrameBound(srcSize, preferencesPtr). * If this condition is not respected, function will fail (@return an errorCode). * The LZ4F_preferences_t structure is optional : you may provide NULL as argument, * but it's not recommended, as it's the only way to provide dictID in the frame header. * @return : number of bytes written into dstBuffer. * or an error code if it fails (can be tested using LZ4F_isError()) */ LZ4FLIB_STATIC_API size_t LZ4F_compressFrame_usingCDict( LZ4F_cctx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const LZ4F_CDict* cdict, const LZ4F_preferences_t* preferencesPtr); /*! LZ4F_compressBegin_usingCDict() : * Inits streaming dictionary compression, and writes the frame header into dstBuffer. * dstCapacity must be >= LZ4F_HEADER_SIZE_MAX bytes. * `prefsPtr` is optional : you may provide NULL as argument, * however, it's the only way to provide dictID in the frame header. * @return : number of bytes written into dstBuffer for the header, * or an error code (which can be tested using LZ4F_isError()) */ LZ4FLIB_STATIC_API size_t LZ4F_compressBegin_usingCDict( LZ4F_cctx* cctx, void* dstBuffer, size_t dstCapacity, const LZ4F_CDict* cdict, const LZ4F_preferences_t* prefsPtr); /*! LZ4F_decompress_usingDict() : * Same as LZ4F_decompress(), using a predefined dictionary. * Dictionary is used "in place", without any preprocessing. * It must remain accessible throughout the entire frame decoding. */ LZ4FLIB_STATIC_API size_t LZ4F_decompress_usingDict( LZ4F_dctx* dctxPtr, void* dstBuffer, size_t* dstSizePtr, const void* srcBuffer, size_t* srcSizePtr, const void* dict, size_t dictSize, const LZ4F_decompressOptions_t* decompressOptionsPtr); #if defined (__cplusplus) } #endif #endif /* defined(LZ4F_STATIC_LINKING_ONLY) && !defined(LZ4F_H_STATIC_09782039843) */ py-lz4framed-0.14.0/lz4/lz4frame_static.h000066400000000000000000000037741357043434000201050ustar00rootroot00000000000000/* LZ4 auto-framing library Header File for static linking only Copyright (C) 2011-2016, Yann Collet. BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. You can contact the author at : - LZ4 source repository : https://github.com/lz4/lz4 - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c */ #ifndef LZ4FRAME_STATIC_H_0398209384 #define LZ4FRAME_STATIC_H_0398209384 /* The declarations that formerly were made here have been merged into * lz4frame.h, protected by the LZ4F_STATIC_LINKING_ONLY macro. Going forward, * it is recommended to simply include that header directly. */ #define LZ4F_STATIC_LINKING_ONLY #include "lz4frame.h" #endif /* LZ4FRAME_STATIC_H_0398209384 */ py-lz4framed-0.14.0/lz4/lz4hc.c000066400000000000000000002015211357043434000160170ustar00rootroot00000000000000/* LZ4 HC - High Compression Mode of LZ4 Copyright (C) 2011-2017, Yann Collet. BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. You can contact the author at : - LZ4 source repository : https://github.com/lz4/lz4 - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c */ /* note : lz4hc is not an independent module, it requires lz4.h/lz4.c for proper compilation */ /* ************************************* * Tuning Parameter ***************************************/ /*! HEAPMODE : * Select how default compression function will allocate workplace memory, * in stack (0:fastest), or in heap (1:requires malloc()). * Since workplace is rather large, heap mode is recommended. */ #ifndef LZ4HC_HEAPMODE # define LZ4HC_HEAPMODE 1 #endif /*=== Dependency ===*/ #define LZ4_HC_STATIC_LINKING_ONLY #include "lz4hc.h" /*=== Common LZ4 definitions ===*/ #if defined(__GNUC__) # pragma GCC diagnostic ignored "-Wunused-function" #endif #if defined (__clang__) # pragma clang diagnostic ignored "-Wunused-function" #endif /*=== Enums ===*/ typedef enum { noDictCtx, usingDictCtxHc } dictCtx_directive; #define LZ4_COMMONDEFS_ONLY #ifndef LZ4_SRC_INCLUDED #include "lz4.c" /* LZ4_count, constants, mem */ #endif /*=== Constants ===*/ #define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH) #define LZ4_OPT_NUM (1<<12) /*=== Macros ===*/ #define MIN(a,b) ( (a) < (b) ? (a) : (b) ) #define MAX(a,b) ( (a) > (b) ? (a) : (b) ) #define HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8)-LZ4HC_HASH_LOG)) #define DELTANEXTMAXD(p) chainTable[(p) & LZ4HC_MAXD_MASK] /* flexible, LZ4HC_MAXD dependent */ #define DELTANEXTU16(table, pos) table[(U16)(pos)] /* faster */ /* Make fields passed to, and updated by LZ4HC_encodeSequence explicit */ #define UPDATABLE(ip, op, anchor) &ip, &op, &anchor static U32 LZ4HC_hashPtr(const void* ptr) { return HASH_FUNCTION(LZ4_read32(ptr)); } /************************************** * HC Compression **************************************/ static void LZ4HC_clearTables (LZ4HC_CCtx_internal* hc4) { MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable)); MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable)); } static void LZ4HC_init_internal (LZ4HC_CCtx_internal* hc4, const BYTE* start) { uptrval startingOffset = (uptrval)(hc4->end - hc4->base); if (startingOffset > 1 GB) { LZ4HC_clearTables(hc4); startingOffset = 0; } startingOffset += 64 KB; hc4->nextToUpdate = (U32) startingOffset; hc4->base = start - startingOffset; hc4->end = start; hc4->dictBase = start - startingOffset; hc4->dictLimit = (U32) startingOffset; hc4->lowLimit = (U32) startingOffset; } /* Update chains up to ip (excluded) */ LZ4_FORCE_INLINE void LZ4HC_Insert (LZ4HC_CCtx_internal* hc4, const BYTE* ip) { U16* const chainTable = hc4->chainTable; U32* const hashTable = hc4->hashTable; const BYTE* const base = hc4->base; U32 const target = (U32)(ip - base); U32 idx = hc4->nextToUpdate; while (idx < target) { U32 const h = LZ4HC_hashPtr(base+idx); size_t delta = idx - hashTable[h]; if (delta>LZ4_DISTANCE_MAX) delta = LZ4_DISTANCE_MAX; DELTANEXTU16(chainTable, idx) = (U16)delta; hashTable[h] = idx; idx++; } hc4->nextToUpdate = target; } /** LZ4HC_countBack() : * @return : negative value, nb of common bytes before ip/match */ LZ4_FORCE_INLINE int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match, const BYTE* const iMin, const BYTE* const mMin) { int back = 0; int const min = (int)MAX(iMin - ip, mMin - match); assert(min <= 0); assert(ip >= iMin); assert((size_t)(ip-iMin) < (1U<<31)); assert(match >= mMin); assert((size_t)(match - mMin) < (1U<<31)); while ( (back > min) && (ip[back-1] == match[back-1]) ) back--; return back; } #if defined(_MSC_VER) # define LZ4HC_rotl32(x,r) _rotl(x,r) #else # define LZ4HC_rotl32(x,r) ((x << r) | (x >> (32 - r))) #endif static U32 LZ4HC_rotatePattern(size_t const rotate, U32 const pattern) { size_t const bitsToRotate = (rotate & (sizeof(pattern) - 1)) << 3; if (bitsToRotate == 0) return pattern; return LZ4HC_rotl32(pattern, (int)bitsToRotate); } /* LZ4HC_countPattern() : * pattern32 must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!) */ static unsigned LZ4HC_countPattern(const BYTE* ip, const BYTE* const iEnd, U32 const pattern32) { const BYTE* const iStart = ip; reg_t const pattern = (sizeof(pattern)==8) ? (reg_t)pattern32 + (((reg_t)pattern32) << 32) : pattern32; while (likely(ip < iEnd-(sizeof(pattern)-1))) { reg_t const diff = LZ4_read_ARCH(ip) ^ pattern; if (!diff) { ip+=sizeof(pattern); continue; } ip += LZ4_NbCommonBytes(diff); return (unsigned)(ip - iStart); } if (LZ4_isLittleEndian()) { reg_t patternByte = pattern; while ((ip>= 8; } } else { /* big endian */ U32 bitOffset = (sizeof(pattern)*8) - 8; while (ip < iEnd) { BYTE const byte = (BYTE)(pattern >> bitOffset); if (*ip != byte) break; ip ++; bitOffset -= 8; } } return (unsigned)(ip - iStart); } /* LZ4HC_reverseCountPattern() : * pattern must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!) * read using natural platform endianess */ static unsigned LZ4HC_reverseCountPattern(const BYTE* ip, const BYTE* const iLow, U32 pattern) { const BYTE* const iStart = ip; while (likely(ip >= iLow+4)) { if (LZ4_read32(ip-4) != pattern) break; ip -= 4; } { const BYTE* bytePtr = (const BYTE*)(&pattern) + 3; /* works for any endianess */ while (likely(ip>iLow)) { if (ip[-1] != *bytePtr) break; ip--; bytePtr--; } } return (unsigned)(iStart - ip); } /* LZ4HC_protectDictEnd() : * Checks if the match is in the last 3 bytes of the dictionary, so reading the * 4 byte MINMATCH would overflow. * @returns true if the match index is okay. */ static int LZ4HC_protectDictEnd(U32 const dictLimit, U32 const matchIndex) { return ((U32)((dictLimit - 1) - matchIndex) >= 3); } typedef enum { rep_untested, rep_not, rep_confirmed } repeat_state_e; typedef enum { favorCompressionRatio=0, favorDecompressionSpeed } HCfavor_e; LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch ( LZ4HC_CCtx_internal* hc4, const BYTE* const ip, const BYTE* const iLowLimit, const BYTE* const iHighLimit, int longest, const BYTE** matchpos, const BYTE** startpos, const int maxNbAttempts, const int patternAnalysis, const int chainSwap, const dictCtx_directive dict, const HCfavor_e favorDecSpeed) { U16* const chainTable = hc4->chainTable; U32* const HashTable = hc4->hashTable; const LZ4HC_CCtx_internal * const dictCtx = hc4->dictCtx; const BYTE* const base = hc4->base; const U32 dictLimit = hc4->dictLimit; const BYTE* const lowPrefixPtr = base + dictLimit; const U32 ipIndex = (U32)(ip - base); const U32 lowestMatchIndex = (hc4->lowLimit + (LZ4_DISTANCE_MAX + 1) > ipIndex) ? hc4->lowLimit : ipIndex - LZ4_DISTANCE_MAX; const BYTE* const dictBase = hc4->dictBase; int const lookBackLength = (int)(ip-iLowLimit); int nbAttempts = maxNbAttempts; U32 matchChainPos = 0; U32 const pattern = LZ4_read32(ip); U32 matchIndex; repeat_state_e repeat = rep_untested; size_t srcPatternLength = 0; DEBUGLOG(7, "LZ4HC_InsertAndGetWiderMatch"); /* First Match */ LZ4HC_Insert(hc4, ip); matchIndex = HashTable[LZ4HC_hashPtr(ip)]; DEBUGLOG(7, "First match at index %u / %u (lowestMatchIndex)", matchIndex, lowestMatchIndex); while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) { int matchLength=0; nbAttempts--; assert(matchIndex < ipIndex); if (favorDecSpeed && (ipIndex - matchIndex < 8)) { /* do nothing */ } else if (matchIndex >= dictLimit) { /* within current Prefix */ const BYTE* const matchPtr = base + matchIndex; assert(matchPtr >= lowPrefixPtr); assert(matchPtr < ip); assert(longest >= 1); if (LZ4_read16(iLowLimit + longest - 1) == LZ4_read16(matchPtr - lookBackLength + longest - 1)) { if (LZ4_read32(matchPtr) == pattern) { int const back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, lowPrefixPtr) : 0; matchLength = MINMATCH + (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit); matchLength -= back; if (matchLength > longest) { longest = matchLength; *matchpos = matchPtr + back; *startpos = ip + back; } } } } else { /* lowestMatchIndex <= matchIndex < dictLimit */ const BYTE* const matchPtr = dictBase + matchIndex; if (LZ4_read32(matchPtr) == pattern) { const BYTE* const dictStart = dictBase + hc4->lowLimit; int back = 0; const BYTE* vLimit = ip + (dictLimit - matchIndex); if (vLimit > iHighLimit) vLimit = iHighLimit; matchLength = (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH; if ((ip+matchLength == vLimit) && (vLimit < iHighLimit)) matchLength += LZ4_count(ip+matchLength, lowPrefixPtr, iHighLimit); back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictStart) : 0; matchLength -= back; if (matchLength > longest) { longest = matchLength; *matchpos = base + matchIndex + back; /* virtual pos, relative to ip, to retrieve offset */ *startpos = ip + back; } } } if (chainSwap && matchLength==longest) { /* better match => select a better chain */ assert(lookBackLength==0); /* search forward only */ if (matchIndex + (U32)longest <= ipIndex) { int const kTrigger = 4; U32 distanceToNextMatch = 1; int const end = longest - MINMATCH + 1; int step = 1; int accel = 1 << kTrigger; int pos; for (pos = 0; pos < end; pos += step) { U32 const candidateDist = DELTANEXTU16(chainTable, matchIndex + (U32)pos); step = (accel++ >> kTrigger); if (candidateDist > distanceToNextMatch) { distanceToNextMatch = candidateDist; matchChainPos = (U32)pos; accel = 1 << kTrigger; } } if (distanceToNextMatch > 1) { if (distanceToNextMatch > matchIndex) break; /* avoid overflow */ matchIndex -= distanceToNextMatch; continue; } } } { U32 const distNextMatch = DELTANEXTU16(chainTable, matchIndex); if (patternAnalysis && distNextMatch==1 && matchChainPos==0) { U32 const matchCandidateIdx = matchIndex-1; /* may be a repeated pattern */ if (repeat == rep_untested) { if ( ((pattern & 0xFFFF) == (pattern >> 16)) & ((pattern & 0xFF) == (pattern >> 24)) ) { repeat = rep_confirmed; srcPatternLength = LZ4HC_countPattern(ip+sizeof(pattern), iHighLimit, pattern) + sizeof(pattern); } else { repeat = rep_not; } } if ( (repeat == rep_confirmed) && (matchCandidateIdx >= lowestMatchIndex) && LZ4HC_protectDictEnd(dictLimit, matchCandidateIdx) ) { const int extDict = matchCandidateIdx < dictLimit; const BYTE* const matchPtr = (extDict ? dictBase : base) + matchCandidateIdx; if (LZ4_read32(matchPtr) == pattern) { /* good candidate */ const BYTE* const dictStart = dictBase + hc4->lowLimit; const BYTE* const iLimit = extDict ? dictBase + dictLimit : iHighLimit; size_t forwardPatternLength = LZ4HC_countPattern(matchPtr+sizeof(pattern), iLimit, pattern) + sizeof(pattern); if (extDict && matchPtr + forwardPatternLength == iLimit) { U32 const rotatedPattern = LZ4HC_rotatePattern(forwardPatternLength, pattern); forwardPatternLength += LZ4HC_countPattern(lowPrefixPtr, iHighLimit, rotatedPattern); } { const BYTE* const lowestMatchPtr = extDict ? dictStart : lowPrefixPtr; size_t backLength = LZ4HC_reverseCountPattern(matchPtr, lowestMatchPtr, pattern); size_t currentSegmentLength; if (!extDict && matchPtr - backLength == lowPrefixPtr && hc4->lowLimit < dictLimit) { U32 const rotatedPattern = LZ4HC_rotatePattern((U32)(-(int)backLength), pattern); backLength += LZ4HC_reverseCountPattern(dictBase + dictLimit, dictStart, rotatedPattern); } /* Limit backLength not go further than lowestMatchIndex */ backLength = matchCandidateIdx - MAX(matchCandidateIdx - (U32)backLength, lowestMatchIndex); assert(matchCandidateIdx - backLength >= lowestMatchIndex); currentSegmentLength = backLength + forwardPatternLength; /* Adjust to end of pattern if the source pattern fits, otherwise the beginning of the pattern */ if ( (currentSegmentLength >= srcPatternLength) /* current pattern segment large enough to contain full srcPatternLength */ && (forwardPatternLength <= srcPatternLength) ) { /* haven't reached this position yet */ U32 const newMatchIndex = matchCandidateIdx + (U32)forwardPatternLength - (U32)srcPatternLength; /* best position, full pattern, might be followed by more match */ if (LZ4HC_protectDictEnd(dictLimit, newMatchIndex)) matchIndex = newMatchIndex; else { /* Can only happen if started in the prefix */ assert(newMatchIndex >= dictLimit - 3 && newMatchIndex < dictLimit && !extDict); matchIndex = dictLimit; } } else { U32 const newMatchIndex = matchCandidateIdx - (U32)backLength; /* farthest position in current segment, will find a match of length currentSegmentLength + maybe some back */ if (!LZ4HC_protectDictEnd(dictLimit, newMatchIndex)) { assert(newMatchIndex >= dictLimit - 3 && newMatchIndex < dictLimit && !extDict); matchIndex = dictLimit; } else { matchIndex = newMatchIndex; if (lookBackLength==0) { /* no back possible */ size_t const maxML = MIN(currentSegmentLength, srcPatternLength); if ((size_t)longest < maxML) { assert(base + matchIndex < ip); if (ip - (base+matchIndex) > LZ4_DISTANCE_MAX) break; assert(maxML < 2 GB); longest = (int)maxML; *matchpos = base + matchIndex; /* virtual pos, relative to ip, to retrieve offset */ *startpos = ip; } { U32 const distToNextPattern = DELTANEXTU16(chainTable, matchIndex); if (distToNextPattern > matchIndex) break; /* avoid overflow */ matchIndex -= distToNextPattern; } } } } } continue; } } } } /* PA optimization */ /* follow current chain */ matchIndex -= DELTANEXTU16(chainTable, matchIndex + matchChainPos); } /* while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) */ if ( dict == usingDictCtxHc && nbAttempts && ipIndex - lowestMatchIndex < LZ4_DISTANCE_MAX) { size_t const dictEndOffset = (size_t)(dictCtx->end - dictCtx->base); U32 dictMatchIndex = dictCtx->hashTable[LZ4HC_hashPtr(ip)]; assert(dictEndOffset <= 1 GB); matchIndex = dictMatchIndex + lowestMatchIndex - (U32)dictEndOffset; while (ipIndex - matchIndex <= LZ4_DISTANCE_MAX && nbAttempts--) { const BYTE* const matchPtr = dictCtx->base + dictMatchIndex; if (LZ4_read32(matchPtr) == pattern) { int mlt; int back = 0; const BYTE* vLimit = ip + (dictEndOffset - dictMatchIndex); if (vLimit > iHighLimit) vLimit = iHighLimit; mlt = (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH; back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictCtx->base + dictCtx->dictLimit) : 0; mlt -= back; if (mlt > longest) { longest = mlt; *matchpos = base + matchIndex + back; *startpos = ip + back; } } { U32 const nextOffset = DELTANEXTU16(dictCtx->chainTable, dictMatchIndex); dictMatchIndex -= nextOffset; matchIndex -= nextOffset; } } } return longest; } LZ4_FORCE_INLINE int LZ4HC_InsertAndFindBestMatch(LZ4HC_CCtx_internal* const hc4, /* Index table will be updated */ const BYTE* const ip, const BYTE* const iLimit, const BYTE** matchpos, const int maxNbAttempts, const int patternAnalysis, const dictCtx_directive dict) { const BYTE* uselessPtr = ip; /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos), * but this won't be the case here, as we define iLowLimit==ip, * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */ return LZ4HC_InsertAndGetWiderMatch(hc4, ip, ip, iLimit, MINMATCH-1, matchpos, &uselessPtr, maxNbAttempts, patternAnalysis, 0 /*chainSwap*/, dict, favorCompressionRatio); } /* LZ4HC_encodeSequence() : * @return : 0 if ok, * 1 if buffer issue detected */ LZ4_FORCE_INLINE int LZ4HC_encodeSequence ( const BYTE** ip, BYTE** op, const BYTE** anchor, int matchLength, const BYTE* const match, limitedOutput_directive limit, BYTE* oend) { size_t length; BYTE* const token = (*op)++; #if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 6) static const BYTE* start = NULL; static U32 totalCost = 0; U32 const pos = (start==NULL) ? 0 : (U32)(*anchor - start); U32 const ll = (U32)(*ip - *anchor); U32 const llAdd = (ll>=15) ? ((ll-15) / 255) + 1 : 0; U32 const mlAdd = (matchLength>=19) ? ((matchLength-19) / 255) + 1 : 0; U32 const cost = 1 + llAdd + ll + 2 + mlAdd; if (start==NULL) start = *anchor; /* only works for single segment */ /* g_debuglog_enable = (pos >= 2228) & (pos <= 2262); */ DEBUGLOG(6, "pos:%7u -- literals:%3u, match:%4i, offset:%5u, cost:%3u + %u", pos, (U32)(*ip - *anchor), matchLength, (U32)(*ip-match), cost, totalCost); totalCost += cost; #endif /* Encode Literal length */ length = (size_t)(*ip - *anchor); if ((limit) && ((*op + (length / 255) + length + (2 + 1 + LASTLITERALS)) > oend)) return 1; /* Check output limit */ if (length >= RUN_MASK) { size_t len = length - RUN_MASK; *token = (RUN_MASK << ML_BITS); for(; len >= 255 ; len -= 255) *(*op)++ = 255; *(*op)++ = (BYTE)len; } else { *token = (BYTE)(length << ML_BITS); } /* Copy Literals */ LZ4_wildCopy8(*op, *anchor, (*op) + length); *op += length; /* Encode Offset */ assert( (*ip - match) <= LZ4_DISTANCE_MAX ); /* note : consider providing offset as a value, rather than as a pointer difference */ LZ4_writeLE16(*op, (U16)(*ip-match)); *op += 2; /* Encode MatchLength */ assert(matchLength >= MINMATCH); length = (size_t)matchLength - MINMATCH; if ((limit) && (*op + (length / 255) + (1 + LASTLITERALS) > oend)) return 1; /* Check output limit */ if (length >= ML_MASK) { *token += ML_MASK; length -= ML_MASK; for(; length >= 510 ; length -= 510) { *(*op)++ = 255; *(*op)++ = 255; } if (length >= 255) { length -= 255; *(*op)++ = 255; } *(*op)++ = (BYTE)length; } else { *token += (BYTE)(length); } /* Prepare next loop */ *ip += matchLength; *anchor = *ip; return 0; } LZ4_FORCE_INLINE int LZ4HC_compress_hashChain ( LZ4HC_CCtx_internal* const ctx, const char* const source, char* const dest, int* srcSizePtr, int const maxOutputSize, unsigned maxNbAttempts, const limitedOutput_directive limit, const dictCtx_directive dict ) { const int inputSize = *srcSizePtr; const int patternAnalysis = (maxNbAttempts > 128); /* levels 9+ */ const BYTE* ip = (const BYTE*) source; const BYTE* anchor = ip; const BYTE* const iend = ip + inputSize; const BYTE* const mflimit = iend - MFLIMIT; const BYTE* const matchlimit = (iend - LASTLITERALS); BYTE* optr = (BYTE*) dest; BYTE* op = (BYTE*) dest; BYTE* oend = op + maxOutputSize; int ml0, ml, ml2, ml3; const BYTE* start0; const BYTE* ref0; const BYTE* ref = NULL; const BYTE* start2 = NULL; const BYTE* ref2 = NULL; const BYTE* start3 = NULL; const BYTE* ref3 = NULL; /* init */ *srcSizePtr = 0; if (limit == fillOutput) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */ if (inputSize < LZ4_minLength) goto _last_literals; /* Input too small, no compression (all literals) */ /* Main Loop */ while (ip <= mflimit) { ml = LZ4HC_InsertAndFindBestMatch(ctx, ip, matchlimit, &ref, maxNbAttempts, patternAnalysis, dict); if (ml encode ML1 */ optr = op; if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow; continue; } if (start0 < ip) { /* first match was skipped at least once */ if (start2 < ip + ml0) { /* squeezing ML1 between ML0(original ML1) and ML2 */ ip = start0; ref = ref0; ml = ml0; /* restore initial ML1 */ } } /* Here, start0==ip */ if ((start2 - ip) < 3) { /* First Match too small : removed */ ml = ml2; ip = start2; ref =ref2; goto _Search2; } _Search3: /* At this stage, we have : * ml2 > ml1, and * ip1+3 <= ip2 (usually < ip1+ml1) */ if ((start2 - ip) < OPTIMAL_ML) { int correction; int new_ml = ml; if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML; if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH; correction = new_ml - (int)(start2 - ip); if (correction > 0) { start2 += correction; ref2 += correction; ml2 -= correction; } } /* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */ if (start2 + ml2 <= mflimit) { ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3, maxNbAttempts, patternAnalysis, 0, dict, favorCompressionRatio); } else { ml3 = ml2; } if (ml3 == ml2) { /* No better match => encode ML1 and ML2 */ /* ip & ref are known; Now for ml */ if (start2 < ip+ml) ml = (int)(start2 - ip); /* Now, encode 2 sequences */ optr = op; if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow; ip = start2; optr = op; if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml2, ref2, limit, oend)) goto _dest_overflow; continue; } if (start3 < ip+ml+3) { /* Not enough space for match 2 : remove it */ if (start3 >= (ip+ml)) { /* can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 */ if (start2 < ip+ml) { int correction = (int)(ip+ml - start2); start2 += correction; ref2 += correction; ml2 -= correction; if (ml2 < MINMATCH) { start2 = start3; ref2 = ref3; ml2 = ml3; } } optr = op; if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow; ip = start3; ref = ref3; ml = ml3; start0 = start2; ref0 = ref2; ml0 = ml2; goto _Search2; } start2 = start3; ref2 = ref3; ml2 = ml3; goto _Search3; } /* * OK, now we have 3 ascending matches; * let's write the first one ML1. * ip & ref are known; Now decide ml. */ if (start2 < ip+ml) { if ((start2 - ip) < OPTIMAL_ML) { int correction; if (ml > OPTIMAL_ML) ml = OPTIMAL_ML; if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH; correction = ml - (int)(start2 - ip); if (correction > 0) { start2 += correction; ref2 += correction; ml2 -= correction; } } else { ml = (int)(start2 - ip); } } optr = op; if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow; /* ML2 becomes ML1 */ ip = start2; ref = ref2; ml = ml2; /* ML3 becomes ML2 */ start2 = start3; ref2 = ref3; ml2 = ml3; /* let's find a new ML3 */ goto _Search3; } _last_literals: /* Encode Last Literals */ { size_t lastRunSize = (size_t)(iend - anchor); /* literals */ size_t litLength = (lastRunSize + 255 - RUN_MASK) / 255; size_t const totalSize = 1 + litLength + lastRunSize; if (limit == fillOutput) oend += LASTLITERALS; /* restore correct value */ if (limit && (op + totalSize > oend)) { if (limit == limitedOutput) return 0; /* Check output limit */ /* adapt lastRunSize to fill 'dest' */ lastRunSize = (size_t)(oend - op) - 1; litLength = (lastRunSize + 255 - RUN_MASK) / 255; lastRunSize -= litLength; } ip = anchor + lastRunSize; if (lastRunSize >= RUN_MASK) { size_t accumulator = lastRunSize - RUN_MASK; *op++ = (RUN_MASK << ML_BITS); for(; accumulator >= 255 ; accumulator -= 255) *op++ = 255; *op++ = (BYTE) accumulator; } else { *op++ = (BYTE)(lastRunSize << ML_BITS); } memcpy(op, anchor, lastRunSize); op += lastRunSize; } /* End */ *srcSizePtr = (int) (((const char*)ip) - source); return (int) (((char*)op)-dest); _dest_overflow: if (limit == fillOutput) { op = optr; /* restore correct out pointer */ goto _last_literals; } return 0; } static int LZ4HC_compress_optimal( LZ4HC_CCtx_internal* ctx, const char* const source, char* dst, int* srcSizePtr, int dstCapacity, int const nbSearches, size_t sufficient_len, const limitedOutput_directive limit, int const fullUpdate, const dictCtx_directive dict, HCfavor_e favorDecSpeed); LZ4_FORCE_INLINE int LZ4HC_compress_generic_internal ( LZ4HC_CCtx_internal* const ctx, const char* const src, char* const dst, int* const srcSizePtr, int const dstCapacity, int cLevel, const limitedOutput_directive limit, const dictCtx_directive dict ) { typedef enum { lz4hc, lz4opt } lz4hc_strat_e; typedef struct { lz4hc_strat_e strat; U32 nbSearches; U32 targetLength; } cParams_t; static const cParams_t clTable[LZ4HC_CLEVEL_MAX+1] = { { lz4hc, 2, 16 }, /* 0, unused */ { lz4hc, 2, 16 }, /* 1, unused */ { lz4hc, 2, 16 }, /* 2, unused */ { lz4hc, 4, 16 }, /* 3 */ { lz4hc, 8, 16 }, /* 4 */ { lz4hc, 16, 16 }, /* 5 */ { lz4hc, 32, 16 }, /* 6 */ { lz4hc, 64, 16 }, /* 7 */ { lz4hc, 128, 16 }, /* 8 */ { lz4hc, 256, 16 }, /* 9 */ { lz4opt, 96, 64 }, /*10==LZ4HC_CLEVEL_OPT_MIN*/ { lz4opt, 512,128 }, /*11 */ { lz4opt,16384,LZ4_OPT_NUM }, /* 12==LZ4HC_CLEVEL_MAX */ }; DEBUGLOG(4, "LZ4HC_compress_generic(ctx=%p, src=%p, srcSize=%d)", ctx, src, *srcSizePtr); if (limit == fillOutput && dstCapacity < 1) return 0; /* Impossible to store anything */ if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size (too large or negative) */ ctx->end += *srcSizePtr; if (cLevel < 1) cLevel = LZ4HC_CLEVEL_DEFAULT; /* note : convention is different from lz4frame, maybe something to review */ cLevel = MIN(LZ4HC_CLEVEL_MAX, cLevel); { cParams_t const cParam = clTable[cLevel]; HCfavor_e const favor = ctx->favorDecSpeed ? favorDecompressionSpeed : favorCompressionRatio; int result; if (cParam.strat == lz4hc) { result = LZ4HC_compress_hashChain(ctx, src, dst, srcSizePtr, dstCapacity, cParam.nbSearches, limit, dict); } else { assert(cParam.strat == lz4opt); result = LZ4HC_compress_optimal(ctx, src, dst, srcSizePtr, dstCapacity, (int)cParam.nbSearches, cParam.targetLength, limit, cLevel == LZ4HC_CLEVEL_MAX, /* ultra mode */ dict, favor); } if (result <= 0) ctx->dirty = 1; return result; } } static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock); static int LZ4HC_compress_generic_noDictCtx ( LZ4HC_CCtx_internal* const ctx, const char* const src, char* const dst, int* const srcSizePtr, int const dstCapacity, int cLevel, limitedOutput_directive limit ) { assert(ctx->dictCtx == NULL); return LZ4HC_compress_generic_internal(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit, noDictCtx); } static int LZ4HC_compress_generic_dictCtx ( LZ4HC_CCtx_internal* const ctx, const char* const src, char* const dst, int* const srcSizePtr, int const dstCapacity, int cLevel, limitedOutput_directive limit ) { const size_t position = (size_t)(ctx->end - ctx->base) - ctx->lowLimit; assert(ctx->dictCtx != NULL); if (position >= 64 KB) { ctx->dictCtx = NULL; return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit); } else if (position == 0 && *srcSizePtr > 4 KB) { memcpy(ctx, ctx->dictCtx, sizeof(LZ4HC_CCtx_internal)); LZ4HC_setExternalDict(ctx, (const BYTE *)src); ctx->compressionLevel = (short)cLevel; return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit); } else { return LZ4HC_compress_generic_internal(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit, usingDictCtxHc); } } static int LZ4HC_compress_generic ( LZ4HC_CCtx_internal* const ctx, const char* const src, char* const dst, int* const srcSizePtr, int const dstCapacity, int cLevel, limitedOutput_directive limit ) { if (ctx->dictCtx == NULL) { return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit); } else { return LZ4HC_compress_generic_dictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit); } } int LZ4_sizeofStateHC(void) { return (int)sizeof(LZ4_streamHC_t); } #ifndef _MSC_VER /* for some reason, Visual fails the aligment test on 32-bit x86 : * it reports an aligment of 8-bytes, * while actually aligning LZ4_streamHC_t on 4 bytes. */ static size_t LZ4_streamHC_t_alignment(void) { struct { char c; LZ4_streamHC_t t; } t_a; return sizeof(t_a) - sizeof(t_a.t); } #endif /* state is presumed correctly initialized, * in which case its size and alignment have already been validate */ int LZ4_compress_HC_extStateHC_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel) { LZ4HC_CCtx_internal* const ctx = &((LZ4_streamHC_t*)state)->internal_donotuse; #ifndef _MSC_VER /* for some reason, Visual fails the aligment test on 32-bit x86 : * it reports an aligment of 8-bytes, * while actually aligning LZ4_streamHC_t on 4 bytes. */ assert(((size_t)state & (LZ4_streamHC_t_alignment() - 1)) == 0); /* check alignment */ #endif if (((size_t)(state)&(sizeof(void*)-1)) != 0) return 0; /* Error : state is not aligned for pointers (32 or 64 bits) */ LZ4_resetStreamHC_fast((LZ4_streamHC_t*)state, compressionLevel); LZ4HC_init_internal (ctx, (const BYTE*)src); if (dstCapacity < LZ4_compressBound(srcSize)) return LZ4HC_compress_generic (ctx, src, dst, &srcSize, dstCapacity, compressionLevel, limitedOutput); else return LZ4HC_compress_generic (ctx, src, dst, &srcSize, dstCapacity, compressionLevel, notLimited); } int LZ4_compress_HC_extStateHC (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel) { LZ4_streamHC_t* const ctx = LZ4_initStreamHC(state, sizeof(*ctx)); if (ctx==NULL) return 0; /* init failure */ return LZ4_compress_HC_extStateHC_fastReset(state, src, dst, srcSize, dstCapacity, compressionLevel); } int LZ4_compress_HC(const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel) { #if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1 LZ4_streamHC_t* const statePtr = (LZ4_streamHC_t*)ALLOC(sizeof(LZ4_streamHC_t)); #else LZ4_streamHC_t state; LZ4_streamHC_t* const statePtr = &state; #endif int const cSize = LZ4_compress_HC_extStateHC(statePtr, src, dst, srcSize, dstCapacity, compressionLevel); #if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1 FREEMEM(statePtr); #endif return cSize; } /* state is presumed sized correctly (>= sizeof(LZ4_streamHC_t)) */ int LZ4_compress_HC_destSize(void* state, const char* source, char* dest, int* sourceSizePtr, int targetDestSize, int cLevel) { LZ4_streamHC_t* const ctx = LZ4_initStreamHC(state, sizeof(*ctx)); if (ctx==NULL) return 0; /* init failure */ LZ4HC_init_internal(&ctx->internal_donotuse, (const BYTE*) source); LZ4_setCompressionLevel(ctx, cLevel); return LZ4HC_compress_generic(&ctx->internal_donotuse, source, dest, sourceSizePtr, targetDestSize, cLevel, fillOutput); } /************************************** * Streaming Functions **************************************/ /* allocation */ LZ4_streamHC_t* LZ4_createStreamHC(void) { LZ4_streamHC_t* const LZ4_streamHCPtr = (LZ4_streamHC_t*)ALLOC(sizeof(LZ4_streamHC_t)); if (LZ4_streamHCPtr==NULL) return NULL; LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr)); /* full initialization, malloc'ed buffer can be full of garbage */ return LZ4_streamHCPtr; } int LZ4_freeStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr) { DEBUGLOG(4, "LZ4_freeStreamHC(%p)", LZ4_streamHCPtr); if (!LZ4_streamHCPtr) return 0; /* support free on NULL */ FREEMEM(LZ4_streamHCPtr); return 0; } LZ4_streamHC_t* LZ4_initStreamHC (void* buffer, size_t size) { LZ4_streamHC_t* const LZ4_streamHCPtr = (LZ4_streamHC_t*)buffer; if (buffer == NULL) return NULL; if (size < sizeof(LZ4_streamHC_t)) return NULL; #ifndef _MSC_VER /* for some reason, Visual fails the aligment test on 32-bit x86 : * it reports an aligment of 8-bytes, * while actually aligning LZ4_streamHC_t on 4 bytes. */ if (((size_t)buffer) & (LZ4_streamHC_t_alignment() - 1)) return NULL; /* alignment check */ #endif /* if compilation fails here, LZ4_STREAMHCSIZE must be increased */ LZ4_STATIC_ASSERT(sizeof(LZ4HC_CCtx_internal) <= LZ4_STREAMHCSIZE); DEBUGLOG(4, "LZ4_initStreamHC(%p, %u)", LZ4_streamHCPtr, (unsigned)size); /* end-base will trigger a clearTable on starting compression */ LZ4_streamHCPtr->internal_donotuse.end = (const BYTE *)(ptrdiff_t)-1; LZ4_streamHCPtr->internal_donotuse.base = NULL; LZ4_streamHCPtr->internal_donotuse.dictCtx = NULL; LZ4_streamHCPtr->internal_donotuse.favorDecSpeed = 0; LZ4_streamHCPtr->internal_donotuse.dirty = 0; LZ4_setCompressionLevel(LZ4_streamHCPtr, LZ4HC_CLEVEL_DEFAULT); return LZ4_streamHCPtr; } /* just a stub */ void LZ4_resetStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel) { LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr)); LZ4_setCompressionLevel(LZ4_streamHCPtr, compressionLevel); } void LZ4_resetStreamHC_fast (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel) { DEBUGLOG(4, "LZ4_resetStreamHC_fast(%p, %d)", LZ4_streamHCPtr, compressionLevel); if (LZ4_streamHCPtr->internal_donotuse.dirty) { LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr)); } else { /* preserve end - base : can trigger clearTable's threshold */ LZ4_streamHCPtr->internal_donotuse.end -= (uptrval)LZ4_streamHCPtr->internal_donotuse.base; LZ4_streamHCPtr->internal_donotuse.base = NULL; LZ4_streamHCPtr->internal_donotuse.dictCtx = NULL; } LZ4_setCompressionLevel(LZ4_streamHCPtr, compressionLevel); } void LZ4_setCompressionLevel(LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel) { DEBUGLOG(5, "LZ4_setCompressionLevel(%p, %d)", LZ4_streamHCPtr, compressionLevel); if (compressionLevel < 1) compressionLevel = LZ4HC_CLEVEL_DEFAULT; if (compressionLevel > LZ4HC_CLEVEL_MAX) compressionLevel = LZ4HC_CLEVEL_MAX; LZ4_streamHCPtr->internal_donotuse.compressionLevel = (short)compressionLevel; } void LZ4_favorDecompressionSpeed(LZ4_streamHC_t* LZ4_streamHCPtr, int favor) { LZ4_streamHCPtr->internal_donotuse.favorDecSpeed = (favor!=0); } /* LZ4_loadDictHC() : * LZ4_streamHCPtr is presumed properly initialized */ int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, const char* dictionary, int dictSize) { LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse; DEBUGLOG(4, "LZ4_loadDictHC(%p, %p, %d)", LZ4_streamHCPtr, dictionary, dictSize); assert(LZ4_streamHCPtr != NULL); if (dictSize > 64 KB) { dictionary += (size_t)dictSize - 64 KB; dictSize = 64 KB; } /* need a full initialization, there are bad side-effects when using resetFast() */ { int const cLevel = ctxPtr->compressionLevel; LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr)); LZ4_setCompressionLevel(LZ4_streamHCPtr, cLevel); } LZ4HC_init_internal (ctxPtr, (const BYTE*)dictionary); ctxPtr->end = (const BYTE*)dictionary + dictSize; if (dictSize >= 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3); return dictSize; } void LZ4_attach_HC_dictionary(LZ4_streamHC_t *working_stream, const LZ4_streamHC_t *dictionary_stream) { working_stream->internal_donotuse.dictCtx = dictionary_stream != NULL ? &(dictionary_stream->internal_donotuse) : NULL; } /* compression */ static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock) { DEBUGLOG(4, "LZ4HC_setExternalDict(%p, %p)", ctxPtr, newBlock); if (ctxPtr->end >= ctxPtr->base + ctxPtr->dictLimit + 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3); /* Referencing remaining dictionary content */ /* Only one memory segment for extDict, so any previous extDict is lost at this stage */ ctxPtr->lowLimit = ctxPtr->dictLimit; ctxPtr->dictLimit = (U32)(ctxPtr->end - ctxPtr->base); ctxPtr->dictBase = ctxPtr->base; ctxPtr->base = newBlock - ctxPtr->dictLimit; ctxPtr->end = newBlock; ctxPtr->nextToUpdate = ctxPtr->dictLimit; /* match referencing will resume from there */ /* cannot reference an extDict and a dictCtx at the same time */ ctxPtr->dictCtx = NULL; } static int LZ4_compressHC_continue_generic (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, char* dst, int* srcSizePtr, int dstCapacity, limitedOutput_directive limit) { LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse; DEBUGLOG(4, "LZ4_compressHC_continue_generic(ctx=%p, src=%p, srcSize=%d)", LZ4_streamHCPtr, src, *srcSizePtr); assert(ctxPtr != NULL); /* auto-init if forgotten */ if (ctxPtr->base == NULL) LZ4HC_init_internal (ctxPtr, (const BYTE*) src); /* Check overflow */ if ((size_t)(ctxPtr->end - ctxPtr->base) > 2 GB) { size_t dictSize = (size_t)(ctxPtr->end - ctxPtr->base) - ctxPtr->dictLimit; if (dictSize > 64 KB) dictSize = 64 KB; LZ4_loadDictHC(LZ4_streamHCPtr, (const char*)(ctxPtr->end) - dictSize, (int)dictSize); } /* Check if blocks follow each other */ if ((const BYTE*)src != ctxPtr->end) LZ4HC_setExternalDict(ctxPtr, (const BYTE*)src); /* Check overlapping input/dictionary space */ { const BYTE* sourceEnd = (const BYTE*) src + *srcSizePtr; const BYTE* const dictBegin = ctxPtr->dictBase + ctxPtr->lowLimit; const BYTE* const dictEnd = ctxPtr->dictBase + ctxPtr->dictLimit; if ((sourceEnd > dictBegin) && ((const BYTE*)src < dictEnd)) { if (sourceEnd > dictEnd) sourceEnd = dictEnd; ctxPtr->lowLimit = (U32)(sourceEnd - ctxPtr->dictBase); if (ctxPtr->dictLimit - ctxPtr->lowLimit < 4) ctxPtr->lowLimit = ctxPtr->dictLimit; } } return LZ4HC_compress_generic (ctxPtr, src, dst, srcSizePtr, dstCapacity, ctxPtr->compressionLevel, limit); } int LZ4_compress_HC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, char* dst, int srcSize, int dstCapacity) { if (dstCapacity < LZ4_compressBound(srcSize)) return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, src, dst, &srcSize, dstCapacity, limitedOutput); else return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, src, dst, &srcSize, dstCapacity, notLimited); } int LZ4_compress_HC_continue_destSize (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, char* dst, int* srcSizePtr, int targetDestSize) { return LZ4_compressHC_continue_generic(LZ4_streamHCPtr, src, dst, srcSizePtr, targetDestSize, fillOutput); } /* dictionary saving */ int LZ4_saveDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, char* safeBuffer, int dictSize) { LZ4HC_CCtx_internal* const streamPtr = &LZ4_streamHCPtr->internal_donotuse; int const prefixSize = (int)(streamPtr->end - (streamPtr->base + streamPtr->dictLimit)); DEBUGLOG(4, "LZ4_saveDictHC(%p, %p, %d)", LZ4_streamHCPtr, safeBuffer, dictSize); if (dictSize > 64 KB) dictSize = 64 KB; if (dictSize < 4) dictSize = 0; if (dictSize > prefixSize) dictSize = prefixSize; memmove(safeBuffer, streamPtr->end - dictSize, dictSize); { U32 const endIndex = (U32)(streamPtr->end - streamPtr->base); streamPtr->end = (const BYTE*)safeBuffer + dictSize; streamPtr->base = streamPtr->end - endIndex; streamPtr->dictLimit = endIndex - (U32)dictSize; streamPtr->lowLimit = endIndex - (U32)dictSize; if (streamPtr->nextToUpdate < streamPtr->dictLimit) streamPtr->nextToUpdate = streamPtr->dictLimit; } return dictSize; } /*************************************************** * Deprecated Functions ***************************************************/ /* These functions currently generate deprecation warnings */ /* Wrappers for deprecated compression functions */ int LZ4_compressHC(const char* src, char* dst, int srcSize) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), 0); } int LZ4_compressHC_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, 0); } int LZ4_compressHC2(const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); } int LZ4_compressHC2_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, cLevel); } int LZ4_compressHC_withStateHC (void* state, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, LZ4_compressBound(srcSize), 0); } int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, maxDstSize, 0); } int LZ4_compressHC2_withStateHC (void* state, const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); } int LZ4_compressHC2_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, maxDstSize, cLevel); } int LZ4_compressHC_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, LZ4_compressBound(srcSize)); } int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, maxDstSize); } /* Deprecated streaming functions */ int LZ4_sizeofStreamStateHC(void) { return LZ4_STREAMHCSIZE; } /* state is presumed correctly sized, aka >= sizeof(LZ4_streamHC_t) * @return : 0 on success, !=0 if error */ int LZ4_resetStreamStateHC(void* state, char* inputBuffer) { LZ4_streamHC_t* const hc4 = LZ4_initStreamHC(state, sizeof(*hc4)); if (hc4 == NULL) return 1; /* init failed */ LZ4HC_init_internal (&hc4->internal_donotuse, (const BYTE*)inputBuffer); return 0; } void* LZ4_createHC (const char* inputBuffer) { LZ4_streamHC_t* const hc4 = LZ4_createStreamHC(); if (hc4 == NULL) return NULL; /* not enough memory */ LZ4HC_init_internal (&hc4->internal_donotuse, (const BYTE*)inputBuffer); return hc4; } int LZ4_freeHC (void* LZ4HC_Data) { if (!LZ4HC_Data) return 0; /* support free on NULL */ FREEMEM(LZ4HC_Data); return 0; } int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int cLevel) { return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, 0, cLevel, notLimited); } int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int dstCapacity, int cLevel) { return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, dstCapacity, cLevel, limitedOutput); } char* LZ4_slideInputBufferHC(void* LZ4HC_Data) { LZ4_streamHC_t *ctx = (LZ4_streamHC_t*)LZ4HC_Data; const BYTE *bufferStart = ctx->internal_donotuse.base + ctx->internal_donotuse.lowLimit; LZ4_resetStreamHC_fast(ctx, ctx->internal_donotuse.compressionLevel); /* avoid const char * -> char * conversion warning :( */ return (char *)(uptrval)bufferStart; } /* ================================================ * LZ4 Optimal parser (levels [LZ4HC_CLEVEL_OPT_MIN - LZ4HC_CLEVEL_MAX]) * ===============================================*/ typedef struct { int price; int off; int mlen; int litlen; } LZ4HC_optimal_t; /* price in bytes */ LZ4_FORCE_INLINE int LZ4HC_literalsPrice(int const litlen) { int price = litlen; assert(litlen >= 0); if (litlen >= (int)RUN_MASK) price += 1 + ((litlen-(int)RUN_MASK) / 255); return price; } /* requires mlen >= MINMATCH */ LZ4_FORCE_INLINE int LZ4HC_sequencePrice(int litlen, int mlen) { int price = 1 + 2 ; /* token + 16-bit offset */ assert(litlen >= 0); assert(mlen >= MINMATCH); price += LZ4HC_literalsPrice(litlen); if (mlen >= (int)(ML_MASK+MINMATCH)) price += 1 + ((mlen-(int)(ML_MASK+MINMATCH)) / 255); return price; } typedef struct { int off; int len; } LZ4HC_match_t; LZ4_FORCE_INLINE LZ4HC_match_t LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx, const BYTE* ip, const BYTE* const iHighLimit, int minLen, int nbSearches, const dictCtx_directive dict, const HCfavor_e favorDecSpeed) { LZ4HC_match_t match = { 0 , 0 }; const BYTE* matchPtr = NULL; /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos), * but this won't be the case here, as we define iLowLimit==ip, * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */ int matchLength = LZ4HC_InsertAndGetWiderMatch(ctx, ip, ip, iHighLimit, minLen, &matchPtr, &ip, nbSearches, 1 /*patternAnalysis*/, 1 /*chainSwap*/, dict, favorDecSpeed); if (matchLength <= minLen) return match; if (favorDecSpeed) { if ((matchLength>18) & (matchLength<=36)) matchLength=18; /* favor shortcut */ } match.len = matchLength; match.off = (int)(ip-matchPtr); return match; } static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx, const char* const source, char* dst, int* srcSizePtr, int dstCapacity, int const nbSearches, size_t sufficient_len, const limitedOutput_directive limit, int const fullUpdate, const dictCtx_directive dict, const HCfavor_e favorDecSpeed) { #define TRAILING_LITERALS 3 LZ4HC_optimal_t opt[LZ4_OPT_NUM + TRAILING_LITERALS]; /* ~64 KB, which is a bit large for stack... */ const BYTE* ip = (const BYTE*) source; const BYTE* anchor = ip; const BYTE* const iend = ip + *srcSizePtr; const BYTE* const mflimit = iend - MFLIMIT; const BYTE* const matchlimit = iend - LASTLITERALS; BYTE* op = (BYTE*) dst; BYTE* opSaved = (BYTE*) dst; BYTE* oend = op + dstCapacity; /* init */ DEBUGLOG(5, "LZ4HC_compress_optimal(dst=%p, dstCapa=%u)", dst, (unsigned)dstCapacity); *srcSizePtr = 0; if (limit == fillOutput) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */ if (sufficient_len >= LZ4_OPT_NUM) sufficient_len = LZ4_OPT_NUM-1; /* Main Loop */ assert(ip - anchor < LZ4_MAX_INPUT_SIZE); while (ip <= mflimit) { int const llen = (int)(ip - anchor); int best_mlen, best_off; int cur, last_match_pos = 0; LZ4HC_match_t const firstMatch = LZ4HC_FindLongerMatch(ctx, ip, matchlimit, MINMATCH-1, nbSearches, dict, favorDecSpeed); if (firstMatch.len==0) { ip++; continue; } if ((size_t)firstMatch.len > sufficient_len) { /* good enough solution : immediate encoding */ int const firstML = firstMatch.len; const BYTE* const matchPos = ip - firstMatch.off; opSaved = op; if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), firstML, matchPos, limit, oend) ) /* updates ip, op and anchor */ goto _dest_overflow; continue; } /* set prices for first positions (literals) */ { int rPos; for (rPos = 0 ; rPos < MINMATCH ; rPos++) { int const cost = LZ4HC_literalsPrice(llen + rPos); opt[rPos].mlen = 1; opt[rPos].off = 0; opt[rPos].litlen = llen + rPos; opt[rPos].price = cost; DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup", rPos, cost, opt[rPos].litlen); } } /* set prices using initial match */ { int mlen = MINMATCH; int const matchML = firstMatch.len; /* necessarily < sufficient_len < LZ4_OPT_NUM */ int const offset = firstMatch.off; assert(matchML < LZ4_OPT_NUM); for ( ; mlen <= matchML ; mlen++) { int const cost = LZ4HC_sequencePrice(llen, mlen); opt[mlen].mlen = mlen; opt[mlen].off = offset; opt[mlen].litlen = llen; opt[mlen].price = cost; DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i) -- initial setup", mlen, cost, mlen); } } last_match_pos = firstMatch.len; { int addLit; for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) { opt[last_match_pos+addLit].mlen = 1; /* literal */ opt[last_match_pos+addLit].off = 0; opt[last_match_pos+addLit].litlen = addLit; opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit); DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup", last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit); } } /* check further positions */ for (cur = 1; cur < last_match_pos; cur++) { const BYTE* const curPtr = ip + cur; LZ4HC_match_t newMatch; if (curPtr > mflimit) break; DEBUGLOG(7, "rPos:%u[%u] vs [%u]%u", cur, opt[cur].price, opt[cur+1].price, cur+1); if (fullUpdate) { /* not useful to search here if next position has same (or lower) cost */ if ( (opt[cur+1].price <= opt[cur].price) /* in some cases, next position has same cost, but cost rises sharply after, so a small match would still be beneficial */ && (opt[cur+MINMATCH].price < opt[cur].price + 3/*min seq price*/) ) continue; } else { /* not useful to search here if next position has same (or lower) cost */ if (opt[cur+1].price <= opt[cur].price) continue; } DEBUGLOG(7, "search at rPos:%u", cur); if (fullUpdate) newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, MINMATCH-1, nbSearches, dict, favorDecSpeed); else /* only test matches of minimum length; slightly faster, but misses a few bytes */ newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, last_match_pos - cur, nbSearches, dict, favorDecSpeed); if (!newMatch.len) continue; if ( ((size_t)newMatch.len > sufficient_len) || (newMatch.len + cur >= LZ4_OPT_NUM) ) { /* immediate encoding */ best_mlen = newMatch.len; best_off = newMatch.off; last_match_pos = cur + 1; goto encode; } /* before match : set price with literals at beginning */ { int const baseLitlen = opt[cur].litlen; int litlen; for (litlen = 1; litlen < MINMATCH; litlen++) { int const price = opt[cur].price - LZ4HC_literalsPrice(baseLitlen) + LZ4HC_literalsPrice(baseLitlen+litlen); int const pos = cur + litlen; if (price < opt[pos].price) { opt[pos].mlen = 1; /* literal */ opt[pos].off = 0; opt[pos].litlen = baseLitlen+litlen; opt[pos].price = price; DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)", pos, price, opt[pos].litlen); } } } /* set prices using match at position = cur */ { int const matchML = newMatch.len; int ml = MINMATCH; assert(cur + newMatch.len < LZ4_OPT_NUM); for ( ; ml <= matchML ; ml++) { int const pos = cur + ml; int const offset = newMatch.off; int price; int ll; DEBUGLOG(7, "testing price rPos %i (last_match_pos=%i)", pos, last_match_pos); if (opt[cur].mlen == 1) { ll = opt[cur].litlen; price = ((cur > ll) ? opt[cur - ll].price : 0) + LZ4HC_sequencePrice(ll, ml); } else { ll = 0; price = opt[cur].price + LZ4HC_sequencePrice(0, ml); } assert((U32)favorDecSpeed <= 1); if (pos > last_match_pos+TRAILING_LITERALS || price <= opt[pos].price - (int)favorDecSpeed) { DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i)", pos, price, ml); assert(pos < LZ4_OPT_NUM); if ( (ml == matchML) /* last pos of last match */ && (last_match_pos < pos) ) last_match_pos = pos; opt[pos].mlen = ml; opt[pos].off = offset; opt[pos].litlen = ll; opt[pos].price = price; } } } /* complete following positions with literals */ { int addLit; for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) { opt[last_match_pos+addLit].mlen = 1; /* literal */ opt[last_match_pos+addLit].off = 0; opt[last_match_pos+addLit].litlen = addLit; opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit); DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)", last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit); } } } /* for (cur = 1; cur <= last_match_pos; cur++) */ assert(last_match_pos < LZ4_OPT_NUM + TRAILING_LITERALS); best_mlen = opt[last_match_pos].mlen; best_off = opt[last_match_pos].off; cur = last_match_pos - best_mlen; encode: /* cur, last_match_pos, best_mlen, best_off must be set */ assert(cur < LZ4_OPT_NUM); assert(last_match_pos >= 1); /* == 1 when only one candidate */ DEBUGLOG(6, "reverse traversal, looking for shortest path (last_match_pos=%i)", last_match_pos); { int candidate_pos = cur; int selected_matchLength = best_mlen; int selected_offset = best_off; while (1) { /* from end to beginning */ int const next_matchLength = opt[candidate_pos].mlen; /* can be 1, means literal */ int const next_offset = opt[candidate_pos].off; DEBUGLOG(7, "pos %i: sequence length %i", candidate_pos, selected_matchLength); opt[candidate_pos].mlen = selected_matchLength; opt[candidate_pos].off = selected_offset; selected_matchLength = next_matchLength; selected_offset = next_offset; if (next_matchLength > candidate_pos) break; /* last match elected, first match to encode */ assert(next_matchLength > 0); /* can be 1, means literal */ candidate_pos -= next_matchLength; } } /* encode all recorded sequences in order */ { int rPos = 0; /* relative position (to ip) */ while (rPos < last_match_pos) { int const ml = opt[rPos].mlen; int const offset = opt[rPos].off; if (ml == 1) { ip++; rPos++; continue; } /* literal; note: can end up with several literals, in which case, skip them */ rPos += ml; assert(ml >= MINMATCH); assert((offset >= 1) && (offset <= LZ4_DISTANCE_MAX)); opSaved = op; if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ip - offset, limit, oend) ) /* updates ip, op and anchor */ goto _dest_overflow; } } } /* while (ip <= mflimit) */ _last_literals: /* Encode Last Literals */ { size_t lastRunSize = (size_t)(iend - anchor); /* literals */ size_t litLength = (lastRunSize + 255 - RUN_MASK) / 255; size_t const totalSize = 1 + litLength + lastRunSize; if (limit == fillOutput) oend += LASTLITERALS; /* restore correct value */ if (limit && (op + totalSize > oend)) { if (limit == limitedOutput) return 0; /* Check output limit */ /* adapt lastRunSize to fill 'dst' */ lastRunSize = (size_t)(oend - op) - 1; litLength = (lastRunSize + 255 - RUN_MASK) / 255; lastRunSize -= litLength; } ip = anchor + lastRunSize; if (lastRunSize >= RUN_MASK) { size_t accumulator = lastRunSize - RUN_MASK; *op++ = (RUN_MASK << ML_BITS); for(; accumulator >= 255 ; accumulator -= 255) *op++ = 255; *op++ = (BYTE) accumulator; } else { *op++ = (BYTE)(lastRunSize << ML_BITS); } memcpy(op, anchor, lastRunSize); op += lastRunSize; } /* End */ *srcSizePtr = (int) (((const char*)ip) - source); return (int) ((char*)op-dst); _dest_overflow: if (limit == fillOutput) { op = opSaved; /* restore correct out pointer */ goto _last_literals; } return 0; } py-lz4framed-0.14.0/lz4/lz4hc.h000066400000000000000000000515551357043434000160360ustar00rootroot00000000000000/* LZ4 HC - High Compression Mode of LZ4 Header File Copyright (C) 2011-2017, Yann Collet. BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. You can contact the author at : - LZ4 source repository : https://github.com/lz4/lz4 - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c */ #ifndef LZ4_HC_H_19834876238432 #define LZ4_HC_H_19834876238432 #if defined (__cplusplus) extern "C" { #endif /* --- Dependency --- */ /* note : lz4hc requires lz4.h/lz4.c for compilation */ #include "lz4.h" /* stddef, LZ4LIB_API, LZ4_DEPRECATED */ /* --- Useful constants --- */ #define LZ4HC_CLEVEL_MIN 3 #define LZ4HC_CLEVEL_DEFAULT 9 #define LZ4HC_CLEVEL_OPT_MIN 10 #define LZ4HC_CLEVEL_MAX 12 /*-************************************ * Block Compression **************************************/ /*! LZ4_compress_HC() : * Compress data from `src` into `dst`, using the powerful but slower "HC" algorithm. * `dst` must be already allocated. * Compression is guaranteed to succeed if `dstCapacity >= LZ4_compressBound(srcSize)` (see "lz4.h") * Max supported `srcSize` value is LZ4_MAX_INPUT_SIZE (see "lz4.h") * `compressionLevel` : any value between 1 and LZ4HC_CLEVEL_MAX will work. * Values > LZ4HC_CLEVEL_MAX behave the same as LZ4HC_CLEVEL_MAX. * @return : the number of bytes written into 'dst' * or 0 if compression fails. */ LZ4LIB_API int LZ4_compress_HC (const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel); /* Note : * Decompression functions are provided within "lz4.h" (BSD license) */ /*! LZ4_compress_HC_extStateHC() : * Same as LZ4_compress_HC(), but using an externally allocated memory segment for `state`. * `state` size is provided by LZ4_sizeofStateHC(). * Memory segment must be aligned on 8-bytes boundaries (which a normal malloc() should do properly). */ LZ4LIB_API int LZ4_sizeofStateHC(void); LZ4LIB_API int LZ4_compress_HC_extStateHC(void* stateHC, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel); /*! LZ4_compress_HC_destSize() : v1.9.0+ * Will compress as much data as possible from `src` * to fit into `targetDstSize` budget. * Result is provided in 2 parts : * @return : the number of bytes written into 'dst' (necessarily <= targetDstSize) * or 0 if compression fails. * `srcSizePtr` : on success, *srcSizePtr is updated to indicate how much bytes were read from `src` */ LZ4LIB_API int LZ4_compress_HC_destSize(void* stateHC, const char* src, char* dst, int* srcSizePtr, int targetDstSize, int compressionLevel); /*-************************************ * Streaming Compression * Bufferless synchronous API **************************************/ typedef union LZ4_streamHC_u LZ4_streamHC_t; /* incomplete type (defined later) */ /*! LZ4_createStreamHC() and LZ4_freeStreamHC() : * These functions create and release memory for LZ4 HC streaming state. * Newly created states are automatically initialized. * A same state can be used multiple times consecutively, * starting with LZ4_resetStreamHC_fast() to start a new stream of blocks. */ LZ4LIB_API LZ4_streamHC_t* LZ4_createStreamHC(void); LZ4LIB_API int LZ4_freeStreamHC (LZ4_streamHC_t* streamHCPtr); /* These functions compress data in successive blocks of any size, using previous blocks as dictionary, to improve compression ratio. One key assumption is that previous blocks (up to 64 KB) remain read-accessible while compressing next blocks. There is an exception for ring buffers, which can be smaller than 64 KB. Ring-buffer scenario is automatically detected and handled within LZ4_compress_HC_continue(). Before starting compression, state must be allocated and properly initialized. LZ4_createStreamHC() does both, though compression level is set to LZ4HC_CLEVEL_DEFAULT. Selecting the compression level can be done with LZ4_resetStreamHC_fast() (starts a new stream) or LZ4_setCompressionLevel() (anytime, between blocks in the same stream) (experimental). LZ4_resetStreamHC_fast() only works on states which have been properly initialized at least once, which is automatically the case when state is created using LZ4_createStreamHC(). After reset, a first "fictional block" can be designated as initial dictionary, using LZ4_loadDictHC() (Optional). Invoke LZ4_compress_HC_continue() to compress each successive block. The number of blocks is unlimited. Previous input blocks, including initial dictionary when present, must remain accessible and unmodified during compression. It's allowed to update compression level anytime between blocks, using LZ4_setCompressionLevel() (experimental). 'dst' buffer should be sized to handle worst case scenarios (see LZ4_compressBound(), it ensures compression success). In case of failure, the API does not guarantee recovery, so the state _must_ be reset. To ensure compression success whenever `dst` buffer size cannot be made >= LZ4_compressBound(), consider using LZ4_compress_HC_continue_destSize(). Whenever previous input blocks can't be preserved unmodified in-place during compression of next blocks, it's possible to copy the last blocks into a more stable memory space, using LZ4_saveDictHC(). Return value of LZ4_saveDictHC() is the size of dictionary effectively saved into 'safeBuffer' (<= 64 KB) After completing a streaming compression, it's possible to start a new stream of blocks, using the same LZ4_streamHC_t state, just by resetting it, using LZ4_resetStreamHC_fast(). */ LZ4LIB_API void LZ4_resetStreamHC_fast(LZ4_streamHC_t* streamHCPtr, int compressionLevel); /* v1.9.0+ */ LZ4LIB_API int LZ4_loadDictHC (LZ4_streamHC_t* streamHCPtr, const char* dictionary, int dictSize); LZ4LIB_API int LZ4_compress_HC_continue (LZ4_streamHC_t* streamHCPtr, const char* src, char* dst, int srcSize, int maxDstSize); /*! LZ4_compress_HC_continue_destSize() : v1.9.0+ * Similar to LZ4_compress_HC_continue(), * but will read as much data as possible from `src` * to fit into `targetDstSize` budget. * Result is provided into 2 parts : * @return : the number of bytes written into 'dst' (necessarily <= targetDstSize) * or 0 if compression fails. * `srcSizePtr` : on success, *srcSizePtr will be updated to indicate how much bytes were read from `src`. * Note that this function may not consume the entire input. */ LZ4LIB_API int LZ4_compress_HC_continue_destSize(LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, char* dst, int* srcSizePtr, int targetDstSize); LZ4LIB_API int LZ4_saveDictHC (LZ4_streamHC_t* streamHCPtr, char* safeBuffer, int maxDictSize); /*^********************************************** * !!!!!! STATIC LINKING ONLY !!!!!! ***********************************************/ /*-****************************************************************** * PRIVATE DEFINITIONS : * Do not use these definitions directly. * They are merely exposed to allow static allocation of `LZ4_streamHC_t`. * Declare an `LZ4_streamHC_t` directly, rather than any type below. * Even then, only do so in the context of static linking, as definitions may change between versions. ********************************************************************/ #define LZ4HC_DICTIONARY_LOGSIZE 16 #define LZ4HC_MAXD (1<= 199901L) /* C99 */) #include typedef struct LZ4HC_CCtx_internal LZ4HC_CCtx_internal; struct LZ4HC_CCtx_internal { uint32_t hashTable[LZ4HC_HASHTABLESIZE]; uint16_t chainTable[LZ4HC_MAXD]; const uint8_t* end; /* next block here to continue on current prefix */ const uint8_t* base; /* All index relative to this position */ const uint8_t* dictBase; /* alternate base for extDict */ uint32_t dictLimit; /* below that point, need extDict */ uint32_t lowLimit; /* below that point, no more dict */ uint32_t nextToUpdate; /* index from which to continue dictionary update */ short compressionLevel; int8_t favorDecSpeed; /* favor decompression speed if this flag set, otherwise, favor compression ratio */ int8_t dirty; /* stream has to be fully reset if this flag is set */ const LZ4HC_CCtx_internal* dictCtx; }; #else typedef struct LZ4HC_CCtx_internal LZ4HC_CCtx_internal; struct LZ4HC_CCtx_internal { unsigned int hashTable[LZ4HC_HASHTABLESIZE]; unsigned short chainTable[LZ4HC_MAXD]; const unsigned char* end; /* next block here to continue on current prefix */ const unsigned char* base; /* All index relative to this position */ const unsigned char* dictBase; /* alternate base for extDict */ unsigned int dictLimit; /* below that point, need extDict */ unsigned int lowLimit; /* below that point, no more dict */ unsigned int nextToUpdate; /* index from which to continue dictionary update */ short compressionLevel; char favorDecSpeed; /* favor decompression speed if this flag set, otherwise, favor compression ratio */ char dirty; /* stream has to be fully reset if this flag is set */ const LZ4HC_CCtx_internal* dictCtx; }; #endif /* Do not use these definitions directly ! * Declare or allocate an LZ4_streamHC_t instead. */ #define LZ4_STREAMHCSIZE (4*LZ4HC_HASHTABLESIZE + 2*LZ4HC_MAXD + 56 + ((sizeof(void*)==16) ? 56 : 0) /* AS400*/ ) /* 262200 or 262256*/ #define LZ4_STREAMHCSIZE_SIZET (LZ4_STREAMHCSIZE / sizeof(size_t)) union LZ4_streamHC_u { size_t table[LZ4_STREAMHCSIZE_SIZET]; LZ4HC_CCtx_internal internal_donotuse; }; /* previously typedef'd to LZ4_streamHC_t */ /* LZ4_streamHC_t : * This structure allows static allocation of LZ4 HC streaming state. * This can be used to allocate statically, on state, or as part of a larger structure. * * Such state **must** be initialized using LZ4_initStreamHC() before first use. * * Note that invoking LZ4_initStreamHC() is not required when * the state was created using LZ4_createStreamHC() (which is recommended). * Using the normal builder, a newly created state is automatically initialized. * * Static allocation shall only be used in combination with static linking. */ /* LZ4_initStreamHC() : v1.9.0+ * Required before first use of a statically allocated LZ4_streamHC_t. * Before v1.9.0 : use LZ4_resetStreamHC() instead */ LZ4LIB_API LZ4_streamHC_t* LZ4_initStreamHC (void* buffer, size_t size); /*-************************************ * Deprecated Functions **************************************/ /* see lz4.h LZ4_DISABLE_DEPRECATE_WARNINGS to turn off deprecation warnings */ /* deprecated compression functions */ LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC (const char* source, char* dest, int inputSize); LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize); LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC2 (const char* source, char* dest, int inputSize, int compressionLevel); LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC2_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel); LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC_withStateHC (void* state, const char* source, char* dest, int inputSize); LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* source, char* dest, int inputSize, int maxOutputSize); LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC2_withStateHC (void* state, const char* source, char* dest, int inputSize, int compressionLevel); LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC2_limitedOutput_withStateHC(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel); LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize); LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize, int maxOutputSize); /* Obsolete streaming functions; degraded functionality; do not use! * * In order to perform streaming compression, these functions depended on data * that is no longer tracked in the state. They have been preserved as well as * possible: using them will still produce a correct output. However, use of * LZ4_slideInputBufferHC() will truncate the history of the stream, rather * than preserve a window-sized chunk of history. */ LZ4_DEPRECATED("use LZ4_createStreamHC() instead") LZ4LIB_API void* LZ4_createHC (const char* inputBuffer); LZ4_DEPRECATED("use LZ4_saveDictHC() instead") LZ4LIB_API char* LZ4_slideInputBufferHC (void* LZ4HC_Data); LZ4_DEPRECATED("use LZ4_freeStreamHC() instead") LZ4LIB_API int LZ4_freeHC (void* LZ4HC_Data); LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int compressionLevel); LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel); LZ4_DEPRECATED("use LZ4_createStreamHC() instead") LZ4LIB_API int LZ4_sizeofStreamStateHC(void); LZ4_DEPRECATED("use LZ4_initStreamHC() instead") LZ4LIB_API int LZ4_resetStreamStateHC(void* state, char* inputBuffer); /* LZ4_resetStreamHC() is now replaced by LZ4_initStreamHC(). * The intention is to emphasize the difference with LZ4_resetStreamHC_fast(), * which is now the recommended function to start a new stream of blocks, * but cannot be used to initialize a memory segment containing arbitrary garbage data. * * It is recommended to switch to LZ4_initStreamHC(). * LZ4_resetStreamHC() will generate deprecation warnings in a future version. */ LZ4LIB_API void LZ4_resetStreamHC (LZ4_streamHC_t* streamHCPtr, int compressionLevel); #if defined (__cplusplus) } #endif #endif /* LZ4_HC_H_19834876238432 */ /*-************************************************** * !!!!! STATIC LINKING ONLY !!!!! * Following definitions are considered experimental. * They should not be linked from DLL, * as there is no guarantee of API stability yet. * Prototypes will be promoted to "stable" status * after successfull usage in real-life scenarios. ***************************************************/ #ifdef LZ4_HC_STATIC_LINKING_ONLY /* protection macro */ #ifndef LZ4_HC_SLO_098092834 #define LZ4_HC_SLO_098092834 #define LZ4_STATIC_LINKING_ONLY /* LZ4LIB_STATIC_API */ #include "lz4.h" #if defined (__cplusplus) extern "C" { #endif /*! LZ4_setCompressionLevel() : v1.8.0+ (experimental) * It's possible to change compression level * between successive invocations of LZ4_compress_HC_continue*() * for dynamic adaptation. */ LZ4LIB_STATIC_API void LZ4_setCompressionLevel( LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel); /*! LZ4_favorDecompressionSpeed() : v1.8.2+ (experimental) * Opt. Parser will favor decompression speed over compression ratio. * Only applicable to levels >= LZ4HC_CLEVEL_OPT_MIN. */ LZ4LIB_STATIC_API void LZ4_favorDecompressionSpeed( LZ4_streamHC_t* LZ4_streamHCPtr, int favor); /*! LZ4_resetStreamHC_fast() : v1.9.0+ * When an LZ4_streamHC_t is known to be in a internally coherent state, * it can often be prepared for a new compression with almost no work, only * sometimes falling back to the full, expensive reset that is always required * when the stream is in an indeterminate state (i.e., the reset performed by * LZ4_resetStreamHC()). * * LZ4_streamHCs are guaranteed to be in a valid state when: * - returned from LZ4_createStreamHC() * - reset by LZ4_resetStreamHC() * - memset(stream, 0, sizeof(LZ4_streamHC_t)) * - the stream was in a valid state and was reset by LZ4_resetStreamHC_fast() * - the stream was in a valid state and was then used in any compression call * that returned success * - the stream was in an indeterminate state and was used in a compression * call that fully reset the state (LZ4_compress_HC_extStateHC()) and that * returned success * * Note: * A stream that was last used in a compression call that returned an error * may be passed to this function. However, it will be fully reset, which will * clear any existing history and settings from the context. */ LZ4LIB_STATIC_API void LZ4_resetStreamHC_fast( LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel); /*! LZ4_compress_HC_extStateHC_fastReset() : * A variant of LZ4_compress_HC_extStateHC(). * * Using this variant avoids an expensive initialization step. It is only safe * to call if the state buffer is known to be correctly initialized already * (see above comment on LZ4_resetStreamHC_fast() for a definition of * "correctly initialized"). From a high level, the difference is that this * function initializes the provided state with a call to * LZ4_resetStreamHC_fast() while LZ4_compress_HC_extStateHC() starts with a * call to LZ4_resetStreamHC(). */ LZ4LIB_STATIC_API int LZ4_compress_HC_extStateHC_fastReset ( void* state, const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel); /*! LZ4_attach_HC_dictionary() : * This is an experimental API that allows for the efficient use of a * static dictionary many times. * * Rather than re-loading the dictionary buffer into a working context before * each compression, or copying a pre-loaded dictionary's LZ4_streamHC_t into a * working LZ4_streamHC_t, this function introduces a no-copy setup mechanism, * in which the working stream references the dictionary stream in-place. * * Several assumptions are made about the state of the dictionary stream. * Currently, only streams which have been prepared by LZ4_loadDictHC() should * be expected to work. * * Alternatively, the provided dictionary stream pointer may be NULL, in which * case any existing dictionary stream is unset. * * A dictionary should only be attached to a stream without any history (i.e., * a stream that has just been reset). * * The dictionary will remain attached to the working stream only for the * current stream session. Calls to LZ4_resetStreamHC(_fast) will remove the * dictionary context association from the working stream. The dictionary * stream (and source buffer) must remain in-place / accessible / unchanged * through the lifetime of the stream session. */ LZ4LIB_STATIC_API void LZ4_attach_HC_dictionary( LZ4_streamHC_t *working_stream, const LZ4_streamHC_t *dictionary_stream); #if defined (__cplusplus) } #endif #endif /* LZ4_HC_SLO_098092834 */ #endif /* LZ4_HC_STATIC_LINKING_ONLY */ py-lz4framed-0.14.0/lz4/xxhash.c000066400000000000000000001023751357043434000163050ustar00rootroot00000000000000/* * xxHash - Fast Hash algorithm * Copyright (C) 2012-2016, Yann Collet * * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * You can contact the author at : * - xxHash homepage: http://www.xxhash.com * - xxHash source repository : https://github.com/Cyan4973/xxHash */ /* ************************************* * Tuning parameters ***************************************/ /*!XXH_FORCE_MEMORY_ACCESS : * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. * The below switch allow to select different access method for improved performance. * Method 0 (default) : use `memcpy()`. Safe and portable. * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. * Method 2 : direct access. This method doesn't depend on compiler but violate C standard. * It can generate buggy code on targets which do not support unaligned memory accesses. * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) * See http://stackoverflow.com/a/32095106/646947 for details. * Prefer these methods in priority order (0 > 1 > 2) */ #ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ # if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \ || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) # define XXH_FORCE_MEMORY_ACCESS 2 # elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || \ (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \ || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \ || defined(__ARM_ARCH_7S__) )) # define XXH_FORCE_MEMORY_ACCESS 1 # endif #endif /*!XXH_ACCEPT_NULL_INPUT_POINTER : * If input pointer is NULL, xxHash default behavior is to dereference it, triggering a segfault. * When this macro is enabled, xxHash actively checks input for null pointer. * It it is, result for null input pointers is the same as a null-length input. */ #ifndef XXH_ACCEPT_NULL_INPUT_POINTER /* can be defined externally */ # define XXH_ACCEPT_NULL_INPUT_POINTER 0 #endif /*!XXH_FORCE_NATIVE_FORMAT : * By default, xxHash library provides endian-independent Hash values, based on little-endian convention. * Results are therefore identical for little-endian and big-endian CPU. * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. * Should endian-independence be of no importance for your application, you may set the #define below to 1, * to improve speed for Big-endian CPU. * This option has no impact on Little_Endian CPU. */ #ifndef XXH_FORCE_NATIVE_FORMAT /* can be defined externally */ # define XXH_FORCE_NATIVE_FORMAT 0 #endif /*!XXH_FORCE_ALIGN_CHECK : * This is a minor performance trick, only useful with lots of very small keys. * It means : check for aligned/unaligned input. * The check costs one initial branch per hash; * set it to 0 when the input is guaranteed to be aligned, * or when alignment doesn't matter for performance. */ #ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */ # if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) # define XXH_FORCE_ALIGN_CHECK 0 # else # define XXH_FORCE_ALIGN_CHECK 1 # endif #endif /* ************************************* * Includes & Memory related functions ***************************************/ /*! Modify the local functions below should you wish to use some other memory routines * for malloc(), free() */ #include static void* XXH_malloc(size_t s) { return malloc(s); } static void XXH_free (void* p) { free(p); } /*! and for memcpy() */ #include static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); } #include /* assert */ #define XXH_STATIC_LINKING_ONLY #include "xxhash.h" /* ************************************* * Compiler Specific Options ***************************************/ #ifdef _MSC_VER /* Visual Studio */ # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ # define FORCE_INLINE static __forceinline #else # if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ # ifdef __GNUC__ # define FORCE_INLINE static inline __attribute__((always_inline)) # else # define FORCE_INLINE static inline # endif # else # define FORCE_INLINE static # endif /* __STDC_VERSION__ */ #endif /* ************************************* * Basic Types ***************************************/ #ifndef MEM_MODULE # if !defined (__VMS) \ && (defined (__cplusplus) \ || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) # include typedef uint8_t BYTE; typedef uint16_t U16; typedef uint32_t U32; # else typedef unsigned char BYTE; typedef unsigned short U16; typedef unsigned int U32; # endif #endif #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) /* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; } #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ /* currently only defined for gcc and icc */ typedef union { U32 u32; } __attribute__((packed)) unalign; static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } #else /* portable and safe solution. Generally efficient. * see : http://stackoverflow.com/a/32095106/646947 */ static U32 XXH_read32(const void* memPtr) { U32 val; memcpy(&val, memPtr, sizeof(val)); return val; } #endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ /* **************************************** * Compiler-specific Functions and Macros ******************************************/ #define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) /* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */ #if defined(_MSC_VER) # define XXH_rotl32(x,r) _rotl(x,r) # define XXH_rotl64(x,r) _rotl64(x,r) #else # define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r))) # define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r))) #endif #if defined(_MSC_VER) /* Visual Studio */ # define XXH_swap32 _byteswap_ulong #elif XXH_GCC_VERSION >= 403 # define XXH_swap32 __builtin_bswap32 #else static U32 XXH_swap32 (U32 x) { return ((x << 24) & 0xff000000 ) | ((x << 8) & 0x00ff0000 ) | ((x >> 8) & 0x0000ff00 ) | ((x >> 24) & 0x000000ff ); } #endif /* ************************************* * Architecture Macros ***************************************/ typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; /* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */ #ifndef XXH_CPU_LITTLE_ENDIAN static int XXH_isLittleEndian(void) { const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ return one.c[0]; } # define XXH_CPU_LITTLE_ENDIAN XXH_isLittleEndian() #endif /* *************************** * Memory reads *****************************/ typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align) { if (align==XXH_unaligned) return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr)); else return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr); } FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian) { return XXH_readLE32_align(ptr, endian, XXH_unaligned); } static U32 XXH_readBE32(const void* ptr) { return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr); } /* ************************************* * Macros ***************************************/ #define XXH_STATIC_ASSERT(c) { enum { XXH_sa = 1/(int)(!!(c)) }; } /* use after variable declarations */ XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; } /* ******************************************************************* * 32-bit hash functions *********************************************************************/ static const U32 PRIME32_1 = 2654435761U; static const U32 PRIME32_2 = 2246822519U; static const U32 PRIME32_3 = 3266489917U; static const U32 PRIME32_4 = 668265263U; static const U32 PRIME32_5 = 374761393U; static U32 XXH32_round(U32 seed, U32 input) { seed += input * PRIME32_2; seed = XXH_rotl32(seed, 13); seed *= PRIME32_1; return seed; } /* mix all bits */ static U32 XXH32_avalanche(U32 h32) { h32 ^= h32 >> 15; h32 *= PRIME32_2; h32 ^= h32 >> 13; h32 *= PRIME32_3; h32 ^= h32 >> 16; return(h32); } #define XXH_get32bits(p) XXH_readLE32_align(p, endian, align) static U32 XXH32_finalize(U32 h32, const void* ptr, size_t len, XXH_endianess endian, XXH_alignment align) { const BYTE* p = (const BYTE*)ptr; #define PROCESS1 \ h32 += (*p++) * PRIME32_5; \ h32 = XXH_rotl32(h32, 11) * PRIME32_1 ; #define PROCESS4 \ h32 += XXH_get32bits(p) * PRIME32_3; \ p+=4; \ h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; switch(len&15) /* or switch(bEnd - p) */ { case 12: PROCESS4; /* fallthrough */ case 8: PROCESS4; /* fallthrough */ case 4: PROCESS4; return XXH32_avalanche(h32); case 13: PROCESS4; /* fallthrough */ case 9: PROCESS4; /* fallthrough */ case 5: PROCESS4; PROCESS1; return XXH32_avalanche(h32); case 14: PROCESS4; /* fallthrough */ case 10: PROCESS4; /* fallthrough */ case 6: PROCESS4; PROCESS1; PROCESS1; return XXH32_avalanche(h32); case 15: PROCESS4; /* fallthrough */ case 11: PROCESS4; /* fallthrough */ case 7: PROCESS4; /* fallthrough */ case 3: PROCESS1; /* fallthrough */ case 2: PROCESS1; /* fallthrough */ case 1: PROCESS1; /* fallthrough */ case 0: return XXH32_avalanche(h32); } assert(0); return h32; /* reaching this point is deemed impossible */ } FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align) { const BYTE* p = (const BYTE*)input; const BYTE* bEnd = p + len; U32 h32; #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) if (p==NULL) { len=0; bEnd=p=(const BYTE*)(size_t)16; } #endif if (len>=16) { const BYTE* const limit = bEnd - 15; U32 v1 = seed + PRIME32_1 + PRIME32_2; U32 v2 = seed + PRIME32_2; U32 v3 = seed + 0; U32 v4 = seed - PRIME32_1; do { v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4; v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4; v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4; v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4; } while (p < limit); h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); } else { h32 = seed + PRIME32_5; } h32 += (U32)len; return XXH32_finalize(h32, p, len&15, endian, align); } XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed) { #if 0 /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ XXH32_state_t state; XXH32_reset(&state, seed); XXH32_update(&state, input, len); return XXH32_digest(&state); #else XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; if (XXH_FORCE_ALIGN_CHECK) { if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); else return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); } } if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); else return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); #endif } /*====== Hash streaming ======*/ XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void) { return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t)); } XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr) { XXH_free(statePtr); return XXH_OK; } XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState) { memcpy(dstState, srcState, sizeof(*dstState)); } XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed) { XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ memset(&state, 0, sizeof(state)); state.v1 = seed + PRIME32_1 + PRIME32_2; state.v2 = seed + PRIME32_2; state.v3 = seed + 0; state.v4 = seed - PRIME32_1; /* do not write into reserved, planned to be removed in a future version */ memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved)); return XXH_OK; } FORCE_INLINE XXH_errorcode XXH32_update_endian(XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian) { if (input==NULL) #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) return XXH_OK; #else return XXH_ERROR; #endif { const BYTE* p = (const BYTE*)input; const BYTE* const bEnd = p + len; state->total_len_32 += (unsigned)len; state->large_len |= (len>=16) | (state->total_len_32>=16); if (state->memsize + len < 16) { /* fill in tmp buffer */ XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len); state->memsize += (unsigned)len; return XXH_OK; } if (state->memsize) { /* some data left from previous update */ XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize); { const U32* p32 = state->mem32; state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++; state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++; state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++; state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian)); } p += 16-state->memsize; state->memsize = 0; } if (p <= bEnd-16) { const BYTE* const limit = bEnd - 16; U32 v1 = state->v1; U32 v2 = state->v2; U32 v3 = state->v3; U32 v4 = state->v4; do { v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4; v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4; v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4; v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4; } while (p<=limit); state->v1 = v1; state->v2 = v2; state->v3 = v3; state->v4 = v4; } if (p < bEnd) { XXH_memcpy(state->mem32, p, (size_t)(bEnd-p)); state->memsize = (unsigned)(bEnd-p); } } return XXH_OK; } XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len) { XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) return XXH32_update_endian(state_in, input, len, XXH_littleEndian); else return XXH32_update_endian(state_in, input, len, XXH_bigEndian); } FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian) { U32 h32; if (state->large_len) { h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18); } else { h32 = state->v3 /* == seed */ + PRIME32_5; } h32 += state->total_len_32; return XXH32_finalize(h32, state->mem32, state->memsize, endian, XXH_aligned); } XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in) { XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) return XXH32_digest_endian(state_in, XXH_littleEndian); else return XXH32_digest_endian(state_in, XXH_bigEndian); } /*====== Canonical representation ======*/ /*! Default XXH result types are basic unsigned 32 and 64 bits. * The canonical representation follows human-readable write convention, aka big-endian (large digits first). * These functions allow transformation of hash result into and from its canonical format. * This way, hash values can be written into a file or buffer, remaining comparable across different systems. */ XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash) { XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t)); if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash); memcpy(dst, &hash, sizeof(*dst)); } XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src) { return XXH_readBE32(src); } #ifndef XXH_NO_LONG_LONG /* ******************************************************************* * 64-bit hash functions *********************************************************************/ /*====== Memory access ======*/ #ifndef MEM_MODULE # define MEM_MODULE # if !defined (__VMS) \ && (defined (__cplusplus) \ || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) # include typedef uint64_t U64; # else /* if compiler doesn't support unsigned long long, replace by another 64-bit type */ typedef unsigned long long U64; # endif #endif #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) /* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; } #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ /* currently only defined for gcc and icc */ typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign64; static U64 XXH_read64(const void* ptr) { return ((const unalign64*)ptr)->u64; } #else /* portable and safe solution. Generally efficient. * see : http://stackoverflow.com/a/32095106/646947 */ static U64 XXH_read64(const void* memPtr) { U64 val; memcpy(&val, memPtr, sizeof(val)); return val; } #endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ #if defined(_MSC_VER) /* Visual Studio */ # define XXH_swap64 _byteswap_uint64 #elif XXH_GCC_VERSION >= 403 # define XXH_swap64 __builtin_bswap64 #else static U64 XXH_swap64 (U64 x) { return ((x << 56) & 0xff00000000000000ULL) | ((x << 40) & 0x00ff000000000000ULL) | ((x << 24) & 0x0000ff0000000000ULL) | ((x << 8) & 0x000000ff00000000ULL) | ((x >> 8) & 0x00000000ff000000ULL) | ((x >> 24) & 0x0000000000ff0000ULL) | ((x >> 40) & 0x000000000000ff00ULL) | ((x >> 56) & 0x00000000000000ffULL); } #endif FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align) { if (align==XXH_unaligned) return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr)); else return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr); } FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian) { return XXH_readLE64_align(ptr, endian, XXH_unaligned); } static U64 XXH_readBE64(const void* ptr) { return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr); } /*====== xxh64 ======*/ static const U64 PRIME64_1 = 11400714785074694791ULL; static const U64 PRIME64_2 = 14029467366897019727ULL; static const U64 PRIME64_3 = 1609587929392839161ULL; static const U64 PRIME64_4 = 9650029242287828579ULL; static const U64 PRIME64_5 = 2870177450012600261ULL; static U64 XXH64_round(U64 acc, U64 input) { acc += input * PRIME64_2; acc = XXH_rotl64(acc, 31); acc *= PRIME64_1; return acc; } static U64 XXH64_mergeRound(U64 acc, U64 val) { val = XXH64_round(0, val); acc ^= val; acc = acc * PRIME64_1 + PRIME64_4; return acc; } static U64 XXH64_avalanche(U64 h64) { h64 ^= h64 >> 33; h64 *= PRIME64_2; h64 ^= h64 >> 29; h64 *= PRIME64_3; h64 ^= h64 >> 32; return h64; } #define XXH_get64bits(p) XXH_readLE64_align(p, endian, align) static U64 XXH64_finalize(U64 h64, const void* ptr, size_t len, XXH_endianess endian, XXH_alignment align) { const BYTE* p = (const BYTE*)ptr; #define PROCESS1_64 \ h64 ^= (*p++) * PRIME64_5; \ h64 = XXH_rotl64(h64, 11) * PRIME64_1; #define PROCESS4_64 \ h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; \ p+=4; \ h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; #define PROCESS8_64 { \ U64 const k1 = XXH64_round(0, XXH_get64bits(p)); \ p+=8; \ h64 ^= k1; \ h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; \ } switch(len&31) { case 24: PROCESS8_64; /* fallthrough */ case 16: PROCESS8_64; /* fallthrough */ case 8: PROCESS8_64; return XXH64_avalanche(h64); case 28: PROCESS8_64; /* fallthrough */ case 20: PROCESS8_64; /* fallthrough */ case 12: PROCESS8_64; /* fallthrough */ case 4: PROCESS4_64; return XXH64_avalanche(h64); case 25: PROCESS8_64; /* fallthrough */ case 17: PROCESS8_64; /* fallthrough */ case 9: PROCESS8_64; PROCESS1_64; return XXH64_avalanche(h64); case 29: PROCESS8_64; /* fallthrough */ case 21: PROCESS8_64; /* fallthrough */ case 13: PROCESS8_64; /* fallthrough */ case 5: PROCESS4_64; PROCESS1_64; return XXH64_avalanche(h64); case 26: PROCESS8_64; /* fallthrough */ case 18: PROCESS8_64; /* fallthrough */ case 10: PROCESS8_64; PROCESS1_64; PROCESS1_64; return XXH64_avalanche(h64); case 30: PROCESS8_64; /* fallthrough */ case 22: PROCESS8_64; /* fallthrough */ case 14: PROCESS8_64; /* fallthrough */ case 6: PROCESS4_64; PROCESS1_64; PROCESS1_64; return XXH64_avalanche(h64); case 27: PROCESS8_64; /* fallthrough */ case 19: PROCESS8_64; /* fallthrough */ case 11: PROCESS8_64; PROCESS1_64; PROCESS1_64; PROCESS1_64; return XXH64_avalanche(h64); case 31: PROCESS8_64; /* fallthrough */ case 23: PROCESS8_64; /* fallthrough */ case 15: PROCESS8_64; /* fallthrough */ case 7: PROCESS4_64; /* fallthrough */ case 3: PROCESS1_64; /* fallthrough */ case 2: PROCESS1_64; /* fallthrough */ case 1: PROCESS1_64; /* fallthrough */ case 0: return XXH64_avalanche(h64); } /* impossible to reach */ assert(0); return 0; /* unreachable, but some compilers complain without it */ } FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align) { const BYTE* p = (const BYTE*)input; const BYTE* bEnd = p + len; U64 h64; #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) if (p==NULL) { len=0; bEnd=p=(const BYTE*)(size_t)32; } #endif if (len>=32) { const BYTE* const limit = bEnd - 32; U64 v1 = seed + PRIME64_1 + PRIME64_2; U64 v2 = seed + PRIME64_2; U64 v3 = seed + 0; U64 v4 = seed - PRIME64_1; do { v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8; v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8; v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8; v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8; } while (p<=limit); h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); h64 = XXH64_mergeRound(h64, v1); h64 = XXH64_mergeRound(h64, v2); h64 = XXH64_mergeRound(h64, v3); h64 = XXH64_mergeRound(h64, v4); } else { h64 = seed + PRIME64_5; } h64 += (U64) len; return XXH64_finalize(h64, p, len, endian, align); } XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed) { #if 0 /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ XXH64_state_t state; XXH64_reset(&state, seed); XXH64_update(&state, input, len); return XXH64_digest(&state); #else XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; if (XXH_FORCE_ALIGN_CHECK) { if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); else return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); } } if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); else return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); #endif } /*====== Hash Streaming ======*/ XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void) { return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t)); } XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) { XXH_free(statePtr); return XXH_OK; } XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t* srcState) { memcpy(dstState, srcState, sizeof(*dstState)); } XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed) { XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ memset(&state, 0, sizeof(state)); state.v1 = seed + PRIME64_1 + PRIME64_2; state.v2 = seed + PRIME64_2; state.v3 = seed + 0; state.v4 = seed - PRIME64_1; /* do not write into reserved, planned to be removed in a future version */ memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved)); return XXH_OK; } FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian) { if (input==NULL) #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) return XXH_OK; #else return XXH_ERROR; #endif { const BYTE* p = (const BYTE*)input; const BYTE* const bEnd = p + len; state->total_len += len; if (state->memsize + len < 32) { /* fill in tmp buffer */ XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len); state->memsize += (U32)len; return XXH_OK; } if (state->memsize) { /* tmp buffer is full */ XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize); state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian)); state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian)); state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian)); state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian)); p += 32-state->memsize; state->memsize = 0; } if (p+32 <= bEnd) { const BYTE* const limit = bEnd - 32; U64 v1 = state->v1; U64 v2 = state->v2; U64 v3 = state->v3; U64 v4 = state->v4; do { v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8; v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8; v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8; v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8; } while (p<=limit); state->v1 = v1; state->v2 = v2; state->v3 = v3; state->v4 = v4; } if (p < bEnd) { XXH_memcpy(state->mem64, p, (size_t)(bEnd-p)); state->memsize = (unsigned)(bEnd-p); } } return XXH_OK; } XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len) { XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) return XXH64_update_endian(state_in, input, len, XXH_littleEndian); else return XXH64_update_endian(state_in, input, len, XXH_bigEndian); } FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian) { U64 h64; if (state->total_len >= 32) { U64 const v1 = state->v1; U64 const v2 = state->v2; U64 const v3 = state->v3; U64 const v4 = state->v4; h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); h64 = XXH64_mergeRound(h64, v1); h64 = XXH64_mergeRound(h64, v2); h64 = XXH64_mergeRound(h64, v3); h64 = XXH64_mergeRound(h64, v4); } else { h64 = state->v3 /*seed*/ + PRIME64_5; } h64 += (U64) state->total_len; return XXH64_finalize(h64, state->mem64, (size_t)state->total_len, endian, XXH_aligned); } XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in) { XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) return XXH64_digest_endian(state_in, XXH_littleEndian); else return XXH64_digest_endian(state_in, XXH_bigEndian); } /*====== Canonical representation ======*/ XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash) { XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t)); if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash); memcpy(dst, &hash, sizeof(*dst)); } XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src) { return XXH_readBE64(src); } #endif /* XXH_NO_LONG_LONG */ py-lz4framed-0.14.0/lz4/xxhash.h000066400000000000000000000322321357043434000163040ustar00rootroot00000000000000/* xxHash - Extremely Fast Hash algorithm Header File Copyright (C) 2012-2016, Yann Collet. BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. You can contact the author at : - xxHash source repository : https://github.com/Cyan4973/xxHash */ /* Notice extracted from xxHash homepage : xxHash is an extremely fast Hash algorithm, running at RAM speed limits. It also successfully passes all tests from the SMHasher suite. Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz) Name Speed Q.Score Author xxHash 5.4 GB/s 10 CrapWow 3.2 GB/s 2 Andrew MumurHash 3a 2.7 GB/s 10 Austin Appleby SpookyHash 2.0 GB/s 10 Bob Jenkins SBox 1.4 GB/s 9 Bret Mulvey Lookup3 1.2 GB/s 9 Bob Jenkins SuperFastHash 1.2 GB/s 1 Paul Hsieh CityHash64 1.05 GB/s 10 Pike & Alakuijala FNV 0.55 GB/s 5 Fowler, Noll, Vo CRC32 0.43 GB/s 9 MD5-32 0.33 GB/s 10 Ronald L. Rivest SHA1-32 0.28 GB/s 10 Q.Score is a measure of quality of the hash function. It depends on successfully passing SMHasher test set. 10 is a perfect score. A 64-bit version, named XXH64, is available since r35. It offers much better speed, but for 64-bit applications only. Name Speed on 64 bits Speed on 32 bits XXH64 13.8 GB/s 1.9 GB/s XXH32 6.8 GB/s 6.0 GB/s */ #ifndef XXHASH_H_5627135585666179 #define XXHASH_H_5627135585666179 1 #if defined (__cplusplus) extern "C" { #endif /* **************************** * Definitions ******************************/ #include /* size_t */ typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; /* **************************** * API modifier ******************************/ /** XXH_INLINE_ALL (and XXH_PRIVATE_API) * This is useful to include xxhash functions in `static` mode * in order to inline them, and remove their symbol from the public list. * Inlining can offer dramatic performance improvement on small keys. * Methodology : * #define XXH_INLINE_ALL * #include "xxhash.h" * `xxhash.c` is automatically included. * It's not useful to compile and link it as a separate module. */ #if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) # ifndef XXH_STATIC_LINKING_ONLY # define XXH_STATIC_LINKING_ONLY # endif # if defined(__GNUC__) # define XXH_PUBLIC_API static __inline __attribute__((unused)) # elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) # define XXH_PUBLIC_API static inline # elif defined(_MSC_VER) # define XXH_PUBLIC_API static __inline # else /* this version may generate warnings for unused static functions */ # define XXH_PUBLIC_API static # endif #else # define XXH_PUBLIC_API /* do nothing */ #endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */ /*! XXH_NAMESPACE, aka Namespace Emulation : * * If you want to include _and expose_ xxHash functions from within your own library, * but also want to avoid symbol collisions with other libraries which may also include xxHash, * * you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library * with the value of XXH_NAMESPACE (therefore, avoid NULL and numeric values). * * Note that no change is required within the calling program as long as it includes `xxhash.h` : * regular symbol name will be automatically translated by this header. */ #ifdef XXH_NAMESPACE # define XXH_CAT(A,B) A##B # define XXH_NAME2(A,B) XXH_CAT(A,B) # define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber) # define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32) # define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState) # define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState) # define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset) # define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update) # define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest) # define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState) # define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash) # define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical) # define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64) # define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState) # define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState) # define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset) # define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update) # define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest) # define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState) # define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash) # define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical) #endif /* ************************************* * Version ***************************************/ #define XXH_VERSION_MAJOR 0 #define XXH_VERSION_MINOR 6 #define XXH_VERSION_RELEASE 5 #define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE) XXH_PUBLIC_API unsigned XXH_versionNumber (void); /*-********************************************************************** * 32-bit hash ************************************************************************/ typedef unsigned int XXH32_hash_t; /*! XXH32() : Calculate the 32-bit hash of sequence "length" bytes stored at memory address "input". The memory between input & input+length must be valid (allocated and read-accessible). "seed" can be used to alter the result predictably. Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s */ XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed); /*====== Streaming ======*/ typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */ XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void); XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state); XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed); XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr); /* * Streaming functions generate the xxHash of an input provided in multiple segments. * Note that, for small input, they are slower than single-call functions, due to state management. * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized. * * XXH state must first be allocated, using XXH*_createState() . * * Start a new hash by initializing state with a seed, using XXH*_reset(). * * Then, feed the hash state by calling XXH*_update() as many times as necessary. * The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. * * Finally, a hash value can be produced anytime, by using XXH*_digest(). * This function returns the nn-bits hash as an int or long long. * * It's still possible to continue inserting input into the hash state after a digest, * and generate some new hashes later on, by calling again XXH*_digest(). * * When done, free XXH state space if it was allocated dynamically. */ /*====== Canonical representation ======*/ typedef struct { unsigned char digest[4]; } XXH32_canonical_t; XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash); XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src); /* Default result type for XXH functions are primitive unsigned 32 and 64 bits. * The canonical representation uses human-readable write convention, aka big-endian (large digits first). * These functions allow transformation of hash result into and from its canonical format. * This way, hash values can be written into a file / memory, and remain comparable on different systems and programs. */ #ifndef XXH_NO_LONG_LONG /*-********************************************************************** * 64-bit hash ************************************************************************/ typedef unsigned long long XXH64_hash_t; /*! XXH64() : Calculate the 64-bit hash of sequence of length "len" stored at memory address "input". "seed" can be used to alter the result predictably. This function runs faster on 64-bit systems, but slower on 32-bit systems (see benchmark). */ XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed); /*====== Streaming ======*/ typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */ XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void); XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state); XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed); XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr); /*====== Canonical representation ======*/ typedef struct { unsigned char digest[8]; } XXH64_canonical_t; XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash); XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src); #endif /* XXH_NO_LONG_LONG */ #ifdef XXH_STATIC_LINKING_ONLY /* ================================================================================================ This section contains declarations which are not guaranteed to remain stable. They may change in future versions, becoming incompatible with a different version of the library. These declarations should only be used with static linking. Never use them in association with dynamic linking ! =================================================================================================== */ /* These definitions are only present to allow * static allocation of XXH state, on stack or in a struct for example. * Never **ever** use members directly. */ #if !defined (__VMS) \ && (defined (__cplusplus) \ || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) # include struct XXH32_state_s { uint32_t total_len_32; uint32_t large_len; uint32_t v1; uint32_t v2; uint32_t v3; uint32_t v4; uint32_t mem32[4]; uint32_t memsize; uint32_t reserved; /* never read nor write, might be removed in a future version */ }; /* typedef'd to XXH32_state_t */ struct XXH64_state_s { uint64_t total_len; uint64_t v1; uint64_t v2; uint64_t v3; uint64_t v4; uint64_t mem64[4]; uint32_t memsize; uint32_t reserved[2]; /* never read nor write, might be removed in a future version */ }; /* typedef'd to XXH64_state_t */ # else struct XXH32_state_s { unsigned total_len_32; unsigned large_len; unsigned v1; unsigned v2; unsigned v3; unsigned v4; unsigned mem32[4]; unsigned memsize; unsigned reserved; /* never read nor write, might be removed in a future version */ }; /* typedef'd to XXH32_state_t */ # ifndef XXH_NO_LONG_LONG /* remove 64-bit support */ struct XXH64_state_s { unsigned long long total_len; unsigned long long v1; unsigned long long v2; unsigned long long v3; unsigned long long v4; unsigned long long mem64[4]; unsigned memsize; unsigned reserved[2]; /* never read nor write, might be removed in a future version */ }; /* typedef'd to XXH64_state_t */ # endif # endif #if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) # include "xxhash.c" /* include xxhash function bodies as `static`, for inlining */ #endif #endif /* XXH_STATIC_LINKING_ONLY */ #if defined (__cplusplus) } #endif #endif /* XXHASH_H_5627135585666179 */ py-lz4framed-0.14.0/lz4framed/000077500000000000000000000000001357043434000160055ustar00rootroot00000000000000py-lz4framed-0.14.0/lz4framed/__init__.py000066400000000000000000000213331357043434000201200ustar00rootroot00000000000000# Copyright (c) 2016 Iotic Labs Ltd. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # https://github.com/Iotic-Labs/py-lz4framed/blob/master/LICENSE # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """lz4 frame compression library, bound to lz4 C implementation Example usage: # To compress compressed = lz4framed.compress(b'binary data') # To decode uncompressed = lz4framed.decompress(compressed) To use a file-like objects as input/output, use the provided Compressor & Decompressor classes instead or manually utilise the context-using low-level methods. All methods are thread safe unless stated. """ from threading import Lock from _lz4framed import ( # noqa (unused import) LZ4F_BLOCKSIZE_DEFAULT, LZ4F_BLOCKSIZE_MAX64KB, LZ4F_BLOCKSIZE_MAX256KB, LZ4F_BLOCKSIZE_MAX1MB, LZ4F_BLOCKSIZE_MAX4MB, LZ4F_COMPRESSION_MIN, LZ4F_COMPRESSION_MIN_HC, LZ4F_COMPRESSION_MAX, LZ4F_ERROR_GENERIC, LZ4F_ERROR_maxBlockSize_invalid, LZ4F_ERROR_blockMode_invalid, LZ4F_ERROR_contentChecksumFlag_invalid, LZ4F_ERROR_compressionLevel_invalid, LZ4F_ERROR_headerVersion_wrong, LZ4F_ERROR_blockChecksum_invalid, LZ4F_ERROR_reservedFlag_set, LZ4F_ERROR_allocation_failed, LZ4F_ERROR_srcSize_tooLarge, LZ4F_ERROR_dstMaxSize_tooSmall, LZ4F_ERROR_frameHeader_incomplete, LZ4F_ERROR_frameType_unknown, LZ4F_ERROR_frameSize_wrong, LZ4F_ERROR_srcPtr_wrong, LZ4F_ERROR_decompressionFailed, LZ4F_ERROR_headerChecksum_invalid, LZ4F_ERROR_contentChecksum_invalid, LZ4F_ERROR_frameDecoding_alreadyStarted, LZ4F_VERSION, LZ4_VERSION, __version__, Lz4FramedError, Lz4FramedNoDataError, compress, decompress, create_compression_context, compress_begin, compress_update, compress_end, create_decompression_context, get_frame_info, decompress_update, get_block_size ) from .compat import Iterable as __Iterable class Compressor(object): """Iteratively compress data in lz4-framed - can be used as a context manager if writing to a file, e.g.: with open('myFile', 'wb') as f: # Context automatically finalises frame on completion, unless an exception occurs with Compressor(f) as c: try: while (...): c.update(moreData) except Lz4FramedNoDataError: pass Alternatively, with output from relevant methods: c = Compressor() while (...): try: someOutput.append(c.update(moreData)) except Lz4FramedNoDataError: pass # Finalise frame someOutput.append(c.end()) """ def __init__(self, fp=None, block_size_id=LZ4F_BLOCKSIZE_DEFAULT, block_mode_linked=True, checksum=False, autoflush=False, level=LZ4F_COMPRESSION_MIN, block_checksum=False): """ Args: fp: File like object (supporting write() method) to write compressed data to. If not set, data will be returned by the update(), flush() and end() methods. block_size_id (int): Compression block size identifier. One of the LZ4F_BLOCKSIZE_* constants block_mode_linked (bool): Whether compression blocks are linked checksum (bool): Whether to produce frame checksum autoflush (bool): Whether to return (or write to fp) compressed data on each update() call rather than waiting for internal buffer to be filled. (This reduces internal buffer size.) level (int): Compression level. Values lower than 3 (including negative ones) use fast compression. Recommended range for hc compression is between 4 and 9, with a maximum of LZ4_COMPRESSION_MAX. block_checksum (bool): Whether to produce checksum after each block """ self.__ctx = create_compression_context() self.__lock = Lock() if fp is None: self.__write = None elif not callable(fp.write): raise TypeError('fp.write not callable') else: self.__write = fp.write self.__header = compress_begin(self.__ctx, block_size_id=block_size_id, block_mode_linked=block_mode_linked, checksum=checksum, autoflush=autoflush, level=level, block_checksum=block_checksum) def __enter__(self): if self.__write is None: raise ValueError('Context only usable when fp supplied') return self def __exit__(self, exc_type, exc_value, traceback): self.end() def update(self, b): # pylint: disable=method-hidden,invalid-name """Compress data given in b, returning compressed result either from this function or writing to fp). Note: sometimes output might be zero length (if being buffered by lz4). Raises Lz4FramedNoDataError if input is of zero length.""" with self.__lock: output = compress_update(self.__ctx, b) if self.__write: self.__write(self.__header) self.__header = None self.__write(output) self.update = self.__updateNextWrite return None header = self.__header self.__header = None self.update = self.__updateNextReturn return header + output # post-first update methods so do not require header write & fp checks def __updateNextWrite(self, b): # pylint: disable=invalid-name self.__write(compress_update(self.__ctx, b)) def __updateNextReturn(self, b): # pylint: disable=invalid-name return compress_update(self.__ctx, b) def end(self): """Finalise lz4 frame, outputting any remaining as return from this function or by writing to fp)""" with self.__lock: if self.__write: self.__write(compress_end(self.__ctx)) return None return compress_end(self.__ctx) class Decompressor(__Iterable): # pylint: disable=super-init-not-called """Iteratively decompress blocks of an lz4-frame from a file-like object, e.g.: with open('myFile', 'rb') as f: try: for chunk in Decompressor(f): decoded.append(chunk) except Lz4FramedNoDataError: # Frame incomplete - error case The decompressor will automatically choose a meaningful read size. Note that some iterator calls might return zero-length data. The iterator raises LZ4FNoDataError if input (from fp.read) is of zero length, before decompression finished. """ def __init__(self, fp): """ Args: fp: File like object (supporting read() method) to read compressed data from. """ super(Decompressor, self).__init__() if fp is None: raise TypeError('fp') elif not callable(fp.read): raise TypeError('fp.read not callable') else: self.__read = fp.read self.__info = None self.__ctx = create_decompression_context() self.__lock = Lock() def __iter__(self): ctx = self.__ctx read = self.__read input_hint = 15 # enough to read largest header chunk_size = 32 # output chunk size, will be increased once block size known with self.__lock: output = decompress_update(ctx, read(input_hint), chunk_size) try: self.__info = info = get_frame_info(ctx) except Lz4FramedError as ex: if ex.args[1] != LZ4F_ERROR_frameHeader_incomplete: # should not happen since have read 15 bytes raise else: chunk_size = get_block_size(info['block_size_id']) input_hint = output.pop() # return any data as part of header read, if present for element in output: yield element while input_hint > 0: output = decompress_update(ctx, read(input_hint), chunk_size) input_hint = output.pop() for element in output: yield element @property def frame_info(self): """See get_frame_info(). Note: This will return None if not enough data has been read yet to decode header (typically at least one read from iterator).""" return self.__info py-lz4framed-0.14.0/lz4framed/__main__.py000066400000000000000000000061601357043434000201020ustar00rootroot00000000000000# Copyright (c) 2016 Iotic Labs Ltd. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # https://github.com/Iotic-Labs/py-lz4framed/blob/master/LICENSE # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """(de)compresses to/from lz4-framed data""" from __future__ import print_function from sys import argv, stderr from .compat import STDIN_RAW, STDOUT_RAW from . import Compressor, Decompressor, Lz4FramedError, Lz4FramedNoDataError, get_block_size def __error(*args, **kwargs): print(*args, file=stderr, **kwargs) def do_compress(in_stream, out_stream): read = in_stream.read read_size = get_block_size() try: with Compressor(out_stream) as compressor: try: while True: compressor.update(read(read_size)) # empty read result supplied to update() except Lz4FramedNoDataError: pass # input stream exception except EOFError: pass except Lz4FramedError as ex: __error('Compression error: %s' % ex) return 8 return 0 def do_decompress(in_stream, out_stream): write = out_stream.write try: for chunk in Decompressor(in_stream): write(chunk) except Lz4FramedError as ex: __error('Compression error: %s' % ex) return 8 return 0 __ACTION = frozenset(('compress', 'decompress')) def main(): # noqa (complexity) if not (3 <= len(argv) <= 4 and argv[1] in __ACTION): print("""USAGE: lz4framed (compress|decompress) (INFILE|-) [OUTFILE] (De)compresses an lz4 frame. Input is read from INFILE unless set to '-', in which case stdin is used. If OUTFILE is not specified, output goes to stdout.""", file=stderr) return 1 compress = (argv[1] == 'compress') in_file = out_file = None try: # input if argv[2] == '-': in_stream = STDIN_RAW else: try: in_stream = in_file = open(argv[2], 'rb') except IOError as ex: __error('Failed to open input file for reading: %s' % ex) return 2 # output if len(argv) == 3: out_stream = STDOUT_RAW else: try: out_stream = out_file = open(argv[3], 'ab') except IOError as ex: __error('Failed to open output file for appending: %s' % ex) return 4 return (do_compress if compress else do_decompress)(in_stream, out_stream) except IOError as ex: __error('I/O failure: %s' % ex) finally: if in_file: in_file.close() if out_file: out_file.close() if __name__ == "__main__": exit(main()) py-lz4framed-0.14.0/lz4framed/compat.py000066400000000000000000000045131357043434000176450ustar00rootroot00000000000000# Copyright (c) 2016 Iotic Labs Ltd. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # https://github.com/Iotic-Labs/py-lz4framed/blob/master/LICENSE # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Original six.py copyright notice, on which snippets herein are based: # # Copyright (c) 2010-2015 Benjamin Peterson # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. """Python v2.7 (NOT 2.6) compatibility""" # pylint: disable=unused-import,invalid-name,wrong-import-order from sys import stderr, stdout, stdin, version_info try: # pylint: disable=no-name-in-module from collections.abc import Iterable except ImportError: from collections import Iterable # noqa PY2 = (version_info[0] == 2) if PY2: STDIN_RAW = stdin STDOUT_RAW = stdout STDERR_RAW = stderr else: STDIN_RAW = getattr(stdin, 'buffer', stdin) STDOUT_RAW = getattr(stdout, 'buffer', stdout) STDERR_RAW = getattr(stderr, 'buffer', stderr) py-lz4framed-0.14.0/lz4framed/py-lz4framed.c000066400000000000000000001234041357043434000204730ustar00rootroot00000000000000/* * Copyright (c) 2016 Iotic Labs Ltd. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * https://github.com/Iotic-Labs/py-lz4framed/blob/master/LICENSE * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ // byte/string argument parsing size as Py_ssize_t (e.g. via PyArg_ParseTupleAndKeywords) #define PY_SSIZE_T_CLEAN #include #include #define LZ4F_STATIC_LINKING_ONLY #include "lz4frame.h" #include "lz4hc.h" /******************************************************************************/ #define UNUSED(x) (void)(x) #define QUOTE(str) #str #define EXPAND_AND_QUOTE(str) QUOTE(str) #define MAX(x, y) (x) >= (y) ? (x) : (y) #define KB *(1<<10) #define MB *(1<<20) // Due to negative levels now being supported, this no longer is particularly meaningful. #define LZ4_COMPRESSION_MIN 0 #define LZ4_COMPRESSION_MIN_HC LZ4HC_CLEVEL_MIN #define LZ4_COMPRESSION_MAX LZ4HC_CLEVEL_MAX #define _BAIL_ON_LZ4_ERROR(code, without_gil) {\ size_t __err;\ if (without_gil) {\ Py_BEGIN_ALLOW_THREADS;\ __err = (code);\ Py_END_ALLOW_THREADS;\ } else {\ __err = (code);\ }\ if (LZ4F_isError(__err)) {\ PyObject *num = NULL, *str = NULL, *tuple = NULL;\ if ((num = PyLong_FromSize_t(-(int)__err)) &&\ (str = PyUnicode_FromString(LZ4F_getErrorName(__err))) &&\ (tuple = PyTuple_Pack(2, str, num))) {\ PyErr_SetObject(LZ4FError, tuple);\ /* backup method in case object creation fails */\ } else {\ PyErr_Format(LZ4FError, "[%d] %s", -(int)__err, LZ4F_getErrorName(__err));\ }\ Py_XDECREF(tuple);\ Py_XDECREF(num);\ Py_XDECREF(str);\ goto bail;\ }\ } #define BAIL_ON_LZ4_ERROR(code) _BAIL_ON_LZ4_ERROR((code), 0) #ifdef WITH_THREAD #include #define LZ4FRAMED_LOCK_FLAG int lock_acquired = 0 #define ENTER_LZ4FRAMED(ctx) \ if (!lock_acquired) {\ Py_BEGIN_ALLOW_THREADS;\ PyThread_acquire_lock((ctx)->lock, 1);\ Py_END_ALLOW_THREADS;\ lock_acquired = 1;\ } #define EXIT_LZ4FRAMED(ctx) \ if (NULL != (ctx) && lock_acquired) {\ PyThread_release_lock((ctx)->lock);\ lock_acquired = 0;\ } #define BAIL_ON_LZ4_ERROR_NOGIL(code) _BAIL_ON_LZ4_ERROR((code), 1) #else #define LZ4FRAMED_LOCK_FLAG #define ENTER_LZ4FRAMED(ctx) #define EXIT_LZ4FRAMED(ctx) #define BAIL_ON_LZ4_ERROR_NOGIL(code) BAIL_ON_LZ4_ERROR(code) #endif // How large buffers have to be at least to release GIL #define NOGIL_COMPRESS_INPUT_SIZE_THRESHOLD 8*1024 #define NOGIL_DECOMPRESS_INPUT_SIZE_THRESHOLD 8*1024 #define NOGIL_DECOMPRESS_OUTPUT_SIZE_THRESHOLD 8*1024 #define BAIL_ON_NULL(result) \ if (NULL == (result)) {\ goto bail;\ } #define BAIL_ON_NONZERO(result) \ if (result) {\ goto bail;\ } #define COMPRESSION_CAPSULE_NAME "_lz4fcctx" #define DECOMPRESSION_CAPSULE_NAME "_lz4fdctx" PyDoc_STRVAR(__lz4f_error__doc__, "Raised when an lz4-specific error occurs. Arguments are the error message and associated code."); static PyObject *LZ4FError = NULL; PyDoc_STRVAR(__lz4f_no_data_error__doc__, "Raised by compress_update() and compress() when data supplied is of zero length"); static PyObject *LZ4FNoDataError = NULL; /* Hold compression context together with preferences, so compress_update & compress_end can calculate right output size * based on actualy preferences previously set via compress_begin (rather than defaults). The lock is used to preserve * thread safety when releasing GIL. */ typedef struct { LZ4F_compressionContext_t ctx; LZ4F_preferences_t prefs; #ifdef WITH_THREAD PyThread_type_lock lock; #endif } _lz4f_cctx_t; typedef struct { LZ4F_decompressionContext_t ctx; #ifdef WITH_THREAD PyThread_type_lock lock; #endif } _lz4f_dctx_t; static LZ4F_preferences_t prefs_defaults = {{0, 0, 0, 0, 0, 0, 0}, 0, 0, 0, {0}}; /******************************************************************************/ static int _valid_lz4f_block_size_id(int id) { switch (id) { case LZ4F_default: case LZ4F_max64KB: case LZ4F_max256KB: case LZ4F_max1MB: case LZ4F_max4MB: return 1; default: return 0; } } static size_t _lz4f_block_size_from_id(int id) { static const size_t blockSizes[4] = { 64 KB, 256 KB, 1 MB, 4 MB }; if (!_valid_lz4f_block_size_id(id)) { return 0; } if (id == LZ4F_default) { id = LZ4F_max64KB; } id -= 4; return blockSizes[id]; } /******************************************************************************/ PyDoc_STRVAR(_lz4framed_get_block_size__doc__, "get_block_size(id=LZ4F_BLOCKSIZE_DEFAULT) -> int\n" "\n" "Returns block size in bytes for the given lz4 block size id\n" "\n" "Args:\n" " id (int): One of LZ4F_BLOCKSIZE_* constants, e.g. retrieved via get_frame_info()\n"); #define FUNC_DEF_GET_BLOCK_SIZE {"get_block_size", (PyCFunction)_lz4framed_get_block_size, METH_VARARGS,\ _lz4framed_get_block_size__doc__} static PyObject* _lz4framed_get_block_size(PyObject *self, PyObject *args) { int block_id = LZ4F_default; PyObject *byte_count = NULL; UNUSED(self); if (!PyArg_ParseTuple(args, "|i:get_block_size", &block_id)) { goto bail; } if (!_valid_lz4f_block_size_id(block_id)) { PyErr_Format(PyExc_ValueError, "id (%d) invalid", block_id); goto bail; } BAIL_ON_NULL(byte_count = PyLong_FromSize_t(_lz4f_block_size_from_id(block_id))); return byte_count; bail: Py_XDECREF(byte_count); return NULL; } /******************************************************************************/ PyDoc_STRVAR(_lz4framed_compress__doc__, "compress(b, block_size_id=LZ4F_BLOCKSIZE_DEFAULT, block_mode_linked=True,\n" " checksum=False, level=0, block_checksum=False) -> bytes\n" "\n" "Compresses the data given in b, returning the compressed and lz4-framed\n" "result.\n" "\n" "Args:\n" " b (bytes-like object): The object containing data to compress\n" " block_size_id (int): Compression block size identifier, one of the\n" " LZ4F_BLOCKSIZE_* constants\n" " block_mode_linked (bool): Whether compression blocks are linked. Better compression\n" " is achieved in linked mode.\n" " checksum (bool): Whether to produce frame checksum\n" " level (int): Compression level. Values lower than LZ4F_COMPRESSION_MIN_HC (including\n" " negative ones) use fast compression. Recommended range for hc compression\n" " is between 4 and 9, with a maximum of LZ4F_COMPRESSION_MAX.\n" " block_checksum (bool): Whether to produce checksum after each block.\n" "\n" "Raises:\n" " LZ4FNoDataError: If provided data is of zero length. (Useful for ending compression loop.)\n" " Lz4FramedError: If a compression failure occured"); #define FUNC_DEF_COMPRESS {"compress", (PyCFunction)_lz4framed_compress, METH_VARARGS | METH_KEYWORDS,\ _lz4framed_compress__doc__} static PyObject* _lz4framed_compress(PyObject *self, PyObject *args, PyObject *kwargs) { #if PY_MAJOR_VERSION >= 3 static const char *format = "y*|iiiii:compress"; #else static const char *format = "s*|iiiii:compress"; #endif static char *keywords[] = {"b", "block_size_id", "block_mode_linked", "checksum", "level", "block_checksum", NULL}; LZ4F_preferences_t prefs = prefs_defaults; Py_buffer input; int input_held = 0; // whether Py_buffer (input) needs to be released int block_id = LZ4F_default; int block_mode_linked = 1; int block_checksum = 0; int checksum = 0; int compression_level = LZ4_COMPRESSION_MIN; PyObject *output = NULL; char * output_str; size_t output_len; UNUSED(self); if (!PyArg_ParseTupleAndKeywords(args, kwargs, format, keywords, &input, &block_id, &block_mode_linked, &checksum, &compression_level, &block_checksum)) { goto bail; } input_held = 1; if (!PyBuffer_IsContiguous(&input, 'C')) { PyErr_SetString(PyExc_ValueError, "input not contiguous"); goto bail; } if (input.len <= 0) { PyErr_SetNone(LZ4FNoDataError); goto bail; } if (!_valid_lz4f_block_size_id(block_id)) { PyErr_Format(PyExc_ValueError, "block_size_id (%d) invalid", block_id); goto bail; } if (compression_level > LZ4_COMPRESSION_MAX) { PyErr_Format(PyExc_ValueError, "level (%d) invalid", compression_level); goto bail; } prefs.frameInfo.contentSize = input.len; prefs.frameInfo.blockMode = block_mode_linked ? LZ4F_blockLinked : LZ4F_blockIndependent; prefs.frameInfo.blockSizeID = block_id; prefs.frameInfo.blockChecksumFlag = block_checksum ? LZ4F_blockChecksumEnabled : LZ4F_noBlockChecksum; prefs.frameInfo.contentChecksumFlag = checksum ? LZ4F_contentChecksumEnabled : LZ4F_noContentChecksum; prefs.compressionLevel = compression_level; BAIL_ON_LZ4_ERROR(output_len = LZ4F_compressFrameBound(input.len, &prefs)); BAIL_ON_NULL(output = PyBytes_FromStringAndSize(NULL, output_len)); BAIL_ON_NULL(output_str = PyBytes_AsString(output)); if (input.len < NOGIL_COMPRESS_INPUT_SIZE_THRESHOLD) { BAIL_ON_LZ4_ERROR(output_len = LZ4F_compressFrame(output_str, output_len, input.buf, input.len, &prefs)); } else { BAIL_ON_LZ4_ERROR_NOGIL(output_len = LZ4F_compressFrame(output_str, output_len, input.buf, input.len, &prefs)); } // output length might be shorter than estimated BAIL_ON_NONZERO(_PyBytes_Resize(&output, output_len)); PyBuffer_Release(&input); input_held = 0; return output; bail: if (input_held) { PyBuffer_Release(&input); } Py_XDECREF(output); return NULL; } /******************************************************************************/ PyDoc_STRVAR(_lz4framed_decompress__doc__, "decompress(b, buffer_size=1024) -> bytes\n" "\n" "Decompresses framed lz4 blocks from the data given in *b*, returning the\n" "uncompressed result. For large payloads consider using Decompressor class\n" "to decompress in chunks.\n" "\n" "Args:\n" " b (bytes-like object): The object containing lz4-framed data to decompress\n" " buffer_size (int): Initial size of buffer in bytes for decompressed\n" " result. This is useful if the frame is not expected\n" " to indicate uncompressed length of data. If\n" " buffer_size is not large enough, it will be doubled\n" " until the resulting data fits. If the frame states\n" " uncompressed size or if len(b) > buffer_size, this\n" " parameter is ignored.\n" "\n" "Raises:\n" " LZ4FNoDataError: If provided data is of zero length\n" " Lz4FramedError: If a decompression failure occured"); #define FUNC_DEF_DECOMPRESS {"decompress", (PyCFunction)_lz4framed_decompress, METH_VARARGS | METH_KEYWORDS,\ _lz4framed_decompress__doc__} static PyObject* _lz4framed_decompress(PyObject *self, PyObject *args, PyObject *kwargs) { #if PY_MAJOR_VERSION >= 3 static const char *format = "y*|i:decompress"; #else static const char *format = "s*|i:decompress"; #endif static char *keywords[] = {"b", "buffer_size", NULL}; LZ4F_decompressionContext_t ctx = NULL; LZ4F_decompressOptions_t opt = {0, {0}}; LZ4F_frameInfo_t frame_info; Py_buffer input; int input_held = 0; // whether Py_buffer (input) needs to be released const char *input_pos; // position in input size_t input_remaining; // bytes remaining in input size_t input_read; // used by LZ4 functions to indicate how many bytes were / can be read size_t input_size_hint; // LZ4 hint to how many bytes make up the remaining block + next header int buffer_size = 1024; PyObject *output = NULL; char *output_pos; // position in output size_t output_len; // size of output size_t output_remaining; // bytes still available in output size_t output_written; // used by LZ4 to indicate how many bytes were / can be written UNUSED(self); if (!PyArg_ParseTupleAndKeywords(args, kwargs, format, keywords, &input, &buffer_size)) { goto bail; } input_held = 1; if (!PyBuffer_IsContiguous(&input, 'C')) { PyErr_SetString(PyExc_ValueError, "input not contiguous"); goto bail; } if (input.len <= 0) { PyErr_SetNone(LZ4FNoDataError); goto bail; } if (buffer_size <= 0) { PyErr_Format(PyExc_ValueError, "buffer_size (%d) invalid", buffer_size); goto bail; } input_read = input_remaining = input.len; input_pos = input.buf; BAIL_ON_LZ4_ERROR(LZ4F_createDecompressionContext(&ctx, LZ4F_VERSION)); // retrieve uncompressed data size BAIL_ON_LZ4_ERROR(input_size_hint = LZ4F_getFrameInfo(ctx, &frame_info, input_pos, &input_read)); input_pos += input_read; input_remaining = input_read = input_remaining - input_read; if (frame_info.contentSize) { output_len = frame_info.contentSize; // Prevent LZ4 from buffering output - works if uncompressed size known since output does not have to be resized opt.stableDst = 1; } else { // uncompressed size is always at least that of compressed output_len = MAX((size_t) buffer_size, input_remaining); } // set up initial output buffer BAIL_ON_NULL(output = PyBytes_FromStringAndSize(NULL, output_len)); BAIL_ON_NULL(output_pos = PyBytes_AsString(output)); output_written = output_remaining = output_len; while (1) { // Decompress next chunk (Releasing GIL if input is very small could be inefficient) if (input_read < NOGIL_DECOMPRESS_INPUT_SIZE_THRESHOLD) { BAIL_ON_LZ4_ERROR(input_size_hint = LZ4F_decompress(ctx, output_pos, &output_written, input_pos, &input_read, &opt)); } else { BAIL_ON_LZ4_ERROR_NOGIL(input_size_hint = LZ4F_decompress(ctx, output_pos, &output_written, input_pos, &input_read, &opt)); } output_pos += output_written; output_written = output_remaining = (output_remaining - output_written); // decompression complete (i.e. all data provided & fits within output buffer0 if (!input_size_hint) { output_len -= output_remaining; break; } input_pos += input_read; input_read = input_remaining = (input_remaining - input_read); // destination too small if (input_remaining) { if (frame_info.contentSize) { // if frame specifies size, should never have to enlarge BAIL_ON_NONZERO(PyErr_WarnEx(PyExc_RuntimeWarning, "lz4frame contentSize mismatch", 2)); } output_remaining += output_len; output_written = output_remaining; output_len *= 2; BAIL_ON_NONZERO(_PyBytes_Resize(&output, output_len)); BAIL_ON_NULL(output_pos = PyBytes_AsString(output)); output_pos += (output_len - output_remaining); // insufficient data } else { PyErr_SetString(PyExc_ValueError, "frame incomplete"); goto bail; } } BAIL_ON_NONZERO(_PyBytes_Resize(&output, output_len)); PyBuffer_Release(&input); input_held = 0; LZ4F_freeDecompressionContext(ctx); return output; bail: if (input_held) { PyBuffer_Release(&input); } Py_XDECREF(output); LZ4F_freeDecompressionContext(ctx); return NULL; } /******************************************************************************/ static void _cctx_capsule_destructor(PyObject *py_ctx) { _lz4f_cctx_t *cctx = (_lz4f_cctx_t*)PyCapsule_GetPointer(py_ctx, COMPRESSION_CAPSULE_NAME); if (NULL != cctx) { // ignoring errors here since shouldn't throw exception in destructor LZ4F_freeCompressionContext(cctx->ctx); #ifdef WITH_THREAD PyThread_free_lock(cctx->lock); #endif PyMem_Del(cctx); } } static void _dctx_capsule_destructor(PyObject *py_ctx) { _lz4f_dctx_t *dctx = (_lz4f_dctx_t*)PyCapsule_GetPointer(py_ctx, DECOMPRESSION_CAPSULE_NAME); if (NULL != dctx) { // ignoring errors here since shouldn't throw exception in destructor LZ4F_freeDecompressionContext(dctx->ctx); #ifdef WITH_THREAD PyThread_free_lock(dctx->lock); #endif PyMem_Del(dctx); } } /******************************************************************************/ PyDoc_STRVAR(_lz4framed_create_compression_context__doc__, "create_compression_context() -> PyCapsule\n" "\n" "Create compression context for use in chunked compression.\n"); #define FUNC_DEF_CREATE_CCTX {"create_compression_context", _lz4framed_create_compression_context, METH_NOARGS,\ _lz4framed_create_compression_context__doc__} static PyObject* _lz4framed_create_compression_context(PyObject *self, PyObject *args) { _lz4f_cctx_t *cctx = NULL; PyObject *ctx_capsule = NULL; UNUSED(self); UNUSED(args); if (NULL == (cctx = PyMem_New(_lz4f_cctx_t, 1))) { PyErr_NoMemory(); goto bail; } cctx->ctx = NULL; cctx->prefs = prefs_defaults; #ifdef WITH_THREAD if (NULL == (cctx->lock = PyThread_allocate_lock())) { PyErr_SetString(PyExc_RuntimeError, "Failed to allocate lock"); goto bail; } #endif BAIL_ON_LZ4_ERROR(LZ4F_createCompressionContext(&(cctx->ctx), LZ4F_VERSION)); BAIL_ON_NULL(ctx_capsule = PyCapsule_New(cctx, COMPRESSION_CAPSULE_NAME, _cctx_capsule_destructor)); return ctx_capsule; bail: // this must NOT be freed once capsule exists (since destructor responsible for freeing) if (cctx) { LZ4F_freeCompressionContext(cctx->ctx); #ifdef WITH_THREAD if (cctx->lock) { PyThread_free_lock(cctx->lock); } #endif PyMem_Del(cctx); } return NULL; } /******************************************************************************/ PyDoc_STRVAR(_lz4framed_create_decompression_context__doc__, "create_decompression_context() -> PyCapsule\n" "\n" "Create decompression context for use in chunked decompression.\n"); #define FUNC_DEF_CREATE_DCTX {"create_decompression_context", _lz4framed_create_decompression_context, METH_NOARGS,\ _lz4framed_create_decompression_context__doc__} static PyObject* _lz4framed_create_decompression_context(PyObject *self, PyObject *args) { _lz4f_dctx_t *dctx = NULL; PyObject *dctx_capsule; UNUSED(self); UNUSED(args); if (NULL == (dctx = PyMem_New(_lz4f_dctx_t, 1))) { PyErr_NoMemory(); goto bail; } dctx->ctx = NULL; #ifdef WITH_THREAD if (NULL == (dctx->lock = PyThread_allocate_lock())) { PyErr_SetString(PyExc_RuntimeError, "Failed to allocate lock"); goto bail; } #endif BAIL_ON_LZ4_ERROR(LZ4F_createDecompressionContext(&(dctx->ctx), LZ4F_VERSION)); BAIL_ON_NULL(dctx_capsule = PyCapsule_New(dctx, DECOMPRESSION_CAPSULE_NAME, _dctx_capsule_destructor)); return dctx_capsule; bail: // this must NOT be freed once capsule exists (since destructor responsible for freeing) if (dctx) { LZ4F_freeDecompressionContext(dctx->ctx); #ifdef WITH_THREAD if (dctx->lock) { PyThread_free_lock(dctx->lock); } #endif PyMem_Del(dctx); } return NULL; } /******************************************************************************/ PyDoc_STRVAR(_lz4framed_compress_begin__doc__, "compress_begin(ctx, block_size_id=LZ4F_BLOCKSIZE_DEFAULT, block_mode_linked=True,\n" " checksum=False, autoflush=False, level=0, block_checksum=False) -> bytes\n" "\n" "Generates and returns frame header, sets compression options.\n" "\n" "Args:\n" " ctx: Compression context\n" " block_size_id (int): Compression block size identifier, one of the\n" " LZ4F_BLOCKSIZE_* constants. Use get_block_size() to\n" " determine size in bytes.\n" " block_mode_linked (bool): Whether compression blocks are linked\n" " checksum (bool): Whether to produce frame checksum\n" " autoflush (bool): Whether to flush output on update() calls rather than buffering\n" " incomplete blocks internally.\n" " level (int): Compression level. Values lower than LZ4F_COMPRESSION_MIN_HC (including\n" " negative ones) use fast compression. Recommended range for hc compression\n" " is between 4 and 9, with a maximum of LZ4F_COMPRESSION_MAX.\n" " block_checksum (bool): Whether to produce checksum after each block.\n" "\n" "Raises:\n" " Lz4FramedError: If a compression failure occured"); #define FUNC_DEF_COMPRESS_BEGIN {"compress_begin", (PyCFunction)_lz4framed_compress_begin,\ METH_VARARGS | METH_KEYWORDS, _lz4framed_compress_begin__doc__} static PyObject* _lz4framed_compress_begin(PyObject *self, PyObject *args, PyObject *kwargs) { static const char *format = "O|iiiiii:compress_begin"; static char *keywords[] = {"ctx", "block_size_id", "block_mode_linked", "checksum", "autoflush", "level", "block_checksum", NULL}; _lz4f_cctx_t *cctx = NULL; PyObject *ctx_capsule; int block_id = LZ4F_default; int block_mode_linked = 1; int block_checksum = 0; int checksum = 0; int autoflush = 0; int compression_level = LZ4_COMPRESSION_MIN; PyObject *output = NULL; char *output_str; size_t output_len = LZ4F_HEADER_SIZE_MAX; LZ4FRAMED_LOCK_FLAG; UNUSED(self); if (!PyArg_ParseTupleAndKeywords(args, kwargs, format, keywords, &ctx_capsule, &block_id, &block_mode_linked, &checksum, &autoflush, &compression_level, &block_checksum)) { goto bail; } if (!PyCapsule_IsValid(ctx_capsule, COMPRESSION_CAPSULE_NAME)) { PyErr_SetString(PyExc_ValueError, "ctx invalid"); goto bail; } if (!_valid_lz4f_block_size_id(block_id)) { PyErr_Format(PyExc_ValueError, "block_size_id (%d) invalid", block_id); goto bail; } if (compression_level > LZ4_COMPRESSION_MAX) { PyErr_Format(PyExc_ValueError, "level (%d) invalid", compression_level); goto bail; } // Guaranteed to succeed due to PyCapsule_IsValid check above cctx = PyCapsule_GetPointer(ctx_capsule, COMPRESSION_CAPSULE_NAME); ENTER_LZ4FRAMED(cctx); cctx->prefs.frameInfo.blockMode = block_mode_linked ? LZ4F_blockLinked : LZ4F_blockIndependent; cctx->prefs.frameInfo.blockSizeID = block_id; cctx->prefs.frameInfo.blockChecksumFlag = block_checksum ? LZ4F_blockChecksumEnabled : LZ4F_noBlockChecksum; cctx->prefs.frameInfo.contentChecksumFlag = checksum ? LZ4F_contentChecksumEnabled : LZ4F_noContentChecksum; cctx->prefs.compressionLevel = compression_level; cctx->prefs.autoFlush = autoflush ? 1 : 0; BAIL_ON_NULL(output = PyBytes_FromStringAndSize(NULL, output_len)); BAIL_ON_NULL(output_str = PyBytes_AsString(output)); // not worth releasing GIL here since only writing header BAIL_ON_LZ4_ERROR(output_len = LZ4F_compressBegin(cctx->ctx, output_str, output_len, &(cctx->prefs))); EXIT_LZ4FRAMED(cctx); BAIL_ON_NONZERO(_PyBytes_Resize(&output, output_len)); return output; bail: EXIT_LZ4FRAMED(cctx); Py_XDECREF(output); return NULL; } /******************************************************************************/ PyDoc_STRVAR(_lz4framed_compress_update__doc__, "compress_update(ctx, b) -> bytes\n" "\n" "Compresses and returns the given data. Note: return can be zero-length if autoflush\n" "parameter is not set via compress_begin(). Once all data has been compressed,\n" "compress_end() must be called (to flush any remaining data and finalise frame.\n" "\n" "Args:\n" " ctx: Compression context\n" " b (bytes-like object): The object containing lz4-framed data to decompress\n" "\n" "Raises:\n" " LZ4FNoDataError: If provided data is of zero length. (Useful for ending compression loop.)\n" " Lz4FramedError: If a compression failure occured"); #define FUNC_DEF_COMPRESS_UPDATE {"compress_update", (PyCFunction)_lz4framed_compress_update, METH_VARARGS,\ _lz4framed_compress_update__doc__} static PyObject* _lz4framed_compress_update(PyObject *self, PyObject *args) { #if PY_MAJOR_VERSION >= 3 static const char *format = "Oy*:compress_update"; #else static const char *format = "Os*:compress_update"; #endif _lz4f_cctx_t *cctx = NULL; PyObject *ctx_capsule; Py_buffer input; int input_held = 0; // whether Py_buffer (input) needs to be released PyObject *output = NULL; char *output_str; size_t output_len; LZ4FRAMED_LOCK_FLAG; UNUSED(self); if (!PyArg_ParseTuple(args, format, &ctx_capsule, &input)) { goto bail; } if (!PyCapsule_IsValid(ctx_capsule, COMPRESSION_CAPSULE_NAME)) { PyErr_SetString(PyExc_ValueError, "ctx invalid"); goto bail; } if (!PyBuffer_IsContiguous(&input, 'C')) { PyErr_SetString(PyExc_ValueError, "input not contiguous"); goto bail; } if (input.len <= 0) { PyErr_SetNone(LZ4FNoDataError); goto bail; } // Guaranteed to succeed due to PyCapsule_IsValid check above cctx = PyCapsule_GetPointer(ctx_capsule, COMPRESSION_CAPSULE_NAME); ENTER_LZ4FRAMED(cctx); BAIL_ON_LZ4_ERROR(output_len = LZ4F_compressBound(input.len, &(cctx->prefs))); BAIL_ON_NULL(output = PyBytes_FromStringAndSize(NULL, output_len)); BAIL_ON_NULL(output_str = PyBytes_AsString(output)); if (input.len < NOGIL_COMPRESS_INPUT_SIZE_THRESHOLD) { BAIL_ON_LZ4_ERROR(output_len = LZ4F_compressUpdate(cctx->ctx, output_str, output_len, input.buf, input.len, NULL)); } else { BAIL_ON_LZ4_ERROR_NOGIL(output_len = LZ4F_compressUpdate(cctx->ctx, output_str, output_len, input.buf, input.len, NULL)); } EXIT_LZ4FRAMED(cctx); BAIL_ON_NONZERO(_PyBytes_Resize(&output, output_len)); PyBuffer_Release(&input); input_held = 0; return output; bail: EXIT_LZ4FRAMED(cctx); if (input_held) { PyBuffer_Release(&input); } Py_XDECREF(output); return NULL; } /******************************************************************************/ PyDoc_STRVAR(_lz4framed_compress_end__doc__, "compress_end(ctx) -> bytes\n" "\n" "Flushes any remaining compressed data, finalises frame and returns said data. After\n" "successful compression the context can be re-used for another frame. Note: Calling\n" "this function without having written any data (via compress_update()) will produce\n" "an invalid frame.\n" "\n" "Args:\n" " ctx: Compression context\n" "\n" "Raises:\n" " Lz4FramedError: If a compression failure occured"); #define FUNC_DEF_COMPRESS_END {"compress_end", (PyCFunction)_lz4framed_compress_end, METH_O,\ _lz4framed_compress_end__doc__} static PyObject* _lz4framed_compress_end(PyObject *self, PyObject *arg) { _lz4f_cctx_t *cctx = NULL; PyObject *output = NULL; char *output_str; size_t output_len; LZ4FRAMED_LOCK_FLAG; UNUSED(self); if (!PyCapsule_IsValid(arg, COMPRESSION_CAPSULE_NAME)) { PyErr_SetString(PyExc_ValueError, "ctx invalid"); goto bail; } // Guaranteed to succeed due to PyCapsule_IsValid check above cctx = PyCapsule_GetPointer(arg, COMPRESSION_CAPSULE_NAME); ENTER_LZ4FRAMED(cctx); BAIL_ON_LZ4_ERROR(output_len = LZ4F_compressBound(0, &(cctx->prefs))); BAIL_ON_NULL(output = PyBytes_FromStringAndSize(NULL, output_len)); BAIL_ON_NULL(output_str = PyBytes_AsString(output)); // not worth releasing GIL since should have less than a block left to write BAIL_ON_LZ4_ERROR(output_len = LZ4F_compressEnd(cctx->ctx, output_str, output_len, NULL)); EXIT_LZ4FRAMED(cctx); BAIL_ON_NONZERO(_PyBytes_Resize(&output, output_len)); return output; bail: EXIT_LZ4FRAMED(cctx); Py_XDECREF(output); return NULL; } /******************************************************************************/ PyDoc_STRVAR(_lz4framed_get_frame_info__doc__, "get_frame_info(ctx) -> dict\n" "\n" "Retrieves frame header information. This method can be called at any point during the\n" "decompression process. If the header has not been parsed yet due to lack of data, one can\n" "expect an Lz4FramedError exception with error code LZ4F_ERROR_HEADER_INCOMPLETE. On success\n" "the method returns a dict with the following keys:\n" " input_hint (int) - How many bytes to provide to next decompress() call for optimal\n" " performance (due to not having to use internal buffers\n" " length (int) - Uncompressed length of data (or zero if unknown)\n" " block_size_id (int) - One of LZ4F_BLOCKSIZE_* constants\n" " block_mode_linked (bool) - Whether blocks in frame are linked\n" " checksum (bool) - Whether the frame has a checksum (which will be verified)\n" "\n" "Args:\n" " ctx: Decompression context\n" "\n" "Raises:\n" " Lz4FramedError: If a compression failure occured"); #define FUNC_DEF_GET_FRAME_INFO {"get_frame_info", (PyCFunction)_lz4framed_get_frame_info, METH_O,\ _lz4framed_get_frame_info__doc__} static PyObject* _lz4framed_get_frame_info(PyObject *self, PyObject *arg) { _lz4f_dctx_t *dctx = NULL; LZ4F_frameInfo_t frameInfo; size_t input_hint; size_t input_read = 0; PyObject *dict = NULL; PyObject *item = NULL; LZ4FRAMED_LOCK_FLAG; UNUSED(self); if (!PyCapsule_IsValid(arg, DECOMPRESSION_CAPSULE_NAME)) { PyErr_SetString(PyExc_ValueError, "ctx invalid"); goto bail; } // Guaranteed to succeed due to PyCapsule_IsValid check above dctx = PyCapsule_GetPointer(arg, DECOMPRESSION_CAPSULE_NAME); ENTER_LZ4FRAMED(dctx); BAIL_ON_LZ4_ERROR(input_hint = LZ4F_getFrameInfo(dctx->ctx, &frameInfo, NULL, &input_read)); BAIL_ON_NULL(dict = PyDict_New()); BAIL_ON_NULL(item = PyLong_FromSize_t(input_hint)); BAIL_ON_NONZERO(PyDict_SetItemString(dict, "input_hint", item)); Py_CLEAR(item); BAIL_ON_NULL(item = PyLong_FromUnsignedLongLong(frameInfo.contentSize)); BAIL_ON_NONZERO(PyDict_SetItemString(dict, "length", item)); Py_CLEAR(item); BAIL_ON_NULL(item = PyLong_FromLong(frameInfo.blockSizeID)); BAIL_ON_NONZERO(PyDict_SetItemString(dict, "block_size_id", item)); Py_CLEAR(item); BAIL_ON_NULL(item = PyBool_FromLong(frameInfo.blockMode == LZ4F_blockLinked)); BAIL_ON_NONZERO(PyDict_SetItemString(dict, "block_mode_linked", item)); Py_CLEAR(item); BAIL_ON_NULL(item = PyBool_FromLong(frameInfo.contentChecksumFlag == LZ4F_contentChecksumEnabled)); BAIL_ON_NONZERO(PyDict_SetItemString(dict, "checksum", item)); Py_CLEAR(item); EXIT_LZ4FRAMED(dctx); return dict; bail: EXIT_LZ4FRAMED(dctx); // necessary for item if dict assignment fails Py_XDECREF(item); Py_XDECREF(dict); return NULL; } /******************************************************************************/ PyDoc_STRVAR(_lz4framed_decompress_update__doc__, "decompress_update(ctx, b, chunk_len=65536) -> list\n" "\n" "Decompresses parts of an lz4 frame from data given in *b*, returning the\n" "uncompressed result as a list of chunks, with the last element being input_hint\n" "(i.e. how many bytes to ideally expect on the next call). Once input_hint is\n" "zero, decompression of the whole frame is complete. Note: Some calls to this\n" "function may return no chunks if they are incomplete.\n" "Args:\n" " ctx: Decompression context\n" " b (bytes-like object): The object containing lz4-framed data to decompress\n" " chunk_len (int): Size of uncompressed chunks in bytes. If not all of the\n" " data fits in one chunk, multiple will be used. Ideally\n" " only one chunk is required per call of this method - this can\n" " be determined from block_size_id via get_frame_info() call." "\n" "Raises:\n" " Lz4FramedError: If a decompression failure occured"); #define FUNC_DEF_DECOMPRESS_UPDATE {"decompress_update", (PyCFunction)_lz4framed_decompress_update,\ METH_VARARGS | METH_KEYWORDS, _lz4framed_decompress_update__doc__} static PyObject* _lz4framed_decompress_update(PyObject *self, PyObject *args, PyObject *kwargs) { #if PY_MAJOR_VERSION >= 3 static const char *format = "Oy*|i:decompress_update"; #else static const char *format = "Os*|i:decompress_update"; #endif static char *keywords[] = {"ctx", "b", "chunk_len", NULL}; _lz4f_dctx_t *dctx = NULL; PyObject *dctx_capsule; Py_buffer input; int input_held = 0; // whether Py_buffer (input) needs to be released const char *input_pos; // position in input size_t input_remaining; // bytes remaining in input size_t input_read; // used by LZ4 functions to indicate how many bytes were / can be read size_t input_size_hint = 1; // LZ4 hint to how many bytes make up the remaining block + next header size_t chunk_len = 65536; // size of chunks PyObject *list = NULL; // function return PyObject *size_hint = NULL; // python object of input_size_hint PyObject *chunk = NULL ; char *chunk_pos = NULL ; // position in current chunk size_t chunk_remaining; // space remaining in chunk size_t chunk_written; // used by lz4 to indicate how much has been written LZ4FRAMED_LOCK_FLAG; UNUSED(self); if (!PyArg_ParseTupleAndKeywords(args, kwargs, format, keywords, &dctx_capsule, &input, &chunk_len)) { goto bail; } if (!PyCapsule_IsValid(dctx_capsule, DECOMPRESSION_CAPSULE_NAME)) { PyErr_SetString(PyExc_ValueError, "ctx invalid"); goto bail; } if (!PyBuffer_IsContiguous(&input, 'C')) { PyErr_SetString(PyExc_ValueError, "input not contiguous"); goto bail; } if (input.len <= 0) { PyErr_SetNone(LZ4FNoDataError); goto bail; } if (chunk_len <= 0) { PyErr_SetString(PyExc_ValueError, "chunk_len invalid"); goto bail; } // Guaranteed to succeed due to PyCapsule_IsValid check above dctx = PyCapsule_GetPointer(dctx_capsule, DECOMPRESSION_CAPSULE_NAME); input_read = input_remaining = input.len; input_pos = input.buf; // output list BAIL_ON_NULL(list = PyList_New(0)); // first chunk BAIL_ON_NULL(chunk = PyBytes_FromStringAndSize(NULL, chunk_len)); BAIL_ON_NULL(chunk_pos = PyBytes_AsString(chunk)); chunk_written = chunk_remaining = chunk_len; ENTER_LZ4FRAMED(dctx); while (input_remaining && input_size_hint) { // add another chunk for more data when current one full if (!chunk_remaining) { // append previous (full) chunk to list BAIL_ON_NONZERO(PyList_Append(list, chunk)); Py_CLEAR(chunk); // create next chunk BAIL_ON_NULL(chunk = PyBytes_FromStringAndSize(NULL, chunk_len)); BAIL_ON_NULL(chunk_pos = PyBytes_AsString(chunk)); chunk_written = chunk_remaining = chunk_len; } if (chunk_written < NOGIL_DECOMPRESS_OUTPUT_SIZE_THRESHOLD) { BAIL_ON_LZ4_ERROR(input_size_hint = LZ4F_decompress(dctx->ctx, chunk_pos, &chunk_written, input_pos, &input_read, NULL)); } else { BAIL_ON_LZ4_ERROR_NOGIL(input_size_hint = LZ4F_decompress(dctx->ctx, chunk_pos, &chunk_written, input_pos, &input_read, NULL)); } chunk_pos += chunk_written; chunk_written = chunk_remaining = (chunk_remaining - chunk_written); input_pos += input_read; input_read = input_remaining = (input_remaining - input_read); } EXIT_LZ4FRAMED(dctx); // append & reduce size of final chunk (if contains any data) if (chunk_remaining < chunk_len) { BAIL_ON_NONZERO(_PyBytes_Resize(&chunk, chunk_len - chunk_remaining)); BAIL_ON_NONZERO(PyList_Append(list, chunk)); } // append input size hint to list BAIL_ON_NULL(size_hint = PyLong_FromSize_t(input_size_hint)); BAIL_ON_NONZERO(PyList_Append(list, size_hint)); PyBuffer_Release(&input); input_held = 0; Py_CLEAR(chunk); Py_CLEAR(size_hint); return list; bail: EXIT_LZ4FRAMED(dctx); if (input_held) { PyBuffer_Release(&input); } Py_XDECREF(chunk); Py_XDECREF(size_hint); Py_XDECREF(list); return NULL; } /******************************************************************************/ static PyMethodDef Lz4framedMethods[] = { FUNC_DEF_GET_BLOCK_SIZE, FUNC_DEF_COMPRESS, FUNC_DEF_DECOMPRESS, FUNC_DEF_CREATE_CCTX, FUNC_DEF_CREATE_DCTX, FUNC_DEF_COMPRESS_BEGIN, FUNC_DEF_COMPRESS_UPDATE, FUNC_DEF_COMPRESS_END, FUNC_DEF_GET_FRAME_INFO, FUNC_DEF_DECOMPRESS_UPDATE, {NULL, NULL, 0, NULL} }; struct module_state { PyObject *error; }; #if PY_MAJOR_VERSION >= 3 #define GETSTATE(m) ((struct module_state*)PyModule_GetState(m)) #else #define GETSTATE(m) (&_state) static struct module_state _state; #endif #if PY_MAJOR_VERSION >= 3 static int myextension_traverse(PyObject *m, visitproc visit, void *arg) { Py_VISIT(GETSTATE(m)->error); return 0; } static int myextension_clear(PyObject *m) { Py_CLEAR(GETSTATE(m)->error); return 0; } static struct PyModuleDef moduledef = { PyModuleDef_HEAD_INIT, "_lz4framed", NULL, sizeof(struct module_state), Lz4framedMethods, NULL, myextension_traverse, myextension_clear, NULL }; #define INITERROR return NULL PyObject* PyInit__lz4framed(void) #else #define INITERROR return void init_lz4framed(void) #endif { struct module_state *state = NULL; #if PY_MAJOR_VERSION >= 3 PyObject *module = PyModule_Create(&moduledef); #else PyObject *module = Py_InitModule("_lz4framed", Lz4framedMethods); #endif BAIL_ON_NULL(module); BAIL_ON_NULL(state = GETSTATE(module)); BAIL_ON_NULL(state->error = PyErr_NewException("_lz4framed.Error", NULL, NULL)); BAIL_ON_NULL(LZ4FError = PyErr_NewExceptionWithDoc("_lz4framed.Lz4FramedError", __lz4f_error__doc__, NULL, NULL)); BAIL_ON_NULL(LZ4FNoDataError = PyErr_NewExceptionWithDoc("_lz4framed.Lz4FramedNoDataError", __lz4f_no_data_error__doc__, NULL, NULL)); Py_INCREF(LZ4FError); Py_INCREF(LZ4FNoDataError); // non-zero returns indicate error if (PyModule_AddObject(module, "Lz4FramedError", LZ4FError) || PyModule_AddObject(module, "Lz4FramedNoDataError", LZ4FNoDataError) || PyModule_AddStringConstant(module, "__version__", EXPAND_AND_QUOTE(VERSION)) || PyModule_AddStringConstant(module, "LZ4_VERSION", LZ4_VERSION_STRING) || PyModule_AddIntMacro(module, LZ4F_VERSION) || PyModule_AddIntMacro(module, LZ4F_ERROR_GENERIC) || PyModule_AddIntMacro(module, LZ4F_ERROR_maxBlockSize_invalid) || PyModule_AddIntMacro(module, LZ4F_ERROR_blockMode_invalid) || PyModule_AddIntMacro(module, LZ4F_ERROR_contentChecksumFlag_invalid) || PyModule_AddIntMacro(module, LZ4F_ERROR_compressionLevel_invalid) || PyModule_AddIntMacro(module, LZ4F_ERROR_headerVersion_wrong) || PyModule_AddIntMacro(module, LZ4F_ERROR_blockChecksum_invalid) || PyModule_AddIntMacro(module, LZ4F_ERROR_reservedFlag_set) || PyModule_AddIntMacro(module, LZ4F_ERROR_allocation_failed) || PyModule_AddIntMacro(module, LZ4F_ERROR_srcSize_tooLarge) || PyModule_AddIntMacro(module, LZ4F_ERROR_dstMaxSize_tooSmall) || PyModule_AddIntMacro(module, LZ4F_ERROR_frameHeader_incomplete) || PyModule_AddIntMacro(module, LZ4F_ERROR_frameType_unknown) || PyModule_AddIntMacro(module, LZ4F_ERROR_frameSize_wrong) || PyModule_AddIntMacro(module, LZ4F_ERROR_srcPtr_wrong) || PyModule_AddIntMacro(module, LZ4F_ERROR_decompressionFailed) || PyModule_AddIntMacro(module, LZ4F_ERROR_headerChecksum_invalid) || PyModule_AddIntMacro(module, LZ4F_ERROR_contentChecksum_invalid) || PyModule_AddIntMacro(module, LZ4F_ERROR_frameDecoding_alreadyStarted) || PyModule_AddIntConstant(module, "LZ4F_BLOCKSIZE_DEFAULT", LZ4F_default) || PyModule_AddIntConstant(module, "LZ4F_BLOCKSIZE_MAX64KB", LZ4F_max64KB) || PyModule_AddIntConstant(module, "LZ4F_BLOCKSIZE_MAX256KB", LZ4F_max256KB) || PyModule_AddIntConstant(module, "LZ4F_BLOCKSIZE_MAX1MB", LZ4F_max1MB) || PyModule_AddIntConstant(module, "LZ4F_BLOCKSIZE_MAX4MB", LZ4F_max4MB) || PyModule_AddIntConstant(module, "LZ4F_COMPRESSION_MIN", LZ4_COMPRESSION_MIN) || PyModule_AddIntConstant(module, "LZ4F_COMPRESSION_MIN_HC", LZ4_COMPRESSION_MIN_HC) || PyModule_AddIntConstant(module, "LZ4F_COMPRESSION_MAX", LZ4_COMPRESSION_MAX)) { goto bail; } #if PY_MAJOR_VERSION >= 3 return module; #else return; #endif bail: Py_XINCREF(LZ4FError); Py_XINCREF(LZ4FNoDataError); Py_XDECREF(module); INITERROR; } py-lz4framed-0.14.0/pylint.rc000066400000000000000000000304121357043434000157620ustar00rootroot00000000000000[MASTER] # Specify a configuration file. #rcfile= # Python code to execute, usually for sys.path manipulation such as # pygtk.require(). #init-hook= # Add files or directories to the blacklist. They should be base names, not # paths. ignore=CVS # Pickle collected data for later comparisons. persistent=no # List of plugins (as comma separated values of python modules names) to load, # usually to register additional checkers. load-plugins= # Use multiple processes to speed up Pylint. jobs=4 # Allow loading of arbitrary C extensions. Extensions are imported into the # active Python interpreter and may run arbitrary code. unsafe-load-any-extension=no # A comma-separated list of package or module names from where C extensions may # be loaded. Extensions are loading into the active Python interpreter and may # run arbitrary code extension-pkg-whitelist=_lz4framed # Allow optimization of some AST trees. This will activate a peephole AST # optimizer, which will apply various small optimizations. For instance, it can # be used to obtain the result of joining multiple strings with the addition # operator. Joining a lot of strings can lead to a maximum recursion error in # Pylint and this flag can prevent that. It has one side effect, the resulting # AST will be different than the one from reality. optimize-ast=no [MESSAGES CONTROL] # Only show warnings with the listed confidence levels. Leave empty to show # all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED confidence= # Enable the message, report, category or checker with the given id(s). You can # either give multiple identifier separated by comma (,) or put this option # multiple time. See also the "--disable" option for examples. #enable= # Disable the message, report, category or checker with the given id(s). You # can either give multiple identifiers separated by comma (,) or put this # option multiple times (only on the command line, not in the configuration # file where it should appear only once).You can also use "--disable=all" to # disable everything first and then reenable specific checks. For example, if # you want to run only the similarities checker, you can use "--disable=all # --enable=similarities". If you want to run only the classes checker, but have # no Warning level messages displayed, use"--disable=all --enable=classes # --disable=W" #disable=execfile-builtin,import-star-module-level,no-absolute-import,unicode-builtin,xrange-builtin,raising-string,coerce-builtin,long-builtin,old-octal-literal,dict-iter-method,delslice-method,intern-builtin,oct-method,using-cmp-argument,next-method-called,range-builtin-not-iterating,standarderror-builtin,input-builtin,old-raise-syntax,zip-builtin-not-iterating,backtick,file-builtin,suppressed-message,buffer-builtin,print-statement,dict-view-method,hex-method,coerce-method,raw_input-builtin,old-ne-operator,useless-suppression,apply-builtin,cmp-builtin,setslice-method,parameter-unpacking,filter-builtin-not-iterating,reduce-builtin,old-division,reload-builtin,unichr-builtin,long-suffix,nonzero-method,unpacking-in-except,basestring-builtin,indexing-exception,map-builtin-not-iterating,round-builtin,metaclass-assignment,getslice-method,cmp-method # Custom exclude list: disable=too-few-public-methods,W0511,locally-disabled,missing-docstring,useless-object-inheritance [REPORTS] # Set the output format. Available formats are text, parseable, colorized, msvs # (visual studio) and html. You can also give a reporter class, eg # mypackage.mymodule.MyReporterClass. output-format=text # Put messages in a separate file for each module / package specified on the # command line instead of printing them on stdout. Reports (if any) will be # written in a file name "pylint_global.[txt|html]". files-output=no # Tells whether to display a full report or only the messages reports=no # Python expression which should return a note less than 10 (10 is the highest # note). You have access to the variables errors warning, statement which # respectively contain the number of errors / warnings messages and the total # number of statements analyzed. This is used by the global evaluation report # (RP0004). evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) # Template used to display messages. This is a python new-style format string # used to format the message information. See doc for all details #msg-template= [LOGGING] # Logging modules to check that the string format arguments are in logging # function parameter format logging-modules=logging [SIMILARITIES] # Minimum lines number of a similarity. min-similarity-lines=4 # Ignore comments when computing similarities. ignore-comments=yes # Ignore docstrings when computing similarities. ignore-docstrings=yes # Ignore imports when computing similarities. ignore-imports=no # Activate the evaluation score. score=no [FORMAT] # Maximum number of characters on a single line. max-line-length=120 # Regexp for a line that is allowed to be longer than the limit. ignore-long-lines=^\s*(# )??$ # Allow the body of an if to be on the same line as the test if there is no # else. single-line-if-stmt=no # List of optional constructs for which whitespace checking is disabled. `dict- # separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}. # `trailing-comma` allows a space between comma and closing bracket: (a, ). # `empty-line` allows space-only lines. no-space-check=trailing-comma,dict-separator # Maximum number of lines in a module max-module-lines=1000 # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 # tab). indent-string=' ' # Number of spaces of indent required inside a hanging or continued line. indent-after-paren=4 # Expected format of line ending, e.g. empty (any line ending), LF or CRLF. expected-line-ending-format=LF [BASIC] # List of builtins function names that should not be used, separated by a comma bad-functions=map,filter # Good variable names which should always be accepted, separated by a comma good-names=i,j,k,ex,Run,_,setUp,fp # Bad variable names which should always be refused, separated by a comma bad-names=foo,bar,baz,toto,tutu,tata # Colon-delimited sets of names that determine each other's naming style when # the name regexes allow several styles. name-group= # Include a hint for the correct naming format with invalid-name include-naming-hint=no # Regular expression matching correct attribute names attr-rgx=[a-z_][a-z0-9_]{2,30}$ # Naming hint for attribute names attr-name-hint=[a-z_][a-z0-9_]{2,30}$ # Regular expression matching correct class names class-rgx=[A-Z_][a-zA-Z0-9]+$ # Naming hint for class names class-name-hint=[A-Z_][a-zA-Z0-9]+$ # Regular expression matching correct class attribute names class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ # Naming hint for class attribute names class-attribute-name-hint=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ # Regular expression matching correct argument names argument-rgx=[a-z_][a-z0-9_]{2,30}$ # Naming hint for argument names argument-name-hint=[a-z_][a-z0-9_]{2,30}$ # Regular expression matching correct variable names variable-rgx=[a-z_][a-z0-9_]{2,30}$ # Naming hint for variable names variable-name-hint=[a-z_][a-z0-9_]{2,30}$ # Regular expression matching correct function names function-rgx=[a-z_][a-z0-9_]{2,30}$ # Naming hint for function names function-name-hint=[a-z_][a-z0-9_]{2,30}$ # Regular expression matching correct module names module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ # Naming hint for module names module-name-hint=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ # Regular expression matching correct inline iteration names inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$ # Naming hint for inline iteration names inlinevar-name-hint=[A-Za-z_][A-Za-z0-9_]*$ # Regular expression matching correct constant names const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$ # Naming hint for constant names const-name-hint=(([A-Z_][A-Z0-9_]*)|(__.*__))$ # Regular expression matching correct method names method-rgx=[a-z_][a-z0-9_]{2,30}$ # Naming hint for method names method-name-hint=[a-z_][a-z0-9_]{2,30}$ # Regular expression which should only match function or class names that do # not require a docstring. no-docstring-rgx=^_ # Minimum line length for functions/classes that require docstrings, shorter # ones are exempt. docstring-min-length=-1 [TYPECHECK] # Tells whether missing members accessed in mixin class should be ignored. A # mixin class is detected if its name ends with "mixin" (case insensitive). ignore-mixin-members=yes # List of module names for which member attributes should not be checked # (useful for modules/projects where namespaces are manipulated during runtime # and thus existing member attributes cannot be deduced by static analysis. It # supports qualified module names, as well as Unix pattern matching. ignored-modules= # List of classes names for which member attributes should not be checked # (useful for classes with attributes dynamically set). This supports can work # with qualified names. ignored-classes= # List of members which are set dynamically and missed by pylint inference # system, and so shouldn't trigger E1101 when accessed. Python regular # expressions are accepted. generated-members= [MISCELLANEOUS] # List of note tags to take in consideration, separated by a comma. notes=FIXME,XXX,TODO [VARIABLES] # Tells whether we should check for unused import in __init__ files. init-import=no # A regular expression matching the name of dummy variables (i.e. expectedly # not used). dummy-variables-rgx=_$|dummy # List of additional names supposed to be defined in builtins. Remember that # you should avoid to define new builtins when possible. additional-builtins= # List of strings which can identify a callback function by name. A callback # name must start or end with one of those strings. callbacks= [ELIF] # Maximum number of nested blocks for function / method body max-nested-blocks=5 [SPELLING] # Spelling dictionary name. Available dictionaries: none. To make it working # install python-enchant package. spelling-dict= # List of comma separated words that should not be checked. spelling-ignore-words= # A path to a file that contains private dictionary; one word per line. spelling-private-dict-file= # Tells whether to store unknown words to indicated private dictionary in # --spelling-private-dict-file option instead of raising a message. spelling-store-unknown-words=no [DESIGN] # Maximum number of arguments for function / method max-args=8 # Argument names that match this expression will be ignored. Default to name # with leading underscore ignored-argument-names=_.* # Maximum number of locals for function / method body max-locals=15 # Maximum number of return / yield for function / method body max-returns=6 # Maximum number of branch for function / method body max-branches=12 # Maximum number of statements in function / method body max-statements=50 # Maximum number of parents for a class (see R0901). max-parents=7 # Maximum number of attributes for a class (see R0902). max-attributes=7 # Minimum number of public methods for a class (see R0903). min-public-methods=2 # Maximum number of public methods for a class (see R0904). max-public-methods=20 # Maximum number of boolean expressions in a if statement max-bool-expr=5 [IMPORTS] # Deprecated modules which should not be used, separated by a comma deprecated-modules=optparse # Create a graph of every (i.e. internal and external) dependencies in the # given file (report RP0402 must not be disabled) import-graph= # Create a graph of external dependencies in the given file (report RP0402 must # not be disabled) ext-import-graph= # Create a graph of internal dependencies in the given file (report RP0402 must # not be disabled) int-import-graph= [CLASSES] # List of method names used to declare (i.e. assign) instance attributes. defining-attr-methods=__init__,__new__,setUp # List of valid names for the first argument in a class method. valid-classmethod-first-arg=cls # List of valid names for the first argument in a metaclass class method. valid-metaclass-classmethod-first-arg=mcs # List of member names, which should be excluded from the protected access # warning. exclude-protected=_asdict,_fields,_replace,_source,_make [EXCEPTIONS] # Exceptions that will emit a warning when being caught. Defaults to # "Exception" overgeneral-exceptions=Exception py-lz4framed-0.14.0/setup.py000066400000000000000000000061111357043434000156260ustar00rootroot00000000000000# Copyright (c) 2016 Iotic Labs Ltd. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # https://github.com/Iotic-Labs/py-lz4framed/blob/master/LICENSE # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # pylint: disable=import-error,wrong-import-order,ungrouped-imports from __future__ import print_function import os # Allow for environments without setuptools try: from setuptools import setup, Extension except ImportError: from ez_setup import use_setuptools use_setuptools() from setuptools import setup, Extension def load_description(filename): script_dir = os.path.abspath(os.path.dirname(__file__)) with open(os.path.join(script_dir, filename), 'r') as infile: return infile.read() VERSION = '0.14.0' setup( name='py-lz4framed', version=VERSION, description='LZ4Frame library for Python (via C bindings)', long_description=load_description('README.md'), long_description_content_type='text/markdown', author='Iotic Labs Ltd', author_email='info@iotic-labs.com', maintainer='Iotic Labs Ltd', maintainer_email='vilnis.termanis@iotic-labs.com', url='https://github.com/Iotic-Labs/py-lz4framed', license='Apache License 2.0', packages=['lz4framed'], zip_safe=False, ext_modules=[ Extension('_lz4framed', [ # lz4 library 'lz4/lz4.c', 'lz4/lz4hc.c', 'lz4/lz4frame.c', 'lz4/xxhash.c', 'lz4framed/py-lz4framed.c', ], extra_compile_args=[ '-Ilz4', '-std=c99', '-DXXH_NAMESPACE=PLZ4F_', '-DVERSION=%s' % VERSION, # For testing only - some of these are GCC-specific # '-Wall', # '-Wextra', # '-Wundef', # '-Wshadow', # '-Wcast-align', # '-Wcast-qual', # '-Wstrict-prototypes', # '-pedantic' ])], keywords=['lz4framed', 'lz4frame', 'lz4'], classifiers=[ 'Development Status :: 5 - Production/Stable', 'License :: OSI Approved :: Apache Software License', 'Intended Audience :: Developers', 'Programming Language :: C', 'Programming Language :: Python', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3.2', 'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', 'Topic :: Software Development :: Libraries', 'Topic :: Software Development :: Libraries :: Python Modules' ] ) py-lz4framed-0.14.0/test.py000066400000000000000000000422551357043434000154560ustar00rootroot00000000000000# Copyright (c) 2016 Iotic Labs Ltd. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # https://github.com/Iotic-Labs/py-lz4framed/blob/master/LICENSE # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Note: These tests are not meant to verify all of lz4's behaviour, only the Python functionality""" from sys import version_info from unittest import TestCase from contextlib import contextmanager from io import BytesIO, SEEK_END from lz4framed import (LZ4F_BLOCKSIZE_DEFAULT, LZ4F_BLOCKSIZE_MAX64KB, LZ4F_BLOCKSIZE_MAX256KB, LZ4F_BLOCKSIZE_MAX1MB, LZ4F_BLOCKSIZE_MAX4MB, LZ4F_COMPRESSION_MAX, LZ4F_ERROR_GENERIC, LZ4F_ERROR_contentChecksum_invalid, LZ4F_ERROR_frameType_unknown, LZ4F_ERROR_srcPtr_wrong, Lz4FramedError, Lz4FramedNoDataError, compress, decompress, create_compression_context, compress_begin, compress_update, compress_end, create_decompression_context, get_frame_info, decompress_update, get_block_size, Compressor, Decompressor) PY2 = version_info[0] < 3 SHORT_INPUT = b'abcdefghijklmnopqrstuvwxyz0123456789' LONG_INPUT = SHORT_INPUT * (10**5) LEVEL_ACCELERATED_MAX = -10 class TestHelperMixin(object): def setUp(self): super(TestHelperMixin, self).setUp() if PY2: # avoid deprecation warning self.assertRaisesRegex = self.assertRaisesRegexp # pylint: disable=invalid-name def check_compress_short(self, *args, **kwargs): self.assertEqual(SHORT_INPUT, decompress(compress(SHORT_INPUT, *args, **kwargs))) def check_compress_long(self, *args, **kwargs): self.assertEqual(LONG_INPUT, decompress(compress(LONG_INPUT, *args, **kwargs))) @contextmanager def assertRaisesLz4FramedError(self, code): # pylint: disable=invalid-name try: yield except Lz4FramedError as ex: self.assertEqual(ex.args[1], code, 'Lz4FramedError code mismatch: [%d]: %s' % (ex.args[1], ex.args[0])) else: self.fail('Lz4FramedError not raised') class TestCompress(TestHelperMixin, TestCase): def test_compress_minimal(self): with self.assertRaises(TypeError): compress() with self.assertRaises(Lz4FramedNoDataError): compress(b'') self.check_compress_short() def test_compress_block_size(self): with self.assertRaises(TypeError): compress(SHORT_INPUT, block_size_id='1') with self.assertRaises(ValueError): compress(SHORT_INPUT, block_size_id=-1) for block_size in (LZ4F_BLOCKSIZE_DEFAULT, LZ4F_BLOCKSIZE_MAX64KB, LZ4F_BLOCKSIZE_MAX256KB, LZ4F_BLOCKSIZE_MAX1MB, LZ4F_BLOCKSIZE_MAX4MB): self.check_compress_short(block_size_id=block_size) self.check_compress_long(block_size_id=block_size) def test_compress_linked_mode(self): with self.assertRaises(TypeError): compress(SHORT_INPUT, block_mode_linked=None) self.check_compress_short(block_mode_linked=True) self.check_compress_short(block_mode_linked=False) def test_compress_checksum(self): with self.assertRaises(TypeError): compress(SHORT_INPUT, checksum=None) self.check_compress_short(checksum=True) self.check_compress_short(checksum=False) for data in (SHORT_INPUT, LONG_INPUT): with self.assertRaisesLz4FramedError(LZ4F_ERROR_contentChecksum_invalid): # invalid checksum decompress(compress(data, checksum=True)[:-1] + b'0') def test_compress_block_checksum(self): with self.assertRaises(TypeError): compress(SHORT_INPUT, block_checksum=None) self.check_compress_short(block_checksum=True) self.check_compress_short(block_checksum=False) def test_compress_level(self): with self.assertRaises(TypeError): compress(SHORT_INPUT, level='1') # negative values designate accelerattion for level in range(LEVEL_ACCELERATED_MAX, LZ4F_COMPRESSION_MAX + 1): self.check_compress_short(level=level) # large input, fast & hc levels (levels > 10 (v1.7.5) are significantly slower) self.check_compress_long(level=0) self.check_compress_long(level=10) def test_compress_memoryview(self): view = memoryview(LONG_INPUT) self.assertEqual(view, decompress(compress(view))) class TestDecompress(TestHelperMixin, TestCase): def test_decompress_minimal(self): with self.assertRaises(TypeError): decompress() with self.assertRaises(Lz4FramedNoDataError): decompress(b'') self.check_compress_short() def test_decompress_buffer_size(self): out = compress(SHORT_INPUT) with self.assertRaises(TypeError): decompress(out, buffer_size='1') with self.assertRaises(ValueError): decompress(out, buffer_size=0) out = compress(LONG_INPUT) for buffer_size in range(1, 1025, 128): self.assertEqual(LONG_INPUT, decompress(out, buffer_size=buffer_size)) def test_decompress_invalid_input(self): with self.assertRaisesLz4FramedError(LZ4F_ERROR_frameType_unknown): decompress(b'invalidheader') with self.assertRaisesRegex(ValueError, 'frame incomplete'): decompress(compress(SHORT_INPUT)[:-5]) # incomplete data (length not specified in header) with BytesIO() as out: with Compressor(out) as compressor: compressor.update(SHORT_INPUT) output = out.getvalue() with self.assertRaisesRegex(ValueError, 'frame incomplete'): decompress(output[:-20]) def test_decompress_memoryview(self): view = memoryview(compress(LONG_INPUT)) self.assertEqual(LONG_INPUT, decompress(view)) class TestLowLevelFunctions(TestHelperMixin, TestCase): def test_get_block_size(self): with self.assertRaises(TypeError): get_block_size('1') with self.assertRaises(ValueError): get_block_size(1) self.assertEqual(get_block_size(), get_block_size(LZ4F_BLOCKSIZE_DEFAULT)) for size in (LZ4F_BLOCKSIZE_MAX64KB, LZ4F_BLOCKSIZE_MAX256KB, LZ4F_BLOCKSIZE_MAX1MB, LZ4F_BLOCKSIZE_MAX4MB): self.assertEqual(get_block_size(size), 1 << (8 + (2 * size))) def test_create_contexts(self): for func in (create_compression_context, create_decompression_context): self.assertIsNotNone(func()) def test_get_frame_info(self): with self.assertRaises(TypeError): get_frame_info() with self.assertRaises(ValueError): get_frame_info(create_compression_context()) ctx = create_decompression_context() with self.assertRaisesLz4FramedError(LZ4F_ERROR_srcPtr_wrong): get_frame_info(ctx) # compress with non-default arguments, check info structure args = {'checksum': True, 'block_size_id': LZ4F_BLOCKSIZE_MAX256KB, 'block_mode_linked': False} # Using long input since lz4 adjusts block size is input smaller than one block decompress_update(ctx, compress(LONG_INPUT, **args)[:15]) info = get_frame_info(ctx) self.assertTrue(info.pop('input_hint', 0) > 0) args['length'] = len(LONG_INPUT) self.assertEqual(info, args) def __compress_begin(self, **kwargs): ctx = create_compression_context() header = compress_begin(ctx, **kwargs) self.assertTrue(7 <= len(header) <= 15) return ctx, header def test_compress_begin(self): with self.assertRaises(TypeError): compress_begin() with self.assertRaises(ValueError): compress_begin(create_decompression_context()) def test_compress_begin_block_size(self): with self.assertRaises(TypeError): self.__compress_begin(block_size_id='1') with self.assertRaises(ValueError): self.__compress_begin(block_size_id=-1) for size in (LZ4F_BLOCKSIZE_DEFAULT, LZ4F_BLOCKSIZE_MAX64KB, LZ4F_BLOCKSIZE_MAX256KB, LZ4F_BLOCKSIZE_MAX1MB, LZ4F_BLOCKSIZE_MAX4MB): self.__compress_begin(block_size_id=size) def test_compress_begin_linked_mode(self): with self.assertRaises(TypeError): self.__compress_begin(block_mode_linked=None) self.__compress_begin(block_mode_linked=True) self.__compress_begin(block_mode_linked=False) def test_compress_begin_checksum(self): with self.assertRaises(TypeError): self.__compress_begin(checksum=None) self.__compress_begin(checksum=True) self.__compress_begin(checksum=False) def test_compress_begin_level(self): with self.assertRaises(TypeError): self.__compress_begin(level='1') for level in range(LEVEL_ACCELERATED_MAX, LZ4F_COMPRESSION_MAX + 1): self.__compress_begin(level=level) def test_compress_update_invalid(self): with self.assertRaises(TypeError): compress_update() with self.assertRaises(TypeError): compress_update(1) # invalid context with self.assertRaises(ValueError): compress_update(create_decompression_context(), b' ') # data before compress_begin called with self.assertRaisesLz4FramedError(LZ4F_ERROR_GENERIC): compress_update(create_compression_context(), b' ') ctx, _ = self.__compress_begin() # invalid data with self.assertRaises(TypeError): compress_update(ctx, 1) # empty data with self.assertRaises(Lz4FramedNoDataError): compress_update(ctx, b'') def test_compress_end(self): with self.assertRaises(TypeError): compress_end() with self.assertRaises(ValueError): compress_end(create_decompression_context()) ctx, header = self.__compress_begin() self.assertEqual(b'', decompress(header + compress_end(ctx))) ctx, header = self.__compress_begin() data = compress_update(ctx, SHORT_INPUT) self.assertEqual(decompress(header + data + compress_end(ctx)), SHORT_INPUT) def __compress_with_data_and_args(self, data, **kwargs): ctx, header = self.__compress_begin(**kwargs) in_raw = BytesIO(data) out = BytesIO(header) out.seek(0, SEEK_END) try: while True: out.write(compress_update(ctx, in_raw.read(1024))) except Lz4FramedNoDataError: pass out.write(compress_end(ctx)) self.assertEqual(decompress(out.getvalue()), data) def test_compress(self): func = self.__compress_with_data_and_args for size in (LZ4F_BLOCKSIZE_DEFAULT, LZ4F_BLOCKSIZE_MAX64KB, LZ4F_BLOCKSIZE_MAX256KB, LZ4F_BLOCKSIZE_MAX1MB, LZ4F_BLOCKSIZE_MAX4MB): func(LONG_INPUT, block_size_id=size) for arg in ('block_mode_linked', 'checksum'): for value in (False, True): func(LONG_INPUT, **{arg: value}) for level in range(LEVEL_ACCELERATED_MAX, LZ4F_COMPRESSION_MAX + 1): func(SHORT_INPUT, level=level) func(memoryview(LONG_INPUT)) def test_decompress_update_invalid(self): with self.assertRaises(TypeError): decompress_update() with self.assertRaises(TypeError): decompress_update(1) # invalid context with self.assertRaises(ValueError): decompress_update(create_compression_context(), b' ') ctx = create_decompression_context() with self.assertRaises(TypeError): decompress_update(ctx, b' ', chunk_len='1') with self.assertRaises(ValueError): decompress_update(ctx, b' ', chunk_len=0) in_raw = compress(LONG_INPUT, checksum=True) ret = decompress_update(ctx, in_raw[:512], chunk_len=2) # input_hint self.assertTrue(ret.pop() > 0) # chunk length self.assertTrue(len(ret) > 0) self.assertTrue(all(1 <= len(chunk) <= 2 for chunk in ret)) # invalid input (from start of frame) with self.assertRaisesLz4FramedError(LZ4F_ERROR_GENERIC): decompress_update(ctx, in_raw) # checksum invalid in_raw = in_raw[:-4] + b'1234' ctx = create_decompression_context() with self.assertRaisesLz4FramedError(LZ4F_ERROR_contentChecksum_invalid): decompress_update(ctx, in_raw) def test_decompress_update_memoryview(self): # pylint: disable=invalid-name ctx = create_decompression_context() data = decompress_update(ctx, memoryview(compress(LONG_INPUT))) self.assertEqual(b''.join(data[:-1]), LONG_INPUT) class TestCompressor(TestHelperMixin, TestCase): """Note: Low-level methods supporting Compressor class have been tested in TestLowLevelFunctions""" def test_compressor_init(self): with self.assertRaisesRegex(AttributeError, 'has no attribute \'write\''): Compressor('1') # non-callable write attribute class Empty(object): write = 1 with self.assertRaises(TypeError): Compressor(Empty()) # cannot use context without fp with self.assertRaises(ValueError): with Compressor() as _: # noqa (unused variable) pass def test_compressor__no_fp(self): in_bytes = BytesIO(LONG_INPUT) out_bytes = BytesIO() compressor = Compressor() try: while True: out_bytes.write(compressor.update(in_bytes.read(1024))) # raised by compressor.update() on empty data argument except Lz4FramedNoDataError: pass out_bytes.write(compressor.end()) self.assertEqual(decompress(out_bytes.getvalue()), LONG_INPUT) def test_compressor_fp(self): self.__fp_test() def __fp_test(self, in_raw=LONG_INPUT, **kwargs): in_bytes = BytesIO(in_raw) out_bytes = BytesIO() with Compressor(out_bytes, **kwargs) as compressor: try: while True: compressor.update(in_bytes.read(1024)) # raised by compressor.update() on empty data argument except Lz4FramedNoDataError: pass self.assertEqual(decompress(out_bytes.getvalue()), in_raw) def test_compressor_block_size(self): for block_size in (LZ4F_BLOCKSIZE_DEFAULT, LZ4F_BLOCKSIZE_MAX64KB, LZ4F_BLOCKSIZE_MAX256KB, LZ4F_BLOCKSIZE_MAX1MB, LZ4F_BLOCKSIZE_MAX4MB): self.__fp_test(block_size_id=block_size) def test_compressor_checksum(self): self.__fp_test(checksum=False) self.__fp_test(checksum=True) def test_compressor_autoflush(self): self.__fp_test(autoflush=True) self.__fp_test(autoflush=False) def test_compressor_level(self): for level in range(LEVEL_ACCELERATED_MAX, LZ4F_COMPRESSION_MAX + 1): self.__fp_test(in_raw=SHORT_INPUT, level=level) self.__fp_test(level=0) # levels > 10 (v1.7.5) are significantly slower self.__fp_test(level=10) class TestDecompressor(TestHelperMixin, TestCase): def test_decompressor_init(self): with self.assertRaises(TypeError): Decompressor() # pylint: disable=no-value-for-parameter with self.assertRaisesRegex(AttributeError, 'has no attribute \'read\''): Decompressor('1') # non-callable read attribute class Empty(object): read = 1 with self.assertRaises(TypeError): Decompressor(Empty()) def test_decompressor_fp(self): # levels > 10 (v1.7.5) are significantly slower for level in (LEVEL_ACCELERATED_MAX, 10): out_bytes = BytesIO() for chunk in Decompressor(BytesIO(compress(LONG_INPUT, level=level))): out_bytes.write(chunk) self.assertEqual(out_bytes.getvalue(), LONG_INPUT) # incomplete frame out_bytes.truncate() with self.assertRaises(Lz4FramedNoDataError): for chunk in Decompressor(BytesIO(compress(LONG_INPUT)[:-32])): out_bytes.write(chunk) # some data should have been written out_bytes.seek(SEEK_END) self.assertTrue(out_bytes.tell() > 0) # def pympler_run(iterations=20): # from unittest import main # from pympler import tracker # from gc import collect # tracker = tracker.SummaryTracker() # for i in range(iterations): # try: # main() # except SystemExit: # pass # if i % 2: # collect() # tracker.print_diff() # if __name__ == '__main__': # pympler_run()