pax_global_header00006660000000000000000000000064143614264440014522gustar00rootroot0000000000000052 comment=9d27c91770608e470c2667fb1a5c5159d3c6b214 reedsolomon-1.7.0/000077500000000000000000000000001436142644400140555ustar00rootroot00000000000000reedsolomon-1.7.0/.coveragerc000066400000000000000000000003461436142644400162010ustar00rootroot00000000000000[run] relative_files = True branch = True include = reedsolo.py omit = home/* [report] include = reedsolo.py omit = */python?.?/* */site-packages/nose/* */opt/python/pypy* */tests/* show_missing = True reedsolomon-1.7.0/.github/000077500000000000000000000000001436142644400154155ustar00rootroot00000000000000reedsolomon-1.7.0/.github/workflows/000077500000000000000000000000001436142644400174525ustar00rootroot00000000000000reedsolomon-1.7.0/.github/workflows/ci-build.yml000066400000000000000000000044421436142644400216710ustar00rootroot00000000000000# This workflow will install Python dependencies and run tests with a variety of Python versions # It uses the Python Package GitHub Actions workflow. # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python # and https://www.youtube.com/watch?v=l6fV09z5XHk name: ci-build on: push: branches: - master # $default-branch only works in Workflows templates, not in Workflows, see https://stackoverflow.com/questions/64781462/github-actions-default-branch-variable pull_request: branches: - master jobs: build: runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: python-version: ["2.7", "3.7", "3.8", "3.9", "3.10", "3.11", "3.12.0-alpha.3", pypy-2.7, pypy-3.8] #os: [ ubuntu-latest, windows-latest, macos-latest ] # jobs that run on Windows and macOS runners that GitHub hosts consume minutes at 2 and 10 times the rate that jobs on Linux runners consume respectively. os: [ ubuntu-latest ] steps: - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v3 with: python-version: ${{ matrix.python-version }} cache: 'pip' # You can test your matrix by printing the current Python version - name: Display Python version run: | python -c "import sys; print(sys.version)" - name: Install dependencies run: | python -m pip install --upgrade pip python -m pip install pytest pytest-cov if [ ${{ matrix.python-version }} <= 3.7 ]; then python -m pip install 'coverage<4'; else python -m pip install coverage; fi python -m pip install coveralls if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - name: Compile the Cython extension (only on Python 3.10) if: ${{ matrix.python-version == 3.10 }} run: | pip install --install-option='--no-cython-compile' cython python setup.py develop --cythonize - name: Test with pytest run: | coverage run --branch -m pytest . -v coverage report -m - name: Send coverage to Coveralls env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} if: ${{ matrix.python-version >= 3 }} run: coveralls --service=github reedsolomon-1.7.0/.gitignore000066400000000000000000000001051436142644400160410ustar00rootroot00000000000000.project .pydevproject *.py[c|d] __pycache__ *.egg-info dist/ build/ reedsolomon-1.7.0/LICENSE000066400000000000000000000045241436142644400150670ustar00rootroot00000000000000The software is available under your choice of Unlicense (SPDX short identifier: Unlicense) or MIT No Attribution License (SPDX short identifier: MIT-0): ----- <> This is free and unencumbered software released into the public domain. Anyone is free to copy, modify, publish, use, compile, sell, or distribute this software, either in source code form or as a compiled binary, for any purpose, commercial or non-commercial, and by any means. In jurisdictions that recognize copyright laws, the author or authors of this software dedicate any and all copyright interest in the software to the public domain. We make this dedication for the benefit of the public at large and to the detriment of our heirs and successors. We intend this dedication to be an overt act of relinquishment in perpetuity of all present and future rights to this software under copyright law. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. For more information, please refer to <> ----- <> Copyright 2013-2023 Tomer Filiba & Stephen Larroque Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. <> reedsolomon-1.7.0/MANIFEST.in000066400000000000000000000002471436142644400156160ustar00rootroot00000000000000include LICENSE include README.rst include changelog.txt include *.pyx # include the cython implementation sourcecode in the wheel, so that the user can compile later reedsolomon-1.7.0/Makefile000066400000000000000000000073311436142644400155210ustar00rootroot00000000000000# This Makefile runs tests and builds the package to upload to pypi # To use this Makefile, pip install py-make # You also need to pip install also other required modules: `pip install flake8 nose coverage twine pytest pytest-cov` # Up to Python 3.9 included, nosetests was used, but from 3.10 onward, support for it was dropped since it is not maintained anymore, so that pytest and pytest-cov are used instead. # Then, cd to this folder, and type `pymake -p` to list all commands, then `pymake ` to run the related entry. # To test on multiple Python versions, install them, install also the C++ redistributables for each (so that Cython works), and then type `pymake testtox`. # To pymake buildupload (deploy on pypi), you need to `pip install cython` and install a C++ compiler, on Windows and with Python 3.7 you need Microsoft Visual C++ 14.0. Get it with "Microsoft Visual C++ Build Tools": https://visualstudio.microsoft.com/fr/visual-cpp-build-tools/ # for Python 3.10, read the updated instructions at: https://wiki.python.org/moin/WindowsCompilers .PHONY: alltests all flake8 test testsetup testsetuppost testcoverage testtox distclean coverclean prebuildclean clean toxclean installdev install build buildupload pypi help help: @+make -p alltests: @+make test @+make flake8 @+make testsetup all: @+make alltests @+make build flake8: @+flake8 -j 8 --count --statistics --exit-zero . test: # Build the Cython extension python setup.py build_ext --inplace # Run the tests python -m unittest discover tests testnobinary: # Run the tests python -m unittest discover tests #pytest --cov-branch testnose: nosetests -vv --with-coverage testtox: # Test for multiple Python versions tox --skip-missing-interpreters -p all testsetup: python setup.py check --metadata --restructuredtext --strict testsetuppost: twine check "dist/*" testcoverage: # This is the preferred way to run the tests since Python 3.10 @+make coverclean # Build the Cython extension #python setup.py build_ext --inplace --cythonize # unnecessary to call build_ext --inplace now python setup.py develop --cythonize # Run the tests # nosetests reedsolo --with-coverage --cover-package=reedsolo --cover-erase --cover-min-percentage=80 -d -v # With PyTest, it is now necessary to first install the python module so that it is found (--cov=) #python setup.py develop #pytest --cov-report term-missing --cov-config=.coveragerc --cov=. tests/ --cov-branch #python setup.py develop --uninstall coverage run --branch -m pytest . -v coverage report -m distclean: @+make coverclean @+make prebuildclean @+make clean @+make toxclean prebuildclean: @+python -c "import shutil; shutil.rmtree('build', True)" @+python -c "import shutil; shutil.rmtree('dist', True)" @+python -c "import shutil; shutil.rmtree('reedsolo.egg-info', True)" coverclean: @+python -c "import os; os.remove('.coverage') if os.path.exists('.coverage') else None" @+python -c "import shutil; shutil.rmtree('__pycache__', True)" @+python -c "import shutil; shutil.rmtree('tests/__pycache__', True)" clean: @+python -c "import os, glob; [os.remove(i) for i in glob.glob('*.py[co]')]" @+python -c "import os, glob; [os.remove(i) for i in glob.glob('tests/*.py[co]')]" toxclean: @+python -c "import shutil; shutil.rmtree('.tox', True)" installdev: @+python setup.py develop --uninstall @+python setup.py develop install: @+python setup.py install build: @+make prebuildclean @+make testsetup @+python setup.py sdist bdist_wheel # @+python setup.py bdist_wininst pymake testsetuppost # @+make does not work here, dunno why pypi: twine upload dist/* buildupload: @+make build @+make pypi reedsolomon-1.7.0/README.rst000066400000000000000000000526211436142644400155520ustar00rootroot00000000000000Reed Solomon ============ |PyPI-Status| |PyPI-Versions| |PyPI-Downloads| |Build-Status| |Coverage| |Conda-Forge-Status| |Conda-Forge-Platforms| |Conda-Forge-Downloads| A pythonic `universal errors-and-erasures Reed-Solomon Codec `_ to protect your data from errors and bitrot. It includes a pure python implementation and an optional speed-optimized Cython/C extension. This is a burst-type implementation, so that it supports any Galois field higher than 2^3, but not binary streams. Burst errors are non-random errors that more often happen on data storage mediums such as hard drives, hence this library is better suited for data storage protection, and less for streams noise correction, although it also works for this purpose but with a bit of overhead (since it works with bytes only, instead of bits). Based on the wonderful tutorial at `Wikiversity `_, written by "Bobmath" and "LRQ3000". If you are just starting with Reed-Solomon error correction codes, the Wikiversity article is a good beginner's introduction. ------------------------------------ .. contents:: Table of contents :backlinks: top :local: Installation ------------ For the latest stable release, install with: .. code:: sh pip install --upgrade reedsolo For the latest development release (do not use in production!), use: .. code:: sh pip install --upgrade git+https://github.com/tomerfiliba/reedsolomon If you have some issues installing through pip, maybe this command may help: .. code:: sh pip install reedsolo --no-binary={reedsolo} By default, only a pure-python implementation is installed. If you have Cython and a C++ compiler, a faster cythonized binary can be optionally built with: .. code:: sh pip install --upgrade reedsolo --install-option="--cythonize" --verbose or locally with: .. code:: sh python setup.py install --cythonize The setup.py will then try to build the Cython optimized module ``creedsolo.pyx`` if Cython is installed, which can then be imported as `import creedsolo` instead of `import reedsolo`, with the same features between both modules. As an alternative, use `conda `_ to install a compiled version for various platforms: .. code:: sh conda install -c conda-forge reedsolo Usage ----- Basic usage with high-level RSCodec class ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: python # Initialization >>> from reedsolo import RSCodec, ReedSolomonError >>> rsc = RSCodec(10) # 10 ecc symbols # Encoding # just a list of numbers/symbols: >>> rsc.encode([1,2,3,4]) b'\x01\x02\x03\x04,\x9d\x1c+=\xf8h\xfa\x98M' # bytearrays are accepted and the output will be matched: >>> rsc.encode(bytearray([1,2,3,4])) bytearray(b'\x01\x02\x03\x04,\x9d\x1c+=\xf8h\xfa\x98M') # encoding a byte string is as easy: >>> rsc.encode(b'hello world') b'hello world\xed%T\xc4\xfd\xfd\x89\xf3\xa8\xaa' # Note: strings of any length, even if longer than the Galois field, will be encoded as well using transparent chunking. # Decoding (repairing) >>> rsc.decode(b'hello world\xed%T\xc4\xfd\xfd\x89\xf3\xa8\xaa')[0] # original b'hello world' >>> rsc.decode(b'heXlo worXd\xed%T\xc4\xfdX\x89\xf3\xa8\xaa')[0] # 3 errors b'hello world' >>> rsc.decode(b'hXXXo worXd\xed%T\xc4\xfdX\x89\xf3\xa8\xaa')[0] # 5 errors b'hello world' >>> rsc.decode(b'hXXXo worXd\xed%T\xc4\xfdXX\xf3\xa8\xaa')[0] # 6 errors - fail Traceback (most recent call last): ... reedsolo.ReedSolomonError: Too many (or few) errors found by Chien Search for the errata locator polynomial! **Important upgrade notice for pre-1.0 users:** Note that ``RSCodec.decode()`` returns 3 variables: 1. the decoded (corrected) message 2. the decoded message and error correction code (which is itself also corrected) 3. and the list of positions of the errata (errors and erasures) Here is how to use these outputs: .. code:: python >>> tampered_msg = b'heXlo worXd\xed%T\xc4\xfdX\x89\xf3\xa8\xaa' >>> decoded_msg, decoded_msgecc, errata_pos = rsc.decode(tampered_msg) >>> print(decoded_msg) # decoded/corrected message bytearray(b'hello world') >>> print(decoded_msgecc) # decoded/corrected message and ecc symbols bytearray(b'hello world\xed%T\xc4\xfd\xfd\x89\xf3\xa8\xaa') >>> print(errata_pos) # errata_pos is returned as a bytearray, hardly intelligible bytearray(b'\x10\t\x02') >>> print(list(errata_pos)) # convert to a list to get the errata positions as integer indices [16, 9, 2] Since we failed to decode with 6 errors with a codec set with 10 error correction code (ecc) symbols, let's try to use a bigger codec, with 12 ecc symbols. .. code:: python >>> rsc = RSCodec(12) # using 2 more ecc symbols (to correct max 6 errors or 12 erasures) >>> rsc.encode(b'hello world') b'hello world?Ay\xb2\xbc\xdc\x01q\xb9\xe3\xe2=' >>> rsc.decode(b'hello worXXXXy\xb2XX\x01q\xb9\xe3\xe2=')[0] # 6 errors - ok, but any more would fail b'hello world' >>> rsc.decode(b'helXXXXXXXXXXy\xb2XX\x01q\xb9\xe3\xe2=', erase_pos=[3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 15, 16])[0] # 12 erasures - OK b'hello world' This shows that we can decode twice as many erasures (where we provide the location of errors ourselves) than errors (with unknown locations). This is the cost of error correction compared to erasure correction. To get the maximum number of errors *or* erasures that can be independently corrected (ie, not simultaneously): .. code:: python >>> maxerrors, maxerasures = rsc.maxerrata(verbose=True) This codec can correct up to 6 errors and 12 erasures independently >>> print(maxerrors, maxerasures) 6 12 To get the maximum number of errors *and* erasures that can be simultaneously corrected, you need to specify the number of errors or erasures you expect: .. code:: python >>> maxerrors, maxerasures = rsc.maxerrata(erasures=6, verbose=True) # we know the number of erasures, will calculate how many errors we can afford This codec can correct up to 3 errors and 6 erasures simultaneously >>> print(maxerrors, maxerasures) 3 6 >>> maxerrors, maxerasures = rsc.maxerrata(errors=5, verbose=True) # we know the number of errors, will calculate how many erasures we can afford This codec can correct up to 5 errors and 2 erasures simultaneously >>> print(maxerrors, maxerasures) 5 2 Note that if a chunk has more errors and erasures than the Singleton Bound as calculated by the ``maxerrata()`` method, the codec will try to raise a ``ReedSolomonError`` exception, but may very well not detect any error either (this is a theoretical limitation of error correction codes). In other words, error correction codes are unreliable to detect if a chunk of a message is corrupted beyond the Singleton Bound. If you want more reliability in errata detection, use a checksum or hash such as SHA or MD5 on your message, these are much more reliable and have no bounds on the number of errata (the only potential issue is with collision but the probability is very very low). Note: to catch a ``ReedSolomonError`` exception, do not forget to import it first with: ``from reedsolo import ReedSolomonError`` To check if a message is tampered given its error correction symbols, without decoding, use the ``check()`` method: .. code:: python # Checking >> rsc.check(b'hello worXXXXy\xb2XX\x01q\xb9\xe3\xe2=') # Tampered message will return False [False] >> rmes, rmesecc, errata_pos = rsc.decode(b'hello worXXXXy\xb2XX\x01q\xb9\xe3\xe2=') >> rsc.check(rmesecc) # Corrected or untampered message will return True [True] >> print('Number of detected errors and erasures: %i, their positions: %s' % (len(errata_pos), list(errata_pos))) Number of detected errors and erasures: 6, their positions: [16, 15, 12, 11, 10, 9] By default, most Reed-Solomon codecs are limited to characters that can be encoded in 256 bits and with a length of maximum 256 characters. But this codec is universal, you can reduce or increase the length and maximum character value by increasing the Galois Field: .. code:: python # To use longer chunks or bigger values than 255 (may be very slow) >> rsc = RSCodec(12, nsize=4095) # always use a power of 2 minus 1 >> rsc = RSCodec(12, c_exp=12) # alternative way to set nsize=4095 >> mes = 'a' * (4095-12) >> mesecc = rsc.encode(mes) >> mesecc[2] = 1 >> mesecc[-1] = 1 >> rmes, rmesecc, errata_pos = rsc.decode(mesecc) >> rsc.check(mesecc) [False] >> rsc.check(rmesecc) [True] Note that the ``RSCodec`` class supports transparent chunking, so you don't need to increase the Galois Field to support longer messages, but characters will still be limited to 256 bits (or whatever field you set with ``c_exp``). Low-level usage via direct access to math functions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If you want full control, you can skip the API and directly use the library as-is. Here's how: First you need to init the precomputed tables: .. code:: python >> import reedsolo as rs >> rs.init_tables(0x11d) Pro tip: if you get the error: ValueError: byte must be in range(0, 256), please check that your prime polynomial is correct for your field. Pro tip2: by default, you can only encode messages of max length and max symbol value = 256. If you want to encode bigger messages, please use the following (where c_exp is the exponent of your Galois Field, eg, 12 = max length 2^12 = 4096): .. code:: python >> prim = rs.find_prime_polys(c_exp=12, fast_primes=True, single=True) >> rs.init_tables(c_exp=12, prim=prim) Let's define our RS message and ecc size: .. code:: python >> n = 255 # length of total message+ecc >> nsym = 12 # length of ecc >> mes = "a" * (n-nsym) # generate a sample message To optimize, you can precompute the generator polynomial: .. code:: python >> gen = rs.rs_generator_poly_all(n) Then to encode: .. code:: python >> mesecc = rs.rs_encode_msg(mes, nsym, gen=gen[nsym]) Let's tamper our message: .. code:: python >> mesecc[1] = 0 To decode: .. code:: python >> rmes, recc, errata_pos = rs.rs_correct_msg(mesecc, nsym, erase_pos=erase_pos) Note that both the message and the ecc are corrected (if possible of course). Pro tip: if you know a few erasures positions, you can specify them in a list ``erase_pos`` to double the repair power. But you can also just specify an empty list. You can check how many errors and/or erasures were corrected, which can be useful to design adaptive bitrate algorithms: .. code:: python >> print('A total of %i errata were corrected over all chunks of this message.' % len(errata_pos)) If the decoding fails, it will normally automatically check and raise a ReedSolomonError exception that you can handle. However if you want to manually check if the repaired message is correct, you can do so: .. code:: python >> rs.rs_check(rmes + recc, nsym) Note: if you want to use multiple reedsolomon with different parameters, you need to backup the globals and restore them before calling reedsolo functions: .. code:: python >> rs.init_tables() >> global gf_log, gf_exp, field_charac >> bak_gf_log, bak_gf_exp, bak_field_charac = gf_log, gf_exp, field_charac Then at anytime, you can do: .. code:: python >> global gf_log, gf_exp, field_charac >> gf_log, gf_exp, field_charac = bak_gf_log, bak_gf_exp, bak_field_charac >> mesecc = rs.rs_encode_msg(mes, nsym) >> rmes, recc, errata_pos = rs.rs_correct_msg(mesecc, nsym) The globals backup is not necessary if you use RSCodec, it will be automatically managed. Read the sourcecode's comments for more info about how it works, and for the various parameters you can setup if you need to interface with other RS codecs. Extended description -------------------- The code of wikiversity is here consolidated into a nice API with exceptions handling. The algorithm can correct up to 2*e+v <= nsym, where e is the number of errors, v the number of erasures and nsym = n-k = the number of ECC (error correction code) symbols. This means that you can either correct exactly floor(nsym/2) errors, or nsym erasures (errors where you know the position), and a combination of both errors and erasures. This is called the Singleton Bound, and is the maximum/optimal theoretical number of erasures and errors any error correction algorithm can correct (although there are experimental approaches to go a bit further, named list decoding, not implemented here, but feel free to do pull request!). The code should work on pretty much any reasonable version of python (2.4-3.7), but I'm only testing on 2.7 and 3.7. Python 3.8 should work except for Cython which is currently incompatible with this version. The codec has quite reasonable performances if you either use PyPy on the pure-python implementation (reedsolo.py) or either if you compile the Cython extension creedsolo.pyx (which is about 2x faster than PyPy). You can expect encoding rates of several MB/s. This library is also thoroughly unit tested so that nearly any encoding/decoding case should be covered. The codec is universal, meaning that it can decode any message encoded by another RS encoder as long as you provide the correct parameters. Note however that if you use higher fields (ie, bigger c_exp), the algorithms will be slower, first because we cannot then use the optimized bytearray() structure but only array.array('i', ...), and also because Reed-Solomon's complexity is quadratic (both in encoding and decoding), so this means that the longer your messages, the longer it will take to encode/decode (quadratically!). The algorithm itself can handle messages of a length up to (2^c_exp)-1 symbols per message (or chunk), including the ECC symbols, and each symbol can have a value of up to (2^c_exp)-1 (indeed, both the message length and the maximum value for one character is constrained by the same mathematical reason). By default, we use the field GF(2^8), which means that you are limited to values between 0 and 255 (perfect to represent a single hexadecimal symbol on computers, so you can encode any binary stream) and limited to messages+ecc of maximum length 255. However, you can "chunk" longer messages to fit them into the message length limit. The ``RSCodec`` class will automatically apply chunking, by splitting longer messages into chunks and encode/decode them separately; it shouldn't make a difference from an API perspective (ie, from your POV). To use the Cython implementation, you need to ``pip install cython`` and to install a C++ compiler (Microsoft Visual C++ 14.x for Windows and Python 3.10+), read the up-to-date instructions in the `official wiki `_. Then you can simply cd to the root of the folder where creedsolo.pyx is, and type ``python setup.py build_ext --inplace --cythonize``. Alternatively, you can generate just the C++ code by typing `cython -3 creedsolo.pyx`. When building a distributable egg or installing the module from source, the Cython module can be transpiled and compiled if both Cython and a C compiler are installed and the ``--cythonize`` flag is supplied to the setup.py, otherwise by default only the pure-python implementation and the `.pyx` cython source code will be included, but the binary won't be in the wheel. Then, use `import RSCodec from creedsolo` instead of importing from the `reedsolo` module, and finally only feed `bytearray()` objects to the `RSCodec` object. Exclusively using bytearrays is one of the reasons creedsolo is faster than reedsolo. You can convert any string by specifying the encoding: `bytearray("Hello World", "UTF-8")`. Note that there is an inherent limitation of the C implementation which cannot work with higher galois fields than 8 (= characters of max 255 value) because the C implementation only works with bytearrays, and bytearrays only support characters up to 255. If you want to use higher galois fields, you need to use the pure python version, which includes a fake `_bytearray` function that overloads the standard bytearray in case galois fields higher than 8 are used to `init_tables()`, or rewrite the C implementation to use lists instead of bytearrays (which will be MUCH slower so this defeats the purpose and you are better off simply using the pure python version under PyPy - an older version of the C implementation was doing just that, and without bytearrays, all performance gains were lost, hence why the bytearrays were kept despite the limitations). Edge cases ------------- Although sanity checks are implemented whenever possible and when they are not too much resource consuming, there are a few cases where messages will not be decoded correctly without raising an exception: * If an incorrect erasure location is provided, the decoding algorithm will just trust the provided locations and create a syndrome that will be wrong, resulting in an incorrect decoded message. In case reliability is critical, always use the check() method after decoding to check the decoding did not go wrong. * Reed-Solomon algorithm is limited by the Singleton Bound, which limits not only its capacity to correct errors and erasures relatively to the number of error correction symbols, but also its ability to check if the message can be decoded or not. Indeed, if the number of errors and erasures are greater than the Singleton Bound, the decoder has no way to mathematically know for sure whether there is an error at all, it may very well be a valid message (although not the message you expect, but mathematically valid nevertheless). Hence, when the message is tampered beyond the Singleton Bound, the decoder may raise an exception, but it may also return a mathematically valid but still tampered message. Using the check() method cannot fix that either. To work around this issue, a solution is to use parity or hashing functions in parallel to the Reed-Solomon codec: use the Reed-Solomon codec to repair messages, use the parity or hashing function to check if there is any error. Due to how parity and hashing functions work, they are much less likely to produce a false negative than the Reed-Solomon algorithm. This is a general rule: error correction codes are efficient at correcting messages but not at detecting errors, hashing and parity functions are the adequate tool for this purpose. Recommended reading ------------------- * "`Reed-Solomon codes for coders `_", free practical beginner's tutorial with Python code examples on WikiVersity. Partially written by one of the authors of the present software. * "Algebraic codes for data transmission", Blahut, Richard E., 2003, Cambridge university press. `Readable online on Google Books `_. This book was pivotal in helping to understand the intricacies of the universal Berlekamp-Massey algorithm (see figures 7.5 and 7.10). Authors ------- This module was conceived and developed by Tomer Filiba in 2012. It was further extended and is currently maintained by Stephen Karl Larroque since 2015. And several other contributors helped improve and make it more robust: |Contributors| For a list of all contributors, please see `the GitHub Contributors graph `_ and the `commits history `_. License ------- This software is released under your choice of the Unlicense or the MIT-0 (MIT No Attribution) License. Both licenses are `public-domain-equivalent licenses `_, as intended by the original author Tomer Filiba. .. |PyPI-Status| image:: https://img.shields.io/pypi/v/reedsolo.svg :target: https://pypi.org/project/reedsolo .. |PyPI-Versions| image:: https://img.shields.io/pypi/pyversions/reedsolo.svg?logo=python&logoColor=white :target: https://pypi.org/project/reedsolo .. |PyPI-Downloads| image:: https://img.shields.io/pypi/dm/reedsolo.svg?label=pypi%20downloads&logo=python&logoColor=white :target: https://pypi.org/project/reedsolo .. |Build-Status| image:: https://github.com/tomerfiliba/reedsolomon/actions/workflows/ci-build.yml/badge.svg?event=push :target: https://github.com/tomerfiliba/reedsolomon/actions/workflows/ci-build.yml .. |Coverage| image:: https://coveralls.io/repos/tomerfiliba/reedsolomon/badge.svg?branch=master&service=github :target: https://coveralls.io/github/tomerfiliba/reedsolomon?branch=master .. |Conda-Forge-Status| image:: https://img.shields.io/conda/vn/conda-forge/reedsolo.svg :target: https://anaconda.org/conda-forge/reedsolo .. |Conda-Forge-Platforms| image:: https://anaconda.org/conda-forge/reedsolo/badges/platforms.svg :target: https://anaconda.org/conda-forge/reedsolo .. |Conda-Forge-Downloads| image:: https://anaconda.org/conda-forge/reedsolo/badges/downloads.svg :target: https://anaconda.org/conda-forge/reedsolo .. |Contributors| image:: https://contrib.rocks/image?repo=tomerfiliba/reedsolomon :target: https://github.com/tomerfiliba/reedsolomon/graphs/contributors reedsolomon-1.7.0/build.sh000077500000000000000000000002041436142644400155070ustar00rootroot00000000000000#!/bin/sh rm -rf build/ dist/ *.egg-info/ python setup.py register sdist --formats=gztar,zip bdist_wininst --plat-name=win32 upload reedsolomon-1.7.0/changelog.txt000066400000000000000000000043601436142644400165500ustar00rootroot00000000000000Modification notes by lrq3000 07/2015: - more precomputing (generator polynomial(s), log of generator polynomial, etc.) - faster encoding (particularly with PyPy and Cython) - more, more and more comments - reordered the code a bit and separated it into 4 sections for cleaner overview. - renamed variables for more understable code (closer to the academic books nomenclatura). - added a few gf and poly functions like gf_poly_div to complete the set of possible actions (in case someone wants to extend the lib in the future). - everything is a bytearray now, for faster computation - xrange() for faster computation in Python 2, but reverts automatically to range() with Python 3 - added public rs_check() function to check if a message with ecc is tampered. - added a fast cython implementation (about 2x faster than pure-python run with PyPy 2.5) - (hopefully) fixed the decoding so that decoding under the bound 2*e+v <= (n-k) is always guaranted - implemented more RS parameters so that it is now a universal Reed-Solomon codec. Modification notes by rotorgit 2015-07-03: - added test unit - compatibility with Python 3+ Modification notes by rotorgit 2015-02-03: - made the following changes to reedsolo by Tomer Filiba (TF) in order to support ADSB UAT FEC standard as specified in: http://adsb.tc.faa.gov/WG5_Meetings/Meeting27/UAT-DO-282B-FRAC.pdf - TF code is based on wikiversity RS code, so the mods are applicable there as well - there were two changes needed to support ADSB UAT FEC decoding: 1. non-zero "first consecutive root" (fcr): implicitly hard-coded as fcr=0 in previous version, needed fcr=120 for ADSB UAT 2. "primitive polynomial": hard-coded as 0x11d in previous version, needed 0x187 for ADSB UAT - both above params were hard-coded and are now user-definable (during class instantiation), with defaults equal to old values to prevent breakage of existing code - there are many online resources for rs, but the best (most practical) for me was: http://downloads.bbc.co.uk/rd/pubs/whp/whp-pdf-files/WHP031.pdf - as noted above, the wikiversity discussion and examples ignore/skip the critical features that must be modified for ADSB UAT support Modifications notes by tomerfiliba 07/06/2012: Initial release. reedsolomon-1.7.0/creedsolo.pyx000066400000000000000000001774121436142644400166120ustar00rootroot00000000000000# -*- coding: utf-8 -*- #!python #cython: language_level=3 # Copyright (c) 2012-2015 Tomer Filiba # Copyright (c) 2015 rotorgit # Copyright (c) 2015-2022 Stephen Larroque ''' Reed Solomon ============ A cython implementation of an `universal errors-and-erasures Reed-Solomon Codec `_ , based on the wonderful tutorial at `wikiversity `_, written by "Bobmath" and "LRQ3000". The code of wikiversity is here consolidated into a nice API with exceptions handling. The algorithm can correct up to 2*e+v <= nsym, where e is the number of errors, v the number of erasures and nsym = n-k = the number of ECC (error correction code) symbols. This means that you can either correct exactly floor(nsym/2) errors, or nsym erasures (errors where you know the position), and a combination of both errors and erasures. The code should work on pretty much any reasonable version of python (2.4-3.2), but I'm only testing on 2.5-3.2. .. note:: The codec is universal, meaning that it can decode any message encoded by another RS encoder as long as you provide the correct parameters. Note however that even if the algorithms and calculations can support Galois Fields > 2^8, the current implementation is based on bytearray structures to get faster computations. But this is easily fixable, just change bytearray to array('i', [...]) and it should work flawlessly for any GF. The algorithm itself can handle messages up to (2^c_exp)-1 symbols, including the ECC symbols, and each symbol can only have a value of up to (2^c_exp)-1. By default, we use the field GF(2^8), which means that you are limited to values between 0 and 255 (perfect to represent a single hexadecimal symbol on computers, so you can encode any binary stream) and limited to messages+ecc of maximum length 255. However, you can "chunk" longer messages to fit them into the message length limit. The ``RSCodec`` class will automatically apply chunking, by splitting longer messages into chunks and encode/decode them separately; it shouldn't make a difference from an API perspective (ie, from your POV). :: >>> rs = RSCodec(10) >>> rs.encode([1,2,3,4]) b'\x01\x02\x03\x04,\x9d\x1c+=\xf8h\xfa\x98M' >>> rs.encode(b'hello world') b'hello world\xed%T\xc4\xfd\xfd\x89\xf3\xa8\xaa' >>> rs.decode(b'hello world\xed%T\xc4\xfd\xfd\x89\xf3\xa8\xaa') b'hello world' >>> rs.decode(b'heXlo worXd\xed%T\xc4\xfdX\x89\xf3\xa8\xaa') # 3 errors b'hello world' >>> rs.decode(b'hXXXo worXd\xed%T\xc4\xfdX\x89\xf3\xa8\xaa') # 5 errors b'hello world' >>> rs.decode(b'hXXXo worXd\xed%T\xc4\xfdXX\xf3\xa8\xaa') # 6 errors - fail Traceback (most recent call last): ... ReedSolomonError: Could not locate error >>> rs = RSCodec(12) >>> rs.encode(b'hello world') b'hello world?Ay\xb2\xbc\xdc\x01q\xb9\xe3\xe2=' >>> rs.decode(b'hello worXXXXy\xb2XX\x01q\xb9\xe3\xe2=') # 6 errors - ok b'hello world' If you want full control, you can skip the API and directly use the library as-is. Here's how: First you need to init the precomputed tables: >> init_tables(0x11d) Pro tip: if you get the error: ValueError: byte must be in range(0, 256), please check that your prime polynomial is correct for your field. Then to encode: >> mesecc = rs_encode_msg(mes, n-k) To decode: >> mes, ecc, errata_pos = rs_correct_msg(mes + ecc, n-k, erase_pos=erase_pos) If the decoding fails, it will normally automatically check and raise a ReedSolomonError exception that you can handle. However if you want to manually check if the repaired message is correct, you can do so: >> rsman.check(rmes + recc, k=k) Read the sourcecode's comments for more infos about how it works, and for the various parameters you can setup if you need to interface with other RS codecs. ''' import cython cimport cython from cython.parallel import parallel, prange import itertools from cython.view cimport array as cvarray ################### INIT and stuff ################### try: bytearray except NameError: def bytearray(obj = 0, encoding = "latin-1"): if isinstance(obj, str): obj = [ord(ch) for ch in obj.encode("latin-1")] elif isinstance(obj, int): obj = [0] * obj return cvarray("B", obj) class ReedSolomonError(Exception): pass ctypedef unsigned char uint8_t # equivalent to (but works with Microsoft C compiler which does not support C99): from libc.stdint cimport uint8_t cdef uint8_t[::1] gf_exp = bytearray([1] * 512) # For efficiency, gf_exp[] has size 2*GF_SIZE, so that a simple multiplication of two numbers can be resolved without calling % field_charac cdef uint8_t[::1] gf_log = bytearray([0] * 256) cdef int field_charac = int(2**8 - 1) ################### GALOIS FIELD ELEMENTS MATHS ################### def rwh_primes1(n): # http://stackoverflow.com/questions/2068372/fastest-way-to-list-all-primes-below-n-in-python/3035188#3035188 ''' Returns a list of primes < n ''' sieve = [True] * (n/2) for i in xrange(3,int(n**0.5)+1,2): if sieve[i/2]: sieve[i*i/2::i] = [False] * ((n-i*i-1)/(2*i)+1) return [2] + [2*i+1 for i in xrange(1,n/2) if sieve[i]] def find_prime_polys(generator=2, c_exp=8, fast_primes=False, single=False): '''Compute the list of prime polynomials for the given generator and galois field characteristic exponent.''' # fast_primes will output less results but will be significantly faster. # single will output the first prime polynomial found, so if all you want is to just find one prime polynomial to generate the LUT for Reed-Solomon to work, then just use that. # A prime polynomial (necessarily irreducible) is necessary to reduce the multiplications in the Galois Field, so as to avoid overflows. # Why do we need a "prime polynomial"? Can't we just reduce modulo 255 (for GF(2^8) for example)? Because we need the values to be unique. # For example: if the generator (alpha) = 2 and c_exp = 8 (GF(2^8) == GF(256)), then the generated Galois Field (0, 1, a, a^1, a^2, ..., a^(p-1)) will be galois field it becomes 0, 1, 2, 4, 8, 16, etc. However, upon reaching 128, the next value will be doubled (ie, next power of 2), which will give 256. Then we must reduce, because we have overflowed above the maximum value of field_charac. But, if we modulo field_charac, this will generate 256 == 1. Then 2, 4, 8, 16, etc. giving us a repeating pattern of numbers. This is very bad, as it's then not anymore a bijection (ie, a non-zero value doesn't have a unique index). That's why we can't just modulo 255, but we need another number above 255, which is called the prime polynomial. # Why so much hassle? Because we are using precomputed look-up tables for multiplication: instead of multiplying a*b, we precompute alpha^a, alpha^b and alpha^(a+b), so that we can just use our lookup table at alpha^(a+b) and get our result. But just like in our original field we had 0,1,2,...,p-1 distinct unique values, in our "LUT" field using alpha we must have unique distinct values (we don't care that they are different from the original field as long as they are unique and distinct). That's why we need to avoid duplicated values, and to avoid duplicated values we need to use a prime irreducible polynomial. # Here is implemented a bruteforce approach to find all these prime polynomials, by generating every possible prime polynomials (ie, every integers between field_charac+1 and field_charac*2), and then we build the whole Galois Field, and we reject the candidate prime polynomial if it duplicates even one value or if it generates a value above field_charac (ie, cause an overflow). # Note that this algorithm is slow if the field is too big (above 12), because it's an exhaustive search algorithm. There are probabilistic approaches, and almost surely prime approaches, but there is no determistic polynomial time algorithm to find irreducible monic polynomials. More info can be found at: http://people.mpi-inf.mpg.de/~csaha/lectures/lec9.pdf # Another faster algorithm may be found at Adleman, Leonard M., and Hendrik W. Lenstra. "Finding irreducible polynomials over finite fields." Proceedings of the eighteenth annual ACM symposium on Theory of computing. ACM, 1986. # Prepare the finite field characteristic (2^p - 1), this also represent the maximum possible value in this field root_charac = 2 # we're in GF(2) field_charac = int(root_charac**c_exp - 1) field_charac_next = int(root_charac**(c_exp+1) - 1) prim_candidates = [] if fast_primes: prim_candidates = rwh_primes1(field_charac_next) # generate maybe prime polynomials and check later if they really are irreducible prim_candidates = [x for x in prim_candidates if x > field_charac] # filter out too small primes else: prim_candidates = xrange(field_charac+2, field_charac_next, root_charac) # try each possible prime polynomial, but skip even numbers (because divisible by 2 so necessarily not irreducible) # Start of the main loop correct_primes = [] for prim in prim_candidates: # try potential candidates primitive irreducible polys seen = bytearray(field_charac+1) # memory variable to indicate if a value was already generated in the field (value at index x is set to 1) or not (set to 0 by default) conflict = False # flag to know if there was at least one conflict # Second loop, build the whole Galois Field x = 1 for i in xrange(field_charac): # Compute the next value in the field (ie, the next power of alpha/generator) x = gf_mult_noLUT(x, generator, prim, field_charac+1) # Rejection criterion: if the value overflowed (above field_charac) or is a duplicate of a previously generated power of alpha, then we reject this polynomial (not prime) if x > field_charac or seen[x] == 1: conflict = True break # Else we flag this value as seen (to maybe detect future duplicates), and we continue onto the next power of alpha else: seen[x] = 1 # End of the second loop: if there's no conflict (no overflow nor duplicated value), this is a prime polynomial! if not conflict: correct_primes.append(prim) if single: return prim # Return the list of all prime polynomials return correct_primes # you can use the following to print the hexadecimal representation of each prime polynomial: print [hex(i) for i in correct_primes] def init_tables(prim=0x11d, generator=2, c_exp=8): '''Precompute the logarithm and anti-log tables for faster computation later, using the provided primitive polynomial. These tables are used for multiplication/division since addition/substraction are simple XOR operations inside GF of characteristic 2. The basic idea is quite simple: since b**(log_b(x), log_b(y)) == x * y given any number b (the base or generator of the logarithm), then we can use any number b to precompute logarithm and anti-log (exponentiation) tables to use for multiplying two numbers x and y. That's why when we use a different base/generator number, the log and anti-log tables are drastically different, but the resulting computations are the same given any such tables. For more infos, see https://en.wikipedia.org/wiki/Finite_field_arithmetic#Implementation_tricks ''' # generator is the generator number (the "increment" that will be used to walk through the field by multiplication, this must be a prime number). This is basically the base of the logarithm/anti-log tables. Also often noted "alpha" in academic books. # prim is the primitive/prime (binary) polynomial and must be irreducible (ie, it can't represented as the product of two smaller polynomials). It's a polynomial in the binary sense: each bit is a coefficient, but in fact it's an integer between field_charac+1 and field_charac*2, and not a list of gf values. The prime polynomial will be used to reduce the overflows back into the range of the Galois Field without duplicating values (all values should be unique). See the function find_prime_polys() and: http://research.swtch.com/field and http://www.pclviewer.com/rs2/galois.html # note that the choice of generator or prime polynomial doesn't matter very much: any two finite fields of size p^n have identical structure, even if they give the individual elements different names (ie, the coefficients of the codeword will be different, but the final result will be the same: you can always correct as many errors/erasures with any choice for those parameters). That's why it makes sense to refer to all the finite fields, and all decoders based on Reed-Solomon, of size p^n as one concept: GF(p^n). It can however impact sensibly the speed (because some parameters will generate sparser tables). # c_exp is the exponent for the field's characteristic GF(2^c_exp) global gf_exp, gf_log, field_charac field_charac = int(2**c_exp - 1) gf_exp = bytearray(field_charac * 2) # anti-log (exponential) table. The first two elements will always be [GF256int(1), generator] gf_log = bytearray(field_charac+1) # log table, log[0] is impossible and thus unused # For each possible value in the galois field 2^8, we will pre-compute the logarithm and anti-logarithm (exponential) of this value # To do that, we generate the Galois Field F(2^p) by building a list starting with the element 0 followed by the (p-1) successive powers of the generator a : 1, a, a^1, a^2, ..., a^(p-1). x = 1 for i in xrange(field_charac): # we could skip index 255 which is equal to index 0 because of modulo: g^255==g^0 gf_exp[i] = x # compute anti-log for this value and store it in a table gf_log[x] = i # compute log at the same time x = gf_mult_noLUT(x, generator, prim, field_charac+1) # If you use only generator==2 or a power of 2, you can use the following which is faster than gf_mult_noLUT(): #x <<= 1 # multiply by 2 (change 1 by another number y to multiply by a power of 2^y) #if x & 0x100: # similar to x >= 256, but a lot faster (because 0x100 == 256) #x ^= prim # substract the primary polynomial to the current value (instead of 255, so that we get a unique set made of coprime numbers), this is the core of the tables generation # Optimization: double the size of the anti-log table so that we don't need to mod 255 to stay inside the bounds (because we will mainly use this table for the multiplication of two GF numbers, no more). for i in xrange(field_charac, field_charac * 2): gf_exp[i] = gf_exp[i - field_charac] return [gf_log, gf_exp] cpdef uint8_t gf_add(uint8_t x, uint8_t y): return x ^ y cpdef uint8_t gf_sub(uint8_t x, uint8_t y): return x ^ y # in binary galois field, substraction is just the same as addition (since we mod 2) cpdef uint8_t gf_neg(uint8_t x): return x cpdef uint8_t gf_inverse(uint8_t x): return gf_exp[field_charac - gf_log[x]] # gf_inverse(x) == gf_div(1, x) cpdef uint8_t gf_mul(uint8_t x, uint8_t y): global gf_exp, gf_log if x == 0 or y == 0: return 0 cdef uint8_t lx = gf_log[x] cdef uint8_t ly = gf_log[y] cdef uint8_t z = (lx + ly) % field_charac cdef uint8_t ret = gf_exp[z] return ret cpdef uint8_t gf_div(uint8_t x, uint8_t y): if y == 0: raise ZeroDivisionError() if x == 0: return 0 return gf_exp[(gf_log[x] + field_charac - gf_log[y]) % field_charac] cpdef uint8_t gf_pow(uint8_t x, int power): cdef uint8_t x1 = gf_log[x] cdef uint8_t x2 = (x1 * power) % field_charac cdef uint8_t ret = gf_exp[x2] return ret def gf_mult_noLUT_slow(x, y, prim=0): '''Multiplication in Galois Fields without using a precomputed look-up table (and thus it's slower) by using the standard carry-less multiplication + modular reduction using an irreducible prime polynomial.''' ### Define bitwise carry-less operations as inner functions ### def cl_mult(x,y): '''Bitwise carry-less multiplication on integers''' z = 0 i = 0 while (y>>i) > 0: if y & (1<> bits: bits += 1 return bits def cl_div(dividend, divisor=None): '''Bitwise carry-less long division on integers and returns the remainder''' # Compute the position of the most significant bit for each integers dl1 = bit_length(dividend) dl2 = bit_length(divisor) # If the dividend is smaller than the divisor, just exit if dl1 < dl2: return dividend # Else, align the most significant 1 of the divisor to the most significant 1 of the dividend (by shifting the divisor) for i in xrange(dl1-dl2,-1,-1): # Check that the dividend is divisible (useless for the first iteration but important for the next ones) if dividend & (1 << i+dl2-1): # If divisible, then shift the divisor to align the most significant bits and XOR (carry-less substraction) dividend ^= divisor << i return dividend ### Main GF multiplication routine ### # Multiply the gf numbers result = cl_mult(x,y) # Then do a modular reduction (ie, remainder from the division) with an irreducible primitive polynomial so that it stays inside GF bounds if prim > 0: result = cl_div(result, prim) return result cpdef int gf_mult_noLUT(int x, int y, int prim=0, int field_charac_full=256, int carryless=True): '''Galois Field integer multiplication using Russian Peasant Multiplication algorithm (faster than the standard multiplication + modular reduction). If prim is 0 and carryless=False, then the function produces the result for a standard integers multiplication (no carry-less arithmetics nor modular reduction).''' r = 0 while y: # while y is above 0 if y & 1: r = r ^ x if carryless else r + x # y is odd, then add the corresponding x to r (the sum of all x's corresponding to odd y's will give the final product). Note that since we're in GF(2), the addition is in fact an XOR (very important because in GF(2) the multiplication and additions are carry-less, thus it changes the result!). y = y >> 1 # equivalent to y // 2 x = x << 1 # equivalent to x*2 if prim > 0 and x & field_charac_full: x = x ^ prim # GF modulo: if x >= 256 then apply modular reduction using the primitive polynomial (we just substract, but since the primitive number can be above 256 then we directly XOR). return r ################### GALOIS FIELD POLYNOMIALS MATHS ################### def gf_poly_scale(p, x): return bytearray([gf_mul(p[i], x) for i in xrange(len(p))]) def gf_poly_add(p, q): r = bytearray( max(len(p), len(q)) ) r[len(r)-len(p):len(r)] = p #for i in xrange(len(p)): #r[i + len(r) - len(p)] = p[i] for i in xrange(len(q)): r[i + len(r) - len(q)] ^= q[i] return r cpdef gf_poly_mul(p, q): '''Multiply two polynomials, inside Galois Field (but the procedure is generic). Optimized function by precomputation of log.''' cdef int i, j, x, y cdef uint8_t lq, qj cdef uint8_t[::1] p_t = bytearray(p) cdef uint8_t[::1] q_t = bytearray(q) # Pre-allocate the result array cdef uint8_t[::1] r = bytearray(p_t.shape[0] + q_t.shape[0] - 1) # Precompute the logarithm of p cdef uint8_t[::1] lp = bytearray(p_t.shape[0]) for i in xrange(p_t.shape[0]): lp[i] = gf_log[p_t[i]] # Compute the polynomial multiplication (just like the outer product of two vectors, we multiply each coefficients of p with all coefficients of q) for j in xrange(q_t.shape[0]): qj = q_t[j] # optimization: load the coefficient once if qj != 0: # log(0) is undefined, we need to check that lq = gf_log[q_t[j]] # Precache the logarithm of the current coefficient of q for i in xrange(p_t.shape[0]): if p_t[i] != 0: # log(0) is undefined, need to check that... r[i + j] ^= gf_exp[lp[i] + lq] # equivalent to: r[i + j] = gf_add(r[i+j], gf_mul(p[i], q[j])) return bytearray(r) def gf_poly_neg(poly): '''Returns the polynomial with all coefficients negated. In GF(2^p), negation does not change the coefficient, so we return the polynomial as-is.''' return poly def gf_poly_div(dividend, divisor): '''Fast polynomial division by using Extended Synthetic Division and optimized for GF(2^p) computations (doesn't work with standard polynomials outside of this galois field).''' # CAUTION: this function expects polynomials to follow the opposite convention at decoding: the terms must go from the biggest to lowest degree (while most other functions here expect a list from lowest to biggest degree). eg: 1 + 2x + 5x^2 = [5, 2, 1], NOT [1, 2, 5] cdef int i, j cdef uint8_t coef, lcoef cdef uint8_t[:] dividend_t = bytearray(dividend) cdef uint8_t[:] msg_out = bytearray(dividend) cdef uint8_t[:] divisor_t = bytearray(divisor) cdef uint8_t[::1] ldivisor_t = bytearray(len(divisor_t)) for j in xrange(divisor_t.shape[0]): ldivisor_t[j] = gf_log[divisor_t[j]] for i in xrange(dividend_t.shape[0] - (divisor_t.shape[0]-1)): coef = msg_out[i] # precaching if coef != 0: # log(0) is undefined, so we need to avoid that case explicitly (and it's also a good optimization) lcoef = gf_log[coef] for j in xrange(1, len(divisor)): # in synthetic division, we always skip the first coefficient of the divisior, because it's only used to normalize the dividend coefficient if divisor[j] != 0: # log(0) is undefined msg_out[i + j] ^= gf_exp[ldivisor_t[j] + lcoef] # equivalent to the more mathematically correct (but xoring directly is faster): msg_out[i + j] += -divisor[j] * coef # The resulting msg_out contains both the quotient and the remainder, the remainder being the size of the divisor (the remainder has necessarily the same degree as the divisor -- not length but degree == length-1 -- since it's what we couldn't divide from the dividend), so we compute the index where this separation is, and return the quotient and remainder. separator = -(len(divisor)-1) return msg_out[:separator], msg_out[separator:] # return quotient, remainder. def gf_poly_square(poly): '''Linear time implementation of polynomial squaring. For details, see paper: "A fast software implementation for arithmetic operations in GF (2n)". De Win, E., Bosselaers, A., Vandenberghe, S., De Gersem, P., & Vandewalle, J. (1996, January). In Advances in Cryptology - Asiacrypt'96 (pp. 65-76). Springer Berlin Heidelberg.''' length = len(poly) out = bytearray(2*length - 1) for i in xrange(length-1): p = poly[i] k = 2*i if p != 0: #out[k] = gf_exp[(2*gf_log[p]) % field_charac] # not necessary to modulo (2^r)-1 since gf_exp is duplicated up to 510. out[k] = gf_exp[2*gf_log[p]] #else: # not necessary since the output is already initialized to an array of 0 #out[k] = 0 out[2*length-2] = gf_exp[2*gf_log[poly[length-1]]] if out[0] == 0: out[0] = 2*poly[1] - 1 return out def gf_poly_eval(poly, uint8_t x): '''Evaluates a polynomial in GF(2^p) given the value for x. This is based on Horner's scheme for maximum efficiency.''' cdef int i cdef uint8_t y = poly[0] for i in xrange(1, len(poly)): y = gf_mul(y, x) ^ poly[i] return y ################### REED-SOLOMON ENCODING ################### def rs_generator_poly(nsym, fcr=0, generator=2): '''Generate an irreducible generator polynomial (necessary to encode a message into Reed-Solomon)''' cdef int i cdef uint8_t[:] g = bytearray([1]) for i in xrange(0, nsym): g = gf_poly_mul(g, [1, gf_pow(generator, i+fcr)]) return bytearray(g) def rs_generator_poly_all(max_nsym, fcr=0, generator=2): '''Generate all irreducible generator polynomials up to max_nsym (usually you can use n, the length of the message+ecc). Very useful to reduce processing time if you want to encode using variable schemes and nsym rates.''' g_all = {} g_all[0] = g_all[1] = [1] for nsym in xrange(max_nsym): g_all[nsym] = rs_generator_poly(nsym, fcr, generator) return g_all @cython.boundscheck(False) @cython.wraparound(False) @cython.initializedcheck(False) def rs_encode_msg(msg_in, int nsym, int fcr=0, int generator=2, gen=None): '''Reed-Solomon encoding using polynomial division, optimized in Cython. Kudos to DavidW: http://stackoverflow.com/questions/30363903/optimizing-a-reed-solomon-encoder-polynomial-division/''' # IMPORTANT: there's no checking of gen's value, and there's no auto generation either as to maximize speed. Thus you need to always provide it. If you fail to provide it, you will be greeted with the following error, which is NOT a bug: # >> cdef uint8_t[::1] msg_out = bytearray(msg_in_t) + bytearray(gen_t.shape[0]-1) # >> ValueError : negative count cdef uint8_t[::1] msg_in_t = bytearray(msg_in) # have to copy, unfortunately - can't make a memory view from a read only object #cdef uint8_t[::1] gen_t = array.array('i',gen) # convert list to array cdef uint8_t[::1] gen_t = gen cdef uint8_t[::1] msg_out = bytearray(msg_in_t) + bytearray(gen_t.shape[0]-1) cdef int i, j cdef uint8_t[::1] lgen = bytearray(gen_t.shape[0]) cdef uint8_t coef, lcoef with nogil: # Precompute the logarithm of every items in the generator for j in prange(gen_t.shape[0]): lgen[j] = gf_log[gen_t[j]] # Extended synthetic division main loop for i in xrange(msg_in_t.shape[0]): coef = msg_out[i] # Note that it's msg_out here, not msg_in. Thus, we reuse the updated value at each iteration (this is how Synthetic Division works, but instead of storing in a temporary register the intermediate values, we directly commit them to the output). # coef = gf_mul(msg_out[i], gf_inverse(gen[0])) # for general polynomial division (when polynomials are non-monic), the usual way of using synthetic division is to divide the divisor g(x) with its leading coefficient (call it a). In this implementation, this means:we need to compute: coef = msg_out[i] / gen[0] if coef != 0: # log(0) is undefined, so we need to manually check for this case. There's no need to check the divisor here because we know it can't be 0 since we generated it. lcoef = gf_log[coef] # precaching for j in prange(1, gen_t.shape[0]): # in synthetic division, we always skip the first coefficient of the divisior, because it's only used to normalize the dividend coefficient (which is here useless since the divisor, the generator polynomial, is always monic) msg_out[i + j] ^= gf_exp[lcoef + lgen[j]] # optimization, equivalent to gf_mul(gen[j], msg_out[i]) and we just substract it to msg_out[i+j] (but since we are in GF256, it's equivalent to an addition and to an XOR). In other words, this is simply a "multiply-accumulate operation" # Recopy the original message bytes (overwrites the part where the quotient was computed) msg_out[:msg_in_t.shape[0]] = msg_in_t # equivalent to c = mprime - b, where mprime is msg_in padded with [0]*nsym return bytearray(msg_out) ####### Attempt at precomputing multiplication and addition table to speedup encoding even more, but it's actually a bit slower... ########### gf_mul_arr = [bytearray(256) for _ in xrange(256)] gf_add_arr = [bytearray(256) for _ in xrange(256)] #gf_mul_arr = bytearray(256*256) #gf_add_arr = bytearray(256*256) def gf_precomp_tables(gf_exp=gf_exp, gf_log=gf_log): global gf_mul_arr, gf_add_arr for i in xrange(256): for j in xrange(256): gf_mul_arr[i][j] = gf_mul(i, j) gf_add_arr[i][j] = i ^ j #gf_mul_arr[i*256+j] = gf_mul(i, j) #gf_add_arr[i*256+j] = i ^ j return gf_mul_arr, gf_add_arr def rs_encode_msg_precomp(msg_in, nsym, fcr=0, generator=2, gen=None): '''Reed-Solomon encoding using polynomial division, better explained at http://research.swtch.com/field''' if len(msg_in) + nsym > field_charac: raise ValueError("Message is too long (%i when max is %i)" % (len(msg_in)+nsym, field_charac)) if gen is None: gen = rs_generator_poly(nsym, fcr, generator) msg_in = bytearray(msg_in) msg_out = bytearray(msg_in) + bytearray(len(gen)-1) # Alternative Numpy #msg_in = np.array(bytearray(msg_in)) #msg_out = np.pad(msg_in, (0, nsym), 'constant') #lgen = gf_log[gen] #for i in xrange(msg_in.size): #msg_out[i+1:i+len(gen)] = gf_add_arr[msg_out[i+1:i+len(gen)], gf_mul_arr[gen[1:], msg_out[i]]] # Fastest #mula = [gf_mul_arr[gen[j]] for j in xrange(len(gen))] for i in xrange(len(msg_in)): # [i for i in xrange(len(msg_in)) if msg_in[i] != 0] coef = msg_out[i] # coef = gf_mul(msg_out[i], gf_inverse(gen[0])) # for general polynomial division (when polynomials are non-monic), the usual way of using synthetic division is to divide the divisor g(x) with its leading coefficient (call it a). In this implementation, this means:we need to compute: coef = msg_out[i] / gen[0] if coef != 0: # coef 0 is normally undefined so we manage it manually here (and it also serves as an optimization btw) mula = gf_mul_arr[coef] for j in xrange(1, len(gen)): # optimization: can skip g0 because the first coefficient of the generator is always 1! (that's why we start at position 1) #msg_out[i + j] = gf_add_arr[msg_out[i+j]][gf_mul_arr[coef][gen[j]]] # slow, which is weird since it's only accessing lists #msg_out[i + j] ^= gf_mul_arr[coef][gen[j]] # faster msg_out[i + j] ^= mula[gen[j]] # fastest # Recopy the original message bytes msg_out[:len(msg_in)] = msg_in # equivalent to c = mprime - b, where mprime is msg_in padded with [0]*nsym return msg_out ############### end of precomputing attempt ########### ################### REED-SOLOMON DECODING ################### def rs_calc_syndromes(msg, nsym, fcr=0, generator=2): '''Given the received codeword msg and the number of error correcting symbols (nsym), computes the syndromes polynomial. Mathematically, it's essentially equivalent to a Fourrier Transform (Chien search being the inverse). ''' # Note the "[0] +" : we add a 0 coefficient for the lowest degree (the constant). This effectively shifts the syndrome, and will shift every computations depending on the syndromes (such as the errors locator polynomial, errors evaluator polynomial, etc. but not the errors positions). # This is not necessary as anyway syndromes are defined such as there are only non-zero coefficients (the only 0 is the shift of the constant here) and subsequent computations will/must account for the shift by skipping the first iteration (eg, the often seen range(1, n-k+1)), but you can also avoid prepending the 0 coeff and adapt every subsequent computations to start from 0 instead of 1. return [0] + [gf_poly_eval(msg, gf_pow(generator, i+fcr)) for i in xrange(nsym)] def rs_correct_errata(msg_in, synd, err_pos, fcr=0, generator=2): # err_pos is a list of the positions of the errors/erasures/errata '''Forney algorithm, computes the values (error magnitude) to correct the input message.''' global field_charac msg = bytearray(msg_in) # calculate errata locator polynomial to correct both errors and erasures (by combining the errors positions given by the error locator polynomial found by BM with the erasures positions given by caller) coef_pos = [len(msg) - 1 - p for p in err_pos] # need to convert the positions to coefficients degrees for the errata locator algo to work (eg: instead of [0, 1, 2] it will become [len(msg)-1, len(msg)-2, len(msg) -3]) err_loc = rs_find_errata_locator(coef_pos, generator) # calculate errata evaluator polynomial (often called Omega or Gamma in academic papers) err_eval = rs_find_error_evaluator(synd[::-1], err_loc, len(err_loc)-1)[::-1] # Second part of Chien search to get the error location polynomial X from the error positions in err_pos (the roots of the error locator polynomial, ie, where it evaluates to 0) X = [] # will store the position of the errors for i in xrange(len(coef_pos)): l = field_charac - coef_pos[i] X.append( gf_pow(generator, -l) ) # Forney algorithm: compute the magnitudes E = bytearray(len(msg)) # will store the values that need to be corrected (substracted) to the message containing errors. This is sometimes called the error magnitude polynomial. Xlength = len(X) for i, Xi in enumerate(X): Xi_inv = gf_inverse(Xi) # Compute the formal derivative of the error locator polynomial (see Blahut, Algebraic codes for data transmission, pp 196-197). # the formal derivative of the errata locator is used as the denominator of the Forney Algorithm, which simply says that the ith error value is given by error_evaluator(gf_inverse(Xi)) / error_locator_derivative(gf_inverse(Xi)). See Blahut, Algebraic codes for data transmission, pp 196-197. err_loc_prime_tmp = [] for j in xrange(Xlength): if j != i: err_loc_prime_tmp.append( gf_sub(1, gf_mul(Xi_inv, X[j])) ) # compute the product, which is the denominator of the Forney algorithm (errata locator derivative) err_loc_prime = 1 for coef in err_loc_prime_tmp: err_loc_prime = gf_mul(err_loc_prime, coef) # equivalent to: err_loc_prime = functools.reduce(gf_mul, err_loc_prime_tmp, 1) # Test if we could find the errata locator, else we raise an Exception (because else since we divide y by err_loc_prime to compute the magnitude, we will get a ZeroDivisionError exception otherwise) if err_loc_prime == 0: raise ReedSolomonError("Decoding failed: Forney algorithm could not properly detect where the errors are located (errata locator prime is 0).") # Compute y (evaluation of the errata evaluator polynomial) # This is a more faithful translation of the theoretical equation contrary to the old forney method. Here it is exactly copy/pasted from the included presentation decoding_rs.pdf: Yl = omega(Xl.inverse()) / prod(1 - Xj*Xl.inverse()) for j in len(X) (in the paper it's for j in s, but it's useless when len(X) < s because we compute neutral terms 1 for nothing, and wrong when correcting more than s erasures or erasures+errors since it prevents computing all required terms). # Thus here this method works with erasures too because firstly we fixed the equation to be like the theoretical one (don't know why it was modified in _old_forney(), if it's an optimization, it doesn't enhance anything), and secondly because we removed the product bound on s, which prevented computing errors and erasures above the s=(n-k)//2 bound. y = gf_poly_eval(err_eval[::-1], Xi_inv) # numerator of the Forney algorithm (errata evaluator evaluated) y = gf_mul(gf_pow(Xi, 1-fcr), y) # adjust to fcr parameter # Compute the magnitude magnitude = gf_div(y, err_loc_prime) # magnitude value of the error, calculated by the Forney algorithm (an equation in fact): dividing the errata evaluator with the errata locator derivative gives us the errata magnitude (ie, value to repair) the ith symbol E[err_pos[i]] = magnitude # store the magnitude for this error into the magnitude polynomial # Apply the correction of values to get our message corrected! (note that the ecc bytes also gets corrected!) # (this isn't the Forney algorithm, we just apply the result of decoding here) msg = gf_poly_add(msg, E) # equivalent to Ci = Ri - Ei where Ci is the correct message, Ri the received (senseword) message, and Ei the errata magnitudes (minus is replaced by XOR since it's equivalent in GF(2^p)). So in fact here we substract from the received message the errors magnitude, which logically corrects the value to what it should be. return msg def rs_find_error_locator(synd, nsym, erase_loc=None, erase_count=0): '''Find error/errata locator and evaluator polynomials with Berlekamp-Massey algorithm''' # The idea is that BM will iteratively estimate the error locator polynomial. # To do this, it will compute a Discrepancy term called Delta, which will tell us if the error locator polynomial needs an update or not # (hence why it's called discrepancy: it tells us when we are getting off board from the correct value). # Init the polynomials if erase_loc: # if the erasure locator polynomial is supplied, we init with its value, so that we include erasures in the final locator polynomial err_loc = bytearray(erase_loc) old_loc = bytearray(erase_loc) else: err_loc = bytearray([1]) # This is the main variable we want to fill, also called Sigma in other notations or more formally the errors/errata locator polynomial. old_loc = bytearray([1]) # BM is an iterative algorithm, and we need the errata locator polynomial of the previous iteration in order to update other necessary variables. #L = 0 # update flag variable, not needed here because we use an alternative equivalent way of checking if update is needed (but using the flag could potentially be faster depending on if using length(list) is taking linear time in your language, here in Python it's constant so it's as fast. # Fix the syndrome shifting: when computing the syndrome, some implementations may prepend a 0 coefficient for the lowest degree term (the constant). This is a case of syndrome shifting, thus the syndrome will be bigger than the number of ecc symbols (I don't know what purpose serves this shifting). If that's the case, then we need to account for the syndrome shifting when we use the syndrome such as inside BM, by skipping those prepended coefficients. # Another way to detect the shifting is to detect the 0 coefficients: by definition, a syndrome does not contain any 0 coefficient (except if there are no errors/erasures, in this case they are all 0). This however doesn't work with the modified Forney syndrome, which set to 0 the coefficients corresponding to erasures, leaving only the coefficients corresponding to errors. synd_shift = 0 if len(synd) > nsym: synd_shift = len(synd) - nsym for i in xrange(nsym-erase_count): # generally: nsym-erase_count == len(synd), except when you input a partial erase_loc and using the full syndrome instead of the Forney syndrome, in which case nsym-erase_count is more correct (len(synd) will fail badly with IndexError). if erase_loc: # if an erasures locator polynomial was provided to init the errors locator polynomial, then we must skip the FIRST erase_count iterations (not the last iterations, this is very important!) K = erase_count+i+synd_shift else: # if erasures locator is not provided, then either there's no erasures to account or we use the Forney syndromes, so we don't need to use erase_count nor erase_loc (the erasures have been trimmed out of the Forney syndromes). K = i+synd_shift # Compute the discrepancy Delta # Here is the close-to-the-books operation to compute the discrepancy Delta: it's a simple polynomial multiplication of error locator with the syndromes, and then we get the Kth element. #delta = gf_poly_mul(err_loc[::-1], synd)[K] # theoretically it should be gf_poly_add(synd[::-1], [1])[::-1] instead of just synd, but it seems it's not absolutely necessary to correctly decode. # But this can be optimized: since we only need the Kth element, we don't need to compute the polynomial multiplication for any other element but the Kth. Thus to optimize, we compute the polymul only at the item we need, skipping the rest (avoiding a nested loop, thus we are linear time instead of quadratic). # This optimization is actually described in several figures of the book "Algebraic codes for data transmission", Blahut, Richard E., 2003, Cambridge university press. delta = synd[K] for j in xrange(1, len(err_loc)): delta ^= gf_mul(err_loc[-(j+1)], synd[K - j]) # delta is also called discrepancy. Here we do a partial polynomial multiplication (ie, we compute the polynomial multiplication only for the term of degree K). Should be equivalent to brownanrs.polynomial.mul_at(). #print "delta", K, delta, list(gf_poly_mul(err_loc[::-1], synd)) # debugline # Shift polynomials to compute the next degree old_loc = old_loc + bytearray([0]) # Iteratively estimate the errata locator and evaluator polynomials if delta != 0: # Update only if there's a discrepancy if len(old_loc) > len(err_loc): # Rule B (rule A is implicitly defined because rule A just says that we skip any modification for this iteration) #if 2*L <= K+erase_count: # equivalent to len(old_loc) > len(err_loc), as long as L is correctly computed # Computing errata locator polynomial Sigma new_loc = gf_poly_scale(old_loc, delta) old_loc = gf_poly_scale(err_loc, gf_inverse(delta)) # effectively we are doing err_loc * 1/delta = err_loc // delta err_loc = new_loc # Update the update flag #L = K - L # incorrect: L = K - L - erase_count, this will lead to an uncorrect decoding in cases where it should correctly decode! # Update with the discrepancy err_loc = gf_poly_add(err_loc, gf_poly_scale(old_loc, delta)) # Check if the result is correct, that there's not too many errors to correct err_loc = list(itertools.dropwhile(lambda x: x == 0, err_loc)) # drop leading 0s, else errs will not be of the correct size errs = len(err_loc) - 1 if (errs-erase_count) * 2 + erase_count > nsym: raise ReedSolomonError("Too many errors to correct") return err_loc def rs_find_errata_locator(e_pos, generator=2): '''Compute the erasures/errors/errata locator polynomial from the erasures/errors/errata positions (the positions must be relative to the x coefficient, eg: "hello worldxxxxxxxxx" is tampered to "h_ll_ worldxxxxxxxxx" with xxxxxxxxx being the ecc of length n-k=9, here the string positions are [1, 4], but the coefficients are reversed since the ecc characters are placed as the first coefficients of the polynomial, thus the coefficients of the erased characters are n-1 - [1, 4] = [18, 15] = erasures_loc to be specified as an argument.''' # See: http://ocw.usu.edu/Electrical_and_Computer_Engineering/Error_Control_Coding/lecture7.pdf and Blahut, Richard E. "Transform techniques for error control codes." IBM Journal of Research and development 23.3 (1979): 299-315. http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.92.600&rep=rep1&type=pdf and also a MatLab implementation here: http://www.mathworks.com/matlabcentral/fileexchange/23567-reed-solomon-errors-and-erasures-decoder/content//RS_E_E_DEC.m e_loc = [1] # just to init because we will multiply, so it must be 1 so that the multiplication starts correctly without nulling any term # erasures_loc is very simple to compute: erasures_loc = prod(1 - x*alpha**i) for i in erasures_pos and where alpha is the alpha chosen to evaluate polynomials (here in this library it's gf(3)). To generate c*x where c is a constant, we simply generate a Polynomial([c, 0]) where 0 is the constant and c is positionned to be the coefficient for x^1. for i in e_pos: e_loc = gf_poly_mul( e_loc, gf_poly_add([1], [gf_pow(generator, i), 0]) ) return e_loc def rs_find_error_evaluator(synd, err_loc, nsym): '''Compute the error (or erasures if you supply sigma=erasures locator polynomial, or errata) evaluator polynomial Omega from the syndrome and the error/erasures/errata locator Sigma. Omega is already computed at the same time as Sigma inside the Berlekamp-Massey implemented above, but in case you modify Sigma, you can recompute Omega afterwards using this method, or just ensure that Omega computed by BM is correct given Sigma.''' # Omega(x) = [ Synd(x) * Error_loc(x) ] mod x^(n-k+1) _, remainder = gf_poly_div( gf_poly_mul(synd, err_loc), ([1] + [0]*(nsym+1)) ) # first multiply syndromes * errata_locator, then do a polynomial division to truncate the polynomial to the required length # Faster way that is equivalent #remainder = gf_poly_mul(synd, err_loc) # first multiply the syndromes with the errata locator polynomial #remainder = remainder[len(remainder)-(nsym+1):] # then divide by a polynomial of the length we want, which is equivalent to slicing the list (which represents the polynomial) return remainder def rs_find_errors(err_loc, nmess, generator=2): '''Find the roots (ie, where evaluation = zero) of error polynomial by bruteforce trial, this is a sort of Chien's search (but less efficient, Chien's search is a way to evaluate the polynomial such that each evaluation only takes constant time).''' # nmess = length of whole codeword (message + ecc symbols) errs = len(err_loc) - 1 err_pos = [] for i in xrange(nmess): # normally we should try all 2^8 possible values, but here we optimize to just check the interesting symbols if gf_poly_eval(err_loc, gf_pow(generator, i)) == 0: # It's a 0? Bingo, it's a root of the error locator polynomial, in other terms this is the location of an error err_pos.append(nmess - 1 - i) # Sanity check: the number of errors/errata positions found should be exactly the same as the length of the errata locator polynomial if len(err_pos) != errs: # TODO: to decode messages+ecc with length n > 255, we may try to use a bruteforce approach: the correct positions ARE in the final array j, but the problem is because we are above the Galois Field's range, there is a wraparound so that for example if j should be [0, 1, 2, 3], we will also get [255, 256, 257, 258] (because 258 % 255 == 3, same for the other values), so we can't discriminate. The issue is that fixing any errs_nb errors among those will always give a correct output message (in the sense that the syndrome will be all 0), so we may not even be able to check if that's correct or not, so I'm not sure the bruteforce approach may even be possible. raise ReedSolomonError("Too many (or few) errors found by Chien Search for the errata locator polynomial!") return err_pos def rs_forney_syndromes(synd, pos, nmess, generator=2): # Compute Forney syndromes, which computes a modified syndromes to compute only errors (erasures are trimmed out). Do not confuse this with Forney algorithm, which allows to correct the message based on the location of errors. erase_pos_reversed = [nmess-1-p for p in pos] # prepare the coefficient degree positions (instead of the erasures positions) # Optimized method, all operations are inlined fsynd = list(synd[1:]) # make a copy and trim the first coefficient which is always 0 by definition for i in xrange(len(pos)): x = gf_pow(generator, erase_pos_reversed[i]) for j in xrange(len(fsynd) - 1): fsynd[j] = gf_mul(fsynd[j], x) ^ fsynd[j + 1] #fsynd.pop() # useless? it doesn't change the results of computations to leave it there # Theoretical way of computing the modified Forney syndromes: fsynd = (erase_loc * synd) % x^(n-k) -- although the trimming by using x^(n-k) is maybe not necessary as many books do not even mention it (and it works without trimming) # See Shao, H. M., Truong, T. K., Deutsch, L. J., & Reed, I. S. (1986, April). A single chip VLSI Reed-Solomon decoder. In Acoustics, Speech, and Signal Processing, IEEE International Conference on ICASSP'86. (Vol. 11, pp. 2151-2154). IEEE.ISO 690 #erase_loc = rs_find_errata_locator(erase_pos_reversed, generator=generator) # computing the erasures locator polynomial #fsynd = gf_poly_mul(erase_loc[::-1], synd[1:]) # then multiply with the syndrome to get the untrimmed forney syndrome #fsynd = fsynd[len(pos):] # then trim the first erase_pos coefficients which are useless. Seems to be not necessary, but this reduces the computation time later in BM (thus it's an optimization). return fsynd def rs_correct_msg(msg_in, nsym, fcr=0, generator=2, erase_pos=None, only_erasures=False): '''Reed-Solomon main decoding function''' global field_charac if len(msg_in) > field_charac: # Note that it is in fact possible to encode/decode messages that are longer than field_charac, but because this will be above the field, this will generate more error positions during Chien Search than it should, because this will generate duplicate values, which should normally be prevented thank's to the prime polynomial reduction (eg, because it can't discriminate between error at position 1 or 256, both being exactly equal under galois field 2^8). So it's really not advised to do it, but it's possible (but then you're not guaranted to be able to correct any error/erasure on symbols with a position above the length of field_charac -- if you really need a bigger message without chunking, then you should better enlarge c_exp so that you get a bigger field). raise ValueError("Message is too long (%i when max is %i)" % (len(msg_in), field_charac)) msg_out = bytearray(msg_in) # copy of message # erasures: set them to null bytes for easier decoding (but this is not necessary, they will be corrected anyway, but debugging will be easier with null bytes because the error locator polynomial values will only depend on the errors locations, not their values) if erase_pos is None: erase_pos = [] else: for e_pos in erase_pos: msg_out[e_pos] = 0 # check if there are too many erasures if len(erase_pos) > nsym: raise ReedSolomonError("Too many erasures to correct") # prepare the syndrome polynomial using only errors (ie: errors = characters that were either replaced by null byte or changed to another character, but we don't know their positions) synd = rs_calc_syndromes(msg_out, nsym, fcr, generator) # check if there's any error/erasure in the input codeword. If not (all syndromes coefficients are 0), then just return the codeword as-is. if max(synd) == 0: return msg_out[:-nsym], msg_out[-nsym:], erase_pos # no errors # Find errors locations if only_erasures: err_pos = [] else: # compute the Forney syndromes, which hide the erasures from the original syndrome (so that BM will just have to deal with errors, not erasures) fsynd = rs_forney_syndromes(synd, erase_pos, len(msg_out), generator) # compute the error locator polynomial using Berlekamp-Massey err_loc = rs_find_error_locator(fsynd, nsym, erase_count=len(erase_pos)) # locate the message errors using Chien search (or bruteforce search) err_pos = rs_find_errors(err_loc[::-1], len(msg_out), generator) if err_pos is None: raise ReedSolomonError("Could not locate error") # Find errors values and apply them to correct the message # compute errata evaluator and errata magnitude polynomials, then correct errors and erasures msg_out = rs_correct_errata(msg_out, synd, (erase_pos + err_pos), fcr, generator) # note that we here use the original syndrome, not the forney syndrome (because we will correct both errors and erasures, so we need the full syndrome) # check if the final message is fully repaired synd = rs_calc_syndromes(msg_out, nsym, fcr, generator) if max(synd) > 0: raise ReedSolomonError("Could not correct message") # return the successfully decoded message return msg_out[:-nsym], msg_out[-nsym:], erase_pos + err_pos # also return the corrected ecc block so that the user can check(), and the position of errors to allow for adaptive bitrate algorithm to check how the number of errors vary def rs_correct_msg_nofsynd(msg_in, nsym, fcr=0, generator=2, erase_pos=None, only_erasures=False): '''Reed-Solomon main decoding function, without using the modified Forney syndromes''' global field_charac if len(msg_in) > field_charac: raise ValueError("Message is too long (%i when max is %i)" % (len(msg_in), field_charac)) msg_out = bytearray(msg_in) # copy of message # erasures: set them to null bytes for easier decoding (but this is not necessary, they will be corrected anyway, but debugging will be easier with null bytes because the error locator polynomial values will only depend on the errors locations, not their values) if erase_pos is None: erase_pos = [] else: for e_pos in erase_pos: msg_out[e_pos] = 0 # check if there are too many erasures if len(erase_pos) > nsym: raise ReedSolomonError("Too many erasures to correct") # prepare the syndrome polynomial using only errors (ie: errors = characters that were either replaced by null byte or changed to another character, but we don't know their positions) synd = rs_calc_syndromes(msg_out, nsym, fcr, generator) # check if there's any error/erasure in the input codeword. If not (all syndromes coefficients are 0), then just return the codeword as-is. if max(synd) == 0: return msg_out[:-nsym], msg_out[-nsym:], [] # no errors # prepare erasures locator and evaluator polynomials erase_loc = None #erase_eval = None erase_count = 0 if erase_pos: erase_count = len(erase_pos) erase_pos_reversed = [len(msg_out)-1-eras for eras in erase_pos] erase_loc = rs_find_errata_locator(erase_pos_reversed, generator=generator) #erase_eval = rs_find_error_evaluator(synd[::-1], erase_loc, len(erase_loc)-1) # prepare errors/errata locator polynomial if only_erasures: err_loc = erase_loc[::-1] #err_eval = erase_eval[::-1] else: err_loc = rs_find_error_locator(synd, nsym, erase_loc=erase_loc, erase_count=erase_count) err_loc = err_loc[::-1] #err_eval = rs_find_error_evaluator(synd[::-1], err_loc[::-1], len(err_loc)-1)[::-1] # find error/errata evaluator polynomial (not really necessary since we already compute it at the same time as the error locator poly in BM) # locate the message errors err_pos = rs_find_errors(err_loc, len(msg_out), generator) # find the roots of the errata locator polynomial (ie: the positions of the errors/errata) if err_pos is None: raise ReedSolomonError("Could not locate error") # compute errata evaluator and errata magnitude polynomials, then correct errors and erasures msg_out = rs_correct_errata(msg_out, synd, err_pos, fcr=fcr, generator=generator) # check if the final message is fully repaired synd = rs_calc_syndromes(msg_out, nsym, fcr, generator) if max(synd) > 0: raise ReedSolomonError("Could not correct message") # return the successfully decoded message return msg_out[:-nsym], msg_out[-nsym:], erase_pos + err_pos # also return the corrected ecc block so that the user can check(), and the position of errors to allow for adaptive bitrate algorithm to check how the number of errors vary def rs_check(msg, nsym, fcr=0, generator=2): '''Returns true if the message + ecc has no error of false otherwise (may not always catch a wrong decoding or a wrong message, particularly if there are too many errors -- above the Singleton bound --, but it usually does)''' return ( max(rs_calc_syndromes(msg, nsym, fcr, generator)) == 0 ) #=================================================================================================== # API #=================================================================================================== class RSCodec(object): ''' A Reed Solomon encoder/decoder. After initializing the object, use ``encode`` to encode a (byte)string to include the RS correction code, and pass such an encoded (byte)string to ``decode`` to extract the original message (if the number of errors allows for correct decoding). The ``nsym`` argument is the length of the correction code, and it determines the number of error bytes (if I understand this correctly, half of ``nsym`` is correctable) ''' ''' Modifications by rotorgit 2/3/2015: Added support for US FAA ADSB UAT RS FEC, by allowing user to specify different primitive polynomial and non-zero first consecutive root (fcr). For UAT/ADSB use, set fcr=120 and prim=0x187 when instantiating the class; leaving them out will default for previous values (0 and 0x11d) ''' def __init__(self, nsym=10, nsize=255, fcr=0, prim=0x11d, generator=2, c_exp=8): '''Initialize the Reed-Solomon codec. Note that different parameters change the internal values (the ecc symbols, look-up table values, etc) but not the output result (whether your message can be repaired or not, there is no influence of the parameters). Note also there are less checks here to be faster, so if you get weird errors, check aggainst the pure python implementation reedsolo.py to get more verbose errors.''' self.nsym = nsym # number of ecc symbols (ie, the repairing rate will be r=(nsym/2)/nsize, so for example if you have nsym=5 and nsize=10, you have a rate r=0.25, so you can correct up to 0.25% errors (or exactly 2 symbols out of 10), and 0.5% erasures (5 symbols out of 10). self.nsize = nsize # maximum length of one chunk (ie, message + ecc symbols after encoding, for the message alone it's nsize-nsym) self.fcr = fcr # first consecutive root, can be any value between 0 and (2**c_exp)-1 self.prim = prim # prime irreducible polynomial, use find_prime_polys() to find a prime poly self.generator = generator # generator integer, must be prime self.c_exp = c_exp # exponent of the field's characteristic. This both defines the maximum value per symbol and the maximum length of one chunk. By default it's GF(2^8), do not change if you're not sure what it means. # Initialize the look-up tables for easy and quick multiplication/division init_tables(prim, generator, c_exp) # Prepare the generator polynomials (because in this cython implementation, the encoding function does not automatically build the generator polynomial if missing) self.g_all = rs_generator_poly_all(nsize, fcr=fcr, generator=generator) def encode(self, data): '''Encode a message (ie, add the ecc symbols) using Reed-Solomon, whatever the length of the message because we use chunking''' if isinstance(data, str): data = bytearray(data, "latin-1") chunk_size = self.nsize - self.nsym enc = bytearray() for i in xrange(0, len(data), chunk_size): chunk = data[i:i+chunk_size] enc.extend(rs_encode_msg(chunk, self.nsym, fcr=self.fcr, generator=self.generator, gen=self.g_all[self.nsym])) return enc def decode(self, data, erase_pos=None, only_erasures=False): '''Repair a message, whatever its size is, by using chunking''' # erase_pos is a list of positions where you know (or greatly suspect at least) there is an erasure (ie, wrong character but you know it's at this position). Just input the list of all positions you know there are errors, and this method will automatically split the erasures positions to attach to the corresponding data chunk. if isinstance(data, str): data = bytearray(data, "latin-1") dec = bytearray() dec_full = bytearray() errata_pos_all = bytearray() for i in xrange(0, len(data), self.nsize): # Split the long message in a chunk chunk = data[i:i+self.nsize] # Extract the erasures for this chunk e_pos = [] if erase_pos: # First extract the erasures for this chunk (all erasures below the maximum chunk length) e_pos = [x for x in erase_pos if x < self.nsize] # Then remove the extract erasures from the big list and also decrement all subsequent positions values by nsize (the current chunk's size) so as to prepare the correct alignment for the next iteration erase_pos = [x - self.nsize for x in erase_pos if x >= self.nsize] # Decode/repair this chunk! rmes, recc, errata_pos = rs_correct_msg(chunk, self.nsym, fcr=self.fcr, generator=self.generator, erase_pos=e_pos, only_erasures=only_erasures) dec.extend(rmes) dec_full.extend(rmes+recc) errata_pos_all.extend(errata_pos) return dec, dec_full, errata_pos_all def check(self, data, nsym=None): '''Check if a message+ecc stream is not corrupted (or fully repaired). Note: may return a wrong result if number of errors > nsym.''' if not nsym: nsym = self.nsym if isinstance(data, str): data = bytearray(data) check = [] for i in xrange(0, len(data), self.nsize): # Split the long message in a chunk chunk = data[i:i+self.nsize] # Check and add the result in the list, we concatenate all results since we are chunking check.append(rs_check(chunk, nsym, fcr=self.fcr, generator=self.generator)) return check def maxerrata(self, errors=None, erasures=None, verbose=False): '''Return the Singleton Bound for the current codec, which is the max number of errata (errors and erasures) that the codec can decode/correct. Beyond the Singleton Bound (too many errors/erasures), the algorithm will try to raise an exception, but it may also not detect any problem with the message and return 0 errors. Hence why you should use checksums if your goal is to detect errors (as opposed to correcting them), as checksums have no bounds on the number of errors, the only limitation being the probability of collisions. By default, return a tuple wth the maximum number of errors (2nd output) OR erasures (2nd output) that can be corrected. If errors or erasures (not both) is specified as argument, computes the remaining **simultaneous** correction capacity (eg, if errors specified, compute the number of erasures that can be simultaneously corrected). Set verbose to True to get print a report.''' nsym = self.nsym # Compute the maximum number of errors OR erasures maxerrors = int(nsym/2) # always floor the number, we can't correct half a symbol, it's all or nothing maxerasures = nsym # Compute the maximum of simultaneous errors AND erasures if erasures is not None and erasures >= 0: # We know the erasures count, we want to know how many errors we can correct simultaneously if erasures > maxerasures: raise ReedSolomonError("Specified number of errors or erasures exceeding the Singleton Bound!") maxerrors = int((nsym-erasures)/2) if verbose: print('This codec can correct up to %i errors and %i erasures simultaneously' % (maxerrors, erasures)) # Return a tuple with the maximum number of simultaneously corrected errors and erasures return maxerrors, erasures if errors is not None and errors >= 0: # We know the errors count, we want to know how many erasures we can correct simultaneously if errors > maxerrors: raise ReedSolomonError("Specified number of errors or erasures exceeding the Singleton Bound!") maxerasures = int(nsym-(errors*2)) if verbose: print('This codec can correct up to %i errors and %i erasures simultaneously' % (errors, maxerasures)) # Return a tuple with the maximum number of simultaneously corrected errors and erasures return errors, maxerasures # Return a tuple with the maximum number of errors and erasures (independently corrected) if verbose: print('This codec can correct up to %i errors and %i erasures independently' % (maxerrors, maxerasures)) return maxerrors, maxerasures reedsolomon-1.7.0/reedsolo.py000066400000000000000000002125201436142644400162450ustar00rootroot00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (c) 2012-2015 Tomer Filiba # Copyright (c) 2015 rotorgit # Copyright (c) 2015-2022 Stephen Larroque ''' Reed Solomon ============ A pure-python `universal errors-and-erasures Reed-Solomon Codec `_ , based on the wonderful tutorial at `wikiversity `_, written by "Bobmath" and "LRQ3000". The code of wikiversity is here consolidated into a nice API with exceptions handling. The algorithm can correct up to 2*e+v <= nsym, where e is the number of errors, v the number of erasures and nsym = n-k = the number of ECC (error correction code) symbols. This means that you can either correct exactly floor(nsym/2) errors, or nsym erasures (errors where you know the position), and a combination of both errors and erasures. The code should work on pretty much any reasonable version of python (2.4-3.5), but I'm only testing on 2.7 - 3.4. .. note:: The codec is universal, meaning that it can decode any message encoded by another RS encoder as long as you provide the correct parameters. Note however that if you use higher fields (ie, bigger c_exp), the algorithms will be slower, first because we cannot then use the optimized bytearray() structure but only array.array('i', ...), and also because Reed-Solomon's complexity is quadratic (both in encoding and decoding), so this means that the longer your messages, the longer it will take to encode/decode (quadratically!). The algorithm itself can handle messages up to (2^c_exp)-1 symbols, including the ECC symbols, and each symbol can have a value of up to (2^c_exp)-1 (indeed, both the message length and the maximum value for one character is constrained by the same mathematical reason). By default, we use the field GF(2^8), which means that you are limited to values between 0 and 255 (perfect to represent a single hexadecimal symbol on computers, so you can encode any binary stream) and limited to messages+ecc of maximum length 255. However, you can "chunk" longer messages to fit them into the message length limit. The ``RSCodec`` class will automatically apply chunking, by splitting longer messages into chunks and encode/decode them separately; it shouldn't make a difference from an API perspective (ie, from your POV). :: # Initialization >>> from reedsolo import RSCodec >>> rsc = RSCodec(10) # 10 ecc symbols # Encoding >>> rsc.encode([1,2,3,4]) b'\x01\x02\x03\x04,\x9d\x1c+=\xf8h\xfa\x98M' >>> rsc.encode(bytearray([1,2,3,4])) bytearray(b'\x01\x02\x03\x04,\x9d\x1c+=\xf8h\xfa\x98M') >>> rsc.encode(b'hello world') b'hello world\xed%T\xc4\xfd\xfd\x89\xf3\xa8\xaa' # Note that chunking is supported transparently to encode any string length. # Decoding (repairing) >>> rsc.decode(b'hello world\xed%T\xc4\xfd\xfd\x89\xf3\xa8\xaa')[0] b'hello world' >>> rsc.decode(b'heXlo worXd\xed%T\xc4\xfdX\x89\xf3\xa8\xaa')[0] # 3 errors b'hello world' >>> rsc.decode(b'hXXXo worXd\xed%T\xc4\xfdX\x89\xf3\xa8\xaa')[0] # 5 errors b'hello world' >>> rsc.decode(b'hXXXo worXd\xed%T\xc4\xfdXX\xf3\xa8\xaa')[0] # 6 errors - fail Traceback (most recent call last): ... ReedSolomonError: Could not locate error >>> rsc = RSCodec(12) # using 2 more ecc symbols (to correct max 6 errors or 12 erasures) >>> rsc.encode(b'hello world') b'hello world?Ay\xb2\xbc\xdc\x01q\xb9\xe3\xe2=' >>> rsc.decode(b'hello worXXXXy\xb2XX\x01q\xb9\xe3\xe2=')[0] # 6 errors - ok b'hello world' >>> rsc.decode(b'helXXXXXXXXXXy\xb2XX\x01q\xb9\xe3\xe2=', erase_pos=[3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 15, 16])[0] # 12 erasures - OK b'hello world' # Checking >> rsc.check(b'hello worXXXXy\xb2XX\x01q\xb9\xe3\xe2=') [False] >> rmes, rmesecc = rsc.decode(b'hello worXXXXy\xb2XX\x01q\xb9\xe3\xe2=') >> rsc.check(rmesecc) [True] # To use longer chunks or bigger values than 255 (may be very slow) >> rsc = RSCodec(12, nsize=4095) # always use a power of 2 minus 1 >> rsc = RSCodec(12, c_exp=12) # alternative way to set nsize=4095 >> mes = 'a' * (4095-12) >> mesecc = rsc.encode(mes) >> mesecc[2] = 1 >> mesecc[-1] = 1 >> rmes, rmesecc = rsc.decode(mesecc) >> rsc.check(mesecc) [False] >> rsc.check(rmesecc) [True] If you want full control, you can skip the API and directly use the library as-is. Here's how: First you need to init the precomputed tables: >> import reedsolo as rs >> rs.init_tables(0x11d) Pro tip: if you get the error: ValueError: byte must be in range(0, 256), please check that your prime polynomial is correct for your field. Pro tip2: by default, you can only encode messages of max length and max symbol value = 256. If you want to encode bigger messages, please use the following (where c_exp is the exponent of your Galois Field, eg, 12 = max length 2^12 = 4096): >> prim = rs.find_prime_polys(c_exp=12, fast_primes=True, single=True) >> rs.init_tables(c_exp=12, prim=prim) Let's define our RS message and ecc size: >> n = 255 # length of total message+ecc >> nsym = 12 # length of ecc >> mes = "a" * (n-nsym) # generate a sample message To optimize, you can precompute the generator polynomial: >> gen = rs.rs_generator_poly_all(n) Then to encode: >> mesecc = rs.rs_encode_msg(mes, nsym, gen=gen[nsym]) Let's tamper our message: >> mesecc[1] = 0 To decode: >> rmes, recc, errata_pos = rs.rs_correct_msg(mesecc, nsym, erase_pos=erase_pos) Note that both the message and the ecc are corrected (if possible of course). Pro tip: if you know a few erasures positions, you can specify them in a list `erase_pos` to double the repair power. But you can also just specify an empty list. If the decoding fails, it will normally automatically check and raise a ReedSolomonError exception that you can handle. However if you want to manually check if the repaired message is correct, you can do so: >> rs.rs_check(rmes + recc, nsym) Note: if you want to use multiple reedsolomon with different parameters, you need to backup the globals and restore them before calling reedsolo functions: >> rs.init_tables() >> global gf_log, gf_exp, field_charac >> bak_gf_log, bak_gf_exp, bak_field_charac = gf_log, gf_exp, field_charac Then at anytime, you can do: >> global gf_log, gf_exp, field_charac >> gf_log, gf_exp, field_charac = bak_gf_log, bak_gf_exp, bak_field_charac >> mesecc = rs.rs_encode_msg(mes, nsym) >> rmes, recc, errata_pos = rs.rs_correct_msg(mesecc, nsym) The globals backup is not necessary if you use RSCodec, it will be automatically managed. Read the sourcecode's comments for more info about how it works, and for the various parameters you can setup if you need to interface with other RS codecs. ''' # TODO IMPORTANT: try to keep the same convention for the ordering of polynomials inside lists throughout the code and functions (because for now there are a lot of list reversing in order to make it work, you never know the order of a polynomial, ie, if the first coefficient is the major degree or the constant term...). import itertools import math ################### INIT and stuff ################### try: # pragma: no cover bytearray _bytearray = bytearray except NameError: # pragma: no cover from array import array def _bytearray(obj = 0, encoding = "latin-1"): # pragma: no cover '''Simple bytearray replacement''' # always use Latin-1 and not UTF8 because Latin-1 maps the first 256 characters to their bytevalue equivalents. UTF8 may mangle your data (particularly at vale 128) if isinstance(obj, str): obj = [ord(ch) for ch in obj.encode(encoding)] elif isinstance(obj, int): obj = [0] * obj return array("B", obj) try: # pragma: no cover # compatibility with Python 2.7 xrange except NameError: # pragma: no cover # compatibility with Python 3+ xrange = range class ReedSolomonError(Exception): pass gf_exp = _bytearray([1] * 512) # For efficiency, gf_exp[] has size 2*GF_SIZE, so that a simple multiplication of two numbers can be resolved without calling % 255. For more infos on how to generate this extended exponentiation table, see paper: "Fast software implementation of finite field operations", Cheng Huang and Lihao Xu, Washington University in St. Louis, Tech. Rep (2003). gf_log = _bytearray(256) field_charac = int(2**8 - 1) ################### GALOIS FIELD ELEMENTS MATHS ################### def rwh_primes1(n): # http://stackoverflow.com/questions/2068372/fastest-way-to-list-all-primes-below-n-in-python/3035188#3035188 ''' Returns a list of primes < n ''' sieve = [True] * int(n/2) for i in xrange(3,int(n**0.5)+1,2): if sieve[int(i/2)]: sieve[int((i*i)/2)::i] = [False] * int((n-i*i-1)/(2*i)+1) return [2] + [2*i+1 for i in xrange(1,int(n/2)) if sieve[i]] def find_prime_polys(generator=2, c_exp=8, fast_primes=False, single=False): '''Compute the list of prime polynomials for the given generator and galois field characteristic exponent.''' # fast_primes will output less results but will be significantly faster. # single will output the first prime polynomial found, so if all you want is to just find one prime polynomial to generate the LUT for Reed-Solomon to work, then just use that. # A prime polynomial (necessarily irreducible) is necessary to reduce the multiplications in the Galois Field, so as to avoid overflows. # Why do we need a "prime polynomial"? Can't we just reduce modulo 255 (for GF(2^8) for example)? Because we need the values to be unique. # For example: if the generator (alpha) = 2 and c_exp = 8 (GF(2^8) == GF(256)), then the generated Galois Field (0, 1, α, α^1, α^2, ..., α^(p-1)) will be galois field it becomes 0, 1, 2, 4, 8, 16, etc. However, upon reaching 128, the next value will be doubled (ie, next power of 2), which will give 256. Then we must reduce, because we have overflowed above the maximum value of 255. But, if we modulo 255, this will generate 256 == 1. Then 2, 4, 8, 16, etc. giving us a repeating pattern of numbers. This is very bad, as it's then not anymore a bijection (ie, a non-zero value doesn't have a unique index). That's why we can't just modulo 255, but we need another number above 255, which is called the prime polynomial. # Why so much hassle? Because we are using precomputed look-up tables for multiplication: instead of multiplying a*b, we precompute alpha^a, alpha^b and alpha^(a+b), so that we can just use our lookup table at alpha^(a+b) and get our result. But just like in our original field we had 0,1,2,...,p-1 distinct unique values, in our "LUT" field using alpha we must have unique distinct values (we don't care that they are different from the original field as long as they are unique and distinct). That's why we need to avoid duplicated values, and to avoid duplicated values we need to use a prime irreducible polynomial. # Here is implemented a bruteforce approach to find all these prime polynomials, by generating every possible prime polynomials (ie, every integers between field_charac+1 and field_charac*2), and then we build the whole Galois Field, and we reject the candidate prime polynomial if it duplicates even one value or if it generates a value above field_charac (ie, cause an overflow). # Note that this algorithm is slow if the field is too big (above 12), because it's an exhaustive search algorithm. There are probabilistic approaches, and almost surely prime approaches, but there is no determistic polynomial time algorithm to find irreducible monic polynomials. More info can be found at: http://people.mpi-inf.mpg.de/~csaha/lectures/lec9.pdf # Another faster algorithm may be found at Adleman, Leonard M., and Hendrik W. Lenstra. "Finding irreducible polynomials over finite fields." Proceedings of the eighteenth annual ACM symposium on Theory of computing. ACM, 1986. # Prepare the finite field characteristic (2^p - 1), this also represent the maximum possible value in this field root_charac = 2 # we're in GF(2) field_charac = int(root_charac**c_exp - 1) field_charac_next = int(root_charac**(c_exp+1) - 1) prim_candidates = [] if fast_primes: prim_candidates = rwh_primes1(field_charac_next) # generate maybe prime polynomials and check later if they really are irreducible prim_candidates = [x for x in prim_candidates if x > field_charac] # filter out too small primes else: prim_candidates = xrange(field_charac+2, field_charac_next, root_charac) # try each possible prime polynomial, but skip even numbers (because divisible by 2 so necessarily not irreducible) # Start of the main loop correct_primes = [] for prim in prim_candidates: # try potential candidates primitive irreducible polys seen = _bytearray(field_charac+1) # memory variable to indicate if a value was already generated in the field (value at index x is set to 1) or not (set to 0 by default) conflict = False # flag to know if there was at least one conflict # Second loop, build the whole Galois Field x = 1 for i in xrange(field_charac): # Compute the next value in the field (ie, the next power of alpha/generator) x = gf_mult_noLUT(x, generator, prim, field_charac+1) # Rejection criterion: if the value overflowed (above field_charac) or is a duplicate of a previously generated power of alpha, then we reject this polynomial (not prime) if x > field_charac or seen[x] == 1: conflict = True break # Else we flag this value as seen (to maybe detect future duplicates), and we continue onto the next power of alpha else: seen[x] = 1 # End of the second loop: if there's no conflict (no overflow nor duplicated value), this is a prime polynomial! if not conflict: correct_primes.append(prim) if single: return prim # Return the list of all prime polynomials return correct_primes # you can use the following to print the hexadecimal representation of each prime polynomial: print [hex(i) for i in correct_primes] def init_tables(prim=0x11d, generator=2, c_exp=8): '''Precompute the logarithm and anti-log tables for faster computation later, using the provided primitive polynomial. These tables are used for multiplication/division since addition/substraction are simple XOR operations inside GF of characteristic 2. The basic idea is quite simple: since b**(log_b(x), log_b(y)) == x * y given any number b (the base or generator of the logarithm), then we can use any number b to precompute logarithm and anti-log (exponentiation) tables to use for multiplying two numbers x and y. That's why when we use a different base/generator number, the log and anti-log tables are drastically different, but the resulting computations are the same given any such tables. For more infos, see https://en.wikipedia.org/wiki/Finite_field_arithmetic#Implementation_tricks ''' # generator is the generator number (the "increment" that will be used to walk through the field by multiplication, this must be a prime number). This is basically the base of the logarithm/anti-log tables. Also often noted "alpha" in academic books. # prim is the primitive/prime (binary) polynomial and must be irreducible (ie, it can't represented as the product of two smaller polynomials). It's a polynomial in the binary sense: each bit is a coefficient, but in fact it's an integer between field_charac+1 and field_charac*2, and not a list of gf values. The prime polynomial will be used to reduce the overflows back into the range of the Galois Field without duplicating values (all values should be unique). See the function find_prime_polys() and: http://research.swtch.com/field and http://www.pclviewer.com/rs2/galois.html # note that the choice of generator or prime polynomial doesn't matter very much: any two finite fields of size p^n have identical structure, even if they give the individual elements different names (ie, the coefficients of the codeword will be different, but the final result will be the same: you can always correct as many errors/erasures with any choice for those parameters). That's why it makes sense to refer to all the finite fields, and all decoders based on Reed-Solomon, of size p^n as one concept: GF(p^n). It can however impact sensibly the speed (because some parameters will generate sparser tables). # c_exp is the exponent for the field's characteristic GF(2^c_exp) # Redefine _bytearray() in case we need to support integers or messages of length > 256 global _bytearray if c_exp <= 8: _bytearray = bytearray else: from array import array def _bytearray(obj = 0, encoding = "latin-1"): '''Fake bytearray replacement, supporting int values above 255''' # always use Latin-1 and not UTF8 because Latin-1 maps the first 256 characters to their bytevalue equivalents. UTF8 may mangle your data (particularly at vale 128) if isinstance(obj, str): # obj is a string, convert to list of ints obj = obj.encode(encoding) if isinstance(obj, str): # Py2 str: convert to list of ascii ints obj = [ord(chr) for chr in obj] elif isinstance(obj, bytes): # Py3 bytes: characters are bytes, need to convert to int for array.array('i', obj) obj = [int(chr) for chr in obj] else: raise(ValueError, "Type of object not recognized!") elif isinstance(obj, int): # compatibility with list preallocation bytearray(int) obj = [0] * obj elif isinstance(obj, bytes): obj = [int(b) for b in obj] # Else obj is a list of int, it's ok return array("i", obj) # Init global tables global gf_exp, gf_log, field_charac field_charac = int(2**c_exp - 1) gf_exp = _bytearray(field_charac * 2) # anti-log (exponential) table. The first two elements will always be [GF256int(1), generator] gf_log = _bytearray(field_charac+1) # log table, log[0] is impossible and thus unused # For each possible value in the galois field 2^8, we will pre-compute the logarithm and anti-logarithm (exponential) of this value # To do that, we generate the Galois Field F(2^p) by building a list starting with the element 0 followed by the (p-1) successive powers of the generator α : 1, α, α^1, α^2, ..., α^(p-1). x = 1 for i in xrange(field_charac): # we could skip index 255 which is equal to index 0 because of modulo: g^255==g^0 but either way, this does not change the later outputs (ie, the ecc symbols will be the same either way) gf_exp[i] = x # compute anti-log for this value and store it in a table gf_log[x] = i # compute log at the same time x = gf_mult_noLUT(x, generator, prim, field_charac+1) # If you use only generator==2 or a power of 2, you can use the following which is faster than gf_mult_noLUT(): #x <<= 1 # multiply by 2 (change 1 by another number y to multiply by a power of 2^y) #if x & 0x100: # similar to x >= 256, but a lot faster (because 0x100 == 256) #x ^= prim # substract the primary polynomial to the current value (instead of 255, so that we get a unique set made of coprime numbers), this is the core of the tables generation # Optimization: double the size of the anti-log table so that we don't need to mod 255 to stay inside the bounds (because we will mainly use this table for the multiplication of two GF numbers, no more). for i in xrange(field_charac, field_charac * 2): gf_exp[i] = gf_exp[i - field_charac] return [gf_log, gf_exp, field_charac] def gf_add(x, y): return x ^ y def gf_sub(x, y): return x ^ y # in binary galois field, substraction is just the same as addition (since we mod 2) def gf_neg(x): return x def gf_inverse(x): return gf_exp[field_charac - gf_log[x]] # gf_inverse(x) == gf_div(1, x) def gf_mul(x, y): if x == 0 or y == 0: return 0 return gf_exp[(gf_log[x] + gf_log[y]) % field_charac] def gf_div(x, y): if y == 0: raise ZeroDivisionError() if x == 0: return 0 return gf_exp[(gf_log[x] + field_charac - gf_log[y]) % field_charac] def gf_pow(x, power): return gf_exp[(gf_log[x] * power) % field_charac] def gf_mult_noLUT_slow(x, y, prim=0): '''Multiplication in Galois Fields without using a precomputed look-up table (and thus it's slower) by using the standard carry-less multiplication + modular reduction using an irreducible prime polynomial.''' ### Define bitwise carry-less operations as inner functions ### def cl_mult(x,y): '''Bitwise carry-less multiplication on integers''' z = 0 i = 0 while (y>>i) > 0: if y & (1<> bits: bits += 1 return bits def cl_div(dividend, divisor=None): '''Bitwise carry-less long division on integers and returns the remainder''' # Compute the position of the most significant bit for each integers dl1 = bit_length(dividend) dl2 = bit_length(divisor) # If the dividend is smaller than the divisor, just exit if dl1 < dl2: return dividend # Else, align the most significant 1 of the divisor to the most significant 1 of the dividend (by shifting the divisor) for i in xrange(dl1-dl2,-1,-1): # Check that the dividend is divisible (useless for the first iteration but important for the next ones) if dividend & (1 << i+dl2-1): # If divisible, then shift the divisor to align the most significant bits and XOR (carry-less substraction) dividend ^= divisor << i return dividend ### Main GF multiplication routine ### # Multiply the gf numbers result = cl_mult(x,y) # Then do a modular reduction (ie, remainder from the division) with an irreducible primitive polynomial so that it stays inside GF bounds if prim > 0: result = cl_div(result, prim) return result def gf_mult_noLUT(x, y, prim=0, field_charac_full=256, carryless=True): '''Galois Field integer multiplication using Russian Peasant Multiplication algorithm (faster than the standard multiplication + modular reduction). If prim is 0 and carryless=False, then the function produces the result for a standard integers multiplication (no carry-less arithmetics nor modular reduction).''' r = 0 while y: # while y is above 0 if y & 1: r = r ^ x if carryless else r + x # y is odd, then add the corresponding x to r (the sum of all x's corresponding to odd y's will give the final product). Note that since we're in GF(2), the addition is in fact an XOR (very important because in GF(2) the multiplication and additions are carry-less, thus it changes the result!). y = y >> 1 # equivalent to y // 2 x = x << 1 # equivalent to x*2 if prim > 0 and x & field_charac_full: x = x ^ prim # GF modulo: if x >= 256 then apply modular reduction using the primitive polynomial (we just substract, but since the primitive number can be above 256 then we directly XOR). return r ################### GALOIS FIELD POLYNOMIALS MATHS ################### def gf_poly_scale(p, x): return _bytearray([gf_mul(p[i], x) for i in xrange(len(p))]) def gf_poly_add(p, q): r = _bytearray( max(len(p), len(q)) ) r[len(r)-len(p):len(r)] = p #for i in xrange(len(p)): #r[i + len(r) - len(p)] = p[i] for i in xrange(len(q)): r[i + len(r) - len(q)] ^= q[i] return r def gf_poly_mul(p, q): '''Multiply two polynomials, inside Galois Field (but the procedure is generic). Optimized function by precomputation of log.''' # Pre-allocate the result array r = _bytearray(len(p) + len(q) - 1) # Precompute the logarithm of p lp = [gf_log[p[i]] for i in xrange(len(p))] # Compute the polynomial multiplication (just like the outer product of two vectors, we multiply each coefficients of p with all coefficients of q) for j in xrange(len(q)): qj = q[j] # optimization: load the coefficient once if qj != 0: # log(0) is undefined, we need to check that lq = gf_log[qj] # Optimization: precache the logarithm of the current coefficient of q for i in xrange(len(p)): if p[i] != 0: # log(0) is undefined, need to check that... r[i + j] ^= gf_exp[lp[i] + lq] # equivalent to: r[i + j] = gf_add(r[i+j], gf_mul(p[i], q[j])) return r def gf_poly_mul_simple(p, q): # simple equivalent way of multiplying two polynomials without precomputation, but thus it's slower '''Multiply two polynomials, inside Galois Field''' # Pre-allocate the result array r = _bytearray(len(p) + len(q) - 1) # Compute the polynomial multiplication (just like the outer product of two vectors, we multiply each coefficients of p with all coefficients of q) for j in xrange(len(q)): for i in xrange(len(p)): r[i + j] ^= gf_mul(p[i], q[j]) # equivalent to: r[i + j] = gf_add(r[i+j], gf_mul(p[i], q[j])) -- you can see it's your usual polynomial multiplication return r def gf_poly_neg(poly): '''Returns the polynomial with all coefficients negated. In GF(2^p), negation does not change the coefficient, so we return the polynomial as-is.''' return poly def gf_poly_div(dividend, divisor): '''Fast polynomial division by using Extended Synthetic Division and optimized for GF(2^p) computations (doesn't work with standard polynomials outside of this galois field).''' # CAUTION: this function expects polynomials to follow the opposite convention at decoding: the terms must go from the biggest to lowest degree (while most other functions here expect a list from lowest to biggest degree). eg: 1 + 2x + 5x^2 = [5, 2, 1], NOT [1, 2, 5] msg_out = _bytearray(dividend) # Copy the dividend list and pad with 0 where the ecc bytes will be computed #normalizer = divisor[0] # precomputing for performance for i in xrange(len(dividend) - (len(divisor)-1)): #msg_out[i] /= normalizer # for general polynomial division (when polynomials are non-monic), the usual way of using synthetic division is to divide the divisor g(x) with its leading coefficient (call it a). In this implementation, this means:we need to compute: coef = msg_out[i] / gen[0]. For more infos, see http://en.wikipedia.org/wiki/Synthetic_division coef = msg_out[i] # precaching if coef != 0: # log(0) is undefined, so we need to avoid that case explicitly (and it's also a good optimization). In fact if you remove it, it should still work because gf_mul() will take care of the condition. But it's still a good practice to put the condition here. for j in xrange(1, len(divisor)): # in synthetic division, we always skip the first coefficient of the divisior, because it's only used to normalize the dividend coefficient if divisor[j] != 0: # log(0) is undefined msg_out[i + j] ^= gf_mul(divisor[j], coef) # equivalent to the more mathematically correct (but xoring directly is faster): msg_out[i + j] += -divisor[j] * coef # The resulting msg_out contains both the quotient and the remainder, the remainder being the size of the divisor (the remainder has necessarily the same degree as the divisor -- not length but degree == length-1 -- since it's what we couldn't divide from the dividend), so we compute the index where this separation is, and return the quotient and remainder. separator = -(len(divisor)-1) return msg_out[:separator], msg_out[separator:] # return quotient, remainder. def gf_poly_square(poly): # pragma: no cover '''Linear time implementation of polynomial squaring. For details, see paper: "A fast software implementation for arithmetic operations in GF (2n)". De Win, E., Bosselaers, A., Vandenberghe, S., De Gersem, P., & Vandewalle, J. (1996, January). In Advances in Cryptology - Asiacrypt'96 (pp. 65-76). Springer Berlin Heidelberg.''' length = len(poly) out = _bytearray(2*length - 1) for i in xrange(length-1): p = poly[i] k = 2*i if p != 0: #out[k] = gf_exp[(2*gf_log[p]) % field_charac] # not necessary to modulo (2^r)-1 since gf_exp is duplicated up to 510. out[k] = gf_exp[2*gf_log[p]] #else: # not necessary since the output is already initialized to an array of 0 #out[k] = 0 out[2*length-2] = gf_exp[2*gf_log[poly[length-1]]] if out[0] == 0: out[0] = 2*poly[1] - 1 return out def gf_poly_eval(poly, x): '''Evaluates a polynomial in GF(2^p) given the value for x. This is based on Horner's scheme for maximum efficiency.''' y = poly[0] for i in xrange(1, len(poly)): y = gf_mul(y, x) ^ poly[i] return y ################### REED-SOLOMON ENCODING ################### def rs_generator_poly(nsym, fcr=0, generator=2): '''Generate an irreducible generator polynomial (necessary to encode a message into Reed-Solomon)''' g = _bytearray([1]) for i in xrange(nsym): g = gf_poly_mul(g, [1, gf_pow(generator, i+fcr)]) return g def rs_generator_poly_all(max_nsym, fcr=0, generator=2): '''Generate all irreducible generator polynomials up to max_nsym (usually you can use n, the length of the message+ecc). Very useful to reduce processing time if you want to encode using variable schemes and nsym rates.''' g_all = {} g_all[0] = g_all[1] = _bytearray([1]) for nsym in xrange(max_nsym): g_all[nsym] = rs_generator_poly(nsym, fcr, generator) return g_all def rs_simple_encode_msg(msg_in, nsym, fcr=0, generator=2, gen=None): '''Simple Reed-Solomon encoding (mainly an example for you to understand how it works, because it's slower than the inlined function below)''' global field_charac if (len(msg_in) + nsym) > field_charac: raise ValueError("Message is too long (%i when max is %i)" % (len(msg_in)+nsym, field_charac)) if gen is None: gen = rs_generator_poly(nsym, fcr, generator) # Pad the message, then divide it by the irreducible generator polynomial _, remainder = gf_poly_div(msg_in + _bytearray(len(gen)-1), gen) # The remainder is our RS code! Just append it to our original message to get our full codeword (this represents a polynomial of max 256 terms) msg_out = msg_in + remainder # Return the codeword return msg_out def rs_encode_msg(msg_in, nsym, fcr=0, generator=2, gen=None): '''Reed-Solomon main encoding function, using polynomial division (Extended Synthetic Division, the fastest algorithm available to my knowledge), better explained at http://research.swtch.com/field''' global field_charac if (len(msg_in) + nsym) > field_charac: raise ValueError("Message is too long (%i when max is %i)" % (len(msg_in)+nsym, field_charac)) if gen is None: gen = rs_generator_poly(nsym, fcr, generator) msg_in = _bytearray(msg_in) msg_out = _bytearray(msg_in) + _bytearray(len(gen)-1) # init msg_out with the values inside msg_in and pad with len(gen)-1 bytes (which is the number of ecc symbols). # Precompute the logarithm of every items in the generator lgen = _bytearray([gf_log[gen[j]] for j in xrange(len(gen))]) # Extended synthetic division main loop # Fastest implementation with PyPy (but the Cython version in creedsolo.pyx is about 2x faster) for i in xrange(len(msg_in)): coef = msg_out[i] # Note that it's msg_out here, not msg_in. Thus, we reuse the updated value at each iteration (this is how Synthetic Division works: instead of storing in a temporary register the intermediate values, we directly commit them to the output). # coef = gf_mul(msg_out[i], gf_inverse(gen[0])) # for general polynomial division (when polynomials are non-monic), the usual way of using synthetic division is to divide the divisor g(x) with its leading coefficient (call it a). In this implementation, this means:we need to compute: coef = msg_out[i] / gen[0] if coef != 0: # log(0) is undefined, so we need to manually check for this case. There's no need to check the divisor here because we know it can't be 0 since we generated it. lcoef = gf_log[coef] # precaching for j in xrange(1, len(gen)): # in synthetic division, we always skip the first coefficient of the divisior, because it's only used to normalize the dividend coefficient (which is here useless since the divisor, the generator polynomial, is always monic) #if gen[j] != 0: # log(0) is undefined so we need to check that, but it slow things down in fact and it's useless in our case (reed-solomon encoding) since we know that all coefficients in the generator are not 0 msg_out[i + j] ^= gf_exp[lcoef + lgen[j]] # optimization, equivalent to gf_mul(gen[j], msg_out[i]) and we just substract it to msg_out[i+j] (but since we are in GF256, it's equivalent to an addition and to an XOR). In other words, this is simply a "multiply-accumulate operation" # Recopy the original message bytes (overwrites the part where the quotient was computed) msg_out[:len(msg_in)] = msg_in # equivalent to c = mprime - b, where mprime is msg_in padded with [0]*nsym return msg_out ################### REED-SOLOMON DECODING ################### def rs_calc_syndromes(msg, nsym, fcr=0, generator=2): '''Given the received codeword msg and the number of error correcting symbols (nsym), computes the syndromes polynomial. Mathematically, it's essentially equivalent to a Fourrier Transform (Chien search being the inverse). ''' # Note the "[0] +" : we add a 0 coefficient for the lowest degree (the constant). This effectively shifts the syndrome, and will shift every computations depending on the syndromes (such as the errors locator polynomial, errors evaluator polynomial, etc. but not the errors positions). # This is not necessary as anyway syndromes are defined such as there are only non-zero coefficients (the only 0 is the shift of the constant here) and subsequent computations will/must account for the shift by skipping the first iteration (eg, the often seen range(1, n-k+1)), but you can also avoid prepending the 0 coeff and adapt every subsequent computations to start from 0 instead of 1. return [0] + [gf_poly_eval(msg, gf_pow(generator, i+fcr)) for i in xrange(nsym)] def rs_correct_errata(msg_in, synd, err_pos, fcr=0, generator=2): # err_pos is a list of the positions of the errors/erasures/errata '''Forney algorithm, computes the values (error magnitude) to correct the input message.''' global field_charac msg = _bytearray(msg_in) # calculate errata locator polynomial to correct both errors and erasures (by combining the errors positions given by the error locator polynomial found by BM with the erasures positions given by caller) coef_pos = [len(msg) - 1 - p for p in err_pos] # need to convert the positions to coefficients degrees for the errata locator algo to work (eg: instead of [0, 1, 2] it will become [len(msg)-1, len(msg)-2, len(msg) -3]) err_loc = rs_find_errata_locator(coef_pos, generator) # calculate errata evaluator polynomial (often called Omega or Gamma in academic papers) err_eval = rs_find_error_evaluator(synd[::-1], err_loc, len(err_loc)-1)[::-1] # Second part of Chien search to get the error location polynomial X from the error positions in err_pos (the roots of the error locator polynomial, ie, where it evaluates to 0) X = [] # will store the position of the errors for i in xrange(len(coef_pos)): l = field_charac - coef_pos[i] X.append( gf_pow(generator, -l) ) # Forney algorithm: compute the magnitudes E = _bytearray(len(msg)) # will store the values that need to be corrected (substracted) to the message containing errors. This is sometimes called the error magnitude polynomial. Xlength = len(X) for i, Xi in enumerate(X): Xi_inv = gf_inverse(Xi) # Compute the formal derivative of the error locator polynomial (see Blahut, Algebraic codes for data transmission, pp 196-197). # the formal derivative of the errata locator is used as the denominator of the Forney Algorithm, which simply says that the ith error value is given by error_evaluator(gf_inverse(Xi)) / error_locator_derivative(gf_inverse(Xi)). See Blahut, Algebraic codes for data transmission, pp 196-197. err_loc_prime_tmp = [] for j in xrange(Xlength): if j != i: err_loc_prime_tmp.append( gf_sub(1, gf_mul(Xi_inv, X[j])) ) # compute the product, which is the denominator of the Forney algorithm (errata locator derivative) err_loc_prime = 1 for coef in err_loc_prime_tmp: err_loc_prime = gf_mul(err_loc_prime, coef) # equivalent to: err_loc_prime = functools.reduce(gf_mul, err_loc_prime_tmp, 1) # Test if we could find the errata locator, else we raise an Exception (because else since we divide y by err_loc_prime to compute the magnitude, we will get a ZeroDivisionError exception otherwise) if err_loc_prime == 0: raise ReedSolomonError("Decoding failed: Forney algorithm could not properly detect where the errors are located (errata locator prime is 0).") # Compute y (evaluation of the errata evaluator polynomial) # This is a more faithful translation of the theoretical equation contrary to the old forney method. Here it is exactly copy/pasted from the included presentation decoding_rs.pdf: Yl = omega(Xl.inverse()) / prod(1 - Xj*Xl.inverse()) for j in len(X) (in the paper it's for j in s, but it's useless when len(X) < s because we compute neutral terms 1 for nothing, and wrong when correcting more than s erasures or erasures+errors since it prevents computing all required terms). # Thus here this method works with erasures too because firstly we fixed the equation to be like the theoretical one (don't know why it was modified in _old_forney(), if it's an optimization, it doesn't enhance anything), and secondly because we removed the product bound on s, which prevented computing errors and erasures above the s=(n-k)//2 bound. y = gf_poly_eval(err_eval[::-1], Xi_inv) # numerator of the Forney algorithm (errata evaluator evaluated) y = gf_mul(gf_pow(Xi, 1-fcr), y) # adjust to fcr parameter # Compute the magnitude magnitude = gf_div(y, err_loc_prime) # magnitude value of the error, calculated by the Forney algorithm (an equation in fact): dividing the errata evaluator with the errata locator derivative gives us the errata magnitude (ie, value to repair) the ith symbol E[err_pos[i]] = magnitude # store the magnitude for this error into the magnitude polynomial # Apply the correction of values to get our message corrected! (note that the ecc bytes also gets corrected!) # (this isn't the Forney algorithm, we just apply the result of decoding here) msg = gf_poly_add(msg, E) # equivalent to Ci = Ri - Ei where Ci is the correct message, Ri the received (senseword) message, and Ei the errata magnitudes (minus is replaced by XOR since it's equivalent in GF(2^p)). So in fact here we substract from the received message the errors magnitude, which logically corrects the value to what it should be. return msg def rs_find_error_locator(synd, nsym, erase_loc=None, erase_count=0): '''Find error/errata locator and evaluator polynomials with Berlekamp-Massey algorithm''' # The idea is that BM will iteratively estimate the error locator polynomial. # To do this, it will compute a Discrepancy term called Delta, which will tell us if the error locator polynomial needs an update or not # (hence why it's called discrepancy: it tells us when we are getting off board from the correct value). # Init the polynomials if erase_loc: # if the erasure locator polynomial is supplied, we init with its value, so that we include erasures in the final locator polynomial err_loc = _bytearray(erase_loc) old_loc = _bytearray(erase_loc) else: err_loc = _bytearray([1]) # This is the main variable we want to fill, also called Sigma in other notations or more formally the errors/errata locator polynomial. old_loc = _bytearray([1]) # BM is an iterative algorithm, and we need the errata locator polynomial of the previous iteration in order to update other necessary variables. #L = 0 # update flag variable, not needed here because we use an alternative equivalent way of checking if update is needed (but using the flag could potentially be faster depending on if using length(list) is taking linear time in your language, here in Python it's constant so it's as fast. # Fix the syndrome shifting: when computing the syndrome, some implementations may prepend a 0 coefficient for the lowest degree term (the constant). This is a case of syndrome shifting, thus the syndrome will be bigger than the number of ecc symbols (I don't know what purpose serves this shifting). If that's the case, then we need to account for the syndrome shifting when we use the syndrome such as inside BM, by skipping those prepended coefficients. # Another way to detect the shifting is to detect the 0 coefficients: by definition, a syndrome does not contain any 0 coefficient (except if there are no errors/erasures, in this case they are all 0). This however doesn't work with the modified Forney syndrome, which set to 0 the coefficients corresponding to erasures, leaving only the coefficients corresponding to errors. synd_shift = 0 if len(synd) > nsym: synd_shift = len(synd) - nsym for i in xrange(nsym-erase_count): # generally: nsym-erase_count == len(synd), except when you input a partial erase_loc and using the full syndrome instead of the Forney syndrome, in which case nsym-erase_count is more correct (len(synd) will fail badly with IndexError). if erase_loc: # if an erasures locator polynomial was provided to init the errors locator polynomial, then we must skip the FIRST erase_count iterations (not the last iterations, this is very important!) K = erase_count+i+synd_shift else: # if erasures locator is not provided, then either there's no erasures to account or we use the Forney syndromes, so we don't need to use erase_count nor erase_loc (the erasures have been trimmed out of the Forney syndromes). K = i+synd_shift # Compute the discrepancy Delta # Here is the close-to-the-books operation to compute the discrepancy Delta: it's a simple polynomial multiplication of error locator with the syndromes, and then we get the Kth element. #delta = gf_poly_mul(err_loc[::-1], synd)[K] # theoretically it should be gf_poly_add(synd[::-1], [1])[::-1] instead of just synd, but it seems it's not absolutely necessary to correctly decode. # But this can be optimized: since we only need the Kth element, we don't need to compute the polynomial multiplication for any other element but the Kth. Thus to optimize, we compute the polymul only at the item we need, skipping the rest (avoiding a nested loop, thus we are linear time instead of quadratic). # This optimization is actually described in several figures of the book "Algebraic codes for data transmission", Blahut, Richard E., 2003, Cambridge university press. delta = synd[K] for j in xrange(1, len(err_loc)): delta ^= gf_mul(err_loc[-(j+1)], synd[K - j]) # delta is also called discrepancy. Here we do a partial polynomial multiplication (ie, we compute the polynomial multiplication only for the term of degree K). Should be equivalent to brownanrs.polynomial.mul_at(). #print "delta", K, delta, list(gf_poly_mul(err_loc[::-1], synd)) # debugline # Shift polynomials to compute the next degree old_loc = old_loc + _bytearray([0]) # Iteratively estimate the errata locator and evaluator polynomials if delta != 0: # Update only if there's a discrepancy if len(old_loc) > len(err_loc): # Rule B (rule A is implicitly defined because rule A just says that we skip any modification for this iteration) #if 2*L <= K+erase_count: # equivalent to len(old_loc) > len(err_loc), as long as L is correctly computed # Computing errata locator polynomial Sigma new_loc = gf_poly_scale(old_loc, delta) old_loc = gf_poly_scale(err_loc, gf_inverse(delta)) # effectively we are doing err_loc * 1/delta = err_loc // delta err_loc = new_loc # Update the update flag #L = K - L # the update flag L is tricky: in Blahut's schema, it's mandatory to use `L = K - L - erase_count` (and indeed in a previous draft of this function, if you forgot to do `- erase_count` it would lead to correcting only 2*(errors+erasures) <= (n-k) instead of 2*errors+erasures <= (n-k)), but in this latest draft, this will lead to a wrong decoding in some cases where it should correctly decode! Thus you should try with and without `- erase_count` to update L on your own implementation and see which one works OK without producing wrong decoding failures. # Update with the discrepancy err_loc = gf_poly_add(err_loc, gf_poly_scale(old_loc, delta)) # Check if the result is correct, that there's not too many errors to correct err_loc = list(itertools.dropwhile(lambda x: x == 0, err_loc)) # drop leading 0s, else errs will not be of the correct size errs = len(err_loc) - 1 if (errs-erase_count) * 2 + erase_count > nsym: raise ReedSolomonError("Too many errors to correct") return err_loc def rs_find_errata_locator(e_pos, generator=2): '''Compute the erasures/errors/errata locator polynomial from the erasures/errors/errata positions (the positions must be relative to the x coefficient, eg: "hello worldxxxxxxxxx" is tampered to "h_ll_ worldxxxxxxxxx" with xxxxxxxxx being the ecc of length n-k=9, here the string positions are [1, 4], but the coefficients are reversed since the ecc characters are placed as the first coefficients of the polynomial, thus the coefficients of the erased characters are n-1 - [1, 4] = [18, 15] = erasures_loc to be specified as an argument.''' # See: http://ocw.usu.edu/Electrical_and_Computer_Engineering/Error_Control_Coding/lecture7.pdf and Blahut, Richard E. "Transform techniques for error control codes." IBM Journal of Research and development 23.3 (1979): 299-315. http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.92.600&rep=rep1&type=pdf and also a MatLab implementation here: http://www.mathworks.com/matlabcentral/fileexchange/23567-reed-solomon-errors-and-erasures-decoder/content//RS_E_E_DEC.m e_loc = [1] # just to init because we will multiply, so it must be 1 so that the multiplication starts correctly without nulling any term # erasures_loc is very simple to compute: erasures_loc = prod(1 - x*alpha**i) for i in erasures_pos and where alpha is the alpha chosen to evaluate polynomials (here in this library it's gf(3)). To generate c*x where c is a constant, we simply generate a Polynomial([c, 0]) where 0 is the constant and c is positionned to be the coefficient for x^1. for i in e_pos: e_loc = gf_poly_mul( e_loc, gf_poly_add(_bytearray([1]), [gf_pow(generator, i), 0]) ) return e_loc def rs_find_error_evaluator(synd, err_loc, nsym): '''Compute the error (or erasures if you supply sigma=erasures locator polynomial, or errata) evaluator polynomial Omega from the syndrome and the error/erasures/errata locator Sigma. Omega is already computed at the same time as Sigma inside the Berlekamp-Massey implemented above, but in case you modify Sigma, you can recompute Omega afterwards using this method, or just ensure that Omega computed by BM is correct given Sigma.''' # Omega(x) = [ Synd(x) * Error_loc(x) ] mod x^(n-k+1) _, remainder = gf_poly_div( gf_poly_mul(synd, err_loc), ([1] + [0]*(nsym+1)) ) # first multiply syndromes * errata_locator, then do a polynomial division to truncate the polynomial to the required length # Faster way that is equivalent #remainder = gf_poly_mul(synd, err_loc) # first multiply the syndromes with the errata locator polynomial #remainder = remainder[len(remainder)-(nsym+1):] # then divide by a polynomial of the length we want, which is equivalent to slicing the list (which represents the polynomial) return remainder def rs_find_errors(err_loc, nmess, generator=2): '''Find the roots (ie, where evaluation = zero) of error polynomial by bruteforce trial, this is a sort of Chien's search (but less efficient, Chien's search is a way to evaluate the polynomial such that each evaluation only takes constant time).''' # nmess = length of whole codeword (message + ecc symbols) errs = len(err_loc) - 1 err_pos = [] for i in xrange(nmess): # normally we should try all 2^8 possible values, but here we optimize to just check the interesting symbols if gf_poly_eval(err_loc, gf_pow(generator, i)) == 0: # It's a 0? Bingo, it's a root of the error locator polynomial, in other terms this is the location of an error err_pos.append(nmess - 1 - i) # Sanity check: the number of errors/errata positions found should be exactly the same as the length of the errata locator polynomial if len(err_pos) != errs: # TODO: to decode messages+ecc with length n > 255, we may try to use a bruteforce approach: the correct positions ARE in the final array j, but the problem is because we are above the Galois Field's range, there is a wraparound so that for example if j should be [0, 1, 2, 3], we will also get [255, 256, 257, 258] (because 258 % 255 == 3, same for the other values), so we can't discriminate. The issue is that fixing any errs_nb errors among those will always give a correct output message (in the sense that the syndrome will be all 0), so we may not even be able to check if that's correct or not, so I'm not sure the bruteforce approach may even be possible. raise ReedSolomonError("Too many (or few) errors found by Chien Search for the errata locator polynomial!") return err_pos def rs_forney_syndromes(synd, pos, nmess, generator=2): # Compute Forney syndromes, which computes a modified syndromes to compute only errors (erasures are trimmed out). Do not confuse this with Forney algorithm, which allows to correct the message based on the location of errors. erase_pos_reversed = [nmess-1-p for p in pos] # prepare the coefficient degree positions (instead of the erasures positions) # Optimized method, all operations are inlined fsynd = list(synd[1:]) # make a copy and trim the first coefficient which is always 0 by definition for i in xrange(len(pos)): x = gf_pow(generator, erase_pos_reversed[i]) for j in xrange(len(fsynd) - 1): fsynd[j] = gf_mul(fsynd[j], x) ^ fsynd[j + 1] #fsynd.pop() # useless? it doesn't change the results of computations to leave it there # Theoretical way of computing the modified Forney syndromes: fsynd = (erase_loc * synd) % x^(n-k) -- although the trimming by using x^(n-k) is maybe not necessary as many books do not even mention it (and it works without trimming) # See Shao, H. M., Truong, T. K., Deutsch, L. J., & Reed, I. S. (1986, April). A single chip VLSI Reed-Solomon decoder. In Acoustics, Speech, and Signal Processing, IEEE International Conference on ICASSP'86. (Vol. 11, pp. 2151-2154). IEEE.ISO 690 #erase_loc = rs_find_errata_locator(erase_pos_reversed, generator=generator) # computing the erasures locator polynomial #fsynd = gf_poly_mul(erase_loc[::-1], synd[1:]) # then multiply with the syndrome to get the untrimmed forney syndrome #fsynd = fsynd[len(pos):] # then trim the first erase_pos coefficients which are useless. Seems to be not necessary, but this reduces the computation time later in BM (thus it's an optimization). return fsynd def rs_correct_msg(msg_in, nsym, fcr=0, generator=2, erase_pos=None, only_erasures=False): '''Reed-Solomon main decoding function''' global field_charac if len(msg_in) > field_charac: # Note that it is in fact possible to encode/decode messages that are longer than field_charac, but because this will be above the field, this will generate more error positions during Chien Search than it should, because this will generate duplicate values, which should normally be prevented thank's to the prime polynomial reduction (eg, because it can't discriminate between error at position 1 or 256, both being exactly equal under galois field 2^8). So it's really not advised to do it, but it's possible (but then you're not guaranted to be able to correct any error/erasure on symbols with a position above the length of field_charac -- if you really need a bigger message without chunking, then you should better enlarge c_exp so that you get a bigger field). raise ValueError("Message is too long (%i when max is %i)" % (len(msg_in), field_charac)) msg_out = _bytearray(msg_in) # copy of message # erasures: set them to null bytes for easier decoding (but this is not necessary, they will be corrected anyway, but debugging will be easier with null bytes because the error locator polynomial values will only depend on the errors locations, not their values) if erase_pos is None: erase_pos = [] else: for e_pos in erase_pos: msg_out[e_pos] = 0 # check if there are too many erasures to correct (beyond the Singleton bound) if len(erase_pos) > nsym: raise ReedSolomonError("Too many erasures to correct") # prepare the syndrome polynomial using only errors (ie: errors = characters that were either replaced by null byte or changed to another character, but we don't know their positions) synd = rs_calc_syndromes(msg_out, nsym, fcr, generator) # check if there's any error/erasure in the input codeword. If not (all syndromes coefficients are 0), then just return the codeword as-is. if max(synd) == 0: return msg_out[:-nsym], msg_out[-nsym:], erase_pos # no errors # Find errors locations if only_erasures: err_pos = [] else: # compute the Forney syndromes, which hide the erasures from the original syndrome (so that BM will just have to deal with errors, not erasures) fsynd = rs_forney_syndromes(synd, erase_pos, len(msg_out), generator) # compute the error locator polynomial using Berlekamp-Massey err_loc = rs_find_error_locator(fsynd, nsym, erase_count=len(erase_pos)) # locate the message errors using Chien search (or bruteforce search) err_pos = rs_find_errors(err_loc[::-1], len(msg_out), generator) if err_pos is None: raise ReedSolomonError("Could not locate error") # Find errors values and apply them to correct the message # compute errata evaluator and errata magnitude polynomials, then correct errors and erasures msg_out = rs_correct_errata(msg_out, synd, erase_pos + err_pos, fcr, generator) # note that we here use the original syndrome, not the forney syndrome (because we will correct both errors and erasures, so we need the full syndrome) # check if the final message is fully repaired synd = rs_calc_syndromes(msg_out, nsym, fcr, generator) if max(synd) > 0: raise ReedSolomonError("Could not correct message") # return the successfully decoded message return msg_out[:-nsym], msg_out[-nsym:], erase_pos + err_pos # also return the corrected ecc block so that the user can check(), and the position of errors to allow for adaptive bitrate algorithm to check how the number of errors vary def rs_correct_msg_nofsynd(msg_in, nsym, fcr=0, generator=2, erase_pos=None, only_erasures=False): '''Reed-Solomon main decoding function, without using the modified Forney syndromes''' global field_charac if len(msg_in) > field_charac: raise ValueError("Message is too long (%i when max is %i)" % (len(msg_in), field_charac)) msg_out = _bytearray(msg_in) # copy of message # erasures: set them to null bytes for easier decoding (but this is not necessary, they will be corrected anyway, but debugging will be easier with null bytes because the error locator polynomial values will only depend on the errors locations, not their values) if erase_pos is None: erase_pos = [] else: for e_pos in erase_pos: msg_out[e_pos] = 0 # check if there are too many erasures if len(erase_pos) > nsym: raise ReedSolomonError("Too many erasures to correct") # prepare the syndrome polynomial using only errors (ie: errors = characters that were either replaced by null byte or changed to another character, but we don't know their positions) synd = rs_calc_syndromes(msg_out, nsym, fcr, generator) # check if there's any error/erasure in the input codeword. If not (all syndromes coefficients are 0), then just return the codeword as-is. if max(synd) == 0: return msg_out[:-nsym], msg_out[-nsym:], [] # no errors # prepare erasures locator and evaluator polynomials erase_loc = None #erase_eval = None erase_count = 0 if erase_pos: erase_count = len(erase_pos) erase_pos_reversed = [len(msg_out)-1-eras for eras in erase_pos] erase_loc = rs_find_errata_locator(erase_pos_reversed, generator=generator) #erase_eval = rs_find_error_evaluator(synd[::-1], erase_loc, len(erase_loc)-1) # prepare errors/errata locator polynomial if only_erasures: err_loc = erase_loc[::-1] #err_eval = erase_eval[::-1] else: err_loc = rs_find_error_locator(synd, nsym, erase_loc=erase_loc, erase_count=erase_count) err_loc = err_loc[::-1] #err_eval = rs_find_error_evaluator(synd[::-1], err_loc[::-1], len(err_loc)-1)[::-1] # find error/errata evaluator polynomial (not really necessary since we already compute it at the same time as the error locator poly in BM) # locate the message errors err_pos = rs_find_errors(err_loc, len(msg_out), generator) # find the roots of the errata locator polynomial (ie: the positions of the errors/errata) if err_pos is None: raise ReedSolomonError("Could not locate error") # compute errata evaluator and errata magnitude polynomials, then correct errors and erasures msg_out = rs_correct_errata(msg_out, synd, err_pos, fcr=fcr, generator=generator) # check if the final message is fully repaired synd = rs_calc_syndromes(msg_out, nsym, fcr, generator) if max(synd) > 0: raise ReedSolomonError("Could not correct message") # return the successfully decoded message return msg_out[:-nsym], msg_out[-nsym:], erase_pos + err_pos # also return the corrected ecc block so that the user can check(), and the position of errors to allow for adaptive bitrate algorithm to check how the number of errors vary def rs_check(msg, nsym, fcr=0, generator=2): '''Returns true if the message + ecc has no error of false otherwise (may not always catch a wrong decoding or a wrong message, particularly if there are too many errors -- above the Singleton bound --, but it usually does)''' return ( max(rs_calc_syndromes(msg, nsym, fcr, generator)) == 0 ) #=================================================================================================== # API #=================================================================================================== class RSCodec(object): ''' A Reed Solomon encoder/decoder. After initializing the object, use ``encode`` to encode a (byte)string to include the RS correction code, and pass such an encoded (byte)string to ``decode`` to extract the original message (if the number of errors allows for correct decoding). The ``nsym`` argument is the length of the correction code, and it determines the number of error bytes (if I understand this correctly, half of ``nsym`` is correctable) ''' ''' Modifications by rotorgit 2/3/2015: Added support for US FAA ADSB UAT RS FEC, by allowing user to specify different primitive polynomial and non-zero first consecutive root (fcr). For UAT/ADSB use, set fcr=120 and prim=0x187 when instantiating the class; leaving them out will default for previous values (0 and 0x11d) ''' def __init__(self, nsym=10, nsize=255, fcr=0, prim=0x11d, generator=2, c_exp=8, single_gen=True): '''Initialize the Reed-Solomon codec. Note that different parameters change the internal values (the ecc symbols, look-up table values, etc) but not the output result (whether your message can be repaired or not, there is no influence of the parameters). nsym : number of ecc symbols (you can repair nsym/2 errors and nsym erasures. nsize : maximum length of each chunk. If higher than 255, will use a higher Galois Field, but the algorithm's complexity and computational cost will raise quadratically... single_gen : if you want to use the same RSCodec for different nsym parameters (but nsize the same), then set single_gen = False. ''' # Auto-setup if galois field or message length is different than default (exponent 8) if nsize > 255 and c_exp <= 8: # nsize (chunksize) is larger than the galois field, we resize the galois field # Get the next closest power of two c_exp = int(math.log(2 ** (math.floor(math.log(nsize) / math.log(2)) + 1), 2)) if c_exp != 8 and prim == 0x11d: # prim was not correctly defined, find one prim = find_prime_polys(generator=generator, c_exp=c_exp, fast_primes=True, single=True) if nsize == 255: # resize chunk size if not set nsize = int(2**c_exp - 1) if nsym >= nsize: raise ValueError('ECC symbols must be strictly less than the total message length (nsym < nsize).') # Memorize variables self.nsym = nsym # number of ecc symbols (ie, the repairing rate will be r=(nsym/2)/nsize, so for example if you have nsym=5 and nsize=10, you have a rate r=0.25, so you can correct up to 0.25% errors (or exactly 2 symbols out of 10), and 0.5% erasures (5 symbols out of 10). self.nsize = nsize # maximum length of one chunk (ie, message + ecc symbols after encoding, for the message alone it's nsize-nsym) self.fcr = fcr # first consecutive root, can be any value between 0 and (2**c_exp)-1 self.prim = prim # prime irreducible polynomial, use find_prime_polys() to find a prime poly self.generator = generator # generator integer, must be prime self.c_exp = c_exp # exponent of the field's characteristic. This both defines the maximum value per symbol and the maximum length of one chunk. By default it's GF(2^8), do not change if you're not sure what it means. # Initialize the look-up tables for easy and quick multiplication/division self.gf_log, self.gf_exp, self.field_charac = init_tables(prim, generator, c_exp) # Precompute the generator polynomials if single_gen: self.gen = {} self.gen[nsym] = rs_generator_poly(nsym, fcr=fcr, generator=generator) else: self.gen = rs_generator_poly_all(nsize, fcr=fcr, generator=generator) def chunk(self, data, chunksize): '''Split a long message into chunks''' for i in xrange(0, len(data), chunksize): # Split the long message in a chunk chunk = data[i:i+chunksize] yield chunk def encode(self, data, nsym=None): '''Encode a message (ie, add the ecc symbols) using Reed-Solomon, whatever the length of the message because we use chunking''' # Restore precomputed tables (allow to use multiple RSCodec in one script) global gf_log, gf_exp, field_charac gf_log, gf_exp, field_charac = self.gf_log, self.gf_exp, self.field_charac if not nsym: nsym = self.nsym if isinstance(data, str): data = _bytearray(data) enc = _bytearray() for chunk in self.chunk(data, self.nsize - self.nsym): enc.extend(rs_encode_msg(chunk, self.nsym, fcr=self.fcr, generator=self.generator, gen=self.gen[nsym])) return enc def decode(self, data, nsym=None, erase_pos=None, only_erasures=False): '''Repair a message, whatever its size is, by using chunking. May return a wrong result if number of errors > nsym. Note that it returns a couple of vars: the repaired messages, and the repaired messages+ecc (useful for checking). Usage: rmes, rmesecc = RSCodec.decode(data). ''' # erase_pos is a list of positions where you know (or greatly suspect at least) there is an erasure (ie, wrong character but you know it's at this position). Just input the list of all positions you know there are errors, and this method will automatically split the erasures positions to attach to the corresponding data chunk. # Restore precomputed tables (allow to use multiple RSCodec in one script) global gf_log, gf_exp, field_charac gf_log, gf_exp, field_charac = self.gf_log, self.gf_exp, self.field_charac if not nsym: nsym = self.nsym if isinstance(data, str): data = _bytearray(data) dec = _bytearray() dec_full = _bytearray() errata_pos_all = _bytearray() for chunk in self.chunk(data, self.nsize): # Extract the erasures for this chunk e_pos = [] if erase_pos: # First extract the erasures for this chunk (all erasures below the maximum chunk length) e_pos = [x for x in erase_pos if x < self.nsize] # Then remove the extract erasures from the big list and also decrement all subsequent positions values by nsize (the current chunk's size) so as to prepare the correct alignment for the next iteration erase_pos = [x - self.nsize for x in erase_pos if x >= self.nsize] # Decode/repair this chunk! rmes, recc, errata_pos = rs_correct_msg(chunk, nsym, fcr=self.fcr, generator=self.generator, erase_pos=e_pos, only_erasures=only_erasures) dec.extend(rmes) dec_full.extend(rmes+recc) errata_pos_all.extend(errata_pos) return dec, dec_full, errata_pos_all def check(self, data, nsym=None): '''Check if a message+ecc stream is not corrupted (or fully repaired). Note: may return a wrong result if number of errors > nsym.''' if not nsym: nsym = self.nsym if isinstance(data, str): data = _bytearray(data) check = [] for chunk in self.chunk(data, self.nsize): check.append(rs_check(chunk, nsym, fcr=self.fcr, generator=self.generator)) return check def maxerrata(self, errors=None, erasures=None, verbose=False): '''Return the Singleton Bound for the current codec, which is the max number of errata (errors and erasures) that the codec can decode/correct. Beyond the Singleton Bound (too many errors/erasures), the algorithm will try to raise an exception, but it may also not detect any problem with the message and return 0 errors. Hence why you should use checksums if your goal is to detect errors (as opposed to correcting them), as checksums have no bounds on the number of errors, the only limitation being the probability of collisions. By default, return a tuple wth the maximum number of errors (2nd output) OR erasures (2nd output) that can be corrected. If errors or erasures (not both) is specified as argument, computes the remaining **simultaneous** correction capacity (eg, if errors specified, compute the number of erasures that can be simultaneously corrected). Set verbose to True to get print a report.''' nsym = self.nsym # Compute the maximum number of errors OR erasures maxerrors = int(nsym/2) # always floor the number, we can't correct half a symbol, it's all or nothing maxerasures = nsym # Compute the maximum of simultaneous errors AND erasures if erasures is not None and erasures >= 0: # We know the erasures count, we want to know how many errors we can correct simultaneously if erasures > maxerasures: raise ReedSolomonError("Specified number of errors or erasures exceeding the Singleton Bound!") maxerrors = int((nsym-erasures)/2) if verbose: print('This codec can correct up to %i errors and %i erasures simultaneously' % (maxerrors, erasures)) # Return a tuple with the maximum number of simultaneously corrected errors and erasures return maxerrors, erasures if errors is not None and errors >= 0: # We know the errors count, we want to know how many erasures we can correct simultaneously if errors > maxerrors: raise ReedSolomonError("Specified number of errors or erasures exceeding the Singleton Bound!") maxerasures = int(nsym-(errors*2)) if verbose: print('This codec can correct up to %i errors and %i erasures simultaneously' % (errors, maxerasures)) # Return a tuple with the maximum number of simultaneously corrected errors and erasures return errors, maxerasures # Return a tuple with the maximum number of errors and erasures (independently corrected) if verbose: print('This codec can correct up to %i errors and %i erasures independently' % (maxerrors, maxerasures)) return maxerrors, maxerasures reedsolomon-1.7.0/requirements.txt000066400000000000000000000000001436142644400173270ustar00rootroot00000000000000reedsolomon-1.7.0/setup.py000066400000000000000000000112541436142644400155720ustar00rootroot00000000000000#!/usr/bin/env python # Authors: # Tomer Filiba # Stephen Larroque # Rotorgit # Angus Gratton # # Licensed under the Public Domain or MIT License at your convenience. # See: # https://docs.python.org/2/distutils/setupscript.html # http://docs.cython.org/src/reference/compilation.html # https://docs.python.org/2/extending/building.html # http://docs.cython.org/src/userguide/source_files_and_compilation.html try: from setuptools import setup from setuptools import Extension except ImportError: from distutils.core import setup from distutils.extension import Extension import os, sys if '--cythonize' in sys.argv: # Remove the special argument, otherwise setuptools will raise an exception sys.argv.remove('--cythonize') try: # If Cython is installed, transpile the optimized Cython module to C and compile as a .pyd to be distributed from Cython.Build import cythonize, build_ext # this acts as a check whether Cython is installed, otherwise this will fail print("Cython is installed, building creedsolo module") extensions = cythonize([ Extension('creedsolo', ['creedsolo.pyx']) ], force=True) # this may fail hard if Cython is installed but there is no C compiler for current Python version, and we have no way to know. Alternatively, we could supply exclude_failures=True , but then for those who really want the cythonized compiled extension, it would be much harder to debug cmdclass = {'build_ext': build_ext} # avoids the need to call python setup.py build_ext --inplace except ImportError: # Else Cython is not installed (or user explicitly wanted to skip) #if '--native-compile' in sys.argv: # Compile pyd from pre-transpiled creedsolo.c # This is recommended by Cython, but in practice it's too difficult to maintain https://cython.readthedocs.io/en/latest/src/userguide/source_files_and_compilation.html#distributing-cython-modules #print("Cython is not installed, but the creedsolo module will be built from the pre-transpiled creedsolo.c file using the locally installed C compiler") #sys.argv.remove('--native-compile') #extensions = [ Extension('creedsolo', ['creedsolo.c']) ] #else: # Else run in pure python mode (no compilation) print("Cython is not installed or is explicitly skipped using --nocython, no creedsolo module will be built") extensions = None cmdclass = {} else: extensions = None cmdclass = {} setup(name = "reedsolo", version = "1.7.0", description = "Pure-Python Reed Solomon encoder/decoder", author = "Tomer Filiba", author_email = "tomerfiliba@gmail.com", maintainer = "Stephen Karl Larroque", maintainer_email = "lrq3000@gmail.com", license = "Public Domain", # the license field can only store one license, use classifiers below to declare multiple licenses https://github.com/pypi/warehouse/issues/8960 url = "https://github.com/tomerfiliba/reedsolomon", py_modules = ["reedsolo"], platforms = ["any"], long_description = open("README.rst", "r").read(), long_description_content_type = 'text/x-rst', license_files = ('LICENSE',), # force include LICENSE file, requires setuptools >= 42.0.0. Note that this field only support one line text, do not input the full license content here. The full LICENSE file is currently forcefully included via MANIFEST.in, but other methods exist, see: https://stackoverflow.com/a/66443941/1121352 classifiers = [ "Development Status :: 6 - Mature", "Intended Audience :: Developers", "Intended Audience :: Information Technology", "License :: OSI Approved :: The Unlicense (Unlicense)", # Unlicense OR MIT-0 at the user preference "License :: OSI Approved :: MIT No Attribution License (MIT-0)", 'Operating System :: Microsoft :: Windows', 'Operating System :: MacOS :: MacOS X', 'Operating System :: POSIX :: Linux', "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: Implementation :: PyPy", "Programming Language :: Cython", "Topic :: Communications", "Topic :: Scientific/Engineering :: Mathematics", "Topic :: System :: Archiving :: Backup", "Topic :: System :: Recovery Tools", ], ext_modules = extensions, cmdclass = cmdclass, ) reedsolomon-1.7.0/tests/000077500000000000000000000000001436142644400152175ustar00rootroot00000000000000reedsolomon-1.7.0/tests/test_creedsolo.py000066400000000000000000000675071436142644400206260ustar00rootroot00000000000000# To use this script easily (starting with Python 2.7), just cd to the parent folder and type the following command: # python -m unittest discover tests from __future__ import print_function import unittest import sys from random import sample import itertools try: from itertools import izip except ImportError: #python3.x izip = zip try: ModuleNotFoundError except: # python2.x does not have ModuleNotFoundError ModuleNotFoundError = ImportError # Test whether we use Python 2 or 3 if sys.version_info >= (3, 0): PY3 = True else: PY3 = False # Test if we are running inside Pypy interpreter (incompatible with Cython) inpypy = True if PY3: # If Python 3, we can't just import __pypy__ to check if there is an ImportError, because it raises a ModuleNotFoundError on Travis CI that is never caught, dunno why # So we test manually without raising any exception import platform, os inpypy = platform.python_implementation().lower().startswith("pypy") # BUT if we are running inside Travis, we skip in all cases, because they put very hard limits on cython so that it's very hard for complex code to run properly # see also https://github.com/cython/cython/issues/2247 # TODO: fix me, maybe we can configure travis to set a higher limit for recursion? Or there is really something wrong with the `from creedsolo import *`, but it works on local computer, so it seems to be specific to Travis is_travis = 'TRAVIS' in os.environ if is_travis: inpypy = True else: # Python 2 way to test try: import __pypy__ except (ImportError, ModuleNotFoundError, Exception) as exc: # TODO: on Travis, ModuleNotFoundError cannot be caught, dunno why, so we catch all Exception and consider this means pypy is not running inpypy = False cython_available = True try: # If Cython is installed, try to import it to see if it works from Cython.Build import cythonize except ImportError: # Otherwise, we skip this whole test cython_available = False # Skip this whole module test if running under PyPy (incompatible with Cython) if inpypy or not cython_available: # Empty test unit to show the reason of skipping class TestMissingDependency(unittest.TestCase): @unittest.skip('Missing dependency - Cython missing or PyPy present (Cython is incompatible with PyPy)') def test_fail(): pass # Else we're not under PyPy, we can run the test else: __pypy__ = None from creedsolo import * try: bytearray except NameError: from creedsolo import bytearray try: # compatibility with Python 3+ xrange except NameError: xrange = range try: # python2.x from StringIO import StringIO except ImportError: from io import StringIO class cTestReedSolomon(unittest.TestCase): def test_simple(self): rs = RSCodec(10) msg = bytearray("hello world " * 10, "latin1") enc = rs.encode(msg) dec, dec_enc, errata_pos = rs.decode(enc) self.assertEqual(dec, msg) def test_correction(self): rs = RSCodec(10) msg = bytearray("hello world " * 10, "latin1") enc = rs.encode(msg) dec, _, _ = rs.decode(enc) self.assertEqual(dec, msg) for i in [27, -3, -9, 7, 0]: enc[i] = 99 self.assertEqual(rs.decode(enc)[0], msg) enc[82] = 99 self.assertRaises(ReedSolomonError, rs.decode, enc) def test_long(self): rs = RSCodec(10) msg = bytearray("a" * 10000, "latin1") enc = rs.encode(msg) dec, _, _ = rs.decode(enc) self.assertEqual(dec, msg) enc[177] = 99 enc[2212] = 88 dec2, _, _ = rs.decode(enc) self.assertEqual(dec2, msg) def test_prim_fcr_basic(self): nn = 30 kk = 18 tt = nn - kk rs = RSCodec(tt, fcr=120, prim=0x187) hexencmsg = '00faa123555555c000000354064432c02800fe97c434e1ff5365' \ 'cf8fafe4' strf = str if sys.version_info[0] >= 3 else unicode encmsg = bytearray.fromhex(strf(hexencmsg)) decmsg = encmsg[:kk] tem = rs.encode(decmsg) self.assertEqual(encmsg, tem, msg="encoded does not match expected") tdm, _, _ = rs.decode(tem) self.assertEqual(tdm, decmsg, msg="decoded does not match original") tem1 = bytearray(tem) # clone a copy # encoding and decoding intact message seem OK, so test errors numerrs = tt >> 1 # inject tt/2 errors (expected to recover fully) for i in sample(range(nn), numerrs): # inject errors in random places tem1[i] ^= 0xff # flip all 8 bits tdm, _, _ = rs.decode(tem1) self.assertEqual(tdm, decmsg, msg="decoded with errors does not match original") tem1 = bytearray(tem) # clone another copy numerrs += 1 # inject tt/2 + 1 errors (expected to fail and detect it) for i in sample(range(nn), numerrs): # inject errors in random places tem1[i] ^= 0xff # flip all 8 bits # if this fails, it means excessive errors not detected self.assertRaises(ReedSolomonError, rs.decode, tem1) def test_prim_fcr_long(self): nn = 48 kk = 34 tt = nn - kk rs = RSCodec(tt, fcr=120, prim=0x187) hexencmsg = '08faa123555555c000000354064432c0280e1b4d090cfc04887400' \ '000003500000000e1985ff9c6b33066ca9f43d12e8' strf = str if sys.version_info[0] >= 3 else unicode encmsg = bytearray.fromhex(strf(hexencmsg)) decmsg = encmsg[:kk] tem = rs.encode(decmsg) self.assertEqual(encmsg, tem, msg="encoded does not match expected") tdm, _, _ = rs.decode(tem) self.assertEqual(tdm, decmsg, msg="decoded does not match original") tem1 = bytearray(tem) numerrs = tt >> 1 for i in sample(range(nn), numerrs): tem1[i] ^= 0xff tdm, _, _ = rs.decode(tem1) self.assertEqual(tdm, decmsg, msg="decoded with errors does not match original") tem1 = bytearray(tem) numerrs += 1 for i in sample(range(nn), numerrs): tem1[i] ^= 0xff self.assertRaises(ReedSolomonError, rs.decode, tem1) def test_generator_poly(self): '''Test if generator poly finder is working correctly and if the all generators poly finder does output the same result''' n = 11 k = 3 # Base 2 test fcr = 120 generator = 2 prim = 0x11d init_tables(generator=generator, prim=prim) g = rs_generator_poly_all(n, fcr=fcr, generator=generator) self.assertEqual( list(g[n-k]) , list(rs_generator_poly(n-k,fcr=fcr, generator=generator)) ) self.assertEqual( list(g[n-k]) , [1, 106, 9, 105, 86, 5, 166, 76, 9] ) # Base 3 test fcr = 0 generator = 3 prim = 0x11b init_tables(generator=generator, prim=prim) g = rs_generator_poly_all(n, fcr=fcr, generator=generator) self.assertEqual( list(g[n-k]) , list(rs_generator_poly(n-k,fcr=fcr, generator=generator)) ) self.assertEqual( list(g[n-k]) , [1, 128, 13, 69, 36, 145, 199, 165, 30] ) def test_prime_poly_build(self): '''Try if the prime polynomials finder works correctly for different GFs (ie, GF(2^6) to GF(2^10)) and with different generators''' params = {"count": 7, "c_exp": [6, 7, 7, 8, 8, 9, 10], "generator": [2, 2, 3, 2, 3, 2, 2], "expected": [ [67, 91, 97, 103, 109, 115], [131, 137, 143, 145, 157, 167, 171, 185, 191, 193, 203, 211, 213, 229, 239, 241, 247, 253], [131, 137, 143, 145, 157, 167, 171, 185, 191, 193, 203, 211, 213, 229, 239, 241, 247, 253], [285, 299, 301, 333, 351, 355, 357, 361, 369, 391, 397, 425, 451, 463, 487, 501], [283, 313, 319, 333, 351, 355, 357, 361, 375, 397, 415, 419, 425, 451, 501, 505], [529, 539, 545, 557, 563, 601, 607, 617, 623, 631, 637, 647, 661, 675, 677, 687, 695, 701, 719, 721, 731, 757, 761, 787, 789, 799, 803, 817, 827, 847, 859, 865, 875, 877, 883, 895, 901, 911, 949, 953, 967, 971, 973, 981, 985, 995, 1001, 1019], [1033, 1051, 1063, 1069, 1125, 1135, 1153, 1163, 1221, 1239, 1255, 1267, 1279, 1293, 1305, 1315, 1329, 1341, 1347, 1367, 1387, 1413, 1423, 1431, 1441, 1479, 1509, 1527, 1531, 1555, 1557, 1573, 1591, 1603, 1615, 1627, 1657, 1663, 1673, 1717, 1729, 1747, 1759, 1789, 1815, 1821, 1825, 1849, 1863, 1869, 1877, 1881, 1891, 1917, 1933, 1939, 1969, 2011, 2035, 2041] ] } for i in xrange(params['count']): self.assertEqual( find_prime_polys(generator=params['generator'][i], c_exp=params['c_exp'][i]) , params["expected"][i] ) def test_init_tables(self): '''Try if the look up table generator (galois field generator) works correctly for different parameters''' params = [ [0x11d, 2, 8], [0x11b, 3, 8], [0xfd, 3, 7] ] expected = [[[0, 0, 1, 25, 2, 50, 26, 198, 3, 223, 51, 238, 27, 104, 199, 75, 4, 100, 224, 14, 52, 141, 239, 129, 28, 193, 105, 248, 200, 8, 76, 113, 5, 138, 101, 47, 225, 36, 15, 33, 53, 147, 142, 218, 240, 18, 130, 69, 29, 181, 194, 125, 106, 39, 249, 185, 201, 154, 9, 120, 77, 228, 114, 166, 6, 191, 139, 98, 102, 221, 48, 253, 226, 152, 37, 179, 16, 145, 34, 136, 54, 208, 148, 206, 143, 150, 219, 189, 241, 210, 19, 92, 131, 56, 70, 64, 30, 66, 182, 163, 195, 72, 126, 110, 107, 58, 40, 84, 250, 133, 186, 61, 202, 94, 155, 159, 10, 21, 121, 43, 78, 212, 229, 172, 115, 243, 167, 87, 7, 112, 192, 247, 140, 128, 99, 13, 103, 74, 222, 237, 49, 197, 254, 24, 227, 165, 153, 119, 38, 184, 180, 124, 17, 68, 146, 217, 35, 32, 137, 46, 55, 63, 209, 91, 149, 188, 207, 205, 144, 135, 151, 178, 220, 252, 190, 97, 242, 86, 211, 171, 20, 42, 93, 158, 132, 60, 57, 83, 71, 109, 65, 162, 31, 45, 67, 216, 183, 123, 164, 118, 196, 23, 73, 236, 127, 12, 111, 246, 108, 161, 59, 82, 41, 157, 85, 170, 251, 96, 134, 177, 187, 204, 62, 90, 203, 89, 95, 176, 156, 169, 160, 81, 11, 245, 22, 235, 122, 117, 44, 215, 79, 174, 213, 233, 230, 231, 173, 232, 116, 214, 244, 234, 168, 80, 88, 175], [1, 2, 4, 8, 16, 32, 64, 128, 29, 58, 116, 232, 205, 135, 19, 38, 76, 152, 45, 90, 180, 117, 234, 201, 143, 3, 6, 12, 24, 48, 96, 192, 157, 39, 78, 156, 37, 74, 148, 53, 106, 212, 181, 119, 238, 193, 159, 35, 70, 140, 5, 10, 20, 40, 80, 160, 93, 186, 105, 210, 185, 111, 222, 161, 95, 190, 97, 194, 153, 47, 94, 188, 101, 202, 137, 15, 30, 60, 120, 240, 253, 231, 211, 187, 107, 214, 177, 127, 254, 225, 223, 163, 91, 182, 113, 226, 217, 175, 67, 134, 17, 34, 68, 136, 13, 26, 52, 104, 208, 189, 103, 206, 129, 31, 62, 124, 248, 237, 199, 147, 59, 118, 236, 197, 151, 51, 102, 204, 133, 23, 46, 92, 184, 109, 218, 169, 79, 158, 33, 66, 132, 21, 42, 84, 168, 77, 154, 41, 82, 164, 85, 170, 73, 146, 57, 114, 228, 213, 183, 115, 230, 209, 191, 99, 198, 145, 63, 126, 252, 229, 215, 179, 123, 246, 241, 255, 227, 219, 171, 75, 150, 49, 98, 196, 149, 55, 110, 220, 165, 87, 174, 65, 130, 25, 50, 100, 200, 141, 7, 14, 28, 56, 112, 224, 221, 167, 83, 166, 81, 162, 89, 178, 121, 242, 249, 239, 195, 155, 43, 86, 172, 69, 138, 9, 18, 36, 72, 144, 61, 122, 244, 245, 247, 243, 251, 235, 203, 139, 11, 22, 44, 88, 176, 125, 250, 233, 207, 131, 27, 54, 108, 216, 173, 71, 142, 1, 2, 4, 8, 16, 32, 64, 128, 29, 58, 116, 232, 205, 135, 19, 38, 76, 152, 45, 90, 180, 117, 234, 201, 143, 3, 6, 12, 24, 48, 96, 192, 157, 39, 78, 156, 37, 74, 148, 53, 106, 212, 181, 119, 238, 193, 159, 35, 70, 140, 5, 10, 20, 40, 80, 160, 93, 186, 105, 210, 185, 111, 222, 161, 95, 190, 97, 194, 153, 47, 94, 188, 101, 202, 137, 15, 30, 60, 120, 240, 253, 231, 211, 187, 107, 214, 177, 127, 254, 225, 223, 163, 91, 182, 113, 226, 217, 175, 67, 134, 17, 34, 68, 136, 13, 26, 52, 104, 208, 189, 103, 206, 129, 31, 62, 124, 248, 237, 199, 147, 59, 118, 236, 197, 151, 51, 102, 204, 133, 23, 46, 92, 184, 109, 218, 169, 79, 158, 33, 66, 132, 21, 42, 84, 168, 77, 154, 41, 82, 164, 85, 170, 73, 146, 57, 114, 228, 213, 183, 115, 230, 209, 191, 99, 198, 145, 63, 126, 252, 229, 215, 179, 123, 246, 241, 255, 227, 219, 171, 75, 150, 49, 98, 196, 149, 55, 110, 220, 165, 87, 174, 65, 130, 25, 50, 100, 200, 141, 7, 14, 28, 56, 112, 224, 221, 167, 83, 166, 81, 162, 89, 178, 121, 242, 249, 239, 195, 155, 43, 86, 172, 69, 138, 9, 18, 36, 72, 144, 61, 122, 244, 245, 247, 243, 251, 235, 203, 139, 11, 22, 44, 88, 176, 125, 250, 233, 207, 131, 27, 54, 108, 216, 173, 71, 142]], [[0, 0, 25, 1, 50, 2, 26, 198, 75, 199, 27, 104, 51, 238, 223, 3, 100, 4, 224, 14, 52, 141, 129, 239, 76, 113, 8, 200, 248, 105, 28, 193, 125, 194, 29, 181, 249, 185, 39, 106, 77, 228, 166, 114, 154, 201, 9, 120, 101, 47, 138, 5, 33, 15, 225, 36, 18, 240, 130, 69, 53, 147, 218, 142, 150, 143, 219, 189, 54, 208, 206, 148, 19, 92, 210, 241, 64, 70, 131, 56, 102, 221, 253, 48, 191, 6, 139, 98, 179, 37, 226, 152, 34, 136, 145, 16, 126, 110, 72, 195, 163, 182, 30, 66, 58, 107, 40, 84, 250, 133, 61, 186, 43, 121, 10, 21, 155, 159, 94, 202, 78, 212, 172, 229, 243, 115, 167, 87, 175, 88, 168, 80, 244, 234, 214, 116, 79, 174, 233, 213, 231, 230, 173, 232, 44, 215, 117, 122, 235, 22, 11, 245, 89, 203, 95, 176, 156, 169, 81, 160, 127, 12, 246, 111, 23, 196, 73, 236, 216, 67, 31, 45, 164, 118, 123, 183, 204, 187, 62, 90, 251, 96, 177, 134, 59, 82, 161, 108, 170, 85, 41, 157, 151, 178, 135, 144, 97, 190, 220, 252, 188, 149, 207, 205, 55, 63, 91, 209, 83, 57, 132, 60, 65, 162, 109, 71, 20, 42, 158, 93, 86, 242, 211, 171, 68, 17, 146, 217, 35, 32, 46, 137, 180, 124, 184, 38, 119, 153, 227, 165, 103, 74, 237, 222, 197, 49, 254, 24, 13, 99, 140, 128, 192, 247, 112, 7], [1, 3, 5, 15, 17, 51, 85, 255, 26, 46, 114, 150, 161, 248, 19, 53, 95, 225, 56, 72, 216, 115, 149, 164, 247, 2, 6, 10, 30, 34, 102, 170, 229, 52, 92, 228, 55, 89, 235, 38, 106, 190, 217, 112, 144, 171, 230, 49, 83, 245, 4, 12, 20, 60, 68, 204, 79, 209, 104, 184, 211, 110, 178, 205, 76, 212, 103, 169, 224, 59, 77, 215, 98, 166, 241, 8, 24, 40, 120, 136, 131, 158, 185, 208, 107, 189, 220, 127, 129, 152, 179, 206, 73, 219, 118, 154, 181, 196, 87, 249, 16, 48, 80, 240, 11, 29, 39, 105, 187, 214, 97, 163, 254, 25, 43, 125, 135, 146, 173, 236, 47, 113, 147, 174, 233, 32, 96, 160, 251, 22, 58, 78, 210, 109, 183, 194, 93, 231, 50, 86, 250, 21, 63, 65, 195, 94, 226, 61, 71, 201, 64, 192, 91, 237, 44, 116, 156, 191, 218, 117, 159, 186, 213, 100, 172, 239, 42, 126, 130, 157, 188, 223, 122, 142, 137, 128, 155, 182, 193, 88, 232, 35, 101, 175, 234, 37, 111, 177, 200, 67, 197, 84, 252, 31, 33, 99, 165, 244, 7, 9, 27, 45, 119, 153, 176, 203, 70, 202, 69, 207, 74, 222, 121, 139, 134, 145, 168, 227, 62, 66, 198, 81, 243, 14, 18, 54, 90, 238, 41, 123, 141, 140, 143, 138, 133, 148, 167, 242, 13, 23, 57, 75, 221, 124, 132, 151, 162, 253, 28, 36, 108, 180, 199, 82, 246, 1, 3, 5, 15, 17, 51, 85, 255, 26, 46, 114, 150, 161, 248, 19, 53, 95, 225, 56, 72, 216, 115, 149, 164, 247, 2, 6, 10, 30, 34, 102, 170, 229, 52, 92, 228, 55, 89, 235, 38, 106, 190, 217, 112, 144, 171, 230, 49, 83, 245, 4, 12, 20, 60, 68, 204, 79, 209, 104, 184, 211, 110, 178, 205, 76, 212, 103, 169, 224, 59, 77, 215, 98, 166, 241, 8, 24, 40, 120, 136, 131, 158, 185, 208, 107, 189, 220, 127, 129, 152, 179, 206, 73, 219, 118, 154, 181, 196, 87, 249, 16, 48, 80, 240, 11, 29, 39, 105, 187, 214, 97, 163, 254, 25, 43, 125, 135, 146, 173, 236, 47, 113, 147, 174, 233, 32, 96, 160, 251, 22, 58, 78, 210, 109, 183, 194, 93, 231, 50, 86, 250, 21, 63, 65, 195, 94, 226, 61, 71, 201, 64, 192, 91, 237, 44, 116, 156, 191, 218, 117, 159, 186, 213, 100, 172, 239, 42, 126, 130, 157, 188, 223, 122, 142, 137, 128, 155, 182, 193, 88, 232, 35, 101, 175, 234, 37, 111, 177, 200, 67, 197, 84, 252, 31, 33, 99, 165, 244, 7, 9, 27, 45, 119, 153, 176, 203, 70, 202, 69, 207, 74, 222, 121, 139, 134, 145, 168, 227, 62, 66, 198, 81, 243, 14, 18, 54, 90, 238, 41, 123, 141, 140, 143, 138, 133, 148, 167, 242, 13, 23, 57, 75, 221, 124, 132, 151, 162, 253, 28, 36, 108, 180, 199, 82, 246]], [[0, 0, 7, 1, 14, 2, 8, 56, 21, 57, 9, 90, 15, 31, 63, 3, 28, 4, 64, 67, 16, 112, 97, 32, 22, 47, 38, 58, 70, 91, 10, 108, 35, 109, 11, 87, 71, 79, 74, 92, 23, 82, 119, 48, 104, 59, 39, 100, 29, 19, 54, 5, 45, 68, 65, 95, 77, 33, 98, 117, 17, 43, 115, 113, 42, 114, 116, 76, 18, 53, 94, 44, 78, 73, 86, 34, 81, 118, 99, 103, 30, 62, 89, 20, 126, 6, 55, 13, 111, 96, 66, 27, 46, 37, 107, 69, 36, 106, 26, 110, 61, 88, 12, 125, 52, 93, 75, 41, 72, 85, 102, 80, 84, 101, 40, 51, 105, 25, 124, 60, 24, 123, 50, 83, 122, 49, 120, 121], [1, 3, 5, 15, 17, 51, 85, 2, 6, 10, 30, 34, 102, 87, 4, 12, 20, 60, 68, 49, 83, 8, 24, 40, 120, 117, 98, 91, 16, 48, 80, 13, 23, 57, 75, 32, 96, 93, 26, 46, 114, 107, 64, 61, 71, 52, 92, 25, 43, 125, 122, 115, 104, 69, 50, 86, 7, 9, 27, 45, 119, 100, 81, 14, 18, 54, 90, 19, 53, 95, 28, 36, 108, 73, 38, 106, 67, 56, 72, 37, 111, 76, 41, 123, 112, 109, 74, 35, 101, 82, 11, 29, 39, 105, 70, 55, 89, 22, 58, 78, 47, 113, 110, 79, 44, 116, 97, 94, 31, 33, 99, 88, 21, 63, 65, 62, 66, 59, 77, 42, 126, 127, 124, 121, 118, 103, 84, 1, 3, 5, 15, 17, 51, 85, 2, 6, 10, 30, 34, 102, 87, 4, 12, 20, 60, 68, 49, 83, 8, 24, 40, 120, 117, 98, 91, 16, 48, 80, 13, 23, 57, 75, 32, 96, 93, 26, 46, 114, 107, 64, 61, 71, 52, 92, 25, 43, 125, 122, 115, 104, 69, 50, 86, 7, 9, 27, 45, 119, 100, 81, 14, 18, 54, 90, 19, 53, 95, 28, 36, 108, 73, 38, 106, 67, 56, 72, 37, 111, 76, 41, 123, 112, 109, 74, 35, 101, 82, 11, 29, 39, 105, 70, 55, 89, 22, 58, 78, 47, 113, 110, 79, 44, 116, 97, 94, 31, 33, 99, 88, 21, 63, 65, 62, 66, 59, 77, 42, 126, 127, 124, 121, 118, 103, 84]]] for i in xrange(len(params)): p = params[i] expected_log_t, expected_exp_t = expected[i] log_t, exp_t = init_tables(prim=p[0], generator=p[1], c_exp=p[2]) self.assertEqual( list(log_t) , expected_log_t ) self.assertEqual( list(exp_t) , expected_exp_t ) def test_consistent_erasures_report(self): # Ensure we always at least return the erasures we used as input rs = RSCodec(10) msg = rs.encode(bytearray("hello world ", "latin1")) self.assertEqual(rs.decode(msg, erase_pos=[1])[2], bytearray([1])) self.assertEqual(rs.decode(msg, erase_pos=[1])[2], bytearray([1])) msg[1] = 0xFF self.assertEqual(rs.decode(msg)[2], bytearray([1])) self.assertEqual(rs.decode(msg, erase_pos=[1])[2], bytearray([1])) def test_erasures_chunking(self): # Test whether providing positions for erasures in the 2nd chunk or later is working rs = RSCodec(30) encoded = rs.encode(b'0' * 226) _, _, _ = rs.decode(encoded, erase_pos=[255], only_erasures=True) # If it works, no exception should be raised class cTestGFArithmetics(unittest.TestCase): '''Test Galois Field arithmetics''' def test_multiply_nolut(self): '''Try to multiply without look-up tables (necessary to build the look-up tables!)''' a = 30 b = 19 generator=2 prim=0x11d # Compare the LUT multiplication and noLUT init_tables(prim=prim, generator=generator) self.assertEqual(gf_mul(a, b), gf_mult_noLUT(a, b, prim=prim)) # More Galois Field multiplications self.assertEqual( gf_mult_noLUT(5, 6, prim=0x11b, field_charac_full=256) , 30 ) self.assertEqual( gf_mult_noLUT(3, 125, prim=0x11b, field_charac_full=256) , 135 ) self.assertEqual( gf_mult_noLUT(2, 200, prim=0x11d, field_charac_full=256) , 141 ) self.assertEqual( gf_mult_noLUT_slow(2, 200, prim=0x11d) , 141 ) # Multiplications in GF(2^7) self.assertEqual( gf_mult_noLUT(3, 125, prim=0xfd, field_charac_full=128) , 122 ) # Multiplications outside of the finite field (we revert to standard integer multiplications just to see if it works) self.assertEqual( gf_mult_noLUT(3, 125, prim=0, carryless=False) , 375 ) self.assertEqual( gf_mult_noLUT_slow(4, 125, prim=0) , 500 ) # the second method, just to check that everything's alright class cTestRSCodecUniversalCrossValidation(unittest.TestCase): '''Ultimate set of tests of a full set of different parameters for encoding and decoding. If this passes, the codec is universal and can correctly interface with any other RS codec!''' def test_main(self): def cartesian_product_dict_items(dicts): return (dict(izip(dicts, x)) for x in itertools.product(*dicts.values())) debugg = False # if one or more tests don't pass, you can enable this flag to True to get verbose output to debug orig_mes = bytearray("hello world", "latin1") n = len(orig_mes)*2 k = len(orig_mes) nsym = n-k istart = 0 params = {"count": 5, "fcr": [120, 0, 1, 1, 1], "prim": [0x187, 0x11d, 0x11b, 0xfd, 0xfd], "generator": [2, 2, 3, 3, 2], "c_exponent": [8, 8, 8, 7, 7], } cases = { "errmode": [1, 2, 3, 4], "erratasnb_errorsnb_onlyeras": [[8, 3, False], [6, 5, False], [5, 5, False], [11, 0, True], [11, 0, False], [0,0, False]], # errata number (errors+erasures), erasures number and only_erasures: the last item is the value for only_erasures (True/False) } ############################$ results_br = [] results_rs = [] it = 0 for p in xrange(params["count"]): fcr = params["fcr"][p] prim = params["prim"][p] generator = params["generator"][p] c_exponent = params["c_exponent"][p] for case in cartesian_product_dict_items(cases): errmode = case["errmode"] erratanb = case["erratasnb_errorsnb_onlyeras"][0] errnb = case["erratasnb_errorsnb_onlyeras"][1] only_erasures = case["erratasnb_errorsnb_onlyeras"][2] it += 1 if debugg: print("it ", it) print("param", p) print(case) # REEDSOLO # Init the RS codec init_tables(generator=generator, prim=prim, c_exp=c_exponent) g = rs_generator_poly_all(n, fcr=fcr, generator=generator) # Encode the message rmesecc = rs_encode_msg(orig_mes, n-k, gen=g[n-k]) rmesecc_orig = rmesecc[:] # make a copy of the original message to check later if fully corrected (because the syndrome may be wrong sometimes) # Tamper the message if erratanb > 0: if errmode == 1: sl = slice(istart, istart+erratanb) elif errmode == 2: sl = slice(-istart-erratanb-(n-k), -(n-k)) elif errmode == 3: sl = slice(-istart-erratanb-1, -1) elif errmode == 4: sl = slice(-istart-erratanb, None) if debugg: print("Removed slice:", list(rmesecc[sl]), rmesecc[sl]) rmesecc[sl] = [0] * erratanb # Generate the erasures positions (if any) erase_pos = [x for x in xrange(len(rmesecc)) if rmesecc[x] == 0] if errnb > 0: erase_pos = erase_pos[:-errnb] # remove the errors positions (must not be known by definition) if debugg: print("erase_pos", erase_pos) print("coef_pos", [len(rmesecc) - 1 - pos for pos in erase_pos]) print("Errata total: ", erratanb-errnb + errnb*2, " -- Correctable? ", (erratanb-errnb + errnb*2 <= nsym)) # Decoding the corrupted codeword # -- Forney syndrome method try: rmes, recc, errata_pos = rs_correct_msg(rmesecc, n-k, fcr=fcr, generator=generator, erase_pos=erase_pos, only_erasures=only_erasures) results_br.append( rs_check(rmes + recc, n-k, fcr=fcr, generator=generator) ) # check if correct by syndrome analysis (can be wrong) results_br.append( rmesecc_orig == (rmes+recc) ) # check if correct by comparing to the original message (always correct) if debugg and not rs_check(rmes + recc, n-k, fcr=fcr, generator=generator) or not (rmesecc_orig == (rmes+recc)): raise ReedSolomonError("False!!!!!") except ReedSolomonError as exc: results_br.append(False) results_br.append(False) if debugg: print("====") print("ERROR! Details:") print("param", p) print(case) print(erase_pos) print("original_msg", rmesecc_orig) print("tampered_msg", rmesecc) print("decoded_msg", rmes+recc) print("checks: ", rs_check(rmes + recc, n-k, fcr=fcr, generator=generator), rmesecc_orig == (rmes+recc)) print("====") raise exc # -- Without Forney syndrome method try: mes, ecc, errata_pos = rs_correct_msg_nofsynd(rmesecc, n-k, fcr=fcr, generator=generator, erase_pos=erase_pos, only_erasures=only_erasures) results_br.append( rs_check(rmes + recc, n-k, fcr=fcr, generator=generator) ) results_br.append( rmesecc_orig == (rmes+recc) ) except ReedSolomonError as exc: results_br.append(False) results_br.append(False) if debugg: print("-----") self.assertTrue(results_br.count(True) == len(results_br)) class TestHelperFuncs(unittest.TestCase): '''Test helper functions''' def test_maxerrata(self): rs = RSCodec(10) self.assertEqual(rs.maxerrata(), (5, 10)) self.assertEqual(rs.maxerrata(erasures=8), (1, 8)) self.assertEqual(rs.maxerrata(errors=2), (2, 6)) self.assertRaises(ReedSolomonError, rs.maxerrata, erasures=11) self.assertRaises(ReedSolomonError, rs.maxerrata, errors=6) def test_maxerrata_verbose(self): output = StringIO() sys.stdout = output rs = RSCodec(10) rs.maxerrata(verbose=True) rs.maxerrata(erasures=2, verbose=True) rs.maxerrata(errors=4, verbose=True) sys.stdout = sys.__stdout__ self.assertEqual(output.getvalue(), "This codec can correct up to 5 errors and 10 erasures independently\nThis codec can correct up to 4 errors and 2 erasures simultaneously\nThis codec can correct up to 4 errors and 2 erasures simultaneously\n") if __name__ == "__main__": unittest.main() reedsolomon-1.7.0/tests/test_reedsolo.py000066400000000000000000000754751436142644400204660ustar00rootroot00000000000000# To use this script easily (starting with Python 2.7), just cd to the parent folder and type the following command: # python -m unittest discover tests from __future__ import print_function import unittest import sys from random import sample import itertools try: from itertools import izip except ImportError: #python3.x izip = zip from reedsolo import * try: bytearray except NameError: from reedsolo import _bytearray as bytearray try: # compatibility with Python 3+ xrange except NameError: xrange = range try: # python2.x from StringIO import StringIO except ImportError: from io import StringIO class TestReedSolomon(unittest.TestCase): def test_simple(self): rs = RSCodec(10) msg = bytearray("hello world " * 10, "latin1") enc = rs.encode(msg) dec, dec_enc, errata_pos = rs.decode(enc) self.assertEqual(dec, msg) self.assertEqual(dec_enc, enc) def test_correction(self): rs = RSCodec(10) msg = bytearray("hello world " * 10, "latin1") enc = rs.encode(msg) rmsg, renc, errata_pos = rs.decode(enc) self.assertEqual(rmsg, msg) self.assertEqual(renc, enc) for i in [27, -3, -9, 7, 0]: enc[i] = 99 rmsg, renc, errata_pos = rs.decode(enc) self.assertEqual(rmsg, msg) enc[82] = 99 self.assertRaises(ReedSolomonError, rs.decode, enc) def test_check(self): rs = RSCodec(10) msg = bytearray("hello world " * 10, "latin1") enc = rs.encode(msg) rmsg, renc, errata_pos = rs.decode(enc) self.assertEqual(rs.check(enc), [True]) self.assertEqual(rs.check(renc), [True]) for i in [27, -3, -9, 7, 0]: enc[i] = 99 rmsg, renc, errata_pos = rs.decode(enc) self.assertEqual(rs.check(enc), [False]) self.assertEqual(rs.check(renc), [True]) def test_long(self): rs = RSCodec(10) msg = bytearray("a" * 10000, "latin1") enc = rs.encode(msg) dec, dec_enc, errata_pos = rs.decode(enc) self.assertEqual(dec, msg) self.assertEqual(dec_enc, enc) enc2 = list(enc) enc2[177] = 99 enc2[2212] = 88 dec2, dec_enc2, errata_pos = rs.decode(enc2) self.assertEqual(dec2, msg) self.assertEqual(dec_enc2, enc) def test_prim_fcr_basic(self): nn = 30 kk = 18 tt = nn - kk rs = RSCodec(tt, fcr=120, prim=0x187) hexencmsg = '00faa123555555c000000354064432c02800fe97c434e1ff5365' \ 'cf8fafe4' strf = str if sys.version_info[0] >= 3 else unicode encmsg = bytearray.fromhex(strf(hexencmsg)) decmsg = encmsg[:kk] tem = rs.encode(decmsg) self.assertEqual(encmsg, tem, msg="encoded does not match expected") tdm, rtem, errata_pos = rs.decode(tem) self.assertEqual(tdm, decmsg, msg="decoded does not match original") self.assertEqual(rtem, tem, msg="decoded mesecc does not match original") tem1 = bytearray(tem) # clone a copy # encoding and decoding intact message seem OK, so test errors numerrs = tt >> 1 # inject tt/2 errors (expected to recover fully) for i in sample(range(nn), numerrs): # inject errors in random places tem1[i] ^= 0xff # flip all 8 bits tdm, _, _ = rs.decode(tem1) self.assertEqual(tdm, decmsg, msg="decoded with errors does not match original") tem1 = bytearray(tem) # clone another copy numerrs += 1 # inject tt/2 + 1 errors (expected to fail and detect it) for i in sample(range(nn), numerrs): # inject errors in random places tem1[i] ^= 0xff # flip all 8 bits # if this fails, it means excessive errors not detected self.assertRaises(ReedSolomonError, rs.decode, tem1) def test_prim_fcr_long(self): nn = 48 kk = 34 tt = nn - kk rs = RSCodec(tt, fcr=120, prim=0x187) hexencmsg = '08faa123555555c000000354064432c0280e1b4d090cfc04887400' \ '000003500000000e1985ff9c6b33066ca9f43d12e8' strf = str if sys.version_info[0] >= 3 else unicode encmsg = bytearray.fromhex(strf(hexencmsg)) decmsg = encmsg[:kk] tem = rs.encode(decmsg) self.assertEqual(encmsg, tem, msg="encoded does not match expected") tdm, rtem, errata_pos = rs.decode(tem) self.assertEqual(tdm, decmsg, msg="decoded does not match original") self.assertEqual(rtem, tem, msg="decoded mesecc does not match original") tem1 = bytearray(tem) numerrs = tt >> 1 for i in sample(range(nn), numerrs): tem1[i] ^= 0xff tdm, rtem, errata_pos = rs.decode(tem1) self.assertEqual(tdm, decmsg, msg="decoded with errors does not match original") self.assertEqual(rtem, tem, msg="decoded mesecc with errors does not match original") tem1 = bytearray(tem) numerrs += 1 for i in sample(range(nn), numerrs): tem1[i] ^= 0xff self.assertRaises(ReedSolomonError, rs.decode, tem1) def test_generator_poly(self): '''Test if generator poly finder is working correctly and if the all generators poly finder does output the same result''' n = 11 k = 3 # Base 2 test fcr = 120 generator = 2 prim = 0x11d init_tables(generator=generator, prim=prim) g = rs_generator_poly_all(n, fcr=fcr, generator=generator) self.assertEqual( list(g[n-k]) , list(rs_generator_poly(n-k,fcr=fcr, generator=generator)) ) self.assertEqual( list(g[n-k]) , [1, 106, 9, 105, 86, 5, 166, 76, 9] ) # Base 3 test fcr = 0 generator = 3 prim = 0x11b init_tables(generator=generator, prim=prim) g = rs_generator_poly_all(n, fcr=fcr, generator=generator) self.assertEqual( list(g[n-k]) , list(rs_generator_poly(n-k,fcr=fcr, generator=generator)) ) self.assertEqual( list(g[n-k]) , [1, 128, 13, 69, 36, 145, 199, 165, 30] ) def test_prime_poly_build(self): '''Try if the prime polynomials finder works correctly for different GFs (ie, GF(2^6) to GF(2^10)) and with different generators''' params = {"count": 7, "c_exp": [6, 7, 7, 8, 8, 9, 10], "generator": [2, 2, 3, 2, 3, 2, 2], "expected": [ [67, 91, 97, 103, 109, 115], [131, 137, 143, 145, 157, 167, 171, 185, 191, 193, 203, 211, 213, 229, 239, 241, 247, 253], [131, 137, 143, 145, 157, 167, 171, 185, 191, 193, 203, 211, 213, 229, 239, 241, 247, 253], [285, 299, 301, 333, 351, 355, 357, 361, 369, 391, 397, 425, 451, 463, 487, 501], [283, 313, 319, 333, 351, 355, 357, 361, 375, 397, 415, 419, 425, 451, 501, 505], [529, 539, 545, 557, 563, 601, 607, 617, 623, 631, 637, 647, 661, 675, 677, 687, 695, 701, 719, 721, 731, 757, 761, 787, 789, 799, 803, 817, 827, 847, 859, 865, 875, 877, 883, 895, 901, 911, 949, 953, 967, 971, 973, 981, 985, 995, 1001, 1019], [1033, 1051, 1063, 1069, 1125, 1135, 1153, 1163, 1221, 1239, 1255, 1267, 1279, 1293, 1305, 1315, 1329, 1341, 1347, 1367, 1387, 1413, 1423, 1431, 1441, 1479, 1509, 1527, 1531, 1555, 1557, 1573, 1591, 1603, 1615, 1627, 1657, 1663, 1673, 1717, 1729, 1747, 1759, 1789, 1815, 1821, 1825, 1849, 1863, 1869, 1877, 1881, 1891, 1917, 1933, 1939, 1969, 2011, 2035, 2041] ] } for i in xrange(params['count']): self.assertEqual( find_prime_polys(generator=params['generator'][i], c_exp=params['c_exp'][i]) , params["expected"][i] ) def test_init_tables(self): '''Try if the look up table generator (galois field generator) works correctly for different parameters''' params = [ [0x11d, 2, 8], [0x11b, 3, 8], [0xfd, 3, 7] ] expected = [ [ [0, 0, 1, 25, 2, 50, 26, 198, 3, 223, 51, 238, 27, 104, 199, 75, 4, 100, 224, 14, 52, 141, 239, 129, 28, 193, 105, 248, 200, 8, 76, 113, 5, 138, 101, 47, 225, 36, 15, 33, 53, 147, 142, 218, 240, 18, 130, 69, 29, 181, 194, 125, 106, 39, 249, 185, 201, 154, 9, 120, 77, 228, 114, 166, 6, 191, 139, 98, 102, 221, 48, 253, 226, 152, 37, 179, 16, 145, 34, 136, 54, 208, 148, 206, 143, 150, 219, 189, 241, 210, 19, 92, 131, 56, 70, 64, 30, 66, 182, 163, 195, 72, 126, 110, 107, 58, 40, 84, 250, 133, 186, 61, 202, 94, 155, 159, 10, 21, 121, 43, 78, 212, 229, 172, 115, 243, 167, 87, 7, 112, 192, 247, 140, 128, 99, 13, 103, 74, 222, 237, 49, 197, 254, 24, 227, 165, 153, 119, 38, 184, 180, 124, 17, 68, 146, 217, 35, 32, 137, 46, 55, 63, 209, 91, 149, 188, 207, 205, 144, 135, 151, 178, 220, 252, 190, 97, 242, 86, 211, 171, 20, 42, 93, 158, 132, 60, 57, 83, 71, 109, 65, 162, 31, 45, 67, 216, 183, 123, 164, 118, 196, 23, 73, 236, 127, 12, 111, 246, 108, 161, 59, 82, 41, 157, 85, 170, 251, 96, 134, 177, 187, 204, 62, 90, 203, 89, 95, 176, 156, 169, 160, 81, 11, 245, 22, 235, 122, 117, 44, 215, 79, 174, 213, 233, 230, 231, 173, 232, 116, 214, 244, 234, 168, 80, 88, 175], [1, 2, 4, 8, 16, 32, 64, 128, 29, 58, 116, 232, 205, 135, 19, 38, 76, 152, 45, 90, 180, 117, 234, 201, 143, 3, 6, 12, 24, 48, 96, 192, 157, 39, 78, 156, 37, 74, 148, 53, 106, 212, 181, 119, 238, 193, 159, 35, 70, 140, 5, 10, 20, 40, 80, 160, 93, 186, 105, 210, 185, 111, 222, 161, 95, 190, 97, 194, 153, 47, 94, 188, 101, 202, 137, 15, 30, 60, 120, 240, 253, 231, 211, 187, 107, 214, 177, 127, 254, 225, 223, 163, 91, 182, 113, 226, 217, 175, 67, 134, 17, 34, 68, 136, 13, 26, 52, 104, 208, 189, 103, 206, 129, 31, 62, 124, 248, 237, 199, 147, 59, 118, 236, 197, 151, 51, 102, 204, 133, 23, 46, 92, 184, 109, 218, 169, 79, 158, 33, 66, 132, 21, 42, 84, 168, 77, 154, 41, 82, 164, 85, 170, 73, 146, 57, 114, 228, 213, 183, 115, 230, 209, 191, 99, 198, 145, 63, 126, 252, 229, 215, 179, 123, 246, 241, 255, 227, 219, 171, 75, 150, 49, 98, 196, 149, 55, 110, 220, 165, 87, 174, 65, 130, 25, 50, 100, 200, 141, 7, 14, 28, 56, 112, 224, 221, 167, 83, 166, 81, 162, 89, 178, 121, 242, 249, 239, 195, 155, 43, 86, 172, 69, 138, 9, 18, 36, 72, 144, 61, 122, 244, 245, 247, 243, 251, 235, 203, 139, 11, 22, 44, 88, 176, 125, 250, 233, 207, 131, 27, 54, 108, 216, 173, 71, 142, 1, 2, 4, 8, 16, 32, 64, 128, 29, 58, 116, 232, 205, 135, 19, 38, 76, 152, 45, 90, 180, 117, 234, 201, 143, 3, 6, 12, 24, 48, 96, 192, 157, 39, 78, 156, 37, 74, 148, 53, 106, 212, 181, 119, 238, 193, 159, 35, 70, 140, 5, 10, 20, 40, 80, 160, 93, 186, 105, 210, 185, 111, 222, 161, 95, 190, 97, 194, 153, 47, 94, 188, 101, 202, 137, 15, 30, 60, 120, 240, 253, 231, 211, 187, 107, 214, 177, 127, 254, 225, 223, 163, 91, 182, 113, 226, 217, 175, 67, 134, 17, 34, 68, 136, 13, 26, 52, 104, 208, 189, 103, 206, 129, 31, 62, 124, 248, 237, 199, 147, 59, 118, 236, 197, 151, 51, 102, 204, 133, 23, 46, 92, 184, 109, 218, 169, 79, 158, 33, 66, 132, 21, 42, 84, 168, 77, 154, 41, 82, 164, 85, 170, 73, 146, 57, 114, 228, 213, 183, 115, 230, 209, 191, 99, 198, 145, 63, 126, 252, 229, 215, 179, 123, 246, 241, 255, 227, 219, 171, 75, 150, 49, 98, 196, 149, 55, 110, 220, 165, 87, 174, 65, 130, 25, 50, 100, 200, 141, 7, 14, 28, 56, 112, 224, 221, 167, 83, 166, 81, 162, 89, 178, 121, 242, 249, 239, 195, 155, 43, 86, 172, 69, 138, 9, 18, 36, 72, 144, 61, 122, 244, 245, 247, 243, 251, 235, 203, 139, 11, 22, 44, 88, 176, 125, 250, 233, 207, 131, 27, 54, 108, 216, 173, 71, 142], 255 ], [ [0, 0, 25, 1, 50, 2, 26, 198, 75, 199, 27, 104, 51, 238, 223, 3, 100, 4, 224, 14, 52, 141, 129, 239, 76, 113, 8, 200, 248, 105, 28, 193, 125, 194, 29, 181, 249, 185, 39, 106, 77, 228, 166, 114, 154, 201, 9, 120, 101, 47, 138, 5, 33, 15, 225, 36, 18, 240, 130, 69, 53, 147, 218, 142, 150, 143, 219, 189, 54, 208, 206, 148, 19, 92, 210, 241, 64, 70, 131, 56, 102, 221, 253, 48, 191, 6, 139, 98, 179, 37, 226, 152, 34, 136, 145, 16, 126, 110, 72, 195, 163, 182, 30, 66, 58, 107, 40, 84, 250, 133, 61, 186, 43, 121, 10, 21, 155, 159, 94, 202, 78, 212, 172, 229, 243, 115, 167, 87, 175, 88, 168, 80, 244, 234, 214, 116, 79, 174, 233, 213, 231, 230, 173, 232, 44, 215, 117, 122, 235, 22, 11, 245, 89, 203, 95, 176, 156, 169, 81, 160, 127, 12, 246, 111, 23, 196, 73, 236, 216, 67, 31, 45, 164, 118, 123, 183, 204, 187, 62, 90, 251, 96, 177, 134, 59, 82, 161, 108, 170, 85, 41, 157, 151, 178, 135, 144, 97, 190, 220, 252, 188, 149, 207, 205, 55, 63, 91, 209, 83, 57, 132, 60, 65, 162, 109, 71, 20, 42, 158, 93, 86, 242, 211, 171, 68, 17, 146, 217, 35, 32, 46, 137, 180, 124, 184, 38, 119, 153, 227, 165, 103, 74, 237, 222, 197, 49, 254, 24, 13, 99, 140, 128, 192, 247, 112, 7], [1, 3, 5, 15, 17, 51, 85, 255, 26, 46, 114, 150, 161, 248, 19, 53, 95, 225, 56, 72, 216, 115, 149, 164, 247, 2, 6, 10, 30, 34, 102, 170, 229, 52, 92, 228, 55, 89, 235, 38, 106, 190, 217, 112, 144, 171, 230, 49, 83, 245, 4, 12, 20, 60, 68, 204, 79, 209, 104, 184, 211, 110, 178, 205, 76, 212, 103, 169, 224, 59, 77, 215, 98, 166, 241, 8, 24, 40, 120, 136, 131, 158, 185, 208, 107, 189, 220, 127, 129, 152, 179, 206, 73, 219, 118, 154, 181, 196, 87, 249, 16, 48, 80, 240, 11, 29, 39, 105, 187, 214, 97, 163, 254, 25, 43, 125, 135, 146, 173, 236, 47, 113, 147, 174, 233, 32, 96, 160, 251, 22, 58, 78, 210, 109, 183, 194, 93, 231, 50, 86, 250, 21, 63, 65, 195, 94, 226, 61, 71, 201, 64, 192, 91, 237, 44, 116, 156, 191, 218, 117, 159, 186, 213, 100, 172, 239, 42, 126, 130, 157, 188, 223, 122, 142, 137, 128, 155, 182, 193, 88, 232, 35, 101, 175, 234, 37, 111, 177, 200, 67, 197, 84, 252, 31, 33, 99, 165, 244, 7, 9, 27, 45, 119, 153, 176, 203, 70, 202, 69, 207, 74, 222, 121, 139, 134, 145, 168, 227, 62, 66, 198, 81, 243, 14, 18, 54, 90, 238, 41, 123, 141, 140, 143, 138, 133, 148, 167, 242, 13, 23, 57, 75, 221, 124, 132, 151, 162, 253, 28, 36, 108, 180, 199, 82, 246, 1, 3, 5, 15, 17, 51, 85, 255, 26, 46, 114, 150, 161, 248, 19, 53, 95, 225, 56, 72, 216, 115, 149, 164, 247, 2, 6, 10, 30, 34, 102, 170, 229, 52, 92, 228, 55, 89, 235, 38, 106, 190, 217, 112, 144, 171, 230, 49, 83, 245, 4, 12, 20, 60, 68, 204, 79, 209, 104, 184, 211, 110, 178, 205, 76, 212, 103, 169, 224, 59, 77, 215, 98, 166, 241, 8, 24, 40, 120, 136, 131, 158, 185, 208, 107, 189, 220, 127, 129, 152, 179, 206, 73, 219, 118, 154, 181, 196, 87, 249, 16, 48, 80, 240, 11, 29, 39, 105, 187, 214, 97, 163, 254, 25, 43, 125, 135, 146, 173, 236, 47, 113, 147, 174, 233, 32, 96, 160, 251, 22, 58, 78, 210, 109, 183, 194, 93, 231, 50, 86, 250, 21, 63, 65, 195, 94, 226, 61, 71, 201, 64, 192, 91, 237, 44, 116, 156, 191, 218, 117, 159, 186, 213, 100, 172, 239, 42, 126, 130, 157, 188, 223, 122, 142, 137, 128, 155, 182, 193, 88, 232, 35, 101, 175, 234, 37, 111, 177, 200, 67, 197, 84, 252, 31, 33, 99, 165, 244, 7, 9, 27, 45, 119, 153, 176, 203, 70, 202, 69, 207, 74, 222, 121, 139, 134, 145, 168, 227, 62, 66, 198, 81, 243, 14, 18, 54, 90, 238, 41, 123, 141, 140, 143, 138, 133, 148, 167, 242, 13, 23, 57, 75, 221, 124, 132, 151, 162, 253, 28, 36, 108, 180, 199, 82, 246], 255 ], [ [0, 0, 7, 1, 14, 2, 8, 56, 21, 57, 9, 90, 15, 31, 63, 3, 28, 4, 64, 67, 16, 112, 97, 32, 22, 47, 38, 58, 70, 91, 10, 108, 35, 109, 11, 87, 71, 79, 74, 92, 23, 82, 119, 48, 104, 59, 39, 100, 29, 19, 54, 5, 45, 68, 65, 95, 77, 33, 98, 117, 17, 43, 115, 113, 42, 114, 116, 76, 18, 53, 94, 44, 78, 73, 86, 34, 81, 118, 99, 103, 30, 62, 89, 20, 126, 6, 55, 13, 111, 96, 66, 27, 46, 37, 107, 69, 36, 106, 26, 110, 61, 88, 12, 125, 52, 93, 75, 41, 72, 85, 102, 80, 84, 101, 40, 51, 105, 25, 124, 60, 24, 123, 50, 83, 122, 49, 120, 121], [1, 3, 5, 15, 17, 51, 85, 2, 6, 10, 30, 34, 102, 87, 4, 12, 20, 60, 68, 49, 83, 8, 24, 40, 120, 117, 98, 91, 16, 48, 80, 13, 23, 57, 75, 32, 96, 93, 26, 46, 114, 107, 64, 61, 71, 52, 92, 25, 43, 125, 122, 115, 104, 69, 50, 86, 7, 9, 27, 45, 119, 100, 81, 14, 18, 54, 90, 19, 53, 95, 28, 36, 108, 73, 38, 106, 67, 56, 72, 37, 111, 76, 41, 123, 112, 109, 74, 35, 101, 82, 11, 29, 39, 105, 70, 55, 89, 22, 58, 78, 47, 113, 110, 79, 44, 116, 97, 94, 31, 33, 99, 88, 21, 63, 65, 62, 66, 59, 77, 42, 126, 127, 124, 121, 118, 103, 84, 1, 3, 5, 15, 17, 51, 85, 2, 6, 10, 30, 34, 102, 87, 4, 12, 20, 60, 68, 49, 83, 8, 24, 40, 120, 117, 98, 91, 16, 48, 80, 13, 23, 57, 75, 32, 96, 93, 26, 46, 114, 107, 64, 61, 71, 52, 92, 25, 43, 125, 122, 115, 104, 69, 50, 86, 7, 9, 27, 45, 119, 100, 81, 14, 18, 54, 90, 19, 53, 95, 28, 36, 108, 73, 38, 106, 67, 56, 72, 37, 111, 76, 41, 123, 112, 109, 74, 35, 101, 82, 11, 29, 39, 105, 70, 55, 89, 22, 58, 78, 47, 113, 110, 79, 44, 116, 97, 94, 31, 33, 99, 88, 21, 63, 65, 62, 66, 59, 77, 42, 126, 127, 124, 121, 118, 103, 84], 127 ] ] for i in xrange(len(params)): p = params[i] expected_log_t, expected_exp_t, expected_field_charac_t = expected[i] log_t, exp_t, field_charac_t = init_tables(prim=p[0], generator=p[1], c_exp=p[2]) self.assertEqual( field_charac_t, expected_field_charac_t ) self.assertEqual( list(log_t) , expected_log_t ) self.assertEqual( list(exp_t) , expected_exp_t ) def test_consistent_erasures_report(self): # Ensure we always at least return the erasures we used as input _ = init_tables() msg = rs_encode_msg(bytes(range(10)), nsym=4) self.assertEqual(rs_correct_msg(msg, nsym=4, erase_pos=[1])[2], [1]) self.assertEqual(rs_correct_msg(msg, nsym=4, erase_pos=[1])[2], [1]) msg[1] = 0xFF self.assertEqual(rs_correct_msg(msg, nsym=4)[2], [1]) self.assertEqual(rs_correct_msg(msg, nsym=4, erase_pos=[1])[2], [1]) def test_erasures_chunking(self): # Test whether providing positions for erasures in the 2nd chunk or later is working rs = RSCodec(30) encoded = rs.encode(b'0' * 226) _, _, _ = rs.decode(encoded, erase_pos=[255], only_erasures=True) # If it works, no exception should be raised def test_too_many_ecc_symbols(self): RSCodec(254).encode(b'a') self.assertRaises(ValueError, RSCodec, 255) # nsym=255 self.assertRaises(ValueError, RSCodec, 2000) # nsym=2000 class TestBigReedSolomon(unittest.TestCase): def test_find_prime_polys(self): self.assertEqual(find_prime_polys(c_exp=4), [19, 25]) self.assertEqual(find_prime_polys(c_exp=8, fast_primes=False), [285, 299, 301, 333, 351, 355, 357, 361, 369, 391, 397, 425, 451, 463, 487, 501]) self.assertEqual(find_prime_polys(c_exp=8, fast_primes=True), [397, 463, 487]) self.assertEqual(find_prime_polys(c_exp=9, fast_primes=True, single=True), 557) def test_c_exp_9(self): rsc = RSCodec(12, c_exp=9) rsc2 = RSCodec(12, nsize=511) self.assertEqual(rsc.c_exp, rsc2.c_exp) self.assertEqual(rsc.nsize, rsc2.nsize) mes = 'a'*((511-12)*2) mesecc = rsc.encode(mes) mesecc[2] = 1 mesecc[-1] = 1 rmes, rmesecc, errata_pos = rsc.decode(mesecc) self.assertEqual(rsc.check(mesecc), [False, False]) self.assertEqual(rsc.check(rmesecc), [True, True]) self.assertEqual([x for x in rmes], [ord(x) for x in mes]) def test_c_exp_12(self): rsc = RSCodec(12, c_exp=12) rsc2 = RSCodec(12, nsize=4095) self.assertEqual(rsc.c_exp, rsc2.c_exp) self.assertEqual(rsc.nsize, rsc2.nsize) mes = 'a'*(4095-12) mesecc = rsc.encode(mes) mesecc[2] = 1 mesecc[-1] = 1 rmes, rmesecc, errata_pos = rsc.decode(mesecc) self.assertEqual(rsc.check(mesecc), [False]) self.assertEqual(rsc.check(rmesecc), [True]) self.assertEqual([x for x in rmes], [ord(x) for x in mes]) def test_multiple_RSCodec(self): '''Test multiple RSCodec instances with different parameters''' mes = 'A' * 30 rs_256 = RSCodec(102) rs_1024 = RSCodec(900, c_exp=10) bytearray(rs_1024.decode(rs_1024.encode(mes))[0]) rs_256.encode(mes) rs_1024.encode(mes) bytearray(rs_256.decode(rs_256.encode(mes))[0]) # At this point, there should not have been any exception raised! def test_higher_galois_fields_bytes(self): rs = RSCodec(12, c_exp=12) # same as nsize=4095 str_msg = "This is a message" bytes_msg = b"This is a binary message" result = rs.encode(str_msg) # this always worked result_b = rs.encode(bytes_msg) # this is the dege case that used to fail class TestGFArithmetics(unittest.TestCase): '''Test Galois Field arithmetics''' def test_multiply_nolut(self): '''Try to multiply without look-up tables (necessary to build the look-up tables!)''' a = 30 b = 19 generator=2 prim=0x11d # Compare the LUT multiplication and noLUT init_tables(prim=prim, generator=generator) self.assertEqual(gf_mul(a, b), gf_mult_noLUT(a, b, prim=prim)) # More Galois Field multiplications self.assertEqual( gf_mult_noLUT(5, 6, prim=0x11b, field_charac_full=256) , 30 ) self.assertEqual( gf_mult_noLUT(3, 125, prim=0x11b, field_charac_full=256) , 135 ) self.assertEqual( gf_mult_noLUT(2, 200, prim=0x11d, field_charac_full=256) , 141 ) self.assertEqual( gf_mult_noLUT_slow(2, 200, prim=0x11d) , 141 ) # Multiplications in GF(2^7) self.assertEqual( gf_mult_noLUT(3, 125, prim=0xfd, field_charac_full=128) , 122 ) # Multiplications outside of the finite field (we revert to standard integer multiplications just to see if it works) self.assertEqual( gf_mult_noLUT(3, 125, prim=0, carryless=False) , 375 ) self.assertEqual( gf_mult_noLUT_slow(4, 125, prim=0) , 500 ) # the second method, just to check that everything's alright def test_gf_operations(self): '''Try various Galois Field 2 operations''' init_tables(prim=0x11d, generator=2, c_exp=8) a = 30 b = 19 # Addition and subtraction (they are the same in GF(2^p) self.assertEqual(gf_add(0, 0), 0) self.assertEqual(gf_add(0, 0), gf_sub(0, 0)) self.assertEqual(gf_add(1, 0), 1) self.assertEqual(gf_add(1, 0), gf_sub(1, 0)) self.assertEqual(gf_add(0, 1), 1) self.assertEqual(gf_add(0, 1), gf_sub(0, 1)) self.assertEqual(gf_add(1, 1), 0) self.assertEqual(gf_add(1, 1), gf_sub(1, 1)) self.assertEqual(gf_add(a, b), 13) self.assertEqual(gf_add(a, b), gf_sub(a, b)) self.assertEqual(gf_add(0, b), b) self.assertEqual(gf_add(0, b), gf_sub(0, b)) self.assertEqual(gf_add(a, 0), a) self.assertEqual(gf_add(a, 0), gf_sub(a,0)) self.assertEqual(gf_add(a, 1), (a+1)) self.assertEqual(gf_add(a, 1), gf_sub(a, 1)) self.assertEqual(gf_add(1, a), (a+1)) self.assertEqual(gf_add(1, a), gf_sub(1, a)) self.assertEqual(gf_add(255, 1), 254) # Negation self.assertEqual(gf_neg(a), a) self.assertEqual(gf_neg(b), b) # Division self.assertEqual(gf_div(a, 1), a) self.assertEqual(gf_div(12, 3), 4) self.assertEqual(gf_div(a, b), 222) self.assertEqual(gf_div(b, a), 25) self.assertEqual(gf_div(0, a), 0) self.assertRaises(ZeroDivisionError, gf_div, *[a, 0]) class TestSimpleFuncs(unittest.TestCase): '''Test simple functions and see if the results are equivalent with optimized functions''' def test_gf_poly_mul_simple(self): a = [1, 12, 14, 9] b = [0, 23, 2, 15] self.assertEqual(gf_poly_mul(a, b), gf_poly_mul_simple(a, b)) def test_gf_poly_neg(self): a = [1, 12, 14, 9] self.assertEqual(gf_poly_neg(a), a) def test_rs_simple_encode_msg(self): a = bytearray("hello world", "latin1") nsym = 10 self.assertEqual(rs_simple_encode_msg(a, nsym), rs_encode_msg(a, nsym)) class TestRSCodecUniversalCrossValidation(unittest.TestCase): '''Ultimate set of tests of a full set of different parameters for encoding and decoding. If this passes, the codec is universal and can correctly interface with any other RS codec!''' def test_main(self): def cartesian_product_dict_items(dicts): return (dict(izip(dicts, x)) for x in itertools.product(*dicts.values())) debugg = False # if one or more tests don't pass, you can enable this flag to True to get verbose output to debug orig_mes = bytearray("hello world", "latin1") n = len(orig_mes)*2 k = len(orig_mes) nsym = n-k istart = 0 params = {"count": 5, "fcr": [120, 0, 1, 1, 1], "prim": [0x187, 0x11d, 0x11b, 0xfd, 0xfd], "generator": [2, 2, 3, 3, 2], "c_exponent": [8, 8, 8, 7, 7], } cases = { "errmode": [1, 2, 3, 4], "erratasnb_errorsnb_onlyeras": [[8, 3, False], [6, 5, False], [5, 5, False], [11, 0, True], [11, 0, False], [0,0, False]], # errata number (errors+erasures), erasures number and only_erasures: the last item is the value for only_erasures (True/False) } ############################$ results_br = [] results_rs = [] it = 0 for p in xrange(params["count"]): fcr = params["fcr"][p] prim = params["prim"][p] generator = params["generator"][p] c_exponent = params["c_exponent"][p] for case in cartesian_product_dict_items(cases): errmode = case["errmode"] erratanb = case["erratasnb_errorsnb_onlyeras"][0] errnb = case["erratasnb_errorsnb_onlyeras"][1] only_erasures = case["erratasnb_errorsnb_onlyeras"][2] it += 1 if debugg: print("it ", it) print("param", p) print(case) # REEDSOLO # Init the RS codec init_tables(generator=generator, prim=prim, c_exp=c_exponent) g = rs_generator_poly_all(n, fcr=fcr, generator=generator) # Encode the message rmesecc = rs_encode_msg(orig_mes, n-k, gen=g[n-k]) rmesecc_orig = rmesecc[:] # make a copy of the original message to check later if fully corrected (because the syndrome may be wrong sometimes) # Tamper the message if erratanb > 0: if errmode == 1: sl = slice(istart, istart+erratanb) elif errmode == 2: sl = slice(-istart-erratanb-(n-k), -(n-k)) elif errmode == 3: sl = slice(-istart-erratanb-1, -1) elif errmode == 4: sl = slice(-istart-erratanb, None) if debugg: print("Removed slice:", list(rmesecc[sl]), rmesecc[sl]) rmesecc[sl] = [0] * erratanb # replace with null bytes # Generate the erasures positions (if any) erase_pos = [x for x in xrange(len(rmesecc)) if rmesecc[x] == 0] if errnb > 0: erase_pos = erase_pos[:-errnb] # remove the errors positions (must not be known by definition) if debugg: print("erase_pos", erase_pos) print("coef_pos", [len(rmesecc) - 1 - pos for pos in erase_pos]) print("Errata total: ", erratanb-errnb + errnb*2, " -- Correctable? ", (erratanb-errnb + errnb*2 <= nsym)) # Decoding the corrupted codeword # -- Forney syndrome method try: rmes, recc, errata_pos = rs_correct_msg(rmesecc, n-k, fcr=fcr, generator=generator, erase_pos=erase_pos, only_erasures=only_erasures) results_br.append( rs_check(rmes + recc, n-k, fcr=fcr, generator=generator) ) # check if correct by syndrome analysis (can be wrong) results_br.append( rmesecc_orig == (rmes+recc) ) # check if correct by comparing to the original message (always correct) if debugg and not rs_check(rmes + recc, n-k, fcr=fcr, generator=generator) or not (rmesecc_orig == (rmes+recc)): raise ReedSolomonError("False!!!!!") except ReedSolomonError as exc: results_br.append(False) results_br.append(False) if debugg: print("====") print("ERROR! Details:") print("param", p) print(case) print(erase_pos) print("original_msg", rmesecc_orig) print("tampered_msg", rmesecc) print("decoded_msg", rmes+recc) print("checks: ", rs_check(rmes + recc, n-k, fcr=fcr, generator=generator), rmesecc_orig == (rmes+recc)) print("====") raise exc # -- Without Forney syndrome method try: mes, ecc, errata_pos = rs_correct_msg_nofsynd(rmesecc, n-k, fcr=fcr, generator=generator, erase_pos=erase_pos, only_erasures=only_erasures) results_br.append( rs_check(rmes + recc, n-k, fcr=fcr, generator=generator) ) results_br.append( rmesecc_orig == (rmes+recc) ) except ReedSolomonError as exc: results_br.append(False) results_br.append(False) if debugg: print("-----") self.assertTrue(results_br.count(True) == len(results_br)) class TestHelperFuncs(unittest.TestCase): '''Test helper functions''' def test_maxerrata(self): rs = RSCodec(10) self.assertEqual(rs.maxerrata(), (5, 10)) self.assertEqual(rs.maxerrata(erasures=8), (1, 8)) self.assertEqual(rs.maxerrata(errors=2), (2, 6)) self.assertRaises(ReedSolomonError, rs.maxerrata, erasures=11) self.assertRaises(ReedSolomonError, rs.maxerrata, errors=6) def test_maxerrata_verbose(self): output = StringIO() sys.stdout = output rs = RSCodec(10) rs.maxerrata(verbose=True) rs.maxerrata(erasures=2, verbose=True) rs.maxerrata(errors=4, verbose=True) sys.stdout = sys.__stdout__ self.assertEqual(output.getvalue(), "This codec can correct up to 5 errors and 10 erasures independently\nThis codec can correct up to 4 errors and 2 erasures simultaneously\nThis codec can correct up to 4 errors and 2 erasures simultaneously\n") if __name__ == "__main__": unittest.main() reedsolomon-1.7.0/tox.ini000066400000000000000000000014311436142644400153670ustar00rootroot00000000000000# Tox (https://tox.testrun.org/) is a tool for running tests # in multiple virtualenvs (and python versions). This configuration file will run the # test suite on all supported python versions. To use it, "pip install tox" # and then run "tox" from this directory. [tox] # deprecation warning: py{26,32,33,34} envlist = py{27,33,34,35,36,37,38,39,310,py,py3}, setup.py [testenv] passenv = CI TRAVIS TRAVIS_* TOXENV CODECOV_* deps = nose coverage coveralls cython commands = nosetests --with-coverage -d -v - coveralls # no cython/numpy/pandas for py{py,py3,26,33,34} [testenv:pypy] # remove cython deps = nose coverage coveralls commands = {[testenv]commands} [testenv:pypy3] deps = nose coverage coveralls commands = {[testenv]commands}