././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1718032361.2306235 gfloat-0.3/0000755000175000017500000000000000000000000010745 5ustar00awfawf././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1718032361.2306235 gfloat-0.3/.github/0000755000175000017500000000000000000000000012305 5ustar00awfawf././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1718032361.2306235 gfloat-0.3/.github/workflows/0000755000175000017500000000000000000000000014342 5ustar00awfawf././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1716478626.0 gfloat-0.3/.github/workflows/ci.yaml0000644000175000017500000000163300000000000015624 0ustar00awfawf# Copyright (c) 2024 Graphcore Ltd. All rights reserved. name: CI on: pull_request: push: branches: [main] jobs: pytest-container: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - uses: actions/setup-python@v4 with: python-version: "3.9" cache: "pip" - name: Install requirements run: | pip install -U pip pip install .[dev] pip install -r requirements-test.txt - name: Log installed environment run: | python3 -m pip freeze - name: Pre-commit all files run: | pre-commit run --all-files - name: Run unit tests run: | pytest . - name: MyPy run: | mypy --disallow-untyped-defs --enable-error-code redundant-expr src test - name: Ensure that docs build run: | cd docs && make html ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1714650883.0 gfloat-0.3/.gitignore0000644000175000017500000000615200000000000012741 0ustar00awfawf# Copyright (c) 2024 Graphcore Ltd. All rights reserved. # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover *.py,cover .hypothesis/ .pytest_cache/ cover/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 db.sqlite3-journal # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/build/ # PyBuilder .pybuilder/ target/ # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py # pyenv # For a library or package, you might want to ignore these files since the code is # intended to run in multiple environments; otherwise, check them in: # .python-version # pipenv # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. # However, in case of collaboration, if having platform-specific dependencies or dependencies # having no cross-platform support, pipenv may install dependencies that don't work, or not # install all needed dependencies. #Pipfile.lock # poetry # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. # This is especially recommended for binary packages to ensure reproducibility, and is more # commonly ignored for libraries. # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control #poetry.lock # pdm # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. #pdm.lock # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it # in version control. # https://pdm.fming.dev/#use-with-ide .pdm.toml # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm __pypackages__/ # Celery stuff celerybeat-schedule celerybeat.pid # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ # pytype static type analyzer .pytype/ # Cython debug symbols cython_debug/ # PyCharm # JetBrains specific template is maintained in a separate JetBrains.gitignore that can # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ .vscode/settings.json .vscode/launch.json ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1716115050.0 gfloat-0.3/.pre-commit-config.yaml0000644000175000017500000000116600000000000015232 0ustar00awfawf# Copyright (c) 2024 Graphcore Ltd. All rights reserved. repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.6.0 hooks: - id: check-yaml - id: end-of-file-fixer - id: trailing-whitespace - repo: https://github.com/psf/black rev: 24.4.0 hooks: - id: black-jupyter - repo: local hooks: - id: etc/check-copyright.sh name: check copyright entry: etc/check-copyright.sh language: script exclude: | (?x)( ^docs/Makefile$| ^docs/make.bat$| (/|)requirements.*\.txt$ ) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1716115050.0 gfloat-0.3/.readthedocs.yaml0000644000175000017500000000035700000000000014201 0ustar00awfawf# Copyright (c) 2024 Graphcore Ltd. All rights reserved. version: 2 build: os: "ubuntu-22.04" tools: python: "3.10" python: install: - requirements: docs/requirements-rtd.txt sphinx: configuration: docs/source/conf.py ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1716115050.0 gfloat-0.3/BUILDING.md0000644000175000017500000000025000000000000012461 0ustar00awfawf ## BUILDING ``` pip install -e . ( cd docs && make html ) ``` #### Pushing ``` sh etc/package.sh ``` ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1718031904.0 gfloat-0.3/ChangeLog0000644000175000017500000000062300000000000012520 0ustar00awfawf 0.3: Jun 10, 2024 - Use python ints throughout, adding float64 to test - Simplify round, fix directed rounding - Rename "ival" to "code" in FloatValue - Shorten format names from "format_info_*" to "*" 0.2: May 21, 2024 - Add MX Formats - Improved CI - Add value table pretty-printing 0.1: May 2, 2024 - First released version Copyright (c) 2024 Graphcore Ltd. All rights reserved. ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1714650883.0 gfloat-0.3/LICENSE0000644000175000017500000000210500000000000011750 0ustar00awfawfMIT License Copyright (c) 2023 Graphcore Ltd. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1718032361.2306235 gfloat-0.3/PKG-INFO0000644000175000017500000000704000000000000012043 0ustar00awfawfMetadata-Version: 2.1 Name: gfloat Version: 0.3 Summary: Generic floating point handling in Python Author-email: Andrew Fitzgibbon Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: MIT License Classifier: Programming Language :: Python :: 3 Classifier: Development Status :: 3 - Alpha Requires-Python: >=3.8.1 Description-Content-Type: text/markdown License-File: LICENSE Requires-Dist: numpy Provides-Extra: dev Requires-Dist: pytest; extra == "dev" Requires-Dist: ml_dtypes; extra == "dev" Requires-Dist: pre-commit; extra == "dev" Requires-Dist: black; extra == "dev" Requires-Dist: mypy; extra == "dev" Requires-Dist: black[jupyter]; extra == "dev" Requires-Dist: isort; extra == "dev" Requires-Dist: sphinx==7.1.2; extra == "dev" Requires-Dist: sphinx-rtd-theme==1.3.0rc1; extra == "dev" Requires-Dist: sphinx_paramlinks; extra == "dev" Requires-Dist: myst_nb; extra == "dev" Requires-Dist: airium; extra == "dev" Requires-Dist: pandas; extra == "dev" # gfloat: Generic floating-point types in Python An implementation of generic floating point encode/decode logic, handling various current and proposed floating point types: - [IEEE 754](https://en.wikipedia.org/wiki/IEEE_754): Binary16, Binary32 - [OCP Float8](https://www.opencompute.org/documents/ocp-8-bit-floating-point-specification-ofp8-revision-1-0-2023-06-20-pdf): E5M2, E4M3 - [IEEE WG P3109](https://github.com/awf/P3109-Public/blob/main/Shared%20Reports/P3109%20WG%20Interim%20report.pdf): P{p} for p in 1..7 - [OCP MX Formats](https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf): E2M1, M2M3, E3M2, E8M0, INT8, and the MX block formats. The library favours readability and extensibility over speed - for fast implementations of these datatypes see, for example, [ml_dtypes](https://github.com/jax-ml/ml_dtypes), [bitstring](https://github.com/scott-griffiths/bitstring), [MX PyTorch Emulation Library](https://github.com/microsoft/microxcaling). See https://gfloat.readthedocs.io for documentation, or dive into the notebooks to explore the formats. For example, here's a table from the [02-value-stats](docs/source/02-value-stats.ipynb) notebook: |name|B: Bits in the format|P: Precision in bits|E: Exponent field width in bits|0 # gfloat: Generic floating-point types in Python An implementation of generic floating point encode/decode logic, handling various current and proposed floating point types: - [IEEE 754](https://en.wikipedia.org/wiki/IEEE_754): Binary16, Binary32 - [OCP Float8](https://www.opencompute.org/documents/ocp-8-bit-floating-point-specification-ofp8-revision-1-0-2023-06-20-pdf): E5M2, E4M3 - [IEEE WG P3109](https://github.com/awf/P3109-Public/blob/main/Shared%20Reports/P3109%20WG%20Interim%20report.pdf): P{p} for p in 1..7 - [OCP MX Formats](https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf): E2M1, M2M3, E3M2, E8M0, INT8, and the MX block formats. The library favours readability and extensibility over speed - for fast implementations of these datatypes see, for example, [ml_dtypes](https://github.com/jax-ml/ml_dtypes), [bitstring](https://github.com/scott-griffiths/bitstring), [MX PyTorch Emulation Library](https://github.com/microsoft/microxcaling). See https://gfloat.readthedocs.io for documentation, or dive into the notebooks to explore the formats. For example, here's a table from the [02-value-stats](docs/source/02-value-stats.ipynb) notebook: |name|B: Bits in the format|P: Precision in bits|E: Exponent field width in bits|0NUL 2>NUL if errorlevel 9009 ( echo. echo.The 'sphinx-build' command was not found. Make sure you have Sphinx echo.installed, then set the SPHINXBUILD environment variable to point echo.to the full path of the 'sphinx-build' executable. Alternatively you echo.may add the Sphinx directory to PATH. echo. echo.If you don't have Sphinx installed, grab it from echo.http://sphinx-doc.org/ exit /b 1 ) %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% goto end :help %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% :end popd ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1716478626.0 gfloat-0.3/docs/requirements-rtd.txt0000644000175000017500000000000700000000000015745 0ustar00awfawf.[dev] ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1718032361.2306235 gfloat-0.3/docs/source/0000755000175000017500000000000000000000000013175 5ustar00awfawf././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1717580751.0 gfloat-0.3/docs/source/01-decode.ipynb0000644000175000017500000003143700000000000015711 0ustar00awfawf{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "\n", "\n", "# GFloat Basics\n", "\n", "This notebook shows the use of `decode_float` to explore properties of some float formats.\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# Install packages\n", "from pandas import DataFrame\n", "import numpy as np\n", "\n", "from gfloat import decode_float\n", "from gfloat.formats import *" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## List all the values in a format\n", "\n", "The first example shows how to list all values in a given format.\n", "We will choose the [OCP](https://www.opencompute.org/documents/ocp-8-bit-floating-point-specification-ofp8-revision-1-0-2023-12-01-pdf-1) E5M2 format.\n", "\n", "The object `format_info_ocp_e5m2` is from the `gfloat.formats` package, and describes the characteristics of that format:" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "FormatInfo(name='ocp_e5m2', k=8, precision=3, emax=15, has_nz=True, has_infs=True, num_high_nans=3, has_subnormals=True, is_signed=True, is_twos_complement=False)" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "format_info_ocp_e5m2" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We shall use the format to decode all values from 0..255, and gather them in a pandas DataFrame.\n", "We see that `decode_float` returns a lot more than just the value - it also splits out the exponent, significand, and sign, and returns the `FloatClass`, which allows us to distinguish normal and subnormal numbers, as well as zero, infinity, and nan." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
fvalexpexpvalsignificandfsignificandsignbitfclass
code
00.000000e+000-1400.000FloatClass.ZERO
11.525879e-050-1410.250FloatClass.SUBNORMAL
23.051758e-050-1420.500FloatClass.SUBNORMAL
34.577637e-050-1430.750FloatClass.SUBNORMAL
46.103516e-051-1401.000FloatClass.NORMAL
........................
251-5.734400e+04301531.751FloatClass.NORMAL
252-inf311601.001FloatClass.INFINITE
253NaN311611.251FloatClass.NAN
254NaN311621.501FloatClass.NAN
255NaN311631.751FloatClass.NAN
\n", "

256 rows × 7 columns

\n", "
" ], "text/plain": [ " fval exp expval significand fsignificand signbit \\\n", "code \n", "0 0.000000e+00 0 -14 0 0.00 0 \n", "1 1.525879e-05 0 -14 1 0.25 0 \n", "2 3.051758e-05 0 -14 2 0.50 0 \n", "3 4.577637e-05 0 -14 3 0.75 0 \n", "4 6.103516e-05 1 -14 0 1.00 0 \n", "... ... ... ... ... ... ... \n", "251 -5.734400e+04 30 15 3 1.75 1 \n", "252 -inf 31 16 0 1.00 1 \n", "253 NaN 31 16 1 1.25 1 \n", "254 NaN 31 16 2 1.50 1 \n", "255 NaN 31 16 3 1.75 1 \n", "\n", " fclass \n", "code \n", "0 FloatClass.ZERO \n", "1 FloatClass.SUBNORMAL \n", "2 FloatClass.SUBNORMAL \n", "3 FloatClass.SUBNORMAL \n", "4 FloatClass.NORMAL \n", "... ... \n", "251 FloatClass.NORMAL \n", "252 FloatClass.INFINITE \n", "253 FloatClass.NAN \n", "254 FloatClass.NAN \n", "255 FloatClass.NAN \n", "\n", "[256 rows x 7 columns]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fmt = format_info_ocp_e5m2\n", "vals = [decode_float(fmt, i) for i in range(256)]\n", "DataFrame(vals).set_index(\"code\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Additional format info: special values, min, max, dynamic range\n", "\n", "In addition, `FormatInfo` can tell us about other characteristics of each format.\n", "To reproduce some of the OCP spec's tables 1 and 2:" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Format ocp_e4m3 ocp_e5m2 p3109_p3\n", "Max exponent (emax) 8 15 15\n", "Exponent bias 7 15 16\n", "Infinities 0 2 2\n", "Number of NaNs 2 6 1\n", "Number of zeros 2 2 1\n", "Max normal number 448.0 57344.0 49152.0\n", "Min normal number 0.015625 6.103515625e-05 3.0517578125e-05\n", "Min subnormal number 0.001953125 1.52587890625e-05 7.62939453125e-06\n", "Dynamic range (binades) 18 32 33\n" ] } ], "source": [ "def compute_dynamic_range(fi):\n", " return np.log2(fi.max / fi.smallest)\n", "\n", "\n", "for prop, probe in (\n", " (\"Format \", lambda fi: fi.name.replace(\"format_info_\", \"\")),\n", " (\"Max exponent (emax) \", lambda fi: fi.emax),\n", " (\"Exponent bias \", lambda fi: fi.expBias),\n", " (\"Infinities \", lambda fi: 2 * int(fi.has_infs)),\n", " (\"Number of NaNs \", lambda fi: fi.num_nans),\n", " (\"Number of zeros \", lambda fi: int(fi.has_zero) + int(fi.has_nz)),\n", " (\"Max normal number \", lambda fi: fi.max),\n", " (\"Min normal number \", lambda fi: fi.smallest_normal),\n", " (\"Min subnormal number \", lambda fi: fi.smallest_subnormal),\n", " (\"Dynamic range (binades)\", lambda x: round(compute_dynamic_range(x))),\n", "):\n", " print(\n", " f\"{prop} {probe(format_info_ocp_e4m3):<20} {probe(format_info_ocp_e5m2):<20} {probe(format_info_p3109(3))}\"\n", " )" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## How do subnormals affect dynamic range?\n", "\n", "Most, if not all, low-precision formats include subnormal numbers, as they increase the number of values near zero, and increase dynamic range.\n", "A natural question is \"by how much?\". To answer this, we can create a mythical new format, a copy of `e4m3`, but with `has_subnormals` set to true." ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "import copy\n", "\n", "e4m3_no_subnormals = copy.copy(format_info_ocp_e4m3)\n", "e4m3_no_subnormals.has_subnormals = False" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "And now compute the dynamic range with and without:" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Dynamic range with subnormals = 17.807354922057606\n", "Dynamic range without subnormals = 15.637429920615292\n", "Ratio = 4.5\n" ] } ], "source": [ "dr_with = compute_dynamic_range(format_info_ocp_e4m3)\n", "dr_without = compute_dynamic_range(e4m3_no_subnormals)\n", "\n", "print(f\"Dynamic range with subnormals = {dr_with}\")\n", "print(f\"Dynamic range without subnormals = {dr_without}\")\n", "print(f\"Ratio = {2**(dr_with - dr_without):.1f}\")" ] } ], "metadata": { "kernelspec": { "display_name": "ml_dtypes", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.0" } }, "nbformat": 4, "nbformat_minor": 2 } ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1716115050.0 gfloat-0.3/docs/source/02-value-stats.ipynb0000644000175000017500000006074600000000000016744 0ustar00awfawf{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "\n", "\n", "# Collect value statistics for formats\n", "\n", "This notebook computes various statistics for a variety of float formats,\n", "by exhaustively enumerating the values. Naturally, most of these statistics can be computed directly, and indeed many are already supplied on the `FormatInfo` class as methods, for example `max`, `smallest_subnormal`, etc. However this method serves as a useful cross-check against the direct formulae.\n", "\n", "## Statistics collected\n", "\n", " - name: Format\n", " - B: Bits in the format\n", " - P: Precision in bits\n", " - E: Exponent field width in bits\n", " - T: Trailing significand field width in bits\n", " - lt1: Number of values x such that `0 < x < 1`\n", " - gt1: Number of values x such that `1 < x < Inf`\n", " - rt16: True if all values are exactly representable in IEEE binary16\n", " - maxFinite: Largest finite value\n", " - minFinite: Smallest finite value\n", " - maxNormal: Largest finite normal value, NaN if all finite values are subnormal\n", " - minNormal: Smallest positive normal value, NaN if all finite values are subnormal\n", " - minSubnormal: Smallest positive subnormal value, NaN if no finite values are - subnormal\n", " - maxSubnormal: Largest subnormal value, NaN if no finite values are subnormal\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from gfloat import *\n", "from gfloat.formats import *\n", "\n", "import pandas\n", "import numpy as np\n", "from IPython.display import HTML\n", "\n", "\n", "def collect_stats(fi: FormatInfo):\n", " # Generate all values\n", " values = [decode_float(fi, i) for i in range(2**fi.bits)]\n", " df = pandas.DataFrame(values)\n", "\n", " # Extract format information parameters\n", " E = fi.expBits\n", " S = fi.tSignificandBits\n", "\n", " # Compute statistics: lt1,gt1\n", " fval = df[\"fval\"]\n", " total_01 = fval.between(0, 1, inclusive=\"neither\").sum()\n", " total_1Inf = fval.between(1, np.inf, inclusive=\"neither\").sum()\n", "\n", " # Compute statistics: maxFinite,minFinite\n", " finite_vals = fval[np.isfinite(fval)]\n", " maxFinite = finite_vals.loc[finite_vals.idxmax()]\n", " minFinite = finite_vals.loc[finite_vals.idxmin()]\n", "\n", " # Compute statistics: maxNormal,minNormal\n", " normal_vals = fval[(df[\"fclass\"] == FloatClass.NORMAL) & (fval > 0)]\n", " maxNormal = normal_vals.loc[normal_vals.idxmax()] if normal_vals.any() else np.nan\n", " minNormal = normal_vals.loc[normal_vals.idxmin()] if normal_vals.any() else np.nan\n", "\n", " # Compute statistics: minSubnormal\n", " pos_subnormal = fval[(df[\"fclass\"] == FloatClass.SUBNORMAL) & (fval > 0)]\n", " maxSubnormal = (\n", " pos_subnormal.loc[pos_subnormal.idxmax()] if pos_subnormal.any() else np.nan\n", " )\n", " minSubnormal = (\n", " pos_subnormal.loc[pos_subnormal.idxmin()] if pos_subnormal.any() else np.nan\n", " )\n", "\n", " # Compute roundtrips: rt16, rt32\n", " with np.errstate(over=\"ignore\"):\n", " rt16 = (np.float64(np.float16(fval)) == np.float64(fval)) | ~np.isfinite(fval)\n", " rt32 = (np.float64(np.float32(fval)) == np.float64(fval)) | ~np.isfinite(fval)\n", "\n", " rt16 = rt16.all()\n", " rt32 = rt32.all()\n", " assert rt32 # If not, we should include rt32 in the table\n", "\n", " # Assemble tuple\n", " return dict(\n", " name=fi.name,\n", " B=fi.bits,\n", " P=fi.precision,\n", " E=E,\n", " T=S,\n", " lt1=total_01,\n", " gt1=total_1Inf,\n", " rt16=rt16,\n", " maxFinite=maxFinite,\n", " minFinite=minFinite,\n", " maxNormal=maxNormal,\n", " minNormal=minNormal,\n", " minSubnormal=minSubnormal,\n", " maxSubnormal=maxSubnormal,\n", " )\n", "\n", "\n", "formats_to_check = (\n", " tiny_formats\n", " + fp8_formats\n", " + [format_info_bfloat16, format_info_ocp_int8, format_info_ocp_e8m0]\n", ")\n", "stats = [collect_stats(fi) for fi in formats_to_check]\n", "df = pandas.DataFrame(stats)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Emit HTML table" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
nameBPETlt1gt1rt16maxFiniteminFinitemaxNormalminNormalminSubnormalmaxSubnormal
ocp_e2m1422115True 6 -6 6 1 0.5 0.5
ocp_e2m36423723True 7.5 -7.5 7.5 1 0.125 0.875
ocp_e3m263321119True 28 -28 28 0.25 0.0625 0.1875
ocp_e4m384435570True 448 -448 4480.0156251*2^-97/4*2^-7
ocp_e5m283525963True 57344 -57344 573441*2^-141*2^-163/2*2^-15
p3109_p181706263False1*2^63-1*2^631*2^631*2^-62nannan
p3109_p282616362False1*2^31-1*2^311*2^311*2^-311*2^-321*2^-32
p3109_p383526362True 49152 -49152 491521*2^-151*2^-173/2*2^-16
p3109_p484436362True 224 -224 2240.00781251*2^-107/4*2^-8
p3109_p585346362True 15 -15 15 0.1250.007812515/8*2^-4
p3109_p686256362True 3.875 -3.875 3.875 0.50.01562531/16*2^-2
bfloat16168871625516383False255/128*2^127-255/128*2^127255/128*2^1271*2^-1261*2^-133127/64*2^-127
ocp_int888076363True127/64*2^0 -2nannan0.015625127/64*2^0
ocp_e8m08180127127False1*2^1271*2^-1271*2^1271*2^-127nannan
\n" ], "text/plain": [ "" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Special rendering for float values - if they don't render nicely in 10.5g,\n", "# use float_pow2str\n", "def render_float(v):\n", " s = f\"{v:8.5g}\"\n", " if not \"e\" in s and float(s) == v:\n", " return s\n", " else:\n", " return float_pow2str(v)\n", "\n", "\n", "for field in (\n", " \"maxFinite\",\n", " \"minFinite\",\n", " \"maxNormal\",\n", " \"minNormal\",\n", " \"minSubnormal\",\n", " \"maxSubnormal\",\n", "):\n", " df[field] = df[field].map(render_float)\n", "\n", "\n", "HTML(df.style.hide().to_html())" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.3" } }, "nbformat": 4, "nbformat_minor": 2 } ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1718031914.0 gfloat-0.3/docs/source/03-value-tables.ipynb0000644000175000017500000043716600000000000017065 0ustar00awfawf{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "\n", "\n", "# Making value tables\n", "\n", "In this notebook, we generate value tables akin to those at [P3109](https://htmlpreview.github.io/?https://raw.githubusercontent.com/P3109/Public/main/Value%20Tables/html/index.html).\n", "\n", "Thes tables comprise one-line summaries of each float value in the form\n", "```text\n", "Code Binary = Exact binary E = Float16 equivalent Float16 binary E = Float Value\n", "0x21 0_0100_001 = +0b1.001*2^-4 = 0_01011_0010000000 +0b1.0010000000*2^-4 = ~0.0703\n", "```" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from gfloat import *\n", "from gfloat.formats import *\n", "import numpy as np\n", "from IPython.display import HTML\n", "import airium" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Define some helpers.\n", "\n", "### Render with underscores separating s_e_m\n", "\n", "E.g `0_1011_110`. For formats with zero significand bits or zero exponent bits, we use `0_1011110_` or `0__10111110`." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "def str_bits_with_underscores(fi, fv):\n", " # 0_1011110_\n", " if fi.tSignificandBits == 0:\n", " return f\"{fv.signbit}_{fv.exp:0{fi.expBits}b}_\"\n", "\n", " # 0__1011110\n", " if fi.expBits == 0:\n", " return f\"{fv.signbit}__{fv.significand:0{fi.tSignificandBits}b}\"\n", "\n", " # 0_101_1110\n", " return (\n", " f\"{fv.signbit}_{fv.exp:0{fi.expBits}b}_{fv.significand:0{fi.tSignificandBits}b}\"\n", " )\n", "\n", "\n", "fi = format_info_p3109(3)\n", "assert str_bits_with_underscores(fi, decode_float(fi, 0x41)) == \"0_10000_01\"\n", "\n", "fi = format_info_p3109(1)\n", "assert str_bits_with_underscores(fi, decode_float(fi, 0x41)) == \"0_1000001_\"\n", "\n", "fi = format_info_p3109(7)\n", "assert str_bits_with_underscores(fi, decode_float(fi, 0x41)) == \"0_1_000001\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Render a binary16 value\n", "\n", "Returns two strings, like this:\n", "```\n", "'0_00010_1010000000', '+0b1.1010000000*2^-13'\n", "```" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import struct\n", "\n", "\n", "def b16_str(val) -> tuple[str, str]:\n", " \"\"\"\n", " Represent VAL in binary16.\n", "\n", " If val does not convert exactly to binary16,\n", " returns \"\"\n", " \"\"\"\n", " with np.errstate(over=\"ignore\"):\n", " b16 = np.float16(val)\n", "\n", " if float(b16) != val and np.isfinite(b16):\n", " # Finite, but not representable in float16\n", " return f\"\", \"\"\n", " b16_int = struct.unpack(\"!H\", struct.pack(\"!e\", b16))[0]\n", "\n", " # bitstr is of the form 0_00000_1100000000\n", " s = f\"{b16_int:016b}\"\n", " e_str = s[1:6]\n", " m_str = s[6:]\n", " bitstr = f\"{s[0]}_{e_str}_{m_str}\"\n", "\n", " # pow2str is of the form '+0b0.1100000000*2^-15', or '' for nonfinite values\n", " e = int(e_str, 2) - 15\n", " m = int(m_str, 2)\n", " leading_bit = 0 if e == -15 else 1\n", " signstr = \"-\" if s[0] == \"1\" else \"+\"\n", " if np.isfinite(b16):\n", " pow2str = f\"{signstr}0b{leading_bit}.{m:010b}*2^{e}\"\n", " else:\n", " pow2str = \"\"\n", " return bitstr, pow2str\n", "\n", "\n", "assert b16_str(13 * 2**-16) == (\"0_00010_1010000000\", \"+0b1.1010000000*2^-13\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Print one table row" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "p3109_p3\n", "0x00 0_00000_00 = 0.0\n", "0x01 0_00000_01 = +0b0.01*2^-15 = 0_00000_0010000000 +0b0.0010000000*2^-15 = ~7.629e-06\n", "0x07 0_00001_11 = +0b1.11*2^-15 = 0_00000_1110000000 +0b0.1110000000*2^-15 = ~5.341e-05\n", "0x21 0_01000_01 = +0b1.01*2^-8 = 0_00111_0100000000 +0b1.0100000000*2^-8 = ~0.0049\n", "0x40 0_10000_00 = +0b1.00*2^0 = 0_01111_0000000000 +0b1.0000000000*2^0 = 1.0\n", "0x41 0_10000_01 = +0b1.01*2^0 = 0_01111_0100000000 +0b1.0100000000*2^0 = 1.25\n", "0x7e 0_11111_10 = +0b1.10*2^15 = 0_11110_1000000000 +0b1.1000000000*2^15 = 49152.0\n", "0x7f 0_11111_11 = inf\n", "0x80 1_00000_00 = nan\n", "0x81 1_00000_01 = -0b0.01*2^-15 = 1_00000_0010000000 -0b0.0010000000*2^-15 = ~-7.629e-06\n", "0xe6 1_11001_10 = -0b1.10*2^9 = 1_11000_1000000000 -0b1.1000000000*2^9 = -768.0\n", "0xfe 1_11111_10 = -0b1.10*2^15 = 1_11110_1000000000 -0b1.1000000000*2^15 = -49152.0\n", "0xff 1_11111_11 = -inf\n", "p3109_p1\n", "0x00 0_0000000_ = 0.0\n", "0x01 0_0000001_ = +0b1.0*2^-62 = = ~2.168e-19\n", "0x07 0_0000111_ = +0b1.0*2^-56 = = ~1.388e-17\n", "0x21 0_0100001_ = +0b1.0*2^-30 = = ~9.313e-10\n", "0x40 0_1000000_ = +0b1.0*2^1 = 0_10000_0000000000 +0b1.0000000000*2^1 = 2.0\n", "0x41 0_1000001_ = +0b1.0*2^2 = 0_10001_0000000000 +0b1.0000000000*2^2 = 4.0\n", "0x7e 0_1111110_ = +0b1.0*2^63 = 0_11111_0000000000 = ~9.223e+18\n", "0x7f 0_1111111_ = inf\n", "0x80 1_0000000_ = nan\n", "0x81 1_0000001_ = -0b1.0*2^-62 = = ~-2.168e-19\n", "0xe6 1_1100110_ = -0b1.0*2^39 = 1_11111_0000000000 = ~-5.498e+11\n", "0xfe 1_1111110_ = -0b1.0*2^63 = 1_11111_0000000000 = ~-9.223e+18\n", "0xff 1_1111111_ = -inf\n" ] } ], "source": [ "def str_tablerow(fi, fv: FloatValue, show_b16_info=True, vs_width=14, vs_d=8):\n", " \"\"\"\n", " Create a string of the form\n", " 0x41 0_10000_01 = +0b1.01*2^0 = 1.25\n", " optionally adding binary16 info\n", " 0x41 0_10000_01 = +0b1.01*2^0 = 0_01111_0100000000 +0b1.0100000000*2^0 = 1.25\n", " \"\"\"\n", " text = []\n", "\n", " # 0x45 0_1000_101\n", " text.append(f\"0x{fv.code:02x} {str_bits_with_underscores(fi, fv)}\")\n", "\n", " finite_nonzero = np.isfinite(fv.fval) and fv.fval != 0\n", "\n", " # = +0b1.101*2^-7 =\n", " if finite_nonzero:\n", "\n", " def signstr(fv):\n", " return \"-\" if fv.signbit else \"+\"\n", "\n", " b = \"0\" if fv.fclass == FloatClass.SUBNORMAL else \"1\"\n", " binary_pow2 = f\"{signstr(fv)}0b{b}.{fv.significand:0{fi.tSignificandBits}b}*2^{fv.expval:<3}\"\n", " text.append(binary_pow2)\n", "\n", " if show_b16_info and finite_nonzero:\n", " b16_binary_str, b16_bscistr = b16_str(fv.fval)\n", " text.append(f\"{b16_binary_str} {b16_bscistr}\")\n", "\n", " # 1.125\n", " text.append(float_tilde_unless_roundtrip_str(fv.fval, width=vs_width, d=vs_d))\n", "\n", " # Return tuple\n", " return \" = \".join(text)\n", "\n", "\n", "for fi in (format_info_p3109(3), format_info_p3109(1)):\n", " print(fi.name)\n", " for i in (\n", " 0x00,\n", " 0x01,\n", " 0x07,\n", " 0x21,\n", " 0x40,\n", " 0x41,\n", " 0x7E,\n", " 0x7F,\n", " 0x80,\n", " 0x81,\n", " 0xE6,\n", " 0xFE,\n", " 0xFF,\n", " ):\n", " print(\n", " str_tablerow(\n", " fi, decode_float(fi, i), show_b16_info=True, vs_width=8, vs_d=4\n", " )\n", " )" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Make HTML table" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "

FP8 Value Table, ocp_e2m1

\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
0x00 0_00_0 = 0.0
\n", "
\n", "
0x08 1_00_0 = -0.0
\n", "
\n", "
0x01 0_00_1 = +0b0.1*2^0   = 0.5
\n", "
\n", "
0x09 1_00_1 = -0b0.1*2^0   = -0.5
\n", "
\n", "
0x02 0_01_0 = +0b1.0*2^0   = 1.0
\n", "
\n", "
0x0a 1_01_0 = -0b1.0*2^0   = -1.0
\n", "
\n", "
0x03 0_01_1 = +0b1.1*2^0   = 1.5
\n", "
\n", "
0x0b 1_01_1 = -0b1.1*2^0   = -1.5
\n", "
\n", "
0x04 0_10_0 = +0b1.0*2^1   = 2.0
\n", "
\n", "
0x0c 1_10_0 = -0b1.0*2^1   = -2.0
\n", "
\n", "
0x05 0_10_1 = +0b1.1*2^1   = 3.0
\n", "
\n", "
0x0d 1_10_1 = -0b1.1*2^1   = -3.0
\n", "
\n", "
0x06 0_11_0 = +0b1.0*2^2   = 4.0
\n", "
\n", "
0x0e 1_11_0 = -0b1.0*2^2   = -4.0
\n", "
\n", "
0x07 0_11_1 = +0b1.1*2^2   = 6.0
\n", "
\n", "
0x0f 1_11_1 = -0b1.1*2^2   = -6.0
\n", "
" ], "text/plain": [ "" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def mktbl(fi: FormatInfo, cols=4, skip_rows=None, **kw):\n", " # Make tables\n", " nvals = 2**fi.bits\n", " rows = nvals // cols\n", "\n", " style = f\"\"\"\n", " div.cell_output td {{\n", " margin: 0pt;\n", " text-align: left;\n", " }}\n", "\n", " div.cell_output table {{\n", " margin: 0pt;\n", " text-align: left;\n", " font-family: monospace;\n", " font-size: xx-small;\n", " font-weight: bold;\n", " border-collapse: collapse;\n", " }}\n", "\n", " \n", " table {{\n", " margin: 0pt;\n", " font-family: monospace;\n", " font-size: xx-small;\n", " font-weight: bold;\n", " border-collapse: collapse;\n", " }}\n", "\n", " tr.blankrow {{\n", " height: 4ex;\n", " vertical-align: top;\n", " }}\n", " \n", " td {{\n", " text-align: left;\n", " border: solid 2px #ccc;\n", " width: {98/cols}%;\n", " }}\n", " \n", " .special {{\n", " color: #874723;\n", " }}\n", " \n", " .subnormal {{\n", " color: #0121a7;\n", " }}\n", " \n", " .normal {{\n", " }}\n", " \n", " @media (prefers-color-scheme: dark) {{\n", " .special {{\n", " color: orange;\n", " }}\n", "\n", " .subnormal {{\n", " color: cyan;\n", " }}\n", " \n", " .normal {{\n", " }}\n", " }}\n", "\n", " pre {{\n", " margin: 1pt 1pt 1pt 13pt;\n", " display: inline;\n", " }}\n", "\"\"\"\n", "\n", " def table_style(fv):\n", " \"\"\"\n", " Select from the table entry styles defined in CSS above.\n", " \"\"\"\n", " if fv.fclass == FloatClass.SUBNORMAL:\n", " return \"subnormal\"\n", "\n", " if fv.fclass == FloatClass.NORMAL:\n", " return \"normal\"\n", "\n", " if fv.fclass == FloatClass.ZERO and not fv.signbit:\n", " return \"normal\"\n", "\n", " # Everyting else is special\n", " return \"special\"\n", "\n", " title = f\"FP8 Value Table, {fi.name}\"\n", " a = airium.Airium()\n", " a.style(_t=style)\n", " a.h3(_t=title)\n", "\n", " with a.table():\n", " for i in range(0, rows):\n", " if skip_rows and (skip_rows[0] <= i < skip_rows[1]):\n", " if i == skip_rows[0]:\n", " a.tr(klass=\"blankrow\").td(\"...\")\n", " continue\n", " trklass = \"blankrow\" if i > 0 and i % 16 == 0 else \"\"\n", " with a.tr(klass=trklass):\n", " for n in range(i, nvals, rows):\n", " fv = decode_float(fi, n)\n", " text = str_tablerow(fi, fv, show_b16_info=False, **kw)\n", " a.td(klass=table_style(fv)).pre(_t=text)\n", "\n", " return str(a)\n", "\n", "\n", "HTML(mktbl(format_info_ocp_e2m1, cols=2, vs_width=8, vs_d=3))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### OCP E2M3\n", "\n", "This 6-bit format has 32 values, with no `NaN` or `Inf`, but does have `-0`.\n", "The positive subnormals are the linear ramp of eighths: [n/8 for n in 1:7].\n", "\n", "One might describe the format in text as:\n", "\n", "> zero to one by eighths, two to four by quarters, four to eight by halves\n", "\n", "where \"to\" is open-ended, or \"to\" is not \"thru\"." ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "

FP8 Value Table, ocp_e2m3

\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
0x00 0_00_000 = 0.0
\n", "
\n", "
0x20 1_00_000 = -0.0
\n", "
\n", "
0x01 0_00_001 = +0b0.001*2^0   = 0.125
\n", "
\n", "
0x21 1_00_001 = -0b0.001*2^0   = -0.125
\n", "
\n", "
0x02 0_00_010 = +0b0.010*2^0   = 0.25
\n", "
\n", "
0x22 1_00_010 = -0b0.010*2^0   = -0.25
\n", "
\n", "
0x03 0_00_011 = +0b0.011*2^0   = 0.375
\n", "
\n", "
0x23 1_00_011 = -0b0.011*2^0   = -0.375
\n", "
\n", "
0x04 0_00_100 = +0b0.100*2^0   = 0.5
\n", "
\n", "
0x24 1_00_100 = -0b0.100*2^0   = -0.5
\n", "
\n", "
0x05 0_00_101 = +0b0.101*2^0   = 0.625
\n", "
\n", "
0x25 1_00_101 = -0b0.101*2^0   = -0.625
\n", "
\n", "
0x06 0_00_110 = +0b0.110*2^0   = 0.75
\n", "
\n", "
0x26 1_00_110 = -0b0.110*2^0   = -0.75
\n", "
\n", "
0x07 0_00_111 = +0b0.111*2^0   = 0.875
\n", "
\n", "
0x27 1_00_111 = -0b0.111*2^0   = -0.875
\n", "
\n", "
0x08 0_01_000 = +0b1.000*2^0   = 1.0
\n", "
\n", "
0x28 1_01_000 = -0b1.000*2^0   = -1.0
\n", "
\n", "
0x09 0_01_001 = +0b1.001*2^0   = 1.125
\n", "
\n", "
0x29 1_01_001 = -0b1.001*2^0   = -1.125
\n", "
\n", "
0x0a 0_01_010 = +0b1.010*2^0   = 1.25
\n", "
\n", "
0x2a 1_01_010 = -0b1.010*2^0   = -1.25
\n", "
\n", "
0x0b 0_01_011 = +0b1.011*2^0   = 1.375
\n", "
\n", "
0x2b 1_01_011 = -0b1.011*2^0   = -1.375
\n", "
\n", "
0x0c 0_01_100 = +0b1.100*2^0   = 1.5
\n", "
\n", "
0x2c 1_01_100 = -0b1.100*2^0   = -1.5
\n", "
\n", "
0x0d 0_01_101 = +0b1.101*2^0   = 1.625
\n", "
\n", "
0x2d 1_01_101 = -0b1.101*2^0   = -1.625
\n", "
\n", "
0x0e 0_01_110 = +0b1.110*2^0   = 1.75
\n", "
\n", "
0x2e 1_01_110 = -0b1.110*2^0   = -1.75
\n", "
\n", "
0x0f 0_01_111 = +0b1.111*2^0   = 1.875
\n", "
\n", "
0x2f 1_01_111 = -0b1.111*2^0   = -1.875
\n", "
\n", "
0x10 0_10_000 = +0b1.000*2^1   = 2.0
\n", "
\n", "
0x30 1_10_000 = -0b1.000*2^1   = -2.0
\n", "
\n", "
0x11 0_10_001 = +0b1.001*2^1   = 2.25
\n", "
\n", "
0x31 1_10_001 = -0b1.001*2^1   = -2.25
\n", "
\n", "
0x12 0_10_010 = +0b1.010*2^1   = 2.5
\n", "
\n", "
0x32 1_10_010 = -0b1.010*2^1   = -2.5
\n", "
\n", "
0x13 0_10_011 = +0b1.011*2^1   = 2.75
\n", "
\n", "
0x33 1_10_011 = -0b1.011*2^1   = -2.75
\n", "
\n", "
0x14 0_10_100 = +0b1.100*2^1   = 3.0
\n", "
\n", "
0x34 1_10_100 = -0b1.100*2^1   = -3.0
\n", "
\n", "
0x15 0_10_101 = +0b1.101*2^1   = 3.25
\n", "
\n", "
0x35 1_10_101 = -0b1.101*2^1   = -3.25
\n", "
\n", "
0x16 0_10_110 = +0b1.110*2^1   = 3.5
\n", "
\n", "
0x36 1_10_110 = -0b1.110*2^1   = -3.5
\n", "
\n", "
0x17 0_10_111 = +0b1.111*2^1   = 3.75
\n", "
\n", "
0x37 1_10_111 = -0b1.111*2^1   = -3.75
\n", "
\n", "
0x18 0_11_000 = +0b1.000*2^2   = 4.0
\n", "
\n", "
0x38 1_11_000 = -0b1.000*2^2   = -4.0
\n", "
\n", "
0x19 0_11_001 = +0b1.001*2^2   = 4.5
\n", "
\n", "
0x39 1_11_001 = -0b1.001*2^2   = -4.5
\n", "
\n", "
0x1a 0_11_010 = +0b1.010*2^2   = 5.0
\n", "
\n", "
0x3a 1_11_010 = -0b1.010*2^2   = -5.0
\n", "
\n", "
0x1b 0_11_011 = +0b1.011*2^2   = 5.5
\n", "
\n", "
0x3b 1_11_011 = -0b1.011*2^2   = -5.5
\n", "
\n", "
0x1c 0_11_100 = +0b1.100*2^2   = 6.0
\n", "
\n", "
0x3c 1_11_100 = -0b1.100*2^2   = -6.0
\n", "
\n", "
0x1d 0_11_101 = +0b1.101*2^2   = 6.5
\n", "
\n", "
0x3d 1_11_101 = -0b1.101*2^2   = -6.5
\n", "
\n", "
0x1e 0_11_110 = +0b1.110*2^2   = 7.0
\n", "
\n", "
0x3e 1_11_110 = -0b1.110*2^2   = -7.0
\n", "
\n", "
0x1f 0_11_111 = +0b1.111*2^2   = 7.5
\n", "
\n", "
0x3f 1_11_111 = -0b1.111*2^2   = -7.5
\n", "
" ], "text/plain": [ "" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "HTML(mktbl(format_info_ocp_e2m3, cols=2, vs_width=8, vs_d=3))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# And here's a 6-bit \"IEEE-754\" float:\n" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "

FP8 Value Table, 754-fp6

\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
0x00 0_000_00 = 0.0
\n", "
\n", "
0x20 1_000_00 = -0.0
\n", "
\n", "
0x01 0_000_01 = +0b0.01*2^-2  = 0.0625
\n", "
\n", "
0x21 1_000_01 = -0b0.01*2^-2  = -0.0625
\n", "
\n", "
0x02 0_000_10 = +0b0.10*2^-2  = 0.125
\n", "
\n", "
0x22 1_000_10 = -0b0.10*2^-2  = -0.125
\n", "
\n", "
0x03 0_000_11 = +0b0.11*2^-2  = 0.1875
\n", "
\n", "
0x23 1_000_11 = -0b0.11*2^-2  = -0.1875
\n", "
\n", "
0x04 0_001_00 = +0b1.00*2^-2  = 0.25
\n", "
\n", "
0x24 1_001_00 = -0b1.00*2^-2  = -0.25
\n", "
\n", "
0x05 0_001_01 = +0b1.01*2^-2  = 0.3125
\n", "
\n", "
0x25 1_001_01 = -0b1.01*2^-2  = -0.3125
\n", "
\n", "
0x06 0_001_10 = +0b1.10*2^-2  = 0.375
\n", "
\n", "
0x26 1_001_10 = -0b1.10*2^-2  = -0.375
\n", "
\n", "
0x07 0_001_11 = +0b1.11*2^-2  = 0.4375
\n", "
\n", "
0x27 1_001_11 = -0b1.11*2^-2  = -0.4375
\n", "
\n", "
0x08 0_010_00 = +0b1.00*2^-1  = 0.5
\n", "
\n", "
0x28 1_010_00 = -0b1.00*2^-1  = -0.5
\n", "
\n", "
0x09 0_010_01 = +0b1.01*2^-1  = 0.625
\n", "
\n", "
0x29 1_010_01 = -0b1.01*2^-1  = -0.625
\n", "
\n", "
0x0a 0_010_10 = +0b1.10*2^-1  = 0.75
\n", "
\n", "
0x2a 1_010_10 = -0b1.10*2^-1  = -0.75
\n", "
\n", "
0x0b 0_010_11 = +0b1.11*2^-1  = 0.875
\n", "
\n", "
0x2b 1_010_11 = -0b1.11*2^-1  = -0.875
\n", "
\n", "
0x0c 0_011_00 = +0b1.00*2^0   = 1.0
\n", "
\n", "
0x2c 1_011_00 = -0b1.00*2^0   = -1.0
\n", "
\n", "
0x0d 0_011_01 = +0b1.01*2^0   = 1.25
\n", "
\n", "
0x2d 1_011_01 = -0b1.01*2^0   = -1.25
\n", "
\n", "
0x0e 0_011_10 = +0b1.10*2^0   = 1.5
\n", "
\n", "
0x2e 1_011_10 = -0b1.10*2^0   = -1.5
\n", "
\n", "
0x0f 0_011_11 = +0b1.11*2^0   = 1.75
\n", "
\n", "
0x2f 1_011_11 = -0b1.11*2^0   = -1.75
\n", "
\n", "
0x10 0_100_00 = +0b1.00*2^1   = 2.0
\n", "
\n", "
0x30 1_100_00 = -0b1.00*2^1   = -2.0
\n", "
\n", "
0x11 0_100_01 = +0b1.01*2^1   = 2.5
\n", "
\n", "
0x31 1_100_01 = -0b1.01*2^1   = -2.5
\n", "
\n", "
0x12 0_100_10 = +0b1.10*2^1   = 3.0
\n", "
\n", "
0x32 1_100_10 = -0b1.10*2^1   = -3.0
\n", "
\n", "
0x13 0_100_11 = +0b1.11*2^1   = 3.5
\n", "
\n", "
0x33 1_100_11 = -0b1.11*2^1   = -3.5
\n", "
\n", "
0x14 0_101_00 = +0b1.00*2^2   = 4.0
\n", "
\n", "
0x34 1_101_00 = -0b1.00*2^2   = -4.0
\n", "
\n", "
0x15 0_101_01 = +0b1.01*2^2   = 5.0
\n", "
\n", "
0x35 1_101_01 = -0b1.01*2^2   = -5.0
\n", "
\n", "
0x16 0_101_10 = +0b1.10*2^2   = 6.0
\n", "
\n", "
0x36 1_101_10 = -0b1.10*2^2   = -6.0
\n", "
\n", "
0x17 0_101_11 = +0b1.11*2^2   = 7.0
\n", "
\n", "
0x37 1_101_11 = -0b1.11*2^2   = -7.0
\n", "
\n", "
0x18 0_110_00 = +0b1.00*2^3   = 8.0
\n", "
\n", "
0x38 1_110_00 = -0b1.00*2^3   = -8.0
\n", "
\n", "
0x19 0_110_01 = +0b1.01*2^3   = 10.0
\n", "
\n", "
0x39 1_110_01 = -0b1.01*2^3   = -10.0
\n", "
\n", "
0x1a 0_110_10 = +0b1.10*2^3   = 12.0
\n", "
\n", "
0x3a 1_110_10 = -0b1.10*2^3   = -12.0
\n", "
\n", "
0x1b 0_110_11 = +0b1.11*2^3   = 14.0
\n", "
\n", "
0x3b 1_110_11 = -0b1.11*2^3   = -14.0
\n", "
\n", "
0x1c 0_111_00 = inf
\n", "
\n", "
0x3c 1_111_00 = -inf
\n", "
\n", "
0x1d 0_111_01 = nan
\n", "
\n", "
0x3d 1_111_01 = nan
\n", "
\n", "
0x1e 0_111_10 = nan
\n", "
\n", "
0x3e 1_111_10 = nan
\n", "
\n", "
0x1f 0_111_11 = nan
\n", "
\n", "
0x3f 1_111_11 = nan
\n", "
" ], "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from gfloat import FormatInfo\n", "\n", "fi = FormatInfo(\"754-fp6\", 6, 3, 3, True, True, 3, True, True, False)\n", "HTML(mktbl(fi, cols=2, vs_width=8, vs_d=3))" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "

FP8 Value Table, P3109-fp6

\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
0x00 0_000_00 = 0.0
\n", "
\n", "
0x20 1_000_00 = nan
\n", "
\n", "
0x01 0_000_01 = +0b0.01*2^-3  = 0.03125
\n", "
\n", "
0x21 1_000_01 = -0b0.01*2^-3  = -0.03125
\n", "
\n", "
0x02 0_000_10 = +0b0.10*2^-3  = 0.0625
\n", "
\n", "
0x22 1_000_10 = -0b0.10*2^-3  = -0.0625
\n", "
\n", "
0x03 0_000_11 = +0b0.11*2^-3  = 0.09375
\n", "
\n", "
0x23 1_000_11 = -0b0.11*2^-3  = -0.09375
\n", "
\n", "
0x04 0_001_00 = +0b1.00*2^-3  = 0.125
\n", "
\n", "
0x24 1_001_00 = -0b1.00*2^-3  = -0.125
\n", "
\n", "
0x05 0_001_01 = +0b1.01*2^-3  = 0.15625
\n", "
\n", "
0x25 1_001_01 = -0b1.01*2^-3  = -0.15625
\n", "
\n", "
0x06 0_001_10 = +0b1.10*2^-3  = 0.1875
\n", "
\n", "
0x26 1_001_10 = -0b1.10*2^-3  = -0.1875
\n", "
\n", "
0x07 0_001_11 = +0b1.11*2^-3  = 0.21875
\n", "
\n", "
0x27 1_001_11 = -0b1.11*2^-3  = -0.21875
\n", "
\n", "
0x08 0_010_00 = +0b1.00*2^-2  = 0.25
\n", "
\n", "
0x28 1_010_00 = -0b1.00*2^-2  = -0.25
\n", "
\n", "
0x09 0_010_01 = +0b1.01*2^-2  = 0.3125
\n", "
\n", "
0x29 1_010_01 = -0b1.01*2^-2  = -0.3125
\n", "
\n", "
0x0a 0_010_10 = +0b1.10*2^-2  = 0.375
\n", "
\n", "
0x2a 1_010_10 = -0b1.10*2^-2  = -0.375
\n", "
\n", "
0x0b 0_010_11 = +0b1.11*2^-2  = 0.4375
\n", "
\n", "
0x2b 1_010_11 = -0b1.11*2^-2  = -0.4375
\n", "
\n", "
0x0c 0_011_00 = +0b1.00*2^-1  = 0.5
\n", "
\n", "
0x2c 1_011_00 = -0b1.00*2^-1  = -0.5
\n", "
\n", "
0x0d 0_011_01 = +0b1.01*2^-1  = 0.625
\n", "
\n", "
0x2d 1_011_01 = -0b1.01*2^-1  = -0.625
\n", "
\n", "
0x0e 0_011_10 = +0b1.10*2^-1  = 0.75
\n", "
\n", "
0x2e 1_011_10 = -0b1.10*2^-1  = -0.75
\n", "
\n", "
0x0f 0_011_11 = +0b1.11*2^-1  = 0.875
\n", "
\n", "
0x2f 1_011_11 = -0b1.11*2^-1  = -0.875
\n", "
\n", "
0x10 0_100_00 = +0b1.00*2^0   = 1.0
\n", "
\n", "
0x30 1_100_00 = -0b1.00*2^0   = -1.0
\n", "
\n", "
0x11 0_100_01 = +0b1.01*2^0   = 1.25
\n", "
\n", "
0x31 1_100_01 = -0b1.01*2^0   = -1.25
\n", "
\n", "
0x12 0_100_10 = +0b1.10*2^0   = 1.5
\n", "
\n", "
0x32 1_100_10 = -0b1.10*2^0   = -1.5
\n", "
\n", "
0x13 0_100_11 = +0b1.11*2^0   = 1.75
\n", "
\n", "
0x33 1_100_11 = -0b1.11*2^0   = -1.75
\n", "
\n", "
0x14 0_101_00 = +0b1.00*2^1   = 2.0
\n", "
\n", "
0x34 1_101_00 = -0b1.00*2^1   = -2.0
\n", "
\n", "
0x15 0_101_01 = +0b1.01*2^1   = 2.5
\n", "
\n", "
0x35 1_101_01 = -0b1.01*2^1   = -2.5
\n", "
\n", "
0x16 0_101_10 = +0b1.10*2^1   = 3.0
\n", "
\n", "
0x36 1_101_10 = -0b1.10*2^1   = -3.0
\n", "
\n", "
0x17 0_101_11 = +0b1.11*2^1   = 3.5
\n", "
\n", "
0x37 1_101_11 = -0b1.11*2^1   = -3.5
\n", "
\n", "
0x18 0_110_00 = +0b1.00*2^2   = 4.0
\n", "
\n", "
0x38 1_110_00 = -0b1.00*2^2   = -4.0
\n", "
\n", "
0x19 0_110_01 = +0b1.01*2^2   = 5.0
\n", "
\n", "
0x39 1_110_01 = -0b1.01*2^2   = -5.0
\n", "
\n", "
0x1a 0_110_10 = +0b1.10*2^2   = 6.0
\n", "
\n", "
0x3a 1_110_10 = -0b1.10*2^2   = -6.0
\n", "
\n", "
0x1b 0_110_11 = +0b1.11*2^2   = 7.0
\n", "
\n", "
0x3b 1_110_11 = -0b1.11*2^2   = -7.0
\n", "
\n", "
0x1c 0_111_00 = +0b1.00*2^3   = 8.0
\n", "
\n", "
0x3c 1_111_00 = -0b1.00*2^3   = -8.0
\n", "
\n", "
0x1d 0_111_01 = +0b1.01*2^3   = 10.0
\n", "
\n", "
0x3d 1_111_01 = -0b1.01*2^3   = -10.0
\n", "
\n", "
0x1e 0_111_10 = +0b1.10*2^3   = 12.0
\n", "
\n", "
0x3e 1_111_10 = -0b1.10*2^3   = -12.0
\n", "
\n", "
0x1f 0_111_11 = inf
\n", "
\n", "
0x3f 1_111_11 = -inf
\n", "
" ], "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# P3109\n", "fi = FormatInfo(\"P3109-fp6\", 6, 3, 3, False, True, 0, True, True, False)\n", "HTML(mktbl(fi, cols=2, vs_width=8, vs_d=3))" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "

FP8 Value Table, P3109-fp6

\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
0x00 0_000_00 = 0.0
\n", "
\n", "
0x20 1_000_00 = nan
\n", "
\n", "
0x01 0_000_01 = +0b0.01*2^-3  = 0.03125
\n", "
\n", "
0x21 1_000_01 = -0b0.01*2^-3  = -0.03125
\n", "
\n", "
0x02 0_000_10 = +0b0.10*2^-3  = 0.0625
\n", "
\n", "
0x22 1_000_10 = -0b0.10*2^-3  = -0.0625
\n", "
\n", "
0x03 0_000_11 = +0b0.11*2^-3  = 0.09375
\n", "
\n", "
0x23 1_000_11 = -0b0.11*2^-3  = -0.09375
\n", "
\n", "
0x04 0_001_00 = +0b1.00*2^-3  = 0.125
\n", "
\n", "
0x24 1_001_00 = -0b1.00*2^-3  = -0.125
\n", "
\n", "
0x05 0_001_01 = +0b1.01*2^-3  = 0.15625
\n", "
\n", "
0x25 1_001_01 = -0b1.01*2^-3  = -0.15625
\n", "
\n", "
0x06 0_001_10 = +0b1.10*2^-3  = 0.1875
\n", "
\n", "
0x26 1_001_10 = -0b1.10*2^-3  = -0.1875
\n", "
\n", "
0x07 0_001_11 = +0b1.11*2^-3  = 0.21875
\n", "
\n", "
0x27 1_001_11 = -0b1.11*2^-3  = -0.21875
\n", "
\n", "
0x08 0_010_00 = +0b1.00*2^-2  = 0.25
\n", "
\n", "
0x28 1_010_00 = -0b1.00*2^-2  = -0.25
\n", "
\n", "
0x09 0_010_01 = +0b1.01*2^-2  = 0.3125
\n", "
\n", "
0x29 1_010_01 = -0b1.01*2^-2  = -0.3125
\n", "
\n", "
0x0a 0_010_10 = +0b1.10*2^-2  = 0.375
\n", "
\n", "
0x2a 1_010_10 = -0b1.10*2^-2  = -0.375
\n", "
\n", "
0x0b 0_010_11 = +0b1.11*2^-2  = 0.4375
\n", "
\n", "
0x2b 1_010_11 = -0b1.11*2^-2  = -0.4375
\n", "
\n", "
0x0c 0_011_00 = +0b1.00*2^-1  = 0.5
\n", "
\n", "
0x2c 1_011_00 = -0b1.00*2^-1  = -0.5
\n", "
\n", "
0x0d 0_011_01 = +0b1.01*2^-1  = 0.625
\n", "
\n", "
0x2d 1_011_01 = -0b1.01*2^-1  = -0.625
\n", "
\n", "
0x0e 0_011_10 = +0b1.10*2^-1  = 0.75
\n", "
\n", "
0x2e 1_011_10 = -0b1.10*2^-1  = -0.75
\n", "
\n", "
0x0f 0_011_11 = +0b1.11*2^-1  = 0.875
\n", "
\n", "
0x2f 1_011_11 = -0b1.11*2^-1  = -0.875
\n", "
\n", "
0x10 0_100_00 = +0b1.00*2^0   = 1.0
\n", "
\n", "
0x30 1_100_00 = -0b1.00*2^0   = -1.0
\n", "
\n", "
0x11 0_100_01 = +0b1.01*2^0   = 1.25
\n", "
\n", "
0x31 1_100_01 = -0b1.01*2^0   = -1.25
\n", "
\n", "
0x12 0_100_10 = +0b1.10*2^0   = 1.5
\n", "
\n", "
0x32 1_100_10 = -0b1.10*2^0   = -1.5
\n", "
\n", "
0x13 0_100_11 = +0b1.11*2^0   = 1.75
\n", "
\n", "
0x33 1_100_11 = -0b1.11*2^0   = -1.75
\n", "
\n", "
0x14 0_101_00 = +0b1.00*2^1   = 2.0
\n", "
\n", "
0x34 1_101_00 = -0b1.00*2^1   = -2.0
\n", "
\n", "
0x15 0_101_01 = +0b1.01*2^1   = 2.5
\n", "
\n", "
0x35 1_101_01 = -0b1.01*2^1   = -2.5
\n", "
\n", "
0x16 0_101_10 = +0b1.10*2^1   = 3.0
\n", "
\n", "
0x36 1_101_10 = -0b1.10*2^1   = -3.0
\n", "
\n", "
0x17 0_101_11 = +0b1.11*2^1   = 3.5
\n", "
\n", "
0x37 1_101_11 = -0b1.11*2^1   = -3.5
\n", "
\n", "
0x18 0_110_00 = +0b1.00*2^2   = 4.0
\n", "
\n", "
0x38 1_110_00 = -0b1.00*2^2   = -4.0
\n", "
\n", "
0x19 0_110_01 = +0b1.01*2^2   = 5.0
\n", "
\n", "
0x39 1_110_01 = -0b1.01*2^2   = -5.0
\n", "
\n", "
0x1a 0_110_10 = +0b1.10*2^2   = 6.0
\n", "
\n", "
0x3a 1_110_10 = -0b1.10*2^2   = -6.0
\n", "
\n", "
0x1b 0_110_11 = +0b1.11*2^2   = 7.0
\n", "
\n", "
0x3b 1_110_11 = -0b1.11*2^2   = -7.0
\n", "
\n", "
0x1c 0_111_00 = +0b1.00*2^3   = 8.0
\n", "
\n", "
0x3c 1_111_00 = -0b1.00*2^3   = -8.0
\n", "
\n", "
0x1d 0_111_01 = +0b1.01*2^3   = 10.0
\n", "
\n", "
0x3d 1_111_01 = -0b1.01*2^3   = -10.0
\n", "
\n", "
0x1e 0_111_10 = +0b1.10*2^3   = 12.0
\n", "
\n", "
0x3e 1_111_10 = -0b1.10*2^3   = -12.0
\n", "
\n", "
0x1f 0_111_11 = +0b1.11*2^3   = 14.0
\n", "
\n", "
0x3f 1_111_11 = -0b1.11*2^3   = -14.0
\n", "
" ], "text/plain": [ "" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# P3109\n", "fi = FormatInfo(\"P3109-fp6\", 6, 3, 3, False, False, 0, True, True, False)\n", "HTML(mktbl(fi, cols=2, vs_width=8, vs_d=3))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### OCP Formats: E5M2, E4M3" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "

FP8 Value Table, ocp_e5m2

\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
0x00 0_00000_00 = 0.0
\n", "
\n", "
0x40 0_10000_00 = +0b1.00*2^1   = 2.0
\n", "
\n", "
0x80 1_00000_00 = -0.0
\n", "
\n", "
0xc0 1_10000_00 = -0b1.00*2^1   = -2.0
\n", "
\n", "
0x01 0_00000_01 = +0b0.01*2^-14 = ~1.5259e-05
\n", "
\n", "
0x41 0_10000_01 = +0b1.01*2^1   = 2.5
\n", "
\n", "
0x81 1_00000_01 = -0b0.01*2^-14 = ~-1.5259e-05
\n", "
\n", "
0xc1 1_10000_01 = -0b1.01*2^1   = -2.5
\n", "
\n", "
0x02 0_00000_10 = +0b0.10*2^-14 = ~3.0518e-05
\n", "
\n", "
0x42 0_10000_10 = +0b1.10*2^1   = 3.0
\n", "
\n", "
0x82 1_00000_10 = -0b0.10*2^-14 = ~-3.0518e-05
\n", "
\n", "
0xc2 1_10000_10 = -0b1.10*2^1   = -3.0
\n", "
\n", "
0x03 0_00000_11 = +0b0.11*2^-14 = ~4.5776e-05
\n", "
\n", "
0x43 0_10000_11 = +0b1.11*2^1   = 3.5
\n", "
\n", "
0x83 1_00000_11 = -0b0.11*2^-14 = ~-4.5776e-05
\n", "
\n", "
0xc3 1_10000_11 = -0b1.11*2^1   = -3.5
\n", "
\n", "
0x04 0_00001_00 = +0b1.00*2^-14 = ~6.1035e-05
\n", "
\n", "
0x44 0_10001_00 = +0b1.00*2^2   = 4.0
\n", "
\n", "
0x84 1_00001_00 = -0b1.00*2^-14 = ~-6.1035e-05
\n", "
\n", "
0xc4 1_10001_00 = -0b1.00*2^2   = -4.0
\n", "
\n", "
0x05 0_00001_01 = +0b1.01*2^-14 = ~7.6294e-05
\n", "
\n", "
0x45 0_10001_01 = +0b1.01*2^2   = 5.0
\n", "
\n", "
0x85 1_00001_01 = -0b1.01*2^-14 = ~-7.6294e-05
\n", "
\n", "
0xc5 1_10001_01 = -0b1.01*2^2   = -5.0
\n", "
\n", "
0x06 0_00001_10 = +0b1.10*2^-14 = ~9.1553e-05
\n", "
\n", "
0x46 0_10001_10 = +0b1.10*2^2   = 6.0
\n", "
\n", "
0x86 1_00001_10 = -0b1.10*2^-14 = ~-9.1553e-05
\n", "
\n", "
0xc6 1_10001_10 = -0b1.10*2^2   = -6.0
\n", "
\n", "
0x07 0_00001_11 = +0b1.11*2^-14 = ~0.00011
\n", "
\n", "
0x47 0_10001_11 = +0b1.11*2^2   = 7.0
\n", "
\n", "
0x87 1_00001_11 = -0b1.11*2^-14 = ~-0.00011
\n", "
\n", "
0xc7 1_10001_11 = -0b1.11*2^2   = -7.0
\n", "
\n", "
0x08 0_00010_00 = +0b1.00*2^-13 = ~0.00012
\n", "
\n", "
0x48 0_10010_00 = +0b1.00*2^3   = 8.0
\n", "
\n", "
0x88 1_00010_00 = -0b1.00*2^-13 = ~-0.00012
\n", "
\n", "
0xc8 1_10010_00 = -0b1.00*2^3   = -8.0
\n", "
\n", "
0x09 0_00010_01 = +0b1.01*2^-13 = ~0.00015
\n", "
\n", "
0x49 0_10010_01 = +0b1.01*2^3   = 10.0
\n", "
\n", "
0x89 1_00010_01 = -0b1.01*2^-13 = ~-0.00015
\n", "
\n", "
0xc9 1_10010_01 = -0b1.01*2^3   = -10.0
\n", "
\n", "
0x0a 0_00010_10 = +0b1.10*2^-13 = ~0.00018
\n", "
\n", "
0x4a 0_10010_10 = +0b1.10*2^3   = 12.0
\n", "
\n", "
0x8a 1_00010_10 = -0b1.10*2^-13 = ~-0.00018
\n", "
\n", "
0xca 1_10010_10 = -0b1.10*2^3   = -12.0
\n", "
\n", "
0x0b 0_00010_11 = +0b1.11*2^-13 = ~0.00021
\n", "
\n", "
0x4b 0_10010_11 = +0b1.11*2^3   = 14.0
\n", "
\n", "
0x8b 1_00010_11 = -0b1.11*2^-13 = ~-0.00021
\n", "
\n", "
0xcb 1_10010_11 = -0b1.11*2^3   = -14.0
\n", "
\n", "
0x0c 0_00011_00 = +0b1.00*2^-12 = ~0.00024
\n", "
\n", "
0x4c 0_10011_00 = +0b1.00*2^4   = 16.0
\n", "
\n", "
0x8c 1_00011_00 = -0b1.00*2^-12 = ~-0.00024
\n", "
\n", "
0xcc 1_10011_00 = -0b1.00*2^4   = -16.0
\n", "
\n", "
0x0d 0_00011_01 = +0b1.01*2^-12 = ~0.00031
\n", "
\n", "
0x4d 0_10011_01 = +0b1.01*2^4   = 20.0
\n", "
\n", "
0x8d 1_00011_01 = -0b1.01*2^-12 = ~-0.00031
\n", "
\n", "
0xcd 1_10011_01 = -0b1.01*2^4   = -20.0
\n", "
\n", "
0x0e 0_00011_10 = +0b1.10*2^-12 = ~0.00037
\n", "
\n", "
0x4e 0_10011_10 = +0b1.10*2^4   = 24.0
\n", "
\n", "
0x8e 1_00011_10 = -0b1.10*2^-12 = ~-0.00037
\n", "
\n", "
0xce 1_10011_10 = -0b1.10*2^4   = -24.0
\n", "
\n", "
0x0f 0_00011_11 = +0b1.11*2^-12 = ~0.00043
\n", "
\n", "
0x4f 0_10011_11 = +0b1.11*2^4   = 28.0
\n", "
\n", "
0x8f 1_00011_11 = -0b1.11*2^-12 = ~-0.00043
\n", "
\n", "
0xcf 1_10011_11 = -0b1.11*2^4   = -28.0
\n", "
\n", "
0x30 0_01100_00 = +0b1.00*2^-3  = 0.125
\n", "
\n", "
0x70 0_11100_00 = +0b1.00*2^13  = 8192.0
\n", "
\n", "
0xb0 1_01100_00 = -0b1.00*2^-3  = -0.125
\n", "
\n", "
0xf0 1_11100_00 = -0b1.00*2^13  = -8192.0
\n", "
\n", "
0x31 0_01100_01 = +0b1.01*2^-3  = 0.15625
\n", "
\n", "
0x71 0_11100_01 = +0b1.01*2^13  = 10240.0
\n", "
\n", "
0xb1 1_01100_01 = -0b1.01*2^-3  = -0.15625
\n", "
\n", "
0xf1 1_11100_01 = -0b1.01*2^13  = -10240.0
\n", "
\n", "
0x32 0_01100_10 = +0b1.10*2^-3  = 0.1875
\n", "
\n", "
0x72 0_11100_10 = +0b1.10*2^13  = 12288.0
\n", "
\n", "
0xb2 1_01100_10 = -0b1.10*2^-3  = -0.1875
\n", "
\n", "
0xf2 1_11100_10 = -0b1.10*2^13  = -12288.0
\n", "
\n", "
0x33 0_01100_11 = +0b1.11*2^-3  = 0.21875
\n", "
\n", "
0x73 0_11100_11 = +0b1.11*2^13  = 14336.0
\n", "
\n", "
0xb3 1_01100_11 = -0b1.11*2^-3  = -0.21875
\n", "
\n", "
0xf3 1_11100_11 = -0b1.11*2^13  = -14336.0
\n", "
\n", "
0x34 0_01101_00 = +0b1.00*2^-2  = 0.25
\n", "
\n", "
0x74 0_11101_00 = +0b1.00*2^14  = 16384.0
\n", "
\n", "
0xb4 1_01101_00 = -0b1.00*2^-2  = -0.25
\n", "
\n", "
0xf4 1_11101_00 = -0b1.00*2^14  = -16384.0
\n", "
\n", "
0x35 0_01101_01 = +0b1.01*2^-2  = 0.3125
\n", "
\n", "
0x75 0_11101_01 = +0b1.01*2^14  = 20480.0
\n", "
\n", "
0xb5 1_01101_01 = -0b1.01*2^-2  = -0.3125
\n", "
\n", "
0xf5 1_11101_01 = -0b1.01*2^14  = -20480.0
\n", "
\n", "
0x36 0_01101_10 = +0b1.10*2^-2  = 0.375
\n", "
\n", "
0x76 0_11101_10 = +0b1.10*2^14  = 24576.0
\n", "
\n", "
0xb6 1_01101_10 = -0b1.10*2^-2  = -0.375
\n", "
\n", "
0xf6 1_11101_10 = -0b1.10*2^14  = -24576.0
\n", "
\n", "
0x37 0_01101_11 = +0b1.11*2^-2  = 0.4375
\n", "
\n", "
0x77 0_11101_11 = +0b1.11*2^14  = 28672.0
\n", "
\n", "
0xb7 1_01101_11 = -0b1.11*2^-2  = -0.4375
\n", "
\n", "
0xf7 1_11101_11 = -0b1.11*2^14  = -28672.0
\n", "
\n", "
0x38 0_01110_00 = +0b1.00*2^-1  = 0.5
\n", "
\n", "
0x78 0_11110_00 = +0b1.00*2^15  = 32768.0
\n", "
\n", "
0xb8 1_01110_00 = -0b1.00*2^-1  = -0.5
\n", "
\n", "
0xf8 1_11110_00 = -0b1.00*2^15  = -32768.0
\n", "
\n", "
0x39 0_01110_01 = +0b1.01*2^-1  = 0.625
\n", "
\n", "
0x79 0_11110_01 = +0b1.01*2^15  = 40960.0
\n", "
\n", "
0xb9 1_01110_01 = -0b1.01*2^-1  = -0.625
\n", "
\n", "
0xf9 1_11110_01 = -0b1.01*2^15  = -40960.0
\n", "
\n", "
0x3a 0_01110_10 = +0b1.10*2^-1  = 0.75
\n", "
\n", "
0x7a 0_11110_10 = +0b1.10*2^15  = 49152.0
\n", "
\n", "
0xba 1_01110_10 = -0b1.10*2^-1  = -0.75
\n", "
\n", "
0xfa 1_11110_10 = -0b1.10*2^15  = -49152.0
\n", "
\n", "
0x3b 0_01110_11 = +0b1.11*2^-1  = 0.875
\n", "
\n", "
0x7b 0_11110_11 = +0b1.11*2^15  = 57344.0
\n", "
\n", "
0xbb 1_01110_11 = -0b1.11*2^-1  = -0.875
\n", "
\n", "
0xfb 1_11110_11 = -0b1.11*2^15  = -57344.0
\n", "
\n", "
0x3c 0_01111_00 = +0b1.00*2^0   = 1.0
\n", "
\n", "
0x7c 0_11111_00 = inf
\n", "
\n", "
0xbc 1_01111_00 = -0b1.00*2^0   = -1.0
\n", "
\n", "
0xfc 1_11111_00 = -inf
\n", "
\n", "
0x3d 0_01111_01 = +0b1.01*2^0   = 1.25
\n", "
\n", "
0x7d 0_11111_01 = nan
\n", "
\n", "
0xbd 1_01111_01 = -0b1.01*2^0   = -1.25
\n", "
\n", "
0xfd 1_11111_01 = nan
\n", "
\n", "
0x3e 0_01111_10 = +0b1.10*2^0   = 1.5
\n", "
\n", "
0x7e 0_11111_10 = nan
\n", "
\n", "
0xbe 1_01111_10 = -0b1.10*2^0   = -1.5
\n", "
\n", "
0xfe 1_11111_10 = nan
\n", "
\n", "
0x3f 0_01111_11 = +0b1.11*2^0   = 1.75
\n", "
\n", "
0x7f 0_11111_11 = nan
\n", "
\n", "
0xbf 1_01111_11 = -0b1.11*2^0   = -1.75
\n", "
\n", "
0xff 1_11111_11 = nan
\n", "
" ], "text/plain": [ "" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "HTML(mktbl(format_info_ocp_e5m2, cols=4, skip_rows=(0x10, 0x30), vs_width=8, vs_d=5))" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "

FP8 Value Table, ocp_e4m3

\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
0x00 0_0000_000 = 0.0
\n", "
\n", "
0x40 0_1000_000 = +0b1.000*2^1   = 2.0
\n", "
\n", "
0x80 1_0000_000 = -0.0
\n", "
\n", "
0xc0 1_1000_000 = -0b1.000*2^1   = -2.0
\n", "
\n", "
0x01 0_0000_001 = +0b0.001*2^-6  = ~0.00195
\n", "
\n", "
0x41 0_1000_001 = +0b1.001*2^1   = 2.25
\n", "
\n", "
0x81 1_0000_001 = -0b0.001*2^-6  = ~-0.00195
\n", "
\n", "
0xc1 1_1000_001 = -0b1.001*2^1   = -2.25
\n", "
\n", "
0x02 0_0000_010 = +0b0.010*2^-6  = ~0.00391
\n", "
\n", "
0x42 0_1000_010 = +0b1.010*2^1   = 2.5
\n", "
\n", "
0x82 1_0000_010 = -0b0.010*2^-6  = ~-0.00391
\n", "
\n", "
0xc2 1_1000_010 = -0b1.010*2^1   = -2.5
\n", "
\n", "
0x03 0_0000_011 = +0b0.011*2^-6  = ~0.00586
\n", "
\n", "
0x43 0_1000_011 = +0b1.011*2^1   = 2.75
\n", "
\n", "
0x83 1_0000_011 = -0b0.011*2^-6  = ~-0.00586
\n", "
\n", "
0xc3 1_1000_011 = -0b1.011*2^1   = -2.75
\n", "
\n", "
0x04 0_0000_100 = +0b0.100*2^-6  = ~0.00781
\n", "
\n", "
0x44 0_1000_100 = +0b1.100*2^1   = 3.0
\n", "
\n", "
0x84 1_0000_100 = -0b0.100*2^-6  = ~-0.00781
\n", "
\n", "
0xc4 1_1000_100 = -0b1.100*2^1   = -3.0
\n", "
\n", "
0x05 0_0000_101 = +0b0.101*2^-6  = ~0.00977
\n", "
\n", "
0x45 0_1000_101 = +0b1.101*2^1   = 3.25
\n", "
\n", "
0x85 1_0000_101 = -0b0.101*2^-6  = ~-0.00977
\n", "
\n", "
0xc5 1_1000_101 = -0b1.101*2^1   = -3.25
\n", "
\n", "
0x06 0_0000_110 = +0b0.110*2^-6  = ~0.01172
\n", "
\n", "
0x46 0_1000_110 = +0b1.110*2^1   = 3.5
\n", "
\n", "
0x86 1_0000_110 = -0b0.110*2^-6  = ~-0.01172
\n", "
\n", "
0xc6 1_1000_110 = -0b1.110*2^1   = -3.5
\n", "
\n", "
0x07 0_0000_111 = +0b0.111*2^-6  = ~0.01367
\n", "
\n", "
0x47 0_1000_111 = +0b1.111*2^1   = 3.75
\n", "
\n", "
0x87 1_0000_111 = -0b0.111*2^-6  = ~-0.01367
\n", "
\n", "
0xc7 1_1000_111 = -0b1.111*2^1   = -3.75
\n", "
\n", "
0x08 0_0001_000 = +0b1.000*2^-6  = 0.015625
\n", "
\n", "
0x48 0_1001_000 = +0b1.000*2^2   = 4.0
\n", "
\n", "
0x88 1_0001_000 = -0b1.000*2^-6  = ~-0.01562
\n", "
\n", "
0xc8 1_1001_000 = -0b1.000*2^2   = -4.0
\n", "
\n", "
0x09 0_0001_001 = +0b1.001*2^-6  = ~0.01758
\n", "
\n", "
0x49 0_1001_001 = +0b1.001*2^2   = 4.5
\n", "
\n", "
0x89 1_0001_001 = -0b1.001*2^-6  = ~-0.01758
\n", "
\n", "
0xc9 1_1001_001 = -0b1.001*2^2   = -4.5
\n", "
\n", "
0x0a 0_0001_010 = +0b1.010*2^-6  = ~0.01953
\n", "
\n", "
0x4a 0_1001_010 = +0b1.010*2^2   = 5.0
\n", "
\n", "
0x8a 1_0001_010 = -0b1.010*2^-6  = ~-0.01953
\n", "
\n", "
0xca 1_1001_010 = -0b1.010*2^2   = -5.0
\n", "
\n", "
0x0b 0_0001_011 = +0b1.011*2^-6  = ~0.02148
\n", "
\n", "
0x4b 0_1001_011 = +0b1.011*2^2   = 5.5
\n", "
\n", "
0x8b 1_0001_011 = -0b1.011*2^-6  = ~-0.02148
\n", "
\n", "
0xcb 1_1001_011 = -0b1.011*2^2   = -5.5
\n", "
\n", "
0x0c 0_0001_100 = +0b1.100*2^-6  = ~0.02344
\n", "
\n", "
0x4c 0_1001_100 = +0b1.100*2^2   = 6.0
\n", "
\n", "
0x8c 1_0001_100 = -0b1.100*2^-6  = ~-0.02344
\n", "
\n", "
0xcc 1_1001_100 = -0b1.100*2^2   = -6.0
\n", "
\n", "
0x0d 0_0001_101 = +0b1.101*2^-6  = ~0.02539
\n", "
\n", "
0x4d 0_1001_101 = +0b1.101*2^2   = 6.5
\n", "
\n", "
0x8d 1_0001_101 = -0b1.101*2^-6  = ~-0.02539
\n", "
\n", "
0xcd 1_1001_101 = -0b1.101*2^2   = -6.5
\n", "
\n", "
0x0e 0_0001_110 = +0b1.110*2^-6  = ~0.02734
\n", "
\n", "
0x4e 0_1001_110 = +0b1.110*2^2   = 7.0
\n", "
\n", "
0x8e 1_0001_110 = -0b1.110*2^-6  = ~-0.02734
\n", "
\n", "
0xce 1_1001_110 = -0b1.110*2^2   = -7.0
\n", "
\n", "
0x0f 0_0001_111 = +0b1.111*2^-6  = ~0.02930
\n", "
\n", "
0x4f 0_1001_111 = +0b1.111*2^2   = 7.5
\n", "
\n", "
0x8f 1_0001_111 = -0b1.111*2^-6  = ~-0.02930
\n", "
\n", "
0xcf 1_1001_111 = -0b1.111*2^2   = -7.5
\n", "
\n", "
0x30 0_0110_000 = +0b1.000*2^-1  = 0.5
\n", "
\n", "
0x70 0_1110_000 = +0b1.000*2^7   = 128.0
\n", "
\n", "
0xb0 1_0110_000 = -0b1.000*2^-1  = -0.5
\n", "
\n", "
0xf0 1_1110_000 = -0b1.000*2^7   = -128.0
\n", "
\n", "
0x31 0_0110_001 = +0b1.001*2^-1  = 0.5625
\n", "
\n", "
0x71 0_1110_001 = +0b1.001*2^7   = 144.0
\n", "
\n", "
0xb1 1_0110_001 = -0b1.001*2^-1  = -0.5625
\n", "
\n", "
0xf1 1_1110_001 = -0b1.001*2^7   = -144.0
\n", "
\n", "
0x32 0_0110_010 = +0b1.010*2^-1  = 0.625
\n", "
\n", "
0x72 0_1110_010 = +0b1.010*2^7   = 160.0
\n", "
\n", "
0xb2 1_0110_010 = -0b1.010*2^-1  = -0.625
\n", "
\n", "
0xf2 1_1110_010 = -0b1.010*2^7   = -160.0
\n", "
\n", "
0x33 0_0110_011 = +0b1.011*2^-1  = 0.6875
\n", "
\n", "
0x73 0_1110_011 = +0b1.011*2^7   = 176.0
\n", "
\n", "
0xb3 1_0110_011 = -0b1.011*2^-1  = -0.6875
\n", "
\n", "
0xf3 1_1110_011 = -0b1.011*2^7   = -176.0
\n", "
\n", "
0x34 0_0110_100 = +0b1.100*2^-1  = 0.75
\n", "
\n", "
0x74 0_1110_100 = +0b1.100*2^7   = 192.0
\n", "
\n", "
0xb4 1_0110_100 = -0b1.100*2^-1  = -0.75
\n", "
\n", "
0xf4 1_1110_100 = -0b1.100*2^7   = -192.0
\n", "
\n", "
0x35 0_0110_101 = +0b1.101*2^-1  = 0.8125
\n", "
\n", "
0x75 0_1110_101 = +0b1.101*2^7   = 208.0
\n", "
\n", "
0xb5 1_0110_101 = -0b1.101*2^-1  = -0.8125
\n", "
\n", "
0xf5 1_1110_101 = -0b1.101*2^7   = -208.0
\n", "
\n", "
0x36 0_0110_110 = +0b1.110*2^-1  = 0.875
\n", "
\n", "
0x76 0_1110_110 = +0b1.110*2^7   = 224.0
\n", "
\n", "
0xb6 1_0110_110 = -0b1.110*2^-1  = -0.875
\n", "
\n", "
0xf6 1_1110_110 = -0b1.110*2^7   = -224.0
\n", "
\n", "
0x37 0_0110_111 = +0b1.111*2^-1  = 0.9375
\n", "
\n", "
0x77 0_1110_111 = +0b1.111*2^7   = 240.0
\n", "
\n", "
0xb7 1_0110_111 = -0b1.111*2^-1  = -0.9375
\n", "
\n", "
0xf7 1_1110_111 = -0b1.111*2^7   = -240.0
\n", "
\n", "
0x38 0_0111_000 = +0b1.000*2^0   = 1.0
\n", "
\n", "
0x78 0_1111_000 = +0b1.000*2^8   = 256.0
\n", "
\n", "
0xb8 1_0111_000 = -0b1.000*2^0   = -1.0
\n", "
\n", "
0xf8 1_1111_000 = -0b1.000*2^8   = -256.0
\n", "
\n", "
0x39 0_0111_001 = +0b1.001*2^0   = 1.125
\n", "
\n", "
0x79 0_1111_001 = +0b1.001*2^8   = 288.0
\n", "
\n", "
0xb9 1_0111_001 = -0b1.001*2^0   = -1.125
\n", "
\n", "
0xf9 1_1111_001 = -0b1.001*2^8   = -288.0
\n", "
\n", "
0x3a 0_0111_010 = +0b1.010*2^0   = 1.25
\n", "
\n", "
0x7a 0_1111_010 = +0b1.010*2^8   = 320.0
\n", "
\n", "
0xba 1_0111_010 = -0b1.010*2^0   = -1.25
\n", "
\n", "
0xfa 1_1111_010 = -0b1.010*2^8   = -320.0
\n", "
\n", "
0x3b 0_0111_011 = +0b1.011*2^0   = 1.375
\n", "
\n", "
0x7b 0_1111_011 = +0b1.011*2^8   = 352.0
\n", "
\n", "
0xbb 1_0111_011 = -0b1.011*2^0   = -1.375
\n", "
\n", "
0xfb 1_1111_011 = -0b1.011*2^8   = -352.0
\n", "
\n", "
0x3c 0_0111_100 = +0b1.100*2^0   = 1.5
\n", "
\n", "
0x7c 0_1111_100 = +0b1.100*2^8   = 384.0
\n", "
\n", "
0xbc 1_0111_100 = -0b1.100*2^0   = -1.5
\n", "
\n", "
0xfc 1_1111_100 = -0b1.100*2^8   = -384.0
\n", "
\n", "
0x3d 0_0111_101 = +0b1.101*2^0   = 1.625
\n", "
\n", "
0x7d 0_1111_101 = +0b1.101*2^8   = 416.0
\n", "
\n", "
0xbd 1_0111_101 = -0b1.101*2^0   = -1.625
\n", "
\n", "
0xfd 1_1111_101 = -0b1.101*2^8   = -416.0
\n", "
\n", "
0x3e 0_0111_110 = +0b1.110*2^0   = 1.75
\n", "
\n", "
0x7e 0_1111_110 = +0b1.110*2^8   = 448.0
\n", "
\n", "
0xbe 1_0111_110 = -0b1.110*2^0   = -1.75
\n", "
\n", "
0xfe 1_1111_110 = -0b1.110*2^8   = -448.0
\n", "
\n", "
0x3f 0_0111_111 = +0b1.111*2^0   = 1.875
\n", "
\n", "
0x7f 0_1111_111 = nan
\n", "
\n", "
0xbf 1_0111_111 = -0b1.111*2^0   = -1.875
\n", "
\n", "
0xff 1_1111_111 = nan
\n", "
" ], "text/plain": [ "" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "HTML(mktbl(format_info_ocp_e4m3, cols=4, skip_rows=(0x10, 0x30), vs_width=8, vs_d=5))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### IEEE WG P3109 {P} formats\n", "\n", "We choose just one example: `p3109(p=3)`, which has the same number of exponent bits as OCP E5 " ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "

FP8 Value Table, p3109_p3

\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
0x00 0_00000_00 = 0.0
\n", "
\n", "
0x40 0_10000_00 = +0b1.00*2^0   = 1.0
\n", "
\n", "
0x80 1_00000_00 = nan
\n", "
\n", "
0xc0 1_10000_00 = -0b1.00*2^0   = -1.0
\n", "
\n", "
0x01 0_00000_01 = +0b0.01*2^-15 = ~7.6294e-06
\n", "
\n", "
0x41 0_10000_01 = +0b1.01*2^0   = 1.25
\n", "
\n", "
0x81 1_00000_01 = -0b0.01*2^-15 = ~-7.6294e-06
\n", "
\n", "
0xc1 1_10000_01 = -0b1.01*2^0   = -1.25
\n", "
\n", "
0x02 0_00000_10 = +0b0.10*2^-15 = ~1.5259e-05
\n", "
\n", "
0x42 0_10000_10 = +0b1.10*2^0   = 1.5
\n", "
\n", "
0x82 1_00000_10 = -0b0.10*2^-15 = ~-1.5259e-05
\n", "
\n", "
0xc2 1_10000_10 = -0b1.10*2^0   = -1.5
\n", "
\n", "
0x03 0_00000_11 = +0b0.11*2^-15 = ~2.2888e-05
\n", "
\n", "
0x43 0_10000_11 = +0b1.11*2^0   = 1.75
\n", "
\n", "
0x83 1_00000_11 = -0b0.11*2^-15 = ~-2.2888e-05
\n", "
\n", "
0xc3 1_10000_11 = -0b1.11*2^0   = -1.75
\n", "
\n", "
0x04 0_00001_00 = +0b1.00*2^-15 = ~3.0518e-05
\n", "
\n", "
0x44 0_10001_00 = +0b1.00*2^1   = 2.0
\n", "
\n", "
0x84 1_00001_00 = -0b1.00*2^-15 = ~-3.0518e-05
\n", "
\n", "
0xc4 1_10001_00 = -0b1.00*2^1   = -2.0
\n", "
\n", "
0x05 0_00001_01 = +0b1.01*2^-15 = ~3.8147e-05
\n", "
\n", "
0x45 0_10001_01 = +0b1.01*2^1   = 2.5
\n", "
\n", "
0x85 1_00001_01 = -0b1.01*2^-15 = ~-3.8147e-05
\n", "
\n", "
0xc5 1_10001_01 = -0b1.01*2^1   = -2.5
\n", "
\n", "
0x06 0_00001_10 = +0b1.10*2^-15 = ~4.5776e-05
\n", "
\n", "
0x46 0_10001_10 = +0b1.10*2^1   = 3.0
\n", "
\n", "
0x86 1_00001_10 = -0b1.10*2^-15 = ~-4.5776e-05
\n", "
\n", "
0xc6 1_10001_10 = -0b1.10*2^1   = -3.0
\n", "
\n", "
0x07 0_00001_11 = +0b1.11*2^-15 = ~5.3406e-05
\n", "
\n", "
0x47 0_10001_11 = +0b1.11*2^1   = 3.5
\n", "
\n", "
0x87 1_00001_11 = -0b1.11*2^-15 = ~-5.3406e-05
\n", "
\n", "
0xc7 1_10001_11 = -0b1.11*2^1   = -3.5
\n", "
\n", "
0x08 0_00010_00 = +0b1.00*2^-14 = ~6.1035e-05
\n", "
\n", "
0x48 0_10010_00 = +0b1.00*2^2   = 4.0
\n", "
\n", "
0x88 1_00010_00 = -0b1.00*2^-14 = ~-6.1035e-05
\n", "
\n", "
0xc8 1_10010_00 = -0b1.00*2^2   = -4.0
\n", "
\n", "
0x09 0_00010_01 = +0b1.01*2^-14 = ~7.6294e-05
\n", "
\n", "
0x49 0_10010_01 = +0b1.01*2^2   = 5.0
\n", "
\n", "
0x89 1_00010_01 = -0b1.01*2^-14 = ~-7.6294e-05
\n", "
\n", "
0xc9 1_10010_01 = -0b1.01*2^2   = -5.0
\n", "
\n", "
0x0a 0_00010_10 = +0b1.10*2^-14 = ~9.1553e-05
\n", "
\n", "
0x4a 0_10010_10 = +0b1.10*2^2   = 6.0
\n", "
\n", "
0x8a 1_00010_10 = -0b1.10*2^-14 = ~-9.1553e-05
\n", "
\n", "
0xca 1_10010_10 = -0b1.10*2^2   = -6.0
\n", "
\n", "
0x0b 0_00010_11 = +0b1.11*2^-14 = ~0.00011
\n", "
\n", "
0x4b 0_10010_11 = +0b1.11*2^2   = 7.0
\n", "
\n", "
0x8b 1_00010_11 = -0b1.11*2^-14 = ~-0.00011
\n", "
\n", "
0xcb 1_10010_11 = -0b1.11*2^2   = -7.0
\n", "
\n", "
0x0c 0_00011_00 = +0b1.00*2^-13 = ~0.00012
\n", "
\n", "
0x4c 0_10011_00 = +0b1.00*2^3   = 8.0
\n", "
\n", "
0x8c 1_00011_00 = -0b1.00*2^-13 = ~-0.00012
\n", "
\n", "
0xcc 1_10011_00 = -0b1.00*2^3   = -8.0
\n", "
\n", "
0x0d 0_00011_01 = +0b1.01*2^-13 = ~0.00015
\n", "
\n", "
0x4d 0_10011_01 = +0b1.01*2^3   = 10.0
\n", "
\n", "
0x8d 1_00011_01 = -0b1.01*2^-13 = ~-0.00015
\n", "
\n", "
0xcd 1_10011_01 = -0b1.01*2^3   = -10.0
\n", "
\n", "
0x0e 0_00011_10 = +0b1.10*2^-13 = ~0.00018
\n", "
\n", "
0x4e 0_10011_10 = +0b1.10*2^3   = 12.0
\n", "
\n", "
0x8e 1_00011_10 = -0b1.10*2^-13 = ~-0.00018
\n", "
\n", "
0xce 1_10011_10 = -0b1.10*2^3   = -12.0
\n", "
\n", "
0x0f 0_00011_11 = +0b1.11*2^-13 = ~0.00021
\n", "
\n", "
0x4f 0_10011_11 = +0b1.11*2^3   = 14.0
\n", "
\n", "
0x8f 1_00011_11 = -0b1.11*2^-13 = ~-0.00021
\n", "
\n", "
0xcf 1_10011_11 = -0b1.11*2^3   = -14.0
\n", "
\n", "
0x30 0_01100_00 = +0b1.00*2^-4  = 0.0625
\n", "
\n", "
0x70 0_11100_00 = +0b1.00*2^12  = 4096.0
\n", "
\n", "
0xb0 1_01100_00 = -0b1.00*2^-4  = -0.0625
\n", "
\n", "
0xf0 1_11100_00 = -0b1.00*2^12  = -4096.0
\n", "
\n", "
0x31 0_01100_01 = +0b1.01*2^-4  = 0.078125
\n", "
\n", "
0x71 0_11100_01 = +0b1.01*2^12  = 5120.0
\n", "
\n", "
0xb1 1_01100_01 = -0b1.01*2^-4  = ~-0.07812
\n", "
\n", "
0xf1 1_11100_01 = -0b1.01*2^12  = -5120.0
\n", "
\n", "
0x32 0_01100_10 = +0b1.10*2^-4  = 0.09375
\n", "
\n", "
0x72 0_11100_10 = +0b1.10*2^12  = 6144.0
\n", "
\n", "
0xb2 1_01100_10 = -0b1.10*2^-4  = -0.09375
\n", "
\n", "
0xf2 1_11100_10 = -0b1.10*2^12  = -6144.0
\n", "
\n", "
0x33 0_01100_11 = +0b1.11*2^-4  = 0.109375
\n", "
\n", "
0x73 0_11100_11 = +0b1.11*2^12  = 7168.0
\n", "
\n", "
0xb3 1_01100_11 = -0b1.11*2^-4  = ~-0.10938
\n", "
\n", "
0xf3 1_11100_11 = -0b1.11*2^12  = -7168.0
\n", "
\n", "
0x34 0_01101_00 = +0b1.00*2^-3  = 0.125
\n", "
\n", "
0x74 0_11101_00 = +0b1.00*2^13  = 8192.0
\n", "
\n", "
0xb4 1_01101_00 = -0b1.00*2^-3  = -0.125
\n", "
\n", "
0xf4 1_11101_00 = -0b1.00*2^13  = -8192.0
\n", "
\n", "
0x35 0_01101_01 = +0b1.01*2^-3  = 0.15625
\n", "
\n", "
0x75 0_11101_01 = +0b1.01*2^13  = 10240.0
\n", "
\n", "
0xb5 1_01101_01 = -0b1.01*2^-3  = -0.15625
\n", "
\n", "
0xf5 1_11101_01 = -0b1.01*2^13  = -10240.0
\n", "
\n", "
0x36 0_01101_10 = +0b1.10*2^-3  = 0.1875
\n", "
\n", "
0x76 0_11101_10 = +0b1.10*2^13  = 12288.0
\n", "
\n", "
0xb6 1_01101_10 = -0b1.10*2^-3  = -0.1875
\n", "
\n", "
0xf6 1_11101_10 = -0b1.10*2^13  = -12288.0
\n", "
\n", "
0x37 0_01101_11 = +0b1.11*2^-3  = 0.21875
\n", "
\n", "
0x77 0_11101_11 = +0b1.11*2^13  = 14336.0
\n", "
\n", "
0xb7 1_01101_11 = -0b1.11*2^-3  = -0.21875
\n", "
\n", "
0xf7 1_11101_11 = -0b1.11*2^13  = -14336.0
\n", "
\n", "
0x38 0_01110_00 = +0b1.00*2^-2  = 0.25
\n", "
\n", "
0x78 0_11110_00 = +0b1.00*2^14  = 16384.0
\n", "
\n", "
0xb8 1_01110_00 = -0b1.00*2^-2  = -0.25
\n", "
\n", "
0xf8 1_11110_00 = -0b1.00*2^14  = -16384.0
\n", "
\n", "
0x39 0_01110_01 = +0b1.01*2^-2  = 0.3125
\n", "
\n", "
0x79 0_11110_01 = +0b1.01*2^14  = 20480.0
\n", "
\n", "
0xb9 1_01110_01 = -0b1.01*2^-2  = -0.3125
\n", "
\n", "
0xf9 1_11110_01 = -0b1.01*2^14  = -20480.0
\n", "
\n", "
0x3a 0_01110_10 = +0b1.10*2^-2  = 0.375
\n", "
\n", "
0x7a 0_11110_10 = +0b1.10*2^14  = 24576.0
\n", "
\n", "
0xba 1_01110_10 = -0b1.10*2^-2  = -0.375
\n", "
\n", "
0xfa 1_11110_10 = -0b1.10*2^14  = -24576.0
\n", "
\n", "
0x3b 0_01110_11 = +0b1.11*2^-2  = 0.4375
\n", "
\n", "
0x7b 0_11110_11 = +0b1.11*2^14  = 28672.0
\n", "
\n", "
0xbb 1_01110_11 = -0b1.11*2^-2  = -0.4375
\n", "
\n", "
0xfb 1_11110_11 = -0b1.11*2^14  = -28672.0
\n", "
\n", "
0x3c 0_01111_00 = +0b1.00*2^-1  = 0.5
\n", "
\n", "
0x7c 0_11111_00 = +0b1.00*2^15  = 32768.0
\n", "
\n", "
0xbc 1_01111_00 = -0b1.00*2^-1  = -0.5
\n", "
\n", "
0xfc 1_11111_00 = -0b1.00*2^15  = -32768.0
\n", "
\n", "
0x3d 0_01111_01 = +0b1.01*2^-1  = 0.625
\n", "
\n", "
0x7d 0_11111_01 = +0b1.01*2^15  = 40960.0
\n", "
\n", "
0xbd 1_01111_01 = -0b1.01*2^-1  = -0.625
\n", "
\n", "
0xfd 1_11111_01 = -0b1.01*2^15  = -40960.0
\n", "
\n", "
0x3e 0_01111_10 = +0b1.10*2^-1  = 0.75
\n", "
\n", "
0x7e 0_11111_10 = +0b1.10*2^15  = 49152.0
\n", "
\n", "
0xbe 1_01111_10 = -0b1.10*2^-1  = -0.75
\n", "
\n", "
0xfe 1_11111_10 = -0b1.10*2^15  = -49152.0
\n", "
\n", "
0x3f 0_01111_11 = +0b1.11*2^-1  = 0.875
\n", "
\n", "
0x7f 0_11111_11 = inf
\n", "
\n", "
0xbf 1_01111_11 = -0b1.11*2^-1  = -0.875
\n", "
\n", "
0xff 1_11111_11 = -inf
\n", "
" ], "text/plain": [ "" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "HTML(mktbl(format_info_p3109(3), cols=4, skip_rows=(0x10, 0x30), vs_width=8, vs_d=5))" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "

FP8 Value Table, p3109_p4

\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
0x00 0_0000_000 = 0.0
\n", "
\n", "
0x40 0_1000_000 = +0b1.000*2^0   = 1.0
\n", "
\n", "
0x80 1_0000_000 = nan
\n", "
\n", "
0xc0 1_1000_000 = -0b1.000*2^0   = -1.0
\n", "
\n", "
0x01 0_0000_001 = +0b0.001*2^-7  = ~0.00098
\n", "
\n", "
0x41 0_1000_001 = +0b1.001*2^0   = 1.125
\n", "
\n", "
0x81 1_0000_001 = -0b0.001*2^-7  = ~-0.00098
\n", "
\n", "
0xc1 1_1000_001 = -0b1.001*2^0   = -1.125
\n", "
\n", "
0x02 0_0000_010 = +0b0.010*2^-7  = ~0.00195
\n", "
\n", "
0x42 0_1000_010 = +0b1.010*2^0   = 1.25
\n", "
\n", "
0x82 1_0000_010 = -0b0.010*2^-7  = ~-0.00195
\n", "
\n", "
0xc2 1_1000_010 = -0b1.010*2^0   = -1.25
\n", "
\n", "
0x03 0_0000_011 = +0b0.011*2^-7  = ~0.00293
\n", "
\n", "
0x43 0_1000_011 = +0b1.011*2^0   = 1.375
\n", "
\n", "
0x83 1_0000_011 = -0b0.011*2^-7  = ~-0.00293
\n", "
\n", "
0xc3 1_1000_011 = -0b1.011*2^0   = -1.375
\n", "
\n", "
0x04 0_0000_100 = +0b0.100*2^-7  = ~0.00391
\n", "
\n", "
0x44 0_1000_100 = +0b1.100*2^0   = 1.5
\n", "
\n", "
0x84 1_0000_100 = -0b0.100*2^-7  = ~-0.00391
\n", "
\n", "
0xc4 1_1000_100 = -0b1.100*2^0   = -1.5
\n", "
\n", "
0x05 0_0000_101 = +0b0.101*2^-7  = ~0.00488
\n", "
\n", "
0x45 0_1000_101 = +0b1.101*2^0   = 1.625
\n", "
\n", "
0x85 1_0000_101 = -0b0.101*2^-7  = ~-0.00488
\n", "
\n", "
0xc5 1_1000_101 = -0b1.101*2^0   = -1.625
\n", "
\n", "
0x06 0_0000_110 = +0b0.110*2^-7  = ~0.00586
\n", "
\n", "
0x46 0_1000_110 = +0b1.110*2^0   = 1.75
\n", "
\n", "
0x86 1_0000_110 = -0b0.110*2^-7  = ~-0.00586
\n", "
\n", "
0xc6 1_1000_110 = -0b1.110*2^0   = -1.75
\n", "
\n", "
0x07 0_0000_111 = +0b0.111*2^-7  = ~0.00684
\n", "
\n", "
0x47 0_1000_111 = +0b1.111*2^0   = 1.875
\n", "
\n", "
0x87 1_0000_111 = -0b0.111*2^-7  = ~-0.00684
\n", "
\n", "
0xc7 1_1000_111 = -0b1.111*2^0   = -1.875
\n", "
\n", "
0x08 0_0001_000 = +0b1.000*2^-7  = ~0.00781
\n", "
\n", "
0x48 0_1001_000 = +0b1.000*2^1   = 2.0
\n", "
\n", "
0x88 1_0001_000 = -0b1.000*2^-7  = ~-0.00781
\n", "
\n", "
0xc8 1_1001_000 = -0b1.000*2^1   = -2.0
\n", "
\n", "
0x09 0_0001_001 = +0b1.001*2^-7  = ~0.00879
\n", "
\n", "
0x49 0_1001_001 = +0b1.001*2^1   = 2.25
\n", "
\n", "
0x89 1_0001_001 = -0b1.001*2^-7  = ~-0.00879
\n", "
\n", "
0xc9 1_1001_001 = -0b1.001*2^1   = -2.25
\n", "
\n", "
0x0a 0_0001_010 = +0b1.010*2^-7  = ~0.00977
\n", "
\n", "
0x4a 0_1001_010 = +0b1.010*2^1   = 2.5
\n", "
\n", "
0x8a 1_0001_010 = -0b1.010*2^-7  = ~-0.00977
\n", "
\n", "
0xca 1_1001_010 = -0b1.010*2^1   = -2.5
\n", "
\n", "
0x0b 0_0001_011 = +0b1.011*2^-7  = ~0.01074
\n", "
\n", "
0x4b 0_1001_011 = +0b1.011*2^1   = 2.75
\n", "
\n", "
0x8b 1_0001_011 = -0b1.011*2^-7  = ~-0.01074
\n", "
\n", "
0xcb 1_1001_011 = -0b1.011*2^1   = -2.75
\n", "
\n", "
0x0c 0_0001_100 = +0b1.100*2^-7  = ~0.01172
\n", "
\n", "
0x4c 0_1001_100 = +0b1.100*2^1   = 3.0
\n", "
\n", "
0x8c 1_0001_100 = -0b1.100*2^-7  = ~-0.01172
\n", "
\n", "
0xcc 1_1001_100 = -0b1.100*2^1   = -3.0
\n", "
\n", "
0x0d 0_0001_101 = +0b1.101*2^-7  = ~0.01270
\n", "
\n", "
0x4d 0_1001_101 = +0b1.101*2^1   = 3.25
\n", "
\n", "
0x8d 1_0001_101 = -0b1.101*2^-7  = ~-0.01270
\n", "
\n", "
0xcd 1_1001_101 = -0b1.101*2^1   = -3.25
\n", "
\n", "
0x0e 0_0001_110 = +0b1.110*2^-7  = ~0.01367
\n", "
\n", "
0x4e 0_1001_110 = +0b1.110*2^1   = 3.5
\n", "
\n", "
0x8e 1_0001_110 = -0b1.110*2^-7  = ~-0.01367
\n", "
\n", "
0xce 1_1001_110 = -0b1.110*2^1   = -3.5
\n", "
\n", "
0x0f 0_0001_111 = +0b1.111*2^-7  = ~0.01465
\n", "
\n", "
0x4f 0_1001_111 = +0b1.111*2^1   = 3.75
\n", "
\n", "
0x8f 1_0001_111 = -0b1.111*2^-7  = ~-0.01465
\n", "
\n", "
0xcf 1_1001_111 = -0b1.111*2^1   = -3.75
\n", "
\n", "
0x30 0_0110_000 = +0b1.000*2^-2  = 0.25
\n", "
\n", "
0x70 0_1110_000 = +0b1.000*2^6   = 64.0
\n", "
\n", "
0xb0 1_0110_000 = -0b1.000*2^-2  = -0.25
\n", "
\n", "
0xf0 1_1110_000 = -0b1.000*2^6   = -64.0
\n", "
\n", "
0x31 0_0110_001 = +0b1.001*2^-2  = 0.28125
\n", "
\n", "
0x71 0_1110_001 = +0b1.001*2^6   = 72.0
\n", "
\n", "
0xb1 1_0110_001 = -0b1.001*2^-2  = -0.28125
\n", "
\n", "
0xf1 1_1110_001 = -0b1.001*2^6   = -72.0
\n", "
\n", "
0x32 0_0110_010 = +0b1.010*2^-2  = 0.3125
\n", "
\n", "
0x72 0_1110_010 = +0b1.010*2^6   = 80.0
\n", "
\n", "
0xb2 1_0110_010 = -0b1.010*2^-2  = -0.3125
\n", "
\n", "
0xf2 1_1110_010 = -0b1.010*2^6   = -80.0
\n", "
\n", "
0x33 0_0110_011 = +0b1.011*2^-2  = 0.34375
\n", "
\n", "
0x73 0_1110_011 = +0b1.011*2^6   = 88.0
\n", "
\n", "
0xb3 1_0110_011 = -0b1.011*2^-2  = -0.34375
\n", "
\n", "
0xf3 1_1110_011 = -0b1.011*2^6   = -88.0
\n", "
\n", "
0x34 0_0110_100 = +0b1.100*2^-2  = 0.375
\n", "
\n", "
0x74 0_1110_100 = +0b1.100*2^6   = 96.0
\n", "
\n", "
0xb4 1_0110_100 = -0b1.100*2^-2  = -0.375
\n", "
\n", "
0xf4 1_1110_100 = -0b1.100*2^6   = -96.0
\n", "
\n", "
0x35 0_0110_101 = +0b1.101*2^-2  = 0.40625
\n", "
\n", "
0x75 0_1110_101 = +0b1.101*2^6   = 104.0
\n", "
\n", "
0xb5 1_0110_101 = -0b1.101*2^-2  = -0.40625
\n", "
\n", "
0xf5 1_1110_101 = -0b1.101*2^6   = -104.0
\n", "
\n", "
0x36 0_0110_110 = +0b1.110*2^-2  = 0.4375
\n", "
\n", "
0x76 0_1110_110 = +0b1.110*2^6   = 112.0
\n", "
\n", "
0xb6 1_0110_110 = -0b1.110*2^-2  = -0.4375
\n", "
\n", "
0xf6 1_1110_110 = -0b1.110*2^6   = -112.0
\n", "
\n", "
0x37 0_0110_111 = +0b1.111*2^-2  = 0.46875
\n", "
\n", "
0x77 0_1110_111 = +0b1.111*2^6   = 120.0
\n", "
\n", "
0xb7 1_0110_111 = -0b1.111*2^-2  = -0.46875
\n", "
\n", "
0xf7 1_1110_111 = -0b1.111*2^6   = -120.0
\n", "
\n", "
0x38 0_0111_000 = +0b1.000*2^-1  = 0.5
\n", "
\n", "
0x78 0_1111_000 = +0b1.000*2^7   = 128.0
\n", "
\n", "
0xb8 1_0111_000 = -0b1.000*2^-1  = -0.5
\n", "
\n", "
0xf8 1_1111_000 = -0b1.000*2^7   = -128.0
\n", "
\n", "
0x39 0_0111_001 = +0b1.001*2^-1  = 0.5625
\n", "
\n", "
0x79 0_1111_001 = +0b1.001*2^7   = 144.0
\n", "
\n", "
0xb9 1_0111_001 = -0b1.001*2^-1  = -0.5625
\n", "
\n", "
0xf9 1_1111_001 = -0b1.001*2^7   = -144.0
\n", "
\n", "
0x3a 0_0111_010 = +0b1.010*2^-1  = 0.625
\n", "
\n", "
0x7a 0_1111_010 = +0b1.010*2^7   = 160.0
\n", "
\n", "
0xba 1_0111_010 = -0b1.010*2^-1  = -0.625
\n", "
\n", "
0xfa 1_1111_010 = -0b1.010*2^7   = -160.0
\n", "
\n", "
0x3b 0_0111_011 = +0b1.011*2^-1  = 0.6875
\n", "
\n", "
0x7b 0_1111_011 = +0b1.011*2^7   = 176.0
\n", "
\n", "
0xbb 1_0111_011 = -0b1.011*2^-1  = -0.6875
\n", "
\n", "
0xfb 1_1111_011 = -0b1.011*2^7   = -176.0
\n", "
\n", "
0x3c 0_0111_100 = +0b1.100*2^-1  = 0.75
\n", "
\n", "
0x7c 0_1111_100 = +0b1.100*2^7   = 192.0
\n", "
\n", "
0xbc 1_0111_100 = -0b1.100*2^-1  = -0.75
\n", "
\n", "
0xfc 1_1111_100 = -0b1.100*2^7   = -192.0
\n", "
\n", "
0x3d 0_0111_101 = +0b1.101*2^-1  = 0.8125
\n", "
\n", "
0x7d 0_1111_101 = +0b1.101*2^7   = 208.0
\n", "
\n", "
0xbd 1_0111_101 = -0b1.101*2^-1  = -0.8125
\n", "
\n", "
0xfd 1_1111_101 = -0b1.101*2^7   = -208.0
\n", "
\n", "
0x3e 0_0111_110 = +0b1.110*2^-1  = 0.875
\n", "
\n", "
0x7e 0_1111_110 = +0b1.110*2^7   = 224.0
\n", "
\n", "
0xbe 1_0111_110 = -0b1.110*2^-1  = -0.875
\n", "
\n", "
0xfe 1_1111_110 = -0b1.110*2^7   = -224.0
\n", "
\n", "
0x3f 0_0111_111 = +0b1.111*2^-1  = 0.9375
\n", "
\n", "
0x7f 0_1111_111 = inf
\n", "
\n", "
0xbf 1_0111_111 = -0b1.111*2^-1  = -0.9375
\n", "
\n", "
0xff 1_1111_111 = -inf
\n", "
" ], "text/plain": [ "" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "HTML(mktbl(format_info_p3109(4), cols=4, skip_rows=(0x10, 0x30), vs_width=8, vs_d=5))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.0" } }, "nbformat": 4, "nbformat_minor": 2 } ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1716304225.0 gfloat-0.3/docs/source/api.rst0000644000175000017500000000134500000000000014503 0ustar00awfawf.. Copyright (c) 2024 Graphcore Ltd. All rights reserved. API === .. module:: gfloat Scalar Functions ---------------- .. autofunction:: decode_float .. autofunction:: round_float .. autofunction:: encode_float Block format functions ---------------------- .. autofunction:: decode_block .. autofunction:: encode_block .. autofunction:: quantize_block .. autofunction:: compute_scale_amax Classes ------- .. autoclass:: FormatInfo() :members: .. autoclass:: FloatClass() :members: .. autoclass:: RoundMode() :members: .. autoclass:: FloatValue() :members: .. autoclass:: BlockFormatInfo() :members: Pretty printers --------------- .. autofunction:: float_pow2str .. autofunction:: float_tilde_unless_roundtrip_str ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1718032353.0 gfloat-0.3/docs/source/conf.py0000644000175000017500000000235600000000000014502 0ustar00awfawf# Copyright (c) 2024 Graphcore Ltd. All rights reserved. # Configuration file for the Sphinx documentation builder. # -- Project information project = "GFloat" copyright = "2024, Graphcore Ltd" author = "Andrew Fitzgibbon" release = "0.3" # Set version in package.sh version = "0.3" # Set version in package.sh # -- General configuration extensions = [ "sphinx.ext.duration", "sphinx.ext.doctest", "sphinx.ext.autodoc", "sphinx.ext.autosummary", "sphinx.ext.intersphinx", "sphinx.ext.viewcode", "sphinx.ext.napoleon", "sphinx_paramlinks", "myst_nb", ] autodoc_typehints = "none" # We have them in the parameter descriptors autodoc_typehints_format = "short" python_use_unqualified_type_names = True autodoc_type_aliases = { "Iterable": "Iterable", "npt.ArrayLike": "ArrayLike", "npt.NDArray": "NDArray", } autodoc_default_options = { "member-order": "bysource", } intersphinx_mapping = { "python": ("https://docs.python.org/3/", None), "sphinx": ("https://www.sphinx-doc.org/en/master/", None), } intersphinx_disabled_domains = ["std"] templates_path = ["_templates"] # -- Options for HTML output html_theme = "sphinx_rtd_theme" # -- Options for EPUB output epub_show_urls = "footnote" ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1718031312.0 gfloat-0.3/docs/source/formats.rst0000644000175000017500000000172400000000000015406 0ustar00awfawf.. Copyright (c) 2024 Graphcore Ltd. All rights reserved. Defined Formats =============== .. module:: gfloat.formats IEEE 754 Formats ---------------- .. autodata:: format_info_binary16 .. autodata:: format_info_binary32 .. autodata:: format_info_binary64 BFloat16 ---------------- .. autodata:: format_info_bfloat16 Open Compute Platform (OCP) Formats ----------------------------------- .. autodata:: format_info_ocp_e5m2 .. autodata:: format_info_ocp_e4m3 .. autodata:: format_info_ocp_e3m2 .. autodata:: format_info_ocp_e2m3 .. autodata:: format_info_ocp_e2m1 .. autodata:: format_info_ocp_e8m0 .. autodata:: format_info_ocp_int8 IEEE WG P3109 Formats --------------------- .. autofunction:: format_info_p3109 Block Formats --------------------- .. autodata:: format_info_mxfp8_e5m2 .. autodata:: format_info_mxfp8_e4m3 .. autodata:: format_info_mxfp6_e3m2 .. autodata:: format_info_mxfp6_e2m3 .. autodata:: format_info_mxfp4_e2m1 .. autodata:: format_info_mxint8 ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1714650883.0 gfloat-0.3/docs/source/index.rst0000644000175000017500000000360300000000000015040 0ustar00awfawf.. Copyright (c) 2024 Graphcore Ltd. All rights reserved. .. note:: Check the version number of this documentation against the `gfloat` version you are using. "Latest" refers to the head on https://github.com/graphcore-research/gfloat, while pypi versions installed using `pip install` will have corresponding `vX.Y.Z` tags. GFloat: Generic floating point formats in Python ================================================ GFloat is designed to allow experimentation with a variety of floating-point formats in Python. Formats are parameterized by the primary IEEE-754 parameters of: * Width in bits (k) * Precision (p) * Maximum exponent (emax) with additional fields defining the encoding of infinities, Not-a-number (NaN) values, and negative zero, among others (see :class:`gfloat.FormatInfo`.) This allows an implementation of generic floating point encode/decode logic, handling various current and proposed floating point types: - `IEEE 754 `_: Binary16, Binary32 - `OCP Float8 `_: E5M2, E4M3, and MX formats - `IEEE WG P3109 `_: P{p} for p in 1..7 The library favours readability and extensibility over speed - for fast implementations of these datatypes see, for example, `ml_dtypes `_, `bitstring `_, `MX PyTorch Emulation Library `_. To get started with the library, we recommend perusing the notebooks, otherwise you may wish to jump straight into the API. .. toctree:: :hidden: self .. toctree:: notebooks api formats Index and Search ================ * :ref:`genindex` * :ref:`search` ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1714650883.0 gfloat-0.3/docs/source/notebooks.rst0000644000175000017500000000034400000000000015733 0ustar00awfawf.. Copyright (c) 2024 Graphcore Ltd. All rights reserved. Notebooks ========= Some notebooks to illustrate uses of the library .. toctree:: :maxdepth: 1 01-decode.ipynb 02-value-stats.ipynb 03-value-tables.ipynb ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1718032361.2306235 gfloat-0.3/etc/0000755000175000017500000000000000000000000011520 5ustar00awfawf././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1714650883.0 gfloat-0.3/etc/check-copyright.sh0000755000175000017500000000054500000000000015146 0ustar00awfawf#!/usr/bin/bash # Copyright (c) 2024 Graphcore Ltd. All rights reserved. PATTERN='Copyright \(c\) 202[0-9] Graphcore Ltd\. +All rights reserved\.' # We "grep ." so the exit code signals that the first grep generated output if grep -L -E "$PATTERN" "$@" | grep . then # There was output, signal unsuccessful exit 1 fi # Normal exit, signalling success ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1718030340.0 gfloat-0.3/etc/package.sh0000644000175000017500000000117700000000000013455 0ustar00awfawf# Copyright (c) 2024 Graphcore Ltd. All rights reserved. # Set version numbers, make package, and publish set -o errexit # This is the master location at which to change version number VERSION="0.3" # Run the script to change the version elsewhere perl -pi -e 's/^(release|version) = "([\d.]+)"/$1 = "'$VERSION'"/' docs/source/conf.py perl -pi -e 's/^version = "([\d.]+)"/version = "'$VERSION'"/' pyproject.toml # Build docs to embed version ( cd docs && make html ) # Build distribution rm -rf dist pip install build twine python -m build echo "Enter PyPI API Token" echo __token__ | twine upload --repository pypi dist/* --verbose ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1714650883.0 gfloat-0.3/etc/test-check-copyright.sh0000644000175000017500000000117500000000000016120 0ustar00awfawf# Copyright (c) 2024 Graphcore Ltd. All rights reserved. tmpdir=$(mktemp -d) test -d $tmpdir || exit -1 cleanup () { echo "Removing $tmpdir" rm $tmpdir/t.sh rmdir $tmpdir } trap cleanup EXIT # Passing case echo "Copyright (c) 2024 Graphcore Ltd. All rights reserved." > $tmpdir/t.sh if sh etc/check-copyright.sh $tmpdir/t.sh then echo Pass: Should have passed else echo FAIL: Should have passed fi # Failing case echo "Copyright (c) 2024 Graphcore Ltd. All rights xreserved." > $tmpdir/t.sh if sh etc/check-copyright.sh $tmpdir/t.sh then echo FAIL: Should have failed, but passed else echo Pass: Should have failed fi ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1718032353.0 gfloat-0.3/pyproject.toml0000644000175000017500000000206200000000000013661 0ustar00awfawf# Copyright (c) 2024 Graphcore Ltd. All rights reserved. [build-system] requires = ["setuptools", "setuptools-scm"] build-backend = "setuptools.build_meta" [tool.setuptools] packages = ['gfloat'] package-dir = {"" = "src"} [project] name = "gfloat" version = "0.3" # Set version in package.sh authors = [ {name = "Andrew Fitzgibbon", email = "awf@fitzgibbon.ie"}, ] description = "Generic floating point handling in Python" readme = "README.md" classifiers = [ "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Programming Language :: Python :: 3", "Development Status :: 3 - Alpha", ] requires-python = ">=3.8.1" dynamic = ["dependencies", "optional-dependencies"] [tool.setuptools.dynamic] # version = {attr = "gfloat.VERSION"} # Wow: https://github.com/pypa/setuptools/issues/1724 dependencies = {file = ["requirements.txt"]} optional-dependencies = {dev = {file = ["requirements-dev.txt"]}} [tool.black] line-length = 88 fast = true [tool.mypy] [[tool.mypy.overrides]] module = "mx.*" ignore_missing_imports = true ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1716478626.0 gfloat-0.3/requirements-dev.txt0000644000175000017500000000046300000000000015010 0ustar00awfawf# Requirements for tests pytest ml_dtypes # See requirements-tests also for direct dependencies # Requirements for development pre-commit black mypy black[jupyter] isort # Requirements for docs sphinx==7.1.2 sphinx-rtd-theme==1.3.0rc1 sphinx_paramlinks myst_nb # Requirements for notebooks airium pandas ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1716478626.0 gfloat-0.3/requirements-test.txt0000644000175000017500000000022600000000000015206 0ustar00awfawf# PyPI doesn't like direct dependencies - see https://github.com/microsoft/microxcaling/issues/22 mx @ git+https://github.com/microsoft/microxcaling ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1711120742.0 gfloat-0.3/requirements.txt0000644000175000017500000000000600000000000014225 0ustar00awfawfnumpy ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1718032361.2306235 gfloat-0.3/setup.cfg0000644000175000017500000000004600000000000012566 0ustar00awfawf[egg_info] tag_build = tag_date = 0 ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1718032361.2306235 gfloat-0.3/src/0000755000175000017500000000000000000000000011534 5ustar00awfawf././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1718032361.2306235 gfloat-0.3/src/gfloat/0000755000175000017500000000000000000000000013010 5ustar00awfawf././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1716324465.0 gfloat-0.3/src/gfloat/__init__.py0000644000175000017500000000103700000000000015122 0ustar00awfawf# Copyright (c) 2024 Graphcore Ltd. All rights reserved. from .block import ( BlockFormatInfo, compute_scale_amax, decode_block, encode_block, quantize_block, ) from .decode import decode_float from .printing import float_pow2str, float_tilde_unless_roundtrip_str from .round import encode_float, round_float from .types import FloatClass, FloatValue, FormatInfo, RoundMode # Don't automatically import from .formats. # If the user wants them in their namespace, they can explicitly import # from gfloat.formats import * ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1717588744.0 gfloat-0.3/src/gfloat/block.py0000644000175000017500000001276000000000000014462 0ustar00awfawf# Copyright (c) 2024 Graphcore Ltd. All rights reserved. # Block floating point formats # https://en.wikipedia.org/wiki/Block_floating_point from dataclasses import dataclass from typing import Callable, Iterable import numpy as np import numpy.typing as npt from .decode import decode_float from .round import RoundMode, encode_float, round_float from .types import FormatInfo @dataclass class BlockFormatInfo: #: Short name for the format, e.g. BlockFP8 name: str #: Element data type etype: FormatInfo #: Scaling block size k: int #: Scale datatype stype: FormatInfo #: ## Derived values @property def element_bits(self) -> int: """The number of bits in each element, d""" return self.etype.k @property def scale_bits(self) -> int: """The number of bits in the scale, w""" return self.stype.k @property def block_size_bytes(self) -> int: """The number of bytes in a block""" bits = self.element_bits * self.k + self.scale_bits assert bits % 8 == 0 return bits // 8 @property def __name__(self) -> str: return self.name def __str__(self) -> str: return f"BlockFormatInfo:{self.name})" def decode_block(fi: BlockFormatInfo, block: Iterable[int]) -> Iterable[float]: """ Decode a :paramref:`block` of integer codepoints in Block Format :paramref:`fi` The scale is encoded in the first value of :paramref:`block`, with the remaining values encoding the block elements. The size of the iterable is not checked against the format descriptor. Args: fi (BlockFormatInfo): Describes the block format block (Iterable[int]): Input block Returns: A sequence of floats representing the encoded values. """ it = iter(block) scale_encoding = next(it) scale = decode_float(fi.stype, scale_encoding).fval for val_encoding in it: val = scale * decode_float(fi.etype, val_encoding).fval yield val # TODO: Assert length of block was k+1? Messy unless block is len()able def encode_block( fi: BlockFormatInfo, scale: float, vals: Iterable[float], round: RoundMode = RoundMode.TiesToEven, ) -> Iterable[int]: """ Encode float :paramref:`vals` into block Format described by :paramref:`fi` The :paramref:`scale` is explicitly passed, and the :paramref:`vals` are assumed to already be multiplied by `1/scale`. That is, this is pure encoding, scaling is computed and applied elsewhere (see e.g. :func:`quantize_block`). It is checked for overflow in the target format, and will raise an exception if it does. Args: fi (BlockFormatInfo): Describes the target block format scale (float): Scale to be recorded in the block vals (Iterable[float]): Input block round (RoundMode): Rounding mode to use, defaults to `TiesToEven` Returns: A sequence of ints representing the encoded values. Raises: ValueError: The scale overflows the target scale encoding format. """ if scale > fi.stype.max or scale < fi.stype.min: raise ValueError(f"Scaled {scale} out of range for {fi.stype}") sat = True # Saturate elements if out of range def enc(ty: FormatInfo, x: float) -> int: return encode_float(ty, round_float(ty, x, round, sat)) yield enc(fi.stype, scale) for val in vals: yield enc(fi.etype, val) ComputeScaleCallable = Callable[[float, npt.ArrayLike], float] def compute_scale_amax(emax: float, vals: npt.ArrayLike) -> float: """ Compute a scale factor such that :paramref:`vals` can be scaled to the range [0, 2**emax]. That is, `scale` is computed such that the largest exponent in the array `vals * scale` will be `emax`. The scale is clipped to the range 2**[-127, 127]. If all values are zero, any scale value smaller than emax would be accurate, but returning the smallest possible means that quick checks on the magnitude to identify near-zero blocks will also find the all-zero blocks. Args: emax (float): Maximum exponent to appear in `vals * scale` vals (ArrayLike): Input block Returns: A float such that `vals * scale` has exponents less than or equal to `emax`. Note: If all vals are zero, 1.0 is returned. """ amax = np.max(np.abs(vals)) if amax == 0.0: q_log2scale = -127.0 else: q_log2scale = np.floor(np.log2(amax)) - emax q_log2scale = np.clip(q_log2scale, -127.0, 127.0) return 2.0**q_log2scale def quantize_block( fi: BlockFormatInfo, vals: npt.NDArray[np.float64], compute_scale: ComputeScaleCallable, round: RoundMode = RoundMode.TiesToEven, ) -> npt.NDArray[np.float64]: """ Encode and decode a block of :paramref:`vals` of bytes into block format described by :paramref:`fi` Args: fi (BlockFormatInfo): Describes the target block format vals (numpy.array): Input block compute_scale ((float, ArrayLike) -> float): Callable to compute the scale, defaults to :func:`compute_scale_amax` round (RoundMode): Rounding mode to use, defaults to `TiesToEven` Returns: An array of floats representing the quantized values. Raises: ValueError: The scale overflows the target scale encoding format. """ q_scale = compute_scale(fi.etype.emax, vals) scaled_vals = vals / q_scale enc = encode_block(fi, q_scale, scaled_vals, round) return np.fromiter(decode_block(fi, enc), float) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1718030269.0 gfloat-0.3/src/gfloat/decode.py0000644000175000017500000000542200000000000014610 0ustar00awfawf# Copyright (c) 2024 Graphcore Ltd. All rights reserved. import numpy as np from .types import FloatClass, FloatValue, FormatInfo def decode_float(fi: FormatInfo, i: int) -> FloatValue: r""" Given :py:class:`FormatInfo` and integer code point, decode to a :py:class:`FloatValue` Args: fi (FormatInfo): Floating point format descriptor. i (int): Integer code point, in the range :math:`0 \le i < 2^{k}`, where :math:`k` = ``fi.k`` Returns: Decoded float value Raises: ValueError: If :paramref:`i` is outside the range of valid code points in :paramref:`fi`. """ assert isinstance(i, int) k = fi.k p = fi.precision t = p - 1 # Trailing significand field width num_signbits = 1 if fi.is_signed else 0 w = k - t - num_signbits # Exponent field width if i < 0 or i >= 2**k: raise ValueError(f"Code point {i} not in range [0, 2**{k})") if fi.is_signed: signmask = 1 << (k - 1) signbit = 1 if i & signmask else 0 sign = -1 if signbit else 1 else: signmask = None signbit = 0 sign = 1 exp = (i >> t) & ((1 << w) - 1) significand = i & ((1 << t) - 1) if fi.is_twos_complement and signbit: significand = (1 << t) - significand expBias = fi.expBias iszero = exp == 0 and significand == 0 and fi.has_zero issubnormal = fi.has_subnormals and (exp == 0) and (significand != 0) isnormal = not iszero and not issubnormal if iszero or issubnormal: expval = 1 - expBias fsignificand = significand * 2**-t else: expval = exp - expBias fsignificand = 1.0 + significand * 2**-t # Handle specials: Infs, NaN, -0, NaN_0 signed_infinity = -np.inf if signbit else np.inf fval = None # All-bits-special exponent (ABSE) if w > 0 and exp == 2**w - 1: min_i_with_nan = 2 ** (p - 1) - fi.num_high_nans if significand >= min_i_with_nan: fval = np.nan if fi.has_infs and significand == min_i_with_nan - 1: fval = signed_infinity # Negative zero or NaN if iszero and i == signmask and not fi.is_twos_complement: if fi.has_nz: fval = -0.0 else: fval = np.nan # In range - compute value if fval is None: fval = sign * fsignificand * 2.0**expval # Compute FloatClass fclass = None if fval == 0: fclass = FloatClass.ZERO elif np.isnan(fval): fclass = FloatClass.NAN elif np.isfinite(fval): if isnormal: fclass = FloatClass.NORMAL else: fclass = FloatClass.SUBNORMAL else: fclass = FloatClass.INFINITE return FloatValue(i, fval, exp, expval, significand, fsignificand, signbit, fclass) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1718030269.0 gfloat-0.3/src/gfloat/formats.py0000644000175000017500000001234400000000000015041 0ustar00awfawf# Copyright (c) 2024 Graphcore Ltd. All rights reserved. from .block import BlockFormatInfo from .types import FormatInfo #: FormatInfo for IEEE-754 Binary64 format format_info_binary64 = FormatInfo( name="binary64", k=64, precision=53, emax=1023, has_nz=True, has_infs=True, num_high_nans=2**52 - 1, has_subnormals=True, is_signed=True, is_twos_complement=False, ) #: FormatInfo for IEEE-754 Binary32 format format_info_binary32 = FormatInfo( name="binary32", k=32, precision=24, emax=127, has_nz=True, has_infs=True, num_high_nans=2**23 - 1, has_subnormals=True, is_signed=True, is_twos_complement=False, ) #: FormatInfo for IEEE-754 Binary16 format format_info_binary16 = FormatInfo( name="binary16", k=16, precision=11, emax=15, has_nz=True, has_infs=True, num_high_nans=2**10 - 1, has_subnormals=True, is_signed=True, is_twos_complement=False, ) #: FormatInfo for Google BFloat16 format format_info_bfloat16 = FormatInfo( name="bfloat16", k=16, precision=8, emax=127, has_nz=True, has_infs=True, num_high_nans=2**7 - 1, has_subnormals=True, is_signed=True, is_twos_complement=False, ) #: FormatInfo for OCP E5M2 format format_info_ocp_e5m2 = FormatInfo( name="ocp_e5m2", k=8, precision=3, emax=15, has_nz=True, has_infs=True, num_high_nans=2**2 - 1, has_subnormals=True, is_signed=True, is_twos_complement=False, ) #: FormatInfo for OCP E4M3 format format_info_ocp_e4m3 = FormatInfo( name="ocp_e4m3", k=8, precision=4, emax=8, has_nz=True, has_infs=False, num_high_nans=1, has_subnormals=True, is_signed=True, is_twos_complement=False, ) #: FormatInfo for OCP MX E2M3 format format_info_ocp_e2m3 = FormatInfo( name="ocp_e2m3", k=6, precision=4, emax=2, has_nz=True, has_infs=False, num_high_nans=0, has_subnormals=True, is_signed=True, is_twos_complement=False, ) #: FormatInfo for OCP MX E3M2 format format_info_ocp_e3m2 = FormatInfo( name="ocp_e3m2", k=6, precision=3, emax=4, has_nz=True, has_infs=False, num_high_nans=0, has_subnormals=True, is_signed=True, is_twos_complement=False, ) #: FormatInfo for OCP MX E2M1 format format_info_ocp_e2m1 = FormatInfo( name="ocp_e2m1", k=4, precision=2, emax=2, has_nz=True, has_infs=False, num_high_nans=0, has_subnormals=True, is_signed=True, is_twos_complement=False, ) #: FormatInfo for OCP MX E8M0 format format_info_ocp_e8m0 = FormatInfo( name="ocp_e8m0", k=8, precision=1, emax=127, has_nz=False, has_infs=False, num_high_nans=1, has_subnormals=False, is_signed=False, is_twos_complement=False, ) #: FormatInfo for OCP MX INT8 format format_info_ocp_int8 = FormatInfo( name="ocp_int8", k=8, precision=8, emax=0, has_nz=False, has_infs=False, num_high_nans=0, has_subnormals=True, is_signed=True, is_twos_complement=True, ) def format_info_p3109(precision: int) -> FormatInfo: """ FormatInfo for P3109 P{p} formats Args: p (int): Precision in bits Returns: FormatInfo class describing the format Raises: ValueError: If p is not in 1..7 """ if precision < 1 or precision > 7: raise ValueError(f"P3109 format not defined for p={precision}") name = f"p3109_p{precision}" emax = 2 ** (7 - precision) - 1 return FormatInfo( name, k=8, precision=precision, emax=emax, has_nz=False, has_infs=True, num_high_nans=0, has_subnormals=True, is_signed=True, is_twos_complement=False, ) # Collections of formats _tiny_formats = [ format_info_ocp_e2m1, format_info_ocp_e2m3, format_info_ocp_e3m2, ] p3109_formats = [format_info_p3109(p) for p in range(1, 7)] _fp8_formats = [ format_info_ocp_e4m3, format_info_ocp_e5m2, *p3109_formats, ] _fp16_formats = [ format_info_binary16, format_info_bfloat16, ] all_formats = [ format_info_ocp_e8m0, format_info_ocp_int8, *_tiny_formats, *_fp8_formats, *_fp16_formats, format_info_binary32, format_info_binary64, ] # ------ # Block formats format_info_mxfp8_e5m2 = BlockFormatInfo( "mxfp8_e5m2", format_info_ocp_e5m2, 32, format_info_ocp_e8m0 ) format_info_mxfp8_e4m3 = BlockFormatInfo( "mxfp8_e4m3", format_info_ocp_e4m3, 32, format_info_ocp_e8m0 ) format_info_mxfp6_e3m2 = BlockFormatInfo( "mxfp6_e3m2", format_info_ocp_e3m2, 32, format_info_ocp_e8m0 ) format_info_mxfp6_e2m3 = BlockFormatInfo( "mxfp6_e2m3", format_info_ocp_e2m3, 32, format_info_ocp_e8m0 ) format_info_mxfp4_e2m1 = BlockFormatInfo( "mxfp4_e2m1", format_info_ocp_e2m1, 32, format_info_ocp_e8m0 ) format_info_mxfp4_e2m1 = BlockFormatInfo( "mxfp4_e2m1", format_info_ocp_e2m1, 32, format_info_ocp_e8m0 ) format_info_mxint8 = BlockFormatInfo( "mxint8", format_info_ocp_int8, 32, format_info_ocp_e8m0 ) all_block_formats = [ format_info_mxfp8_e5m2, format_info_mxfp8_e4m3, format_info_mxfp6_e3m2, format_info_mxfp6_e2m3, format_info_mxfp4_e2m1, format_info_mxint8, ] ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1714650883.0 gfloat-0.3/src/gfloat/printing.py0000644000175000017500000000306200000000000015215 0ustar00awfawf# Copyright (c) 2024 Graphcore Ltd. All rights reserved. import fractions import numpy as np def float_pow2str(v: float, min_exponent: float = -np.inf) -> str: """ Render floating point values as exact fractions times a power of two. Example: float_pow2str(127.0) is "127/64*2^6", That is (a significand between 1 and 2) times (a power of two). If `min_exponent` is supplied, then values with exponent below `min_exponent`, are printed as fractions less than 1, with exponent set to `min_exponent`. This is typically used to represent subnormal values. """ if not np.isfinite(v): return str(v) s = np.sign(v) x = np.abs(v) e = np.floor(np.log2(x)) sig = x * 2.0**-e if e < min_exponent: sig *= 2.0 ** (e - min_exponent) e = min_exponent significand = fractions.Fraction(sig) return ("-" if s < 0 else "") + f"{significand}*2^{int(e):d}" def float_tilde_unless_roundtrip_str(v: float, width: int = 14, d: int = 8) -> str: """ Return a string representation of :paramref:`v`, in base 10, with maximum width :paramref:`width` and decimal digits :paramref:`d` """ # valstr: string representation of value in base 10 # If the representation does not roundtrip to the value, # it is preceded by a "~" to indicate "approximately equal to" s = f"{v}" if len(s) > width: if abs(v) < 1 and not "e" in s: s = f"{v:.{d}f}" else: s = f"{v:.{d}}" if np.isfinite(v) and float(s) != v: s = "~" + s return s ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1718030269.0 gfloat-0.3/src/gfloat/round.py0000644000175000017500000001520100000000000014510 0ustar00awfawf# Copyright (c) 2024 Graphcore Ltd. All rights reserved. import math import numpy as np from .types import FormatInfo, RoundMode def _isodd(v: int) -> bool: return v & 0x1 == 1 def round_float( fi: FormatInfo, v: float, rnd: RoundMode = RoundMode.TiesToEven, sat: bool = False ) -> float: """ Round input to the given :py:class:`FormatInfo`, given rounding mode and saturation flag An input NaN will convert to a NaN in the target. An input Infinity will convert to the largest float if :paramref:`sat`, otherwise to an Inf, if present, otherwise to a NaN. Negative zero will be returned if the format has negative zero, otherwise zero. Args: fi (FormatInfo): Describes the target format v (float): Input value to be rounded rnd (RoundMode): Rounding mode to use sat (bool): Saturation flag: if True, round overflowed values to `fi.max` Returns: A float which is one of the values in the format. Raises: ValueError: The target format cannot represent the input (e.g. converting a `NaN`, or an `Inf` when the target has no `NaN` or `Inf`, and :paramref:`sat` is false) """ # Constants p = fi.precision bias = fi.expBias if np.isnan(v): if fi.num_nans == 0: raise ValueError(f"No NaN in format {fi}") # Note that this does not preserve the NaN payload return np.nan # Extract sign sign = np.signbit(v) and fi.is_signed vpos = -v if sign else v if np.isinf(vpos): result = np.inf elif vpos == 0: result = 0 else: # Extract exponent expval = int(np.floor(np.log2(vpos))) # Effective precision, accounting for right shift for subnormal values if fi.has_subnormals: expval = max(expval, 1 - bias) # Lift to "integer * 2^e" expval = expval - p + 1 # use ldexp instead of vpos*2**-expval to avoid overflow fsignificand = math.ldexp(vpos, -expval) # Round isignificand = math.floor(fsignificand) delta = fsignificand - isignificand if ( (rnd == RoundMode.TowardPositive and not sign and delta > 0) or (rnd == RoundMode.TowardNegative and sign and delta > 0) or (rnd == RoundMode.TiesToAway and delta >= 0.5) or (rnd == RoundMode.TiesToEven and delta > 0.5) or (rnd == RoundMode.TiesToEven and delta == 0.5 and _isodd(isignificand)) ): isignificand += 1 ## Special case for Precision=1, all-log format with zero. # The logic is simply duplicated (and isignificand overwritten) for clarity. if fi.precision == 1: isignificand = math.floor(fsignificand) code_is_odd = isignificand != 0 and _isodd(expval + bias) if ( (rnd == RoundMode.TowardPositive and not sign and delta > 0) or (rnd == RoundMode.TowardNegative and sign and delta > 0) or (rnd == RoundMode.TiesToAway and delta >= 0.5) or (rnd == RoundMode.TiesToEven and delta > 0.5) or (rnd == RoundMode.TiesToEven and delta == 0.5 and code_is_odd) ): # Go to nextUp. # Increment isignificand if zero, # else increment exponent if isignificand == 0: isignificand = 1 else: assert isignificand == 1 expval += 1 ## End special case for Precision=1. # Reconstruct rounded result to float result = isignificand * (2.0**expval) if result == 0: if sign and fi.has_nz: return -0.0 else: return 0.0 # Overflow amax = -fi.min if sign else fi.max if result > amax: if ( sat or (rnd == RoundMode.TowardNegative and not sign and np.isfinite(v)) or (rnd == RoundMode.TowardPositive and sign and np.isfinite(v)) or (rnd == RoundMode.TowardZero and np.isfinite(v)) ): result = amax else: if fi.has_infs: result = np.inf elif fi.num_nans > 0: result = np.nan else: raise ValueError(f"No Infs or NaNs in format {fi}, and sat=False") # Set sign if sign: result = -result return result def encode_float(fi: FormatInfo, v: float) -> int: """ Encode input to the given :py:class:`FormatInfo`. Will round toward zero if :paramref:`v` is not in the value set. Will saturate to `Inf`, `NaN`, `fi.max` in order of precedence. Encode -0 to 0 if not `fi.has_nz` For other roundings and saturations, call :func:`round_float` first. Args: fi (FormatInfo): Describes the target format v (float): The value to be encoded. Returns: The integer code point """ # Format Constants k = fi.bits p = fi.precision t = p - 1 # Encode if np.isnan(v): return fi.code_of_nan # Overflow/underflow if v > fi.max: if fi.has_infs: return fi.code_of_posinf if fi.num_nans > 0: return fi.code_of_nan return fi.code_of_max if v < fi.min: if fi.has_infs: return fi.code_of_neginf if fi.num_nans > 0: return fi.code_of_nan return fi.code_of_min # Finite values sign = fi.is_signed and np.signbit(v) vpos = -v if sign else v if fi.has_subnormals and vpos <= fi.smallest_subnormal / 2: isig = 0 biased_exp = 0 else: sig, exp = np.frexp(vpos) exp = int(exp) # All calculations in Python ints # sig in range [0.5, 1) sig *= 2 exp -= 1 # now sig in range [1, 2) biased_exp = exp + fi.expBias if biased_exp < 1 and fi.has_subnormals: # subnormal sig *= 2.0 ** (biased_exp - 1) biased_exp = 0 assert vpos == sig * 2 ** (1 - fi.expBias) else: if sig > 0: sig -= 1.0 isig = math.floor(sig * 2**t) # Zero if isig == 0 and biased_exp == 0 and fi.has_zero: if sign and fi.has_nz: return fi.code_of_negzero else: return fi.code_of_zero # Nonzero assert isig < 2**t assert biased_exp < 2**fi.expBits or fi.is_twos_complement # Handle two's complement encoding if fi.is_twos_complement and sign: isig = (1 << t) - isig # Pack values into a single integer code = (int(sign) << (k - 1)) | (biased_exp << t) | (isig << 0) return code ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1717691678.0 gfloat-0.3/src/gfloat/types.py0000644000175000017500000002730300000000000014533 0ustar00awfawf# Copyright (c) 2024 Graphcore Ltd. All rights reserved. from dataclasses import dataclass from enum import Enum class RoundMode(Enum): """ Enum for IEEE-754 rounding modes. Result r is obtained from input v depending on rounding mode as follows """ TowardZero = 1 #: :math:`\max \{ r ~ s.t. ~ |r| \le |v| \}` TowardNegative = 2 #: :math:`\max \{ r ~ s.t. ~ r \le v \}` TowardPositive = 3 #: :math:`\min \{ r ~ s.t. ~ r \ge v \}` TiesToEven = 4 #: Round to nearest, ties to even TiesToAway = 5 #: Round to nearest, ties away from zero class FloatClass(Enum): """ Enum for the classification of a FloatValue. """ NORMAL = 1 #: A positive or negative normalized non-zero value SUBNORMAL = 2 #: A positive or negative subnormal value ZERO = 3 #: A positive or negative zero value INFINITE = 4 #: A positive or negative infinity (+/-Inf) NAN = 5 #: Not a Number (NaN) @dataclass class FloatValue: """ A floating-point value decoded in great detail. """ code: int #: Integer code point #: Value. Assumed to be exactly round-trippable to python float. #: This is true for all <64bit formats known in 2023. fval: float exp: int #: Raw exponent without bias expval: int #: Exponent, bias subtracted significand: int #: Significand as an integer fsignificand: float #: Significand as a float in the range [0,2) signbit: int #: Sign bit: 1 => negative, 0 => positive fclass: FloatClass #: See FloatClass @dataclass class FormatInfo: """ Class describing a floating-point format, parametrized by width, precision, and special value encoding rules. """ #: Short name for the format, e.g. binary32, bfloat16 name: str #: Number of bits in the format k: int #: Number of significand bits (including implicit leading bit) precision: int #: Largest exponent, emax, which shall equal floor(log_2(maxFinite)) emax: int #: Set if format encodes -0 at (sgn=1,exp=0,significand=0). #: If False, that encoding decodes to a NaN labelled NaN_0 has_nz: bool #: Set if format includes +/- Infinity. #: If set, the non-nan value with the highest encoding for each sign (s) #: is replaced by (s)Inf. has_infs: bool #: Number of NaNs that are encoded in the highest encodings for each sign num_high_nans: int #: Set if format encodes subnormals has_subnormals: bool #: Set if the format has a sign bit is_signed: bool #: Set if the format uses two's complement encoding for the significand is_twos_complement: bool #: ## Derived values @property def tSignificandBits(self) -> int: """The number of trailing significand bits, t""" return self.precision - 1 @property def expBits(self) -> int: """The number of exponent bits, w""" return self.k - self.precision + (0 if self.is_signed else 1) @property def signBits(self) -> int: """The number of sign bits, s""" return 1 if self.is_signed else 0 @property def expBias(self) -> int: """The exponent bias derived from (p,emax) This is the bias that should be applied so that :math:`floor(log_2(maxFinite)) = emax` """ # Calculate whether all of the all-bits-one-exponent values contain specials. # If so, emax will be obtained for exponent value 2^w-2, otherwise it is 2^w-1 t = self.tSignificandBits num_posinfs = 1 if self.has_infs else 0 all_bits_one_full = (self.num_high_nans + num_posinfs == 2**t) or ( self.expBits == 0 and self.has_infs ) # Compute exponent bias. exp_for_emax = 2**self.expBits - (2 if all_bits_one_full else 1) return exp_for_emax - self.emax # numpy finfo properties @property def bits(self) -> int: """ The number of bits occupied by the type. """ return self.k # @property # def dtype(self) -> np.dtype: # """ # Returns the dtype for which `finfo` returns information. For complex # input, the returned dtype is the associated ``float*`` dtype for its # real and complex components. # """ @property def eps(self) -> float: """ The difference between 1.0 and the smallest representable float larger than 1.0. For example, for 64-bit binary floats in the IEEE-754 standard, ``eps = 2**-52``, approximately 2.22e-16. """ # TODO: Check if 1.0 is subnormal for any reasonable format, e.g. p3109(7)? return 2**self.machep @property def epsneg(self) -> float: """ The difference between 1.0 and the largest representable float less than 1.0. For example, for 64-bit binary floats in the IEEE-754 standard, ``epsneg = 2**-53``, approximately 1.11e-16. """ return self.eps / 2 @property def iexp(self) -> int: """ The number of bits in the exponent portion of the floating point representation. """ return self.expBits @property def machep(self) -> int: """ The exponent that yields `eps`. """ return -self.tSignificandBits @property def max(self) -> float: """ The largest representable number. """ num_posinfs = 1 if self.has_infs else 0 num_non_finites = self.num_high_nans + num_posinfs if num_non_finites == 2**self.tSignificandBits: # All-bits-one exponent field is full, value is in the # binade below, so significand is 0xFFF..F isig = 2**self.tSignificandBits - 1 else: # All-bits-one exponent field is not full, value is in the # final binade, so significand is 0xFFF..F - num_non_finites isig = 2**self.tSignificandBits - 1 - num_non_finites if self.is_all_subnormal: return 2**self.emax * (isig * 2 ** (1 - self.tSignificandBits)) else: return 2**self.emax * (1.0 + isig * 2**-self.tSignificandBits) @property def maxexp(self) -> int: """ The smallest positive power of the base (2) that causes overflow. """ return self.emax + 1 @property def min(self) -> float: """ The smallest representable number, typically ``-max``. """ if self.is_signed: if not self.is_twos_complement: return -self.max else: assert not self.has_infs and self.num_high_nans == 0 and not self.has_nz return -(2 ** (self.emax + 1)) elif self.has_zero: return 0.0 else: return 2**-self.expBias @property def num_nans(self) -> int: """ The number of code points which decode to NaN """ if not self.is_signed: return self.num_high_nans # Signed if self.is_twos_complement: assert not self.has_infs and self.num_high_nans == 0 and not self.has_nz return 0 return (0 if self.has_nz else 1) + 2 * self.num_high_nans @property def code_of_nan(self) -> int: """ Return a codepoint for a NaN """ if self.num_high_nans > 0: return 2 ** (self.k) - 1 if not self.has_nz: return 2 ** (self.k - 1) raise ValueError(f"No NaN in {self}") @property def code_of_posinf(self) -> int: """ Return a codepoint for positive infinity """ if not self.has_infs: raise ValueError(f"No Inf in {self}") return 2 ** (self.k - 1) - 1 - self.num_high_nans @property def code_of_neginf(self) -> int: """ Return a codepoint for negative infinity """ if not self.has_infs: raise ValueError(f"No Inf in {self}") return 2**self.k - 1 - self.num_high_nans @property def code_of_zero(self) -> int: """ Return a codepoint for (non-negative) zero """ assert self.has_zero return 0 @property def has_zero(self) -> bool: """ Does the format have zero? This is false if the mantissa is 0 width and we don't have subnormals - essentially the mantissa is always decoded as 1. If we have subnormals, the only subnormal is zero, and the mantissa is always decoded as 0. """ return self.precision > 1 or self.has_subnormals @property def code_of_negzero(self) -> int: """ Return a codepoint for negative zero """ if not self.has_nz: raise ValueError(f"No negative zero in {self}") return 2 ** (self.k - 1) @property def code_of_max(self) -> int: """ Return a codepoint for fi.max """ return 2 ** (self.k - self.signBits) - self.num_high_nans - self.has_infs - 1 @property def code_of_min(self) -> int: """ Return a codepoint for fi.min """ if self.is_signed and not self.is_twos_complement: return 2**self.k - self.num_high_nans - self.has_infs - 1 elif self.is_signed and self.is_twos_complement: return 2 ** (self.k - 1) else: return 0 # codepoint of smallest value, whether 0 or 2^-expBias # @property # def minexp(self) -> int: # """ # The most negative power of the base (2) consistent with there # being no leading 0's in the mantissa. # """ # @property # def negep(self) -> int: # """ # The exponent that yields `epsneg`. # """ # @property # def nexp(self) -> int: # """ # The number of bits in the exponent including its sign and bias. # """ # @property # def nmant(self) -> int: # """ # The number of bits in the mantissa. # """ # @property # def precision(self) -> int: # """ # The approximate number of decimal digits to which this kind of # float is precise. # """ # @property # def resolution(self) -> float: # """ # The approximate decimal resolution of this type, i.e., # ``10**-precision``. # """ # @property # def tiny(self) -> float: # """ # An alias for `smallest_normal`, kept for backwards compatibility. # """ @property def smallest_normal(self) -> float: """ The smallest positive floating point number with 1 as leading bit in the significand following IEEE-754. """ if self.has_subnormals: return 2 ** (1 - self.expBias) elif self.has_zero: return 2**-self.expBias + 2 ** (-self.expBias - self.tSignificandBits) else: return 2**-self.expBias @property def smallest_subnormal(self) -> float: """ The smallest positive floating point number with 0 as leading bit in the significand following IEEE-754. """ assert self.has_subnormals, "not implemented" return 2 ** -(self.expBias + self.tSignificandBits - 1) @property def smallest(self) -> float: """ The smallest positive floating point number. """ if self.has_subnormals: return self.smallest_subnormal else: return self.smallest_normal @property def is_all_subnormal(self) -> bool: """ Are all encoded values subnormal? """ return (self.expBits == 0) and self.has_subnormals @property def __name__(self) -> str: return self.name def __str__(self) -> str: return f"{self.name}" ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1718032361.2306235 gfloat-0.3/src/gfloat.egg-info/0000755000175000017500000000000000000000000014502 5ustar00awfawf././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1718032361.0 gfloat-0.3/src/gfloat.egg-info/PKG-INFO0000644000175000017500000000704000000000000015600 0ustar00awfawfMetadata-Version: 2.1 Name: gfloat Version: 0.3 Summary: Generic floating point handling in Python Author-email: Andrew Fitzgibbon Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: MIT License Classifier: Programming Language :: Python :: 3 Classifier: Development Status :: 3 - Alpha Requires-Python: >=3.8.1 Description-Content-Type: text/markdown License-File: LICENSE Requires-Dist: numpy Provides-Extra: dev Requires-Dist: pytest; extra == "dev" Requires-Dist: ml_dtypes; extra == "dev" Requires-Dist: pre-commit; extra == "dev" Requires-Dist: black; extra == "dev" Requires-Dist: mypy; extra == "dev" Requires-Dist: black[jupyter]; extra == "dev" Requires-Dist: isort; extra == "dev" Requires-Dist: sphinx==7.1.2; extra == "dev" Requires-Dist: sphinx-rtd-theme==1.3.0rc1; extra == "dev" Requires-Dist: sphinx_paramlinks; extra == "dev" Requires-Dist: myst_nb; extra == "dev" Requires-Dist: airium; extra == "dev" Requires-Dist: pandas; extra == "dev" # gfloat: Generic floating-point types in Python An implementation of generic floating point encode/decode logic, handling various current and proposed floating point types: - [IEEE 754](https://en.wikipedia.org/wiki/IEEE_754): Binary16, Binary32 - [OCP Float8](https://www.opencompute.org/documents/ocp-8-bit-floating-point-specification-ofp8-revision-1-0-2023-06-20-pdf): E5M2, E4M3 - [IEEE WG P3109](https://github.com/awf/P3109-Public/blob/main/Shared%20Reports/P3109%20WG%20Interim%20report.pdf): P{p} for p in 1..7 - [OCP MX Formats](https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf): E2M1, M2M3, E3M2, E8M0, INT8, and the MX block formats. The library favours readability and extensibility over speed - for fast implementations of these datatypes see, for example, [ml_dtypes](https://github.com/jax-ml/ml_dtypes), [bitstring](https://github.com/scott-griffiths/bitstring), [MX PyTorch Emulation Library](https://github.com/microsoft/microxcaling). See https://gfloat.readthedocs.io for documentation, or dive into the notebooks to explore the formats. For example, here's a table from the [02-value-stats](docs/source/02-value-stats.ipynb) notebook: |name|B: Bits in the format|P: Precision in bits|E: Exponent field width in bits|0 None: vals = np.linspace(-37.0, 42.0, 32) scale = compute_scale_amax(fi.etype.emax, vals) block = list(encode_block(fi, scale, vals / scale)) decoded_vals = list(decode_block(fi, block)) etype_next_under_max = decode_float(fi.etype, fi.etype.code_of_max - 1).fval atol = (fi.etype.max - etype_next_under_max) * scale / 2 np.testing.assert_allclose(decoded_vals, vals, atol=atol) via_qb = quantize_block(fi, vals, compute_scale_amax) np.testing.assert_allclose(via_qb, decoded_vals, atol=0.0) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1717580751.0 gfloat-0.3/test/test_decode.py0000644000175000017500000001660100000000000014564 0ustar00awfawf# Copyright (c) 2024 Graphcore Ltd. All rights reserved. import ml_dtypes import numpy as np import pytest from gfloat import FloatClass, decode_float from gfloat.formats import * def _isnegzero(x: float) -> bool: return (x == 0) and (np.signbit(x) == 1) def test_spot_check_ocp_e5m2() -> None: fi = format_info_ocp_e5m2 dec = lambda code: decode_float(fi, code).fval fclass = lambda code: decode_float(fi, code).fclass assert dec(0x01) == 2.0**-16 assert dec(0x40) == 2.0 assert _isnegzero(dec(0x80)) assert dec(0x7B) == 57344.0 assert dec(0x7C) == np.inf assert np.floor(np.log2(dec(0x7B))) == fi.emax assert dec(0xFC) == -np.inf assert np.isnan(dec(0x7F)) assert fclass(0x80) == FloatClass.ZERO assert fclass(0x00) == FloatClass.ZERO def test_spot_check_ocp_e4m3() -> None: fi = format_info_ocp_e4m3 dec = lambda code: decode_float(fi, code).fval assert dec(0x40) == 2.0 assert dec(0x01) == 2.0**-9 assert _isnegzero(dec(0x80)) assert np.isnan(dec(0x7F)) assert dec(0x7E) == 448.0 assert np.floor(np.log2(dec(0x7E))) == fi.emax def test_spot_check_p3109_p3() -> None: fi = format_info_p3109(3) dec = lambda code: decode_float(fi, code).fval assert dec(0x01) == 2.0**-17 assert dec(0x40) == 1.0 assert np.isnan(dec(0x80)) assert dec(0xFF) == -np.inf assert np.floor(np.log2(dec(0x7E))) == fi.emax def test_spot_check_p3109_p1() -> None: fi = format_info_p3109(1) dec = lambda code: decode_float(fi, code).fval assert dec(0x01) == 2.0**-62 assert dec(0x40) == 2.0 assert np.isnan(dec(0x80)) assert dec(0xFF) == -np.inf assert np.floor(np.log2(dec(0x7E))) == fi.emax def test_spot_check_binary16() -> None: fi = format_info_binary16 dec = lambda code: decode_float(fi, code).fval assert dec(0x3C00) == 1.0 assert dec(0x3C01) == 1.0 + 2**-10 assert dec(0x4000) == 2.0 assert dec(0x0001) == 2**-24 assert dec(0x7BFF) == 65504.0 assert np.isinf(dec(0x7C00)) assert np.isnan(dec(0x7C01)) assert np.isnan(dec(0x7FFF)) def test_spot_check_bfloat16() -> None: fi = format_info_bfloat16 dec = lambda code: decode_float(fi, code).fval assert dec(0x3F80) == 1 assert dec(0x4000) == 2 assert dec(0x0001) == 2**-133 assert dec(0x4780) == 65536.0 assert np.isinf(dec(0x7F80)) assert np.isnan(dec(0x7F81)) assert np.isnan(dec(0x7FFF)) def test_spot_check_ocp_e2m3() -> None: # Test against Table 4 in "OCP Microscaling Formats (MX) v1.0 Spec" fi = format_info_ocp_e2m3 dec = lambda code: decode_float(fi, code).fval assert fi.max == 7.5 assert fi.smallest_subnormal == 0.125 assert fi.smallest_normal == 1.0 assert not fi.has_infs assert fi.num_nans == 0 assert fi.has_nz assert dec(0b000000) == 0 assert dec(0b011111) == 7.5 assert _isnegzero(dec(0b100000)) def test_spot_check_ocp_e3m2() -> None: # Test against Table 4 in "OCP Microscaling Formats (MX) v1.0 Spec" fi = format_info_ocp_e3m2 dec = lambda code: decode_float(fi, code).fval assert fi.max == 28.0 assert fi.smallest_subnormal == 0.0625 assert fi.smallest_normal == 0.25 assert not fi.has_infs assert fi.num_nans == 0 assert fi.has_nz assert dec(0b000000) == 0 assert dec(0b011111) == 28.0 assert _isnegzero(dec(0b100000)) def test_spot_check_ocp_e2m1() -> None: # Test against Table 5 in "OCP Microscaling Formats (MX) v1.0 Spec" fi = format_info_ocp_e2m1 dec = lambda code: decode_float(fi, code).fval assert fi.max == 6.0 assert fi.smallest_subnormal == 0.5 assert fi.smallest_normal == 1.0 assert not fi.has_infs assert fi.num_nans == 0 assert fi.has_nz assert dec(0b0000) == 0 assert dec(0b0001) == 0.5 assert dec(0b0010) == 1.0 assert dec(0b0011) == 1.5 assert dec(0b0100) == 2.0 assert dec(0b0101) == 3.0 assert dec(0b0110) == 4.0 assert dec(0b0111) == 6.0 assert _isnegzero(dec(0b1000)) def test_spot_check_ocp_e8m0() -> None: # Test against Table 7 in "OCP Microscaling Formats (MX) v1.0 Spec" fi = format_info_ocp_e8m0 dec = lambda code: decode_float(fi, code).fval fclass = lambda code: decode_float(fi, code).fclass assert fi.expBias == 127 assert fi.max == 2.0**127 assert fi.smallest == 2.0**-127 assert not fi.has_infs assert fi.num_nans == 1 assert dec(0x00) == 2.0**-127 assert dec(0x01) == 2.0**-126 assert dec(0x7F) == 1.0 assert np.isnan(dec(0xFF)) assert fclass(0x80) == FloatClass.NORMAL assert fclass(0x00) == FloatClass.NORMAL def test_spot_check_ocp_int8() -> None: # Test against Table TODO in "OCP Microscaling Formats (MX) v1.0 Spec" fi = format_info_ocp_int8 dec = lambda code: decode_float(fi, code).fval assert fi.max == 1.0 + 63.0 / 64 assert fi.smallest == 2.0**-6 assert not fi.has_infs assert fi.num_nans == 0 assert dec(0x00) == 0.0 assert dec(0x01) == fi.smallest assert dec(0x7F) == fi.max assert dec(0x80) == -2.0 assert dec(0x80) == fi.min assert dec(0xFF) == -fi.smallest @pytest.mark.parametrize("fi", p3109_formats) def test_specials(fi: FormatInfo) -> None: assert fi.code_of_nan == 0x80 assert fi.code_of_zero == 0x00 assert fi.code_of_posinf == 0x7F assert fi.code_of_neginf == 0xFF @pytest.mark.parametrize("fi", all_formats) def test_specials_decode(fi: FormatInfo) -> None: dec = lambda v: decode_float(fi, v).fval if fi.has_zero: assert dec(fi.code_of_zero) == 0 if fi.num_nans > 0: assert np.isnan(dec(fi.code_of_nan)) if fi.has_infs: assert dec(fi.code_of_posinf) == np.inf assert dec(fi.code_of_neginf) == -np.inf assert dec(fi.code_of_max) == fi.max assert dec(fi.code_of_min) == fi.min if fi.has_zero: assert dec(1) == fi.smallest else: assert dec(0) == fi.smallest @pytest.mark.parametrize( "fmt,npfmt,int_dtype", [ (format_info_binary16, np.float16, np.uint16), (format_info_bfloat16, ml_dtypes.bfloat16, np.uint16), (format_info_ocp_e4m3, ml_dtypes.float8_e4m3fn, np.uint8), ], ) def test_consistent_decodes_all_values( fmt: FormatInfo, npfmt: np.dtype, int_dtype: np.dtype ) -> None: npivals = np.arange( np.iinfo(int_dtype).min, int(np.iinfo(int_dtype).max) + 1, dtype=int_dtype ) npfvals = npivals.view(dtype=npfmt) for i, npfval in zip(npivals, npfvals): val = decode_float(fmt, int(i)) np.testing.assert_equal(val.fval, npfval) @pytest.mark.parametrize("v", [-1, 0x10000]) def test_except(v: int) -> None: with pytest.raises(ValueError): decode_float(format_info_binary16, v) @pytest.mark.parametrize("fi", [fi for fi in all_formats if fi.bits <= 8]) def test_dense(fi: FormatInfo) -> None: fvs = [decode_float(fi, i) for i in range(0, 2**fi.bits)] vals = np.array([fv.fval for fv in fvs]) assert np.min(vals[np.isfinite(vals)]) == fi.min assert np.max(vals[np.isfinite(vals)]) == fi.max assert np.min(vals[np.isfinite(vals) & (vals > 0)]) == fi.smallest if fi.has_subnormals: vals_subnormal = np.array( [fv.fval for fv in fvs if fv.fclass == FloatClass.SUBNORMAL and fv.fval > 0] ) if len(vals_subnormal): # In some formats, zero is the only "subnormal" assert np.min(vals_subnormal) == fi.smallest_subnormal ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1718030269.0 gfloat-0.3/test/test_encode.py0000644000175000017500000000240200000000000014570 0ustar00awfawf# Copyright (c) 2024 Graphcore Ltd. All rights reserved. import ml_dtypes import numpy as np import pytest from gfloat import decode_float, encode_float from gfloat.formats import * @pytest.mark.parametrize("fi", all_formats) def test_encode(fi: FormatInfo) -> None: dec = lambda v: decode_float(fi, v).fval if fi.bits <= 8: step = 1 elif fi.bits <= 16: step = 13 elif fi.bits <= 32: step = 73013 elif fi.bits <= 64: step = (73013 << 32) + 39 for i in range(0, 2**fi.bits, step): fv = decode_float(fi, i) code = encode_float(fi, fv.fval) assert (i == code) or np.isnan(fv.fval) fv2 = decode_float(fi, code) np.testing.assert_equal(fv2.fval, fv.fval) @pytest.mark.parametrize("fi", all_formats) def test_encode_edges(fi: FormatInfo) -> None: assert encode_float(fi, fi.max) == fi.code_of_max assert encode_float(fi, fi.max * 1.25) == ( fi.code_of_posinf if fi.has_infs else fi.code_of_nan if fi.num_nans > 0 else fi.code_of_max ) if fi.is_signed: assert encode_float(fi, fi.min * 1.25) == ( fi.code_of_neginf if fi.has_infs else fi.code_of_nan if fi.num_nans > 0 else fi.code_of_min ) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1714650883.0 gfloat-0.3/test/test_finfo.py0000644000175000017500000000175600000000000014447 0ustar00awfawf# Copyright (c) 2024 Graphcore Ltd. All rights reserved. # Test that finfo methods on FloatFormat agree with numpy/ml_dtypes import ml_dtypes import numpy as np import pytest from gfloat import decode_float, round_float from gfloat.formats import * @pytest.mark.parametrize( "fmt,npfmt", [ (format_info_ocp_e5m2, ml_dtypes.float8_e5m2), (format_info_ocp_e4m3, ml_dtypes.float8_e4m3fn), (format_info_binary16, np.float16), (format_info_bfloat16, ml_dtypes.bfloat16), ], ) def test_finfo(fmt: FormatInfo, npfmt: np.dtype) -> None: assert fmt.eps == ml_dtypes.finfo(npfmt).eps assert fmt.epsneg == ml_dtypes.finfo(npfmt).epsneg assert fmt.max == ml_dtypes.finfo(npfmt).max assert fmt.maxexp == ml_dtypes.finfo(npfmt).maxexp def test_constants() -> None: assert format_info_p3109(1).smallest_subnormal == 2.0**-62 assert format_info_p3109(4).smallest_subnormal == 2.0**-10 assert format_info_p3109(7).smallest_subnormal == 2.0**-6 ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1716304225.0 gfloat-0.3/test/test_microxcaling.py0000644000175000017500000000406400000000000016020 0ustar00awfawf# Copyright (c) 2024 Graphcore Ltd. All rights reserved. import pytest import numpy as np from numpy.typing import NDArray import torch from mx.mx_ops import quantize_mx_op from mx.formats import ElemFormat from gfloat import ( BlockFormatInfo, RoundMode, quantize_block, compute_scale_amax, encode_block, ) from gfloat.formats import * @pytest.mark.parametrize( ("mx_round,gf_round"), [("even", RoundMode.TiesToEven), ("nearest", RoundMode.TiesToAway)], ) @pytest.mark.parametrize( ("mx_etype,gf_etype"), [ (ElemFormat.int8, format_info_ocp_int8), (ElemFormat.fp6_e3m2, format_info_ocp_e3m2), (ElemFormat.fp4_e2m1, format_info_ocp_e2m1), ], ) @pytest.mark.parametrize( "A", [ np.arange(32) / 2 - 5, np.zeros(32), ], ids=[ "tennish", "zeros", ], ) def test_mx( mx_etype: ElemFormat, gf_etype: FormatInfo, mx_round: str, gf_round: RoundMode, A: NDArray[np.float64], ) -> None: # MX: Declare block format mx_specs = dict( block_size=32, scale_bits=8, shared_exp_method="max", mx_flush_fp32_subnorms=False, custom_cuda=False, ) # MX: Quantize mx_dq = quantize_mx_op(torch.tensor(A), mx_specs, mx_etype, axes=0, round=mx_round) # GFloat: Declare block format fi = BlockFormatInfo("test", gf_etype, 32, format_info_ocp_e8m0) # GFloat: Quantize gf_dq = quantize_block(fi, A, compute_scale_amax, gf_round) # Compare np.testing.assert_allclose(gf_dq, mx_dq) def test_mx_exceptions() -> None: fi = BlockFormatInfo("test", format_info_ocp_e2m1, 32, format_info_ocp_e8m0) A = np.ones(32) * 2.0**-139 s = compute_scale_amax(fi.etype.emax, A) assert s == 2.0**-127 with pytest.raises(ValueError, match="out of range"): list(encode_block(fi, fi.stype.max * 2, A)) assert not fi.stype.is_signed scale = fi.stype.min / 2 assert scale != 0 with pytest.raises(ValueError, match="out of range"): list(encode_block(fi, scale, A)) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1714650883.0 gfloat-0.3/test/test_printing.py0000644000175000017500000000201700000000000015167 0ustar00awfawf# Copyright (c) 2024 Graphcore Ltd. All rights reserved. import numpy as np from gfloat import float_pow2str, float_tilde_unless_roundtrip_str def test_pow2str() -> None: assert float_pow2str(127) == "127/64*2^6" assert float_pow2str(1.0625 * 2.0**-12) == "17/16*2^-12" assert float_pow2str(3.0 * 2.0**-12) == "3/2*2^-11" assert float_pow2str(3.0 / 16 * 2.0**-8) == "3/2*2^-11" assert float_pow2str(3.0 / 16 * 2.0**-8, min_exponent=-8) == "3/16*2^-8" def test_tilde_unless_roundtrip() -> None: assert float_tilde_unless_roundtrip_str(1.52587892525e-05) == "~1.5258789e-05" assert float_tilde_unless_roundtrip_str(28672.0) == "28672.0" assert float_tilde_unless_roundtrip_str(0.0009765625) == "0.0009765625" assert float_tilde_unless_roundtrip_str(120.0) == "120.0" assert float_tilde_unless_roundtrip_str(0.0010001, width=7, d=4) == "~0.0010" assert float_tilde_unless_roundtrip_str(np.inf, width=7, d=4) == "inf" assert float_tilde_unless_roundtrip_str(np.nan, width=7, d=4) == "nan" ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1718030269.0 gfloat-0.3/test/test_round.py0000644000175000017500000003202500000000000014466 0ustar00awfawf# Copyright (c) 2024 Graphcore Ltd. All rights reserved. from typing import Type import ml_dtypes import numpy as np import pytest from gfloat import RoundMode, decode_float, round_float from gfloat.formats import * def test_round_p3109() -> None: fi = format_info_p3109(4) assert round_float(fi, 0.0068359375) == 0.0068359375 assert round_float(fi, 0.0029296875) == 0.0029296875 assert round_float(fi, 0.0078125) == 0.0078125 assert round_float(fi, 0.017578125) == 0.017578125 assert round_float(fi, 224.0) == 224.0 assert round_float(fi, 240.0) == np.inf assert round_float(fi, 224.1, RoundMode.TowardPositive) == np.inf assert round_float(fi, 232.0) == 224.0 assert round_float(fi, 232.0, RoundMode.TiesToAway) == np.inf assert round_float(fi, 232.0, RoundMode.TowardZero) == 224.0 assert round_float(fi, 232.0, RoundMode.TowardNegative) == 224.0 assert round_float(fi, 232.0, RoundMode.TowardPositive) == np.inf assert round_float(fi, -232.0) == -224.0 assert round_float(fi, -232.0, RoundMode.TiesToAway) == -np.inf assert round_float(fi, -232.0, RoundMode.TowardZero) == -224.0 assert round_float(fi, -232.0, RoundMode.TowardNegative) == -np.inf assert round_float(fi, -232.0, RoundMode.TowardPositive) == -224.0 assert round_float(fi, 232.1) == np.inf p4min = 2**-10 # smallest subnormal in p4 @pytest.mark.parametrize( "mode, vals", ( ( RoundMode.TowardZero, ( (p4min, p4min), (p4min / 4, 0.0), (p4min / 2, 0.0), (-p4min, -p4min), (-p4min / 4, 0.0), (-p4min / 2, 0.0), (64.0, 64.0), (63.0, 60.0), (62.0, 60.0), (-64.0, -64.0), (-63.0, -60.0), (-62.0, -60.0), ), ), ( RoundMode.TowardPositive, ( (p4min, p4min), (p4min / 4, p4min), (p4min / 2, p4min), (-p4min, -p4min), (-p4min / 4, 0.0), (-p4min / 2, 0.0), (64.0, 64.0), (63.0, 64.0), (62.0, 64.0), (-64.0, -64.0), (-63.0, -60.0), (-62.0, -60.0), ), ), ( RoundMode.TowardNegative, ( (p4min, p4min), (p4min / 4, 0.0), (p4min / 2, 0.0), (-p4min, -p4min), (-p4min / 4, -p4min), (-p4min / 2, -p4min), (64.0, 64.0), (63.0, 60.0), (62.0, 60.0), (-64.0, -64.0), (-63.0, -64.0), (-62.0, -64.0), ), ), ( RoundMode.TiesToEven, ( (p4min, p4min), (p4min / 4, 0.0), (p4min / 2, 0.0), (-p4min, -p4min), (-p4min / 4, 0.0), (-p4min / 2, 0.0), (64.0, 64.0), (63.0, 64.0), (62.0, 64.0), (61.0, 60.0), (-64.0, -64.0), (-63.0, -64.0), (-62.0, -64.0), (-61.0, -60.0), (-58.0, -56.0), ), ), ( RoundMode.TiesToAway, ( (p4min, p4min), (p4min / 4, 0.0), (p4min / 2, p4min), (-p4min, -p4min), (-p4min / 4, 0.0), (-p4min / 2, -p4min), (64.0, 64.0), (63.0, 64.0), (62.0, 64.0), (61.0, 60.0), (-64.0, -64.0), (-63.0, -64.0), (-62.0, -64.0), (-61.0, -60.0), (-58.0, -60.0), ), ), ), ) def test_round_p3109b(mode: RoundMode, vals: list) -> None: fi = format_info_p3109(4) for sat in (True, False): for val, expected in vals: assert round_float(fi, val, mode, sat) == expected p4max = 224.0 p4maxup = 240.0 p4maxhalfup = (p4max + p4maxup) / 2 @pytest.mark.parametrize( "modesat, vals", ( ( (RoundMode.TowardZero, True), ( (p4max, p4max), (p4maxhalfup, p4max), (p4maxup, p4max), (np.inf, p4max), (-p4max, -p4max), (-p4maxhalfup, -p4max), (-p4maxup, -p4max), (-np.inf, -p4max), ), ), ( (RoundMode.TowardZero, False), ( (p4max, p4max), (p4maxhalfup, p4max), (p4maxup, p4max), (np.inf, np.inf), (-p4max, -p4max), (-p4maxhalfup, -p4max), (-p4maxup, -p4max), (-np.inf, -np.inf), ), ), ( (RoundMode.TowardPositive, True), ( (p4max, p4max), (p4maxhalfup, p4max), (p4maxup, p4max), (np.inf, p4max), (-p4max, -p4max), (-p4maxhalfup, -p4max), (-p4maxup, -p4max), (-np.inf, -p4max), ), ), ( (RoundMode.TowardPositive, False), ( (p4max, p4max), (p4maxhalfup, np.inf), (p4maxup, np.inf), (np.inf, np.inf), (-p4max, -p4max), (-p4maxhalfup, -p4max), (-p4maxup, -p4max), (-np.inf, -np.inf), ), ), ( (RoundMode.TowardNegative, True), ( (p4max, p4max), (p4maxhalfup, p4max), (p4maxup, p4max), (np.inf, p4max), (-p4max, -p4max), (-p4maxhalfup, -p4max), (-p4maxup, -p4max), (-np.inf, -p4max), ), ), ( (RoundMode.TowardNegative, False), ( (p4max, p4max), (p4maxhalfup, p4max), (p4maxup, p4max), (np.inf, np.inf), (-p4max, -p4max), (-p4maxhalfup, -np.inf), (-p4maxup, -np.inf), (-np.inf, -np.inf), ), ), ( (RoundMode.TiesToEven, True), ( (p4max, p4max), (p4maxhalfup, p4max), (p4maxup, p4max), (np.inf, p4max), (-p4max, -p4max), (-p4maxhalfup, -p4max), (-p4maxup, -p4max), (-np.inf, -p4max), ), ), ( (RoundMode.TiesToEven, False), ( (p4max, p4max), (p4maxhalfup, p4max), (p4maxup, np.inf), (np.inf, np.inf), (-p4max, -p4max), (-p4maxhalfup, -p4max), (-p4maxup, -np.inf), (-np.inf, -np.inf), ), ), ( (RoundMode.TiesToAway, True), ( (p4max, p4max), (p4maxhalfup, p4max), (p4maxup, p4max), (np.inf, p4max), (-p4max, -p4max), (-p4maxhalfup, -p4max), (-p4maxup, -p4max), (-np.inf, -p4max), ), ), ( (RoundMode.TiesToAway, False), ( (p4max, p4max), (p4maxhalfup, np.inf), (p4maxup, np.inf), (np.inf, np.inf), (-p4max, -p4max), (-p4maxhalfup, -np.inf), (-p4maxup, -np.inf), (-np.inf, -np.inf), ), ), ), ids=lambda x: f"{str(x[0])}-{'Sat' if x[1] else 'Inf'}" if len(x) == 2 else None, ) def test_round_p3109_sat(modesat: tuple[RoundMode, bool], vals: list) -> None: fi = format_info_p3109(4) for val, expected in vals: assert round_float(fi, val, *modesat) == expected def test_round_e5m2() -> None: fi = format_info_ocp_e5m2 assert fi.max == 57344 assert round_float(fi, 1.5258789e-05) == 2**-16 # Default NONSAT rounding assert round_float(fi, 57344.0) == 57344 assert round_float(fi, 57344.1) == 57344 assert round_float(fi, 61439.9) == 57344 assert round_float(fi, 61440.0) == np.inf assert round_float(fi, np.inf, sat=False) == np.inf assert round_float(fi, -np.inf, sat=False) == -np.inf assert np.isnan(round_float(fi, np.nan, sat=False)) # SAT rounding assert round_float(fi, 57344.0, sat=True) == 57344 assert round_float(fi, 57344.1, sat=True) == 57344 assert round_float(fi, 61439.9, sat=True) == 57344 assert round_float(fi, 61440.0, sat=True) == 57344 assert round_float(fi, np.inf, sat=True) == 57344 assert round_float(fi, -np.inf, sat=True) == -57344 assert np.isnan(round_float(fi, np.nan, sat=True)) def test_round_e4m3() -> None: fi = format_info_ocp_e4m3 assert fi.max == 448 # Default NONSAT rounding assert round_float(fi, 448.0) == 448 assert round_float(fi, 448.1) == 448 assert round_float(fi, 464.0) == 448 assert np.isnan(round_float(fi, 464.01)) assert np.isnan(round_float(fi, np.inf, sat=False)) assert np.isnan(round_float(fi, -np.inf, sat=False)) assert np.isnan(round_float(fi, np.nan, sat=False)) # SAT rounding assert round_float(fi, 448.0, sat=True) == 448 assert round_float(fi, 448.1, sat=True) == 448 assert round_float(fi, 464.0, sat=True) == 448 assert round_float(fi, 464.01, sat=True) == 448 assert round_float(fi, np.inf, sat=True) == 448 assert round_float(fi, -np.inf, sat=True) == -448 assert np.isnan(round_float(fi, np.nan, sat=True)) some_positive_codepoints = ( 0x00, 0x01, 0x02, 0x03, 0x07, 0x0F, 0x17, 0x21, 0x33, 0x40, 0x53, 0x65, 0x70, ) @pytest.mark.parametrize( "fi", [ format_info_ocp_e5m2, format_info_ocp_e4m3, *p3109_formats, ], ) def test_round(fi: FormatInfo) -> None: """ Test rounding from values between exact binary8 values For integer code point i, let v0 = the float value at i v1 = the float value at i+1, i.e. nextUp(v0) dv = v1 - v0 Then check that: round(v0) == v0 round(v0 + 0.3*dv) == v0 round(v0 + 0.6*dv) == v1 """ for i in some_positive_codepoints: v0 = decode_float(fi, i + 0).fval v1 = decode_float(fi, i + 1).fval if np.isfinite([v0, v1]).all(): dv = v1 - v0 np.testing.assert_equal(round_float(fi, v0), v0) np.testing.assert_equal(round_float(fi, v0 + 0.3 * dv), v0) np.testing.assert_equal(round_float(fi, v0 + 0.49 * dv), v0) np.testing.assert_equal(round_float(fi, v0 + 0.51 * dv), v1) np.testing.assert_equal(round_float(fi, v0 + 0.99 * dv), v1) nearest_even = v0 if (i & 1 == 0) else v1 np.testing.assert_equal(round_float(fi, v0 + 0.50 * dv), nearest_even) test_formats = [ (format_info_ocp_e5m2, ml_dtypes.float8_e5m2), (format_info_ocp_e4m3, ml_dtypes.float8_e4m3fn), ] def _linterp(a: float, b: float, t: float) -> float: return a * (1 - t) + b * t def _mlround(v: float, dty: Type) -> float: """ Round `v` using ml_dtypes library """ return np.array([v]).astype(dty).astype(float).item() @pytest.mark.parametrize("fi,mldtype", test_formats) def test_ml_dtype_compatible(fi: FormatInfo, mldtype: Type) -> None: """ Test that rounding is compatible with ml_dtypes """ for i in range(255): # For each float v, check values at various interpolations # between v and nextUp(v) v0 = decode_float(fi, i + 0).fval v1 = decode_float(fi, i + 1).fval for alpha in (0, 0.3, 0.5, 0.6, 0.9, 1.25): v = _linterp(v0, v1, alpha) if np.isfinite(v): val = round_float(fi, v, RoundMode.TiesToEven) mlval = _mlround(v, mldtype) np.testing.assert_equal(val, mlval) @pytest.mark.parametrize("fi,mldtype", test_formats) def test_round_ints(fi: FormatInfo, mldtype: Type) -> None: for v in np.arange(289).astype(float): val = round_float(fi, v) mlval = _mlround(v, mldtype) np.testing.assert_equal(val, mlval) @pytest.mark.parametrize("fi", all_formats) def test_round_roundtrip(fi: FormatInfo) -> None: if fi.bits <= 8: step = 1 elif fi.bits <= 16: step = 13 elif fi.bits <= 32: step = 73013 elif fi.bits <= 64: step = (73013 << 32) + 39 for i in range(0, 2**fi.bits, step): fv = decode_float(fi, i) fval2 = round_float(fi, fv.fval) np.testing.assert_equal(fval2, fv.fval)