pax_global_header00006660000000000000000000000064145121704340014513gustar00rootroot0000000000000052 comment=bb6f038c405acd84ca79b15a226105a612808a65 bytecode-0.15.1/000077500000000000000000000000001451217043400133755ustar00rootroot00000000000000bytecode-0.15.1/.coveragerc000066400000000000000000000006071451217043400155210ustar00rootroot00000000000000[run] branch = True omit = setup.py [paths] source = src */site-packages [report] # Regexes for lines to exclude from consideration exclude_lines = # Have to re-enable the standard pragma pragma: no cover # Don't complain if tests don't hit defensive assertion code: raise NotImplementedError pass # Don't complain about ellipsis in overload \.\.\. bytecode-0.15.1/.github/000077500000000000000000000000001451217043400147355ustar00rootroot00000000000000bytecode-0.15.1/.github/FUNDING.yml000066400000000000000000000012151451217043400165510ustar00rootroot00000000000000# These are supported funding model platforms github: [MatthieuDartiailh] patreon: # Replace with a single Patreon username open_collective: # Replace with a single Open Collective username ko_fi: # Replace with a single Ko-fi username tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry liberapay: # Replace with a single Liberapay username issuehunt: # Replace with a single IssueHunt username otechie: # Replace with a single Otechie username custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] bytecode-0.15.1/.github/dependabot.yml000066400000000000000000000002421451217043400175630ustar00rootroot00000000000000version: 2 updates: # Maintain dependencies for GitHub Actions - package-ecosystem: "github-actions" directory: "/" schedule: interval: "weekly"bytecode-0.15.1/.github/workflows/000077500000000000000000000000001451217043400167725ustar00rootroot00000000000000bytecode-0.15.1/.github/workflows/cis.yml000066400000000000000000000037411451217043400203000ustar00rootroot00000000000000name: Continuous Integration on: schedule: - cron: "0 0 * * 3" push: branches: - main pull_request: branches: - main paths: - .github/workflows/cis.yml - "src/**" - "tests/*" - pyproject.toml - setup.py - tox.ini jobs: lint: name: Lint code runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v4 with: python-version: "3.10" - name: Install tools run: | python -m pip install --upgrade pip python -m pip install tox - name: Linting env: TOXENV: lint run: | tox tests: name: Unit tests runs-on: ubuntu-latest strategy: fail-fast: false matrix: include: - python-version: "3.8" toxenv: py38 - python-version: "3.9" toxenv: py39 - python-version: "3.10" toxenv: py310 - python-version: "3.11" toxenv: py311 - python-version: "3.12-dev" toxenv: py312 steps: - uses: actions/checkout@v4 - name: Get history and tags for SCM versioning to work run: | git fetch --prune --unshallow git fetch --depth=1 origin +refs/tags/*:refs/tags/* - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | python -m pip install --upgrade pip python -m pip install tox - name: Test env: TOXENV: ${{ matrix.toxenv }} run: | tox - name: Upload coverage to Codecov uses: codecov/codecov-action@v3 if: github.event_name != 'schedule' with: token: ${{ secrets.CODECOV_TOKEN }} name: codecov-umbrella fail_ci_if_error: true bytecode-0.15.1/.github/workflows/docs.yml000066400000000000000000000016041451217043400204460ustar00rootroot00000000000000name: Documentation building on: schedule: - cron: "0 0 * * 3" push: branches: - main pull_request: branches: - main paths: - .github/workflows/docs.yml - "src/**" - "doc/**" - pyproject.toml - setup.py jobs: docs: name: Docs building runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Get history and tags for SCM versioning to work run: | git fetch --prune --unshallow git fetch --depth=1 origin +refs/tags/*:refs/tags/* - name: Set up Python uses: actions/setup-python@v4 with: python-version: '3.x' - name: Install dependencies run: | python -m pip install --upgrade pip python -m pip install tox - name: Build documentation env: TOXENV: docs run: | tox bytecode-0.15.1/.github/workflows/frameworks.yml000066400000000000000000000017371451217043400217050ustar00rootroot00000000000000name: Frameworks tests on: push: branches: - main pull_request: branches: - main paths: - .github/workflows/cis.yml - "src/**" - "tests/frameworks/*" - pyproject.toml - setup.py - tox.ini jobs: boto3: name: boto3 with Python ${{ matrix.python-version }} runs-on: ubuntu-latest strategy: fail-fast: false matrix: python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v4 with: fetch-depth: 0 - name: Set up Python uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }}-dev - name: Setup run: bash scripts/frameworks/boto3/setup.sh /tmp/boto3 ${{ matrix.python-version }} - name: Run env: PYTHONPATH: ${{ github.workspace }}/tests/frameworks/ run: bash scripts/frameworks/boto3/run.sh /tmp/boto3 ${{ matrix.python-version }} bytecode-0.15.1/.github/workflows/release.yml000066400000000000000000000062611451217043400211420ustar00rootroot00000000000000name: Build and upload wheels on: workflow_dispatch: schedule: - cron: '0 0 * * 3' push: tags: - '*' jobs: build_sdist: name: Build sdist runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v4 - name: Get history and tags for SCM versioning to work run: | git fetch --prune --unshallow git fetch --depth=1 origin +refs/tags/*:refs/tags/* - name: Setup Python uses: actions/setup-python@v4 with: python-version: '3.x' - name: Build sdist run: | pip install --upgrade pip pip install wheel build python -m build . -s - name: Test sdist run: | pip install pytest pip install dist/*.tar.gz python -X dev -m pytest tests - name: Store artifacts uses: actions/upload-artifact@v3 with: name: artifact path: dist/* build_wheel: name: Build wheel runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v4 - name: Get history and tags for SCM versioning to work run: | git fetch --prune --unshallow git fetch --depth=1 origin +refs/tags/*:refs/tags/* - name: Setup Python uses: actions/setup-python@v4 with: python-version: '3.x' - name: Build wheels run: | pip install --upgrade pip pip install wheel build python -m build . -w - name: Test wheel run: | pip install pytest pip install dist/*.whl python -X dev -m pytest tests - name: Store artifacts uses: actions/upload-artifact@v3 with: name: artifact path: dist/*.whl release_upload: name: Create Release and Upload Release Asset runs-on: ubuntu-latest if: github.event_name == 'push' needs: [build_wheel, build_sdist] steps: - name: Create Release id: create_release uses: actions/create-release@v1 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: tag_name: ${{ github.ref }} release_name: Release ${{ github.ref }} draft: false prerelease: ${{ contains(github.ref, 'rc') || contains(github.ref, 'a') || contains(github.ref, 'b')}} - uses: actions/download-artifact@v3 with: name: artifact path: dist - name: Upload Release Asset id: upload-release-asset uses: shogo82148/actions-upload-release-asset@v1 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: upload_url: ${{ steps.create_release.outputs.upload_url }} asset_path: dist/* upload_pypi: if: github.event_name == 'push' needs: [build_wheel, build_sdist] runs-on: ubuntu-latest steps: - uses: actions/download-artifact@v3 with: name: artifact path: dist - uses: pypa/gh-action-pypi-publish@master with: user: __token__ password: ${{ secrets.pypi_password }} # To test: # repository_url: https://test.pypi.org/legacy/ bytecode-0.15.1/.gitignore000066400000000000000000000003571451217043400153720ustar00rootroot00000000000000*.py[cod] *.swp MANIFEST build dist # generated by setuptools-scm src/bytecode/version.py # generated by tox .tox/ bytecode.egg-info/ .mypy_cache .dmypy.json .spyproject .idea/ .vscode/ .coverage coverage.xml .pytest_cache .cache .venv bytecode-0.15.1/.pre-commit-config.yaml000066400000000000000000000005701451217043400176600ustar00rootroot00000000000000repos: - repo: https://github.com/pre-commit/mirrors-isort rev: v5.10.1 hooks: - id: isort - repo: https://github.com/psf/black rev: 22.10.0 hooks: - id: black - repo: https://github.com/pycqa/flake8 rev: 6.0.0 hooks: - id: flake8 - repo: https://github.com/pre-commit/mirrors-mypy rev: v0.991 hooks: - id: mypy bytecode-0.15.1/.readthedocs.yaml000066400000000000000000000011031451217043400166170ustar00rootroot00000000000000# .readthedocs.yaml # Read the Docs configuration file # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details # Required version: 2 # Set the version of Python and other tools you might need build: os: ubuntu-20.04 tools: python: "3.9" # Build documentation in the docs/source directory with Sphinx sphinx: configuration: doc/conf.py # Enable epub output formats: - epub # Optionally declare the Python requirements required to build your docs python: install: - requirements: doc/requirements.txt - method: pip path: . bytecode-0.15.1/COPYING000066400000000000000000000021061451217043400144270ustar00rootroot00000000000000The MIT License (MIT) Copyright Contributors to the bytecode project. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. bytecode-0.15.1/MANIFEST.in000066400000000000000000000002101451217043400151240ustar00rootroot00000000000000include COPYING include MANIFEST.in include README.rst include tox.ini include doc/conf.py doc/make.bat doc/Makefile include doc/*.rst bytecode-0.15.1/README.rst000066400000000000000000000040331451217043400150640ustar00rootroot00000000000000******** bytecode ******** .. image:: https://img.shields.io/pypi/v/bytecode.svg :alt: Latest release on the Python Cheeseshop (PyPI) :target: https://pypi.python.org/pypi/bytecode .. image:: https://github.com/MatthieuDartiailh/bytecode/workflows/Continuous%20Integration/badge.svg :target: https://github.com/MatthieuDartiailh/bytecode/actions :alt: Continuous integration .. image:: https://github.com/MatthieuDartiailh/bytecode/workflows/Documentation%20building/badge.svg :target: https://github.com/MatthieuDartiailh/bytecode/actions :alt: Documentation building .. image:: https://img.shields.io/codecov/c/github/MatthieuDartiailh/bytecode/master.svg :alt: Code coverage of bytecode on codecov.io :target: https://codecov.io/github/MatthieuDartiailh/bytecode .. image:: https://img.shields.io/badge/code%20style-black-000000.svg :alt: Code formatted using Black :target: https://github.com/psf/black ``bytecode`` is a Python module to generate and modify bytecode. * `bytecode project homepage at GitHub `_ (code, bugs) * `bytecode documentation `_ * `Download latest bytecode release at the Python Cheeseshop (PyPI) `_ Install bytecode: ``python3 -m pip install bytecode``. It requires Python 3.8 or newer. The latest release that supports Python 3.7 and 3.6 is 0.13.0. The latest release that supports Python 3.5 is 0.12.0. For Python 2.7 support, have a look at `dead-bytecode `_ instead. Example executing ``print('Hello World!')``: .. code:: python from bytecode import Instr, Bytecode bytecode = Bytecode([Instr("LOAD_NAME", 'print'), Instr("LOAD_CONST", 'Hello World!'), Instr("CALL_FUNCTION", 1), Instr("POP_TOP"), Instr("LOAD_CONST", None), Instr("RETURN_VALUE")]) code = bytecode.to_code() exec(code) bytecode-0.15.1/TODO.rst000066400000000000000000000002711451217043400146740ustar00rootroot00000000000000Python 3.12 support =================== - LOAD_ATTR changes follow changes made to LOAD_GLOBAL - update tests * ConcreteBytecode.to_code(): better error reporting on bugs in the code bytecode-0.15.1/codecov.yml000066400000000000000000000001151451217043400155370ustar00rootroot00000000000000# .codecov.yml: coverage: fixes: - "__init__.py::bytecode/__init__.py" bytecode-0.15.1/doc/000077500000000000000000000000001451217043400141425ustar00rootroot00000000000000bytecode-0.15.1/doc/Makefile000066400000000000000000000151611451217043400156060ustar00rootroot00000000000000# Makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build PAPER = BUILDDIR = build # User-friendly check for sphinx-build ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) endif # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . # the i18n builder cannot share the environment and doctrees with the others I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext help: @echo "Please use \`make ' where is one of" @echo " html to make standalone HTML files" @echo " dirhtml to make HTML files named index.html in directories" @echo " singlehtml to make a single large HTML file" @echo " pickle to make pickle files" @echo " json to make JSON files" @echo " htmlhelp to make HTML files and a HTML help project" @echo " qthelp to make HTML files and a qthelp project" @echo " devhelp to make HTML files and a Devhelp project" @echo " epub to make an epub" @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" @echo " latexpdf to make LaTeX files and run them through pdflatex" @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" @echo " text to make text files" @echo " man to make manual pages" @echo " texinfo to make Texinfo files" @echo " info to make Texinfo files and run them through makeinfo" @echo " gettext to make PO message catalogs" @echo " changes to make an overview of all changed/added/deprecated items" @echo " xml to make Docutils-native XML files" @echo " pseudoxml to make pseudoxml-XML files for display purposes" @echo " linkcheck to check all external links for integrity" @echo " doctest to run all doctests embedded in the documentation (if enabled)" clean: rm -rf $(BUILDDIR)/* html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." dirhtml: $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." singlehtml: $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml @echo @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." pickle: $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle @echo @echo "Build finished; now you can process the pickle files." json: $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json @echo @echo "Build finished; now you can process the JSON files." htmlhelp: $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ ".hhp project file in $(BUILDDIR)/htmlhelp." qthelp: $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ ".qhcp project file in $(BUILDDIR)/qthelp, like this:" @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/bytecode.qhcp" @echo "To view the help file:" @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/bytecode.qhc" devhelp: $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp @echo @echo "Build finished." @echo "To view the help file:" @echo "# mkdir -p $$HOME/.local/share/devhelp/bytecode" @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/bytecode" @echo "# devhelp" epub: $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub @echo @echo "Build finished. The epub file is in $(BUILDDIR)/epub." latex: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." @echo "Run \`make' in that directory to run these through (pdf)latex" \ "(use \`make latexpdf' here to do that automatically)." latexpdf: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through pdflatex..." $(MAKE) -C $(BUILDDIR)/latex all-pdf @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." latexpdfja: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through platex and dvipdfmx..." $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." text: $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text @echo @echo "Build finished. The text files are in $(BUILDDIR)/text." man: $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man @echo @echo "Build finished. The manual pages are in $(BUILDDIR)/man." texinfo: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." @echo "Run \`make' in that directory to run these through makeinfo" \ "(use \`make info' here to do that automatically)." info: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo "Running Texinfo files through makeinfo..." make -C $(BUILDDIR)/texinfo info @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." gettext: $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale @echo @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." changes: $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes @echo @echo "The overview file is in $(BUILDDIR)/changes." linkcheck: $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." doctest: $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." xml: $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml @echo @echo "Build finished. The XML files are in $(BUILDDIR)/xml." pseudoxml: $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml @echo @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." bytecode-0.15.1/doc/api.rst000066400000000000000000000761051451217043400154560ustar00rootroot00000000000000************ Bytecode API ************ * Constants: :data:`__version__`, :data:`UNSET` * Abstract bytecode: :class:`Label`, :class:`Instr`, :class:`Bytecode` * Line number: :class:`SetLineno` * Arguments: :class:`CellVar`, :class:`Compare`, :class:`FreeVar` * Concrete bytecode: :class:`ConcreteInstr`, :class:`ConcreteBytecode` * Control Flow Graph (CFG): :class:`BasicBlock`, :class:`ControlFlowGraph` * Base class: :class:`BaseBytecode` Constants ========= .. data:: __version__ Module version string (ex: ``'0.1'``). .. data:: UNSET Singleton used to mark the lack of value. It is different than ``None``. Functions ========= .. function:: format_bytecode(bytecode, \*, lineno: bool = False) -> str: Format a bytecode to a str representation. :class:`ConcreteBytecode`, :class:`Bytecode` and :class:`ControlFlowGraph` are accepted for *bytecode*. If *lineno* is true, show also line numbers and instruction index/offset. This function is written for debug purpose. .. function:: dump_bytecode(bytecode, \*, lineno=False) Dump a bytecode to the standard output. :class:`ConcreteBytecode`, :class:`Bytecode` and :class:`ControlFlowGraph` are accepted for *bytecode*. If *lineno* is true, show also line numbers and instruction index/offset. This function is written for debug purpose. Instruction classes =================== Instr ----- .. class:: Instr(name: str, arg=UNSET, \*, lineno: Union[int, None, UNSET] = UNSET, location: Optional[InstrLocation] = None) Abstract instruction. The type of the *arg* parameter (and the :attr:`arg` attribute) depends on the operation: * If the operation has a jump argument (:meth:`has_jump`, ex: ``JUMP_ABSOLUTE``): *arg* must be a :class:`Label` (if the instruction is used in :class:`Bytecode`) or a :class:`BasicBlock` (used in :class:`ControlFlowGraph`). * If the operation has a cell or free argument (ex: ``LOAD_DEREF``): *arg* must be a :class:`CellVar` or :class:`FreeVar` instance. * If the operation has a local variable (ex: ``LOAD_FAST``): *arg* must be a variable name, type ``str``. * If the operation has a constant argument (``LOAD_CONST``): *arg* must not be a :class:`Label` or :class:`BasicBlock` instance. * If the operation has a compare argument (``COMPARE_OP``): *arg* must be a :class:`Compare` enum. * If the operation has no argument (ex: ``DUP_TOP``), *arg* must not be set. * Otherwise (the operation has an argument, ex: ``CALL_FUNCTION``), *arg* must be an integer (``int``) in the range ``0``..\ ``2,147,483,647``. To replace the operation name and the argument, the :meth:`set` method must be used instead of modifying the :attr:`name` attribute and then the :attr:`arg` attribute. Otherwise, an exception is raised if the previous operation requires an argument and the new operation has no argument (or the opposite). Attributes: .. attribute:: arg Argument value. It can be :data:`UNSET` if the instruction has no argument. .. attribute:: lineno Line number (``int >= 1``), or ``None``. .. attribute:: name Operation name (``str``). Setting the name updates the :attr:`opcode` attribute. .. attribute:: opcode Operation code (``int``). Setting the operation code updates the :attr:`name` attribute. .. versionchanged:: 0.3 The ``op`` attribute was renamed to :attr:`opcode`. .. attribute:: location Detailed location (:class:`InstrLocation`) Methods: .. method:: require_arg() -> bool Does the instruction require an argument? .. method:: copy() Create a copy of the instruction. .. method:: is_final() -> bool Is the operation a final operation? Final operations: * RETURN_VALUE * RAISE_VARARGS * BREAK_LOOP * CONTINUE_LOOP * unconditional jumps: :meth:`is_uncond_jump` .. method:: has_jump() -> bool Does the operation have a jump argument? More general than :meth:`is_cond_jump` and :meth:`is_uncond_jump`, it includes other operations. Examples: * FOR_ITER * SETUP_EXCEPT * CONTINUE_LOOP .. method:: is_cond_jump() -> bool Is the operation a conditional jump? Conditional jumps: * JUMP_IF_FALSE_OR_POP * JUMP_IF_TRUE_OR_POP * JUMP_FORWARD_IF_FALSE_OR_POP * JUMP_BACKWARD_IF_FALSE_OR_POP * JUMP_FORWARD_IF_TRUE_OR_POP * JUMP_BACKWARD_IF_TRUE_OR_POP * POP_JUMP_IF_FALSE * POP_JUMP_IF_TRUE .. method:: is_uncond_jump() -> bool Is the operation an unconditional jump? Unconditional jumps: * JUMP_FORWARD * JUMP_ABSOLUTE * JUMP_BACKWARD * JUMP_BACKWARD_NO_INTERRUPT .. method:: is_abs_jump() -> bool Is the operation an absolute jump? .. method:: is_forward_rel_jump() -> bool Is the operation a forward relative jump? .. method:: is_backward_rel_jump() -> bool Is the operation a backward relative jump? .. method:: set(name: str, arg=UNSET) Modify the instruction in-place: replace :attr:`name` and :attr:`arg` attributes, and update the :attr:`opcode` attribute. .. versionchanged:: 0.3 The *lineno* parameter has been removed. .. method:: stack_effect(jump: bool = None) -> int Operation effect on the stack size as computed by :func:`dis.stack_effect`. The *jump* argument takes one of three values. None (the default) requests the largest stack effect. This works fine with most instructions. True returns the stack effect for taken branches. False returns the stack effect for non-taken branches. .. versionchanged:: 0.8 ``stack_effect`` was changed from a property to a method in order to add the keyword argument *jump*. .. method:: pre_and_post_stack_effect(jump: Optional[bool] = None) -> Tuple[int, int] Effect of the instruction on the stack before and after its execution. The impact on the stack before the instruction reflects how many values from the stacks are used/popped. The impact on the stack after the instruction execution reflects how many values are pushed back on the stack. Those are deduced from :func:`dis.stack_effect` and manual analysis. The *jump* argument has the same meaning as in :py:meth:`Instr.stack_effect`. .. versionadded:: 0.12 ConcreteInstr ------------- .. class:: ConcreteInstr(name: str, arg=UNSET, \*, lineno: int=None) Concrete instruction Inherit from :class:`Instr`. If the operation requires an argument, *arg* must be an integer (``int``) in the range ``0``..\ ``2,147,483,647``. Otherwise, *arg* must not by set. Concrete instructions should only be used in :class:`ConcreteBytecode`. Attributes: .. attribute:: arg Argument value: an integer (``int``) in the range ``0``..\ ``2,147,483,647``, or :data:`UNSET`. Setting the argument value can change the instruction size (:attr:`size`). .. attribute:: size Read-only size of the instruction in bytes (``int``): between ``1`` byte (no argument) and ``6`` bytes (extended argument). Static method: .. staticmethod:: disassemble(code: bytes, offset: int) -> ConcreteInstr Create a concrete instruction from a bytecode string. Methods: .. method:: get_jump_target(instr_offset: int) -> int or None Get the absolute target offset of a jump. Return ``None`` if the instruction is not a jump. The *instr_offset* parameter is the offset of the instruction. It is required by relative jumps. .. note:: Starting with Python 3.10, this quantity is expressed in term of instruction offset rather than byte offset, and is hence twice smaller than in 3.9 for identical code. .. method:: assemble() -> bytes Assemble the instruction to a bytecode string. .. method:: use_cache_opcodes() -> int Number of cache opcodes that should follow the instruction. Compare ------- .. class:: Compare Enum for the argument of the ``COMPARE_OP`` instruction. Equality test: * ``Compare.EQ`` (``2``): ``x == y`` * ``Compare.NE`` (``3``): ``x != y`` * ``Compare.IS`` (``8``): ``x is y`` removed in Python 3.9+ * ``Compare.IS_NOT`` (``9``): ``x is not y`` removed in Python 3.9+ Inequality test: * ``Compare.LT`` (``0``): ``x < y`` * ``Compare.LE`` (``1``): ``x <= y`` * ``Compare.GT`` (``4``): ``x > y`` * ``Compare.GE`` (``5``): ``x >= y`` Other tests: * ``Compare.IN`` (``6``): ``x in y`` removed in Python 3.9+ * ``Compare.NOT_IN`` (``7``): ``x not in y`` removed in Python 3.9+ * ``Compare.EXC_MATCH`` (``10``): used to compare exceptions in ``except:`` blocks. Removed in Python 3.9+ Binary operation ---------------- .. class:: BinaryOp Enum for the argument of the ``BINARY_OP`` instruction (3.11+). Arithmetic operations ``BinaryOp.ADD`` (``0``): ``x + y`` ``BinaryOp.SUBTRACT`` (``10``): ``x - y`` ``BinaryOp.MULTIPLY`` (``5``): ``x * y`` ``BinaryOp.TRUE_DIVIDE`` (``11``): ``x / y`` ``BinaryOp.FLOOR_DIVIDE`` (``2``): ``x // y`` ``BinaryOp.REMAINDER`` (``6``): ``x % y`` ``BinaryOp.MATRIX_MULTIPLY`` (``4``): ``x @ y`` ``BinaryOp.POWER`` (``8``): ``x ** y`` Logical and binary operations ``BinaryOp.LSHIFT`` (``3``): ``x << y`` ``BinaryOp.RSHIFT`` (``9``): ``x >> y`` ``BinaryOp.AND`` (``1``): ``x & y`` ``BinaryOp.OR`` (``7``): ``x | y`` ``BinaryOp.XOR`` (``12``): ``x ^ y`` Inplace operations: ``BinaryOp.INPLACE_ADD`` (``13``): ``x += y`` ``BinaryOp.INPLACE_SUBTRACT`` (``23``): ``x -= y`` ``BinaryOp.INPLACE_MULTIPLY`` (``18``): ``x *= y`` ``BinaryOp.INPLACE_TRUE_DIVIDE`` (``24``): ``x /= y`` ``BinaryOp.INPLACE_FLOOR_DIVIDE`` (``15``): ``x //= y`` ``BinaryOp.INPLACE_REMAINDER`` (``19``): ``x %= y`` ``BinaryOp.INPLACE_MATRIX_MULTIPLY`` (``17``): ``x @= y`` ``BinaryOp.INPLACE_POWER`` (``21``): ``x **= y`` ``BinaryOp.INPLACE_LSHIFT`` (``16``): ``x <<= y`` ``BinaryOp.INPLACE_RSHIFT`` (``22``): ``x >>= y`` ``BinaryOp.INPLACE_AND`` (``14``): ``x &= y`` ``BinaryOp.INPLACE_OR`` (``20``): ``x |= y`` ``BinaryOp.INPLACE_XOR`` (``25``): ``x ^= y`` Intrinsic operations -------------------- .. class:: Intrinsic1Op Enum for the argument of the ``CALL_INTRINSIC_1`` instruction (3.12+). ``INTRINSIC_1_INVALID`` ``INTRINSIC_PRINT`` ``INTRINSIC_IMPORT_STAR`` ``INTRINSIC_STOPITERATION_ERROR`` ``INTRINSIC_ASYNC_GEN_WRAP`` ``INTRINSIC_UNARY_POSITIVE`` ``INTRINSIC_LIST_TO_TUPLE`` ``INTRINSIC_TYPEVAR`` ``INTRINSIC_PARAMSPEC`` ``INTRINSIC_TYPEVARTUPLE`` ``INTRINSIC_SUBSCRIPT_GENERIC`` ``INTRINSIC_TYPEALIAS`` .. class:: Intrinsic2Op Enum for the argument of the ``CALL_INTRINSIC_2`` instruction (3.12+). ``INTRINSIC_2_INVALID`` ``INTRINSIC_PREP_RERAISE_STAR`` ``INTRINSIC_TYPEVAR_WITH_BOUND`` ``INTRINSIC_TYPEVAR_WITH_CONSTRAINTS`` ``INTRINSIC_SET_FUNCTION_TYPE_PARAMS`` CellVar and FreeVar ------------------- The following classes are used to represent the argument of opcode listed in ``opcode.hasfree`` which includes: - MAKE_CELL - LOAD_CLOSURE - LOAD_DEREF - STORE_DEREF - DELETE_DEREF - LOAD_CLASSDEREF - LOAD_FROM_DICT_OR_DEREF .. class:: CellVar Argument of an opcode referring to a variable held in a cell. Cell variables cannot always be inferred only from the instructions (``__class__`` used by super() is implicit) and as a consequence cellvars are explicitly listed on all bytecode objects. Attributes: .. attribute:: name Name of the cell variable (``str``). .. class:: FreeVar Argument of opcode referring to a free variable. Free variables cannot always be inferred only from the instructions (``__class__`` used by super() is implicit) and as a consequence freevars are explicitly listed on all bytecode objects. Attributes: .. attribute:: name Name of the free variable (``str``). Label ----- .. class:: Label Pseudo-instruction used as targets of jump instructions. Label targets are "resolved" by :class:`Bytecode.to_concrete_bytecode`. Labels must only be used in :class:`Bytecode`. SetLineno --------- .. class:: SetLineno(lineno: int) Pseudo-instruction to set the line number of following instructions. *lineno* must be greater or equal than ``1``. .. attribute:: lineno Line number (``int``), read-only attribute. InstrLocation ------------- .. class:: InstrLocation(lineno: Optional[int], end_lineno: Optional[int], col_offset: Optional[int], end_col_offset: Optional[int]) Detailed location for an instruction. .. attribute:: lineno Line number on which the instruction starts. .. attribute:: end_lineno Line number on which the instruction ends. .. attribute:: col_offset Column offset within the start line at which the instruction starts. .. attribute:: end_col_offset Column offset within the end line at which the instruction starts. .. classmethod:: from_positions(cls, position: dis.Positions) -> InstrLocation Build an InstrLocation from a dis.Position object. TryBegin -------- .. class:: TryBegin(target: Union[Label, BasicBlock], push_lasti: bool, stack_depth: Union[int, UNSET] = UNSET) Pseudo instruction marking the beginning of an exception table entry. TryBegin can never be nested. Used in Python 3.11+ in :class:`Bytecode` and :class:`BasicBlock`. .. attribute:: target Target :class:`Label` or :class:`BasicBlock` to which to jump to if an exception occurs on an instruction sitting between this :class:`TryBegin` and the matching :class:`TryEnd`. .. attribute:: push_lasti Is the instruction offset at which an exception occurred pushed on the stack before the exception itself when handling an exception. .. attribute:: stack_depth Stack depth that will be restored by the interpreter by popping from the stack when handling an exception, before pushing the exception possibly preceded by the instruction offset depending on :attr:`TryBegin.push_lasti`. .. method:: copy() -> TryBegin Create a copy of the TryBegin. TryEnd ------ .. class:: TryEnd(entry: TryBegin) Pseudo instruction marking the end of an exception table entry. .. note:: In a :class:`BasicBlock`, one may find a :class:`TryEnd` instance after a final instruction. This results from the exception enclosing the final instruction. Since :class:`TryEnd` is only a pseudo-instruction this does not violate the guarantee made by a :class:`BasicBlock` which only applies to instructions. .. note:: A jump may cause to exit an exception table entry. If the jump is unconditional the instruction is final and the above applies. For conditional jumps, within a :class:`ControlFlowGraph`, we insert a :class:`TryEnd` at the beginning of the target block to explicitly signal that we left the exception table entry region. As a consequence, multiple :class:`TryExit` corresponding to a single :class:`TryBegin` can exist. :class:`TryEnd` corresponding to exiting an exception table entry through a conditional jump always appear before the first instruction of the target block. However, care needs to be taken since that block may be reached through a different path in which no :class:`TryBegin` was encountered. In such cases, the :class:`TryEnd` should be ignored. Bytecode classes ================ BaseBytecode ------------ .. class:: BaseBytecode Base class of bytecode classes. Attributes: .. attribute:: argcount Argument count (``int``), default: ``0``. .. attribute:: cellvars Names of the cell variables (``list`` of ``str``), default: empty list. .. attribute:: docstring Documentation string aka "docstring" (``str``), ``None``, or :data:`UNSET`. Default: :data:`UNSET`. If set, it is used by :meth:`ConcreteBytecode.to_code` as the first constant of the created Python code object. .. attribute:: filename Code filename (``str``), default: ``''``. .. attribute:: first_lineno First line number (``int``), default: ``1``. .. attribute:: flags Flags (``int``). .. attribute:: freevars List of free variable names (``list`` of ``str``), default: empty list. .. attribute:: posonlyargcount Positional-only argument count (``int``), default: ``0``. New in Python 3.8 .. attribute:: kwonlyargcount Keyword-only argument count (``int``), default: ``0``. .. attribute:: name Code name (``str``), default: ``''``. .. attribute:: qualname Qualified code name (``str``). New in Python 3.11 .. versionchanged:: 0.3 Attribute ``kw_only_argcount`` renamed to :attr:`kwonlyargcount`. Bytecode -------- .. class:: Bytecode Abstract bytecode: list of abstract instructions (:class:`Instr`). Inherit from :class:`BaseBytecode` and :class:`list`. A bytecode must only contain objects of the 4 following types: * :class:`Label` * :class:`SetLineno` * :class:`Instr` * :class:`TryBegin` * :class:`TryEnd` .. versionchanged:: 0.14.0 It is not possible anymore to use concrete instructions (:class:`ConcreteInstr`) in :class:`Bytecode`. Attributes: .. attribute:: argnames List of the argument names (``list`` of ``str``), default: empty list. Static methods: .. staticmethod:: from_code(code) -> Bytecode Create an abstract bytecode from a Python code object. Methods: .. method:: legalize() Check the validity of all the instruction and remove the :class:`SetLineno` instances after updating the instructions. .. method:: to_concrete_bytecode(compute_jumps_passes: int = None, compute_exception_stack_depths: bool = True) -> ConcreteBytecode Convert to concrete bytecode with concrete instructions. Resolve jump targets: replace abstract labels (:class:`Label`) with concrete instruction offsets (relative or absolute, depending on the jump operation). It will also add EXTENDED_ARG prefixes to jump instructions to ensure that the target instructions can be reached. If *compute_jumps_passes* is not None, it sets the upper limit for the number of passes that can be made to generate EXTENDED_ARG prefixes for jump instructions. If None then an internal default is used. The number of passes is, in theory, limited only by the number of input instructions, however a much smaller default is used because the algorithm converges quickly on most code. For example, running CPython 3.6.5 unittests on OS X 11.13 results in 264996 compiled methods, only one of which requires 5 passes, and none requiring more. If *compute_exception_stack_depths* is True, the stack depth for each exception table entry will be computed (which requires to convert the the bytecode to a :class:`ControlFlowGraph`) .. method:: to_code(compute_jumps_passes: int = None, stacksize: int = None, *, check_pre_and_post: bool = True, compute_exception_stack_depths: bool = True) -> types.CodeType Convert to a Python code object. It is based on :meth:`to_concrete_bytecode` and so resolve jump targets. *compute_jumps_passes*: see :meth:`to_concrete_bytecode` *stacksize*: see :meth:`ConcreteBytecode.to_code` *check_pre_and_post*: see :meth:`ConcreteBytecode.to_code` *compute_exception_stack_depths*: see :meth:`to_concrete_bytecode` .. method:: compute_stacksize(*, check_pre_and_post: bool = True) -> int Compute the stacksize needed to execute the code. Will raise an exception if the bytecode is invalid. This computation requires to build the control flow graph associated with the code. *check_pre_and_post* Allows caller to disable checking for stack underflow .. method:: update_flags(is_async: bool = None) -> None Update the object flags by calling :py:func:infer_flags on itself. ConcreteBytecode ---------------- .. class:: ExceptionTableEntry Entry for a given line in the exception table. All offsets are expressed in instructions not in bytes. Attributes: .. attribute:: start_offset Offset (``int``) in instruction between the beginning of the bytecode and the beginning of this entry. .. attribute:: stop_offset Offset (``int``) in instruction between the beginning of the bytecode and the end of this entry. This offset is inclusive meaning that the instruction it points to is included in the try/except handling. .. attribute:: target Offset (``int``) in instruction to the first instruction of the exception handling block. .. attribute:: stack_depth Minimal stack depth (``int``) in the block delineated by start and stop offset of the exception table entry. Used to restore the stack (by popping items) when entering the exception handling block. .. attribute:: push_lasti ``bool`` indicating if the offset, at which an exception was raised, should be pushed on the stack before the exception itself (which is pushed as a single value). .. class:: ConcreteBytecode List of concrete instructions (:class:`ConcreteInstr`). Inherit from :class:`BaseBytecode`. A concrete bytecode must only contain objects of the 2 following types: * :class:`SetLineno` * :class:`ConcreteInstr` :class:`Label`, :class:`TryBegin`, :class:`TryEnd` and :class:`Instr` must not be used in concrete bytecode. Attributes: .. attribute:: consts List of constants (``list``), default: empty list. .. attribute:: names List of names (``list`` of ``str``), default: empty list. .. attribute:: varnames List of variable names (``list`` of ``str``), default: empty list. .. attribute:: exception_table List of :class:`ExceptionTableEntry` describing portion of the bytecode in which exceptions are caught and where there are handled. Used only in Python 3.11+ Static methods: .. staticmethod:: from_code(code, \*, extended_arg=false) -> ConcreteBytecode Create a concrete bytecode from a Python code object. If *extended_arg* is true, create ``EXTENDED_ARG`` instructions. Otherwise, concrete instruction use extended argument (size of ``6`` bytes rather than ``3`` bytes). Methods: .. method:: legalize() Check the validity of all the instruction and remove the :class:`SetLineno` instances after updating the instructions. .. method:: to_code(stacksize: int = None, *, check_pre_and_post: bool = True, compute_exception_stack_depths: bool = True) -> types.CodeType Convert to a Python code object. *stacksize* Allows caller to explicitly specify a stacksize. If not specified a :class:`ControlFlowGraph` is created internally in order to call :meth:`ControlFlowGraph.compute_stacksize`. It's cheaper to pass a value if the value is known. *check_pre_and_post* Allows caller to disable checking for stack underflow If *compute_exception_stack_depths* is True, the stack depth for each exception table entry will be computed (which requires to convert the the bytecode to a :class:`ControlFlowGraph`) .. method:: to_bytecode() -> Bytecode Convert to abstract bytecode with abstract instructions. .. method:: compute_stacksize(*, check_pre_and_post: bool = True) -> int Compute the stacksize needed to execute the code. Will raise an exception if the bytecode is invalid. This computation requires to build the control flow graph associated with the code. *check_pre_and_post* Allows caller to disable checking for stack underflow .. method:: update_flags(is_async: bool = None) Update the object flags by calling :py:func:infer_flags on itself. BasicBlock ---------- .. class:: BasicBlock `Basic block `_. Inherit from :class:`list`. A basic block is a straight-line code sequence of abstract instructions (:class:`Instr`) with no branches in except to the entry and no branches out except at the exit. A block must only contain objects of the 4 following types: * :class:`SetLineno` * :class:`Instr` * :class:`TryBegin` * :class:`TryEnd` .. versionchanged:: 0.14.0 It is not possible anymore to use concrete instructions (:class:`ConcreteInstr`) in :class:`BasicBlock`. Only the last instruction can have a jump argument, and the jump argument must be a basic block (:class:`BasicBlock`). Labels (:class:`Label`) must not be used in blocks. Attributes: .. attribute:: next_block Next basic block (:class:`BasicBlock`), or ``None``. Methods: .. method:: legalize(first_lineno: int) -> None Check the validity of all the instruction and remove the :class:`SetLineno` instances after updating the instructions. `first_lineno` specifies the line number to use for instruction without a set line number encountered before the first :class:`SetLineno` instance. .. method:: get_jump() --> BasicBlock | None Get the target block (:class:`BasicBlock`) of the jump if the basic block ends with an instruction with a jump argument. Otherwise, return ``None``. .. method:: get_trailing_end(index: int) -> TryEnd | None Get the first TryEnd found after the position ``index`` in the block if any. ControlFlowGraph ---------------- .. class:: ControlFlowGraph `Control flow graph (CFG) `_: list of basic blocks (:class:`BasicBlock`). A basic block is a straight-line code sequence of abstract instructions (:class:`Instr`) with no branches in except to the entry and no branches out except at the exit. Inherit from :class:`BaseBytecode`. Labels (:class:`Label`) must not be used in blocks. This class is not designed to emit code, but to analyze and modify existing code. Use :class:`Bytecode` to emit code. Attributes: .. attribute:: argnames List of the argument names (``list`` of ``str``), default: empty list. Methods: .. staticmethod:: from_bytecode(bytecode: Bytecode) -> ControlFlowGraph Convert a :class:`Bytecode` object to a :class:`ControlFlowGraph` object: convert labels to blocks. Splits blocks after final instructions (:meth:`Instr.is_final`) and after conditional jumps (:meth:`Instr.is_cond_jump`). .. method:: legalize(first_lineno: int) Legalize all the blocks of the CFG. .. method:: add_block(instructions=None) -> BasicBlock Add a new basic block. Return the newly created basic block. .. method:: get_block_index(block: BasicBlock) -> int Get the index of a block in the bytecode. Raise a :exc:`ValueError` if the block is not part of the bytecode. .. versionadded:: 0.3 .. method:: split_block(block: BasicBlock, index: int) -> BasicBlock Split a block into two blocks at the specific instruction. Return the newly created block, or *block* if index equals ``0``. .. method:: get_dead_blocks() -> List[BasicBlock] Retrieve all the blocks of the CFG that are unreachable. .. method:: to_bytecode() -> Bytecode Convert to a bytecode object using labels. .. method:: compute_stacksize(*, check_pre_and_post: bool = True, compute_exception_stack_depths: bool = True) -> int Compute the stack size required by a bytecode object. Will raise an exception if the bytecode is invalid. *check_pre_and_post* Allows caller to disable checking for stack underflow *compute_exception_stack_depths* Allows caller to disable the computation of the stack depth required by exception table entries. NOTE: The computation will only consider block that can be reached from the entry block. In particular, stack size for TryBegin/TryEnd in dead blocks is not updated. In some cases, stack usage may be slightly overestimated compared to CPython. This occurs when CPython duplicated the code for a finally clause but computed stack size before the duplication in which case one could infer a smaller stack usage for a TryBegin/TryEnd pair than can be done with the final bytecode form. .. method:: update_flags(is_async: bool = None) Update the object flags by calling :py:func:infer_flags on itself. .. method:: to_code(stacksize: int = None, *, check_pre_and_post: bool = True, compute_exception_stack_depths: bool = True) Convert to a Python code object. Refer to descriptions of :meth:`Bytecode.to_code` and :meth:`ConcreteBytecode.to_code`. *check_pre_and_post* Allows caller to disable checking for stack underflow *compute_exception_stack_depths* Allows caller to disable the computation of the stack depth required by exception table entries. Line Numbers ============ The line number can set directly on an instruction using the ``lineno`` parameter of the constructor. Otherwise, the line number if inherited from the previous instruction, starting at ``first_lineno`` of the bytecode. :class:`SetLineno` pseudo-instruction can be used to set the line number of following instructions. Starting with Python 3.11, instructions now have a starting lineno, and end lineno along with a starting column offset and an end column offset. :class:`InstrLocation` is used to store these new detailed information. Compiler Flags ============== .. class:: CompilerFlags() .. attribute:: OPTIMIZED Set if a code object only uses fast locals .. attribute:: NEWLOCALS Set if the code execution should be done with a new local scope .. attribute:: VARARGS Set if a code object expects variable number of positional arguments .. attribute:: VARKEYWORDS Set if a code object expects variable number of keyword arguments .. attribute:: NESTED Set if a code object correspond to function defined in another function .. attribute:: GENERATOR Set if a code object is a generator (contains yield instructions) .. attribute:: NOFREE Set if a code object does not use free variables .. attribute:: COROUTINE Set if a code object is a coroutine. New in Python 3.5 .. attribute:: ITERABLE_COROUTINE Set if a code object is an iterable coroutine. New in Python 3.5 .. attribute:: ASYNC_GENERATOR Set if a code object is an asynchronous generator. New in Python 3.6 .. attribute:: FUTURE_GENERATOR_STOP Set if a code object is defined in a context in which generator_stop has been imported from \_\_future\_\_ .. function:: infer_flags(bytecode, async: bool = None) -> CompilerFlags Infer the correct values for the compiler flags for a given bytecode based on the instructions. The flags that can be inferred are : - OPTIMIZED - GENERATOR - NOFREE - COROUTINE - ASYNC_GENERATOR Force the code to be marked as asynchronous if True, prevent it from being marked as asynchronous if False and simply infer the best solution based on the opcode and the existing flag if None. bytecode-0.15.1/doc/byteplay_codetransformer.rst000066400000000000000000000103001451217043400217740ustar00rootroot00000000000000++++++++++++++++++++++++++++++++++++++++++++ Comparison with byteplay and codetransformer ++++++++++++++++++++++++++++++++++++++++++++ History of the bytecode API design ================================== The design of the bytecode module started with a single use case: reimplement the CPython peephole optimizer (implemented in C) in pure Python. The design of the API required many iterations to get the current API. bytecode now has a clear separation between concrete instructions using integer arguments and abstract instructions which use Python objects for arguments. Jump targets are labels or basic blocks. And the control flow graph abstraction is now an API well separated from the regular abstract bytecode which is a simple list of instructions. byteplay and codetransformer ============================ The `byteplay `_ and `codetransformer `_ are clear inspiration for the design of the bytecode API. Sadly, byteplay and codetransformer API have design issues (at least for my specific use cases). Free and cell variables ----------------------- Converting a code object to bytecode and then back to code must not modify the code object. It is an important requirement. The LOAD_DEREF instruction supports free variables and cell variables. byteplay and codetransformer use a simple string for the variable name. When the bytecode is converted to a code object, they check if the variable is a free variable, or fallback to a cell variable. The CPython code base contains a corner case: code having a free variable and a cell variable with the same name. The heuristic produces invalid code which can lead to a crash. bytecode uses :class:`FreeVar` and :class:`CellVar` classes to tag the type of the variable. Trying to use a simple string raise a :exc:`TypeError` in the :class:`Instr` constructor. .. note:: It's possible to fix this issue in byteplay and codetransformer, maybe even with keeping support for simple string for free/cell variables for backward compatibility. Line numbers ------------ codetransformer uses internally a dictionary mapping offsets to line numbers. It is updated when the ``.steal()`` method is used. byteplay uses a pseudo-instruction ``SetLineno`` to set the current line number of the following instructions. It requires to handle these pseudo-instructions when you modify the bytecode, especially when instructions are moved. In FAT Python, some optimizations move instructions but their line numbers must be kept. That's also why Python 3.6 was modified to support negative line number delta in ``code.co_lntotab``. bytecode has a different design: line numbers are stored directly inside instructions (:attr:`Instr.lineno` attribute). Moving an instruction keeps the line number information by design. bytecode also supports the pseudo-instruction :class:`SetLineno`. It was added to simplify functions emitting bytecode. It's not used when an existing code object is converted to bytecode. Jump targets ------------ In codetransformer, a jump target is an instruction. Jump targets are computed when the bytecode is converted to a code object. byteplay and bytecode use labels. Jump targets are computed when the abstract bytecode is converted to a code object. .. note:: A loop is need in the conversion from bytecode to code: if the jump target is larger than 2**16, the size of the jump instruction changes (from 3 to 6 bytes). So other jump targets must be recomputed. bytecode handles this corner case. byteplay and codetransformer don't, but it should be easy to fix them. Control flow graph ------------------ The peephole optimizer has strong requirements on the control flow: an optimization must not modify two instructions which are part of two different basic blocks. Otherwise, the optimizer produces invalid code. bytecode provides a control flow graph API for this use case. byteplay and codetransformer don't. Functions or methods -------------------- This point is a matter of taste. In bytecode, instructions are objects with methods like :meth:`~Instr.is_final`, :meth:`~Instr.has_cond_jump`, etc. The byteplay project uses functions taking an instruction as parameter. bytecode-0.15.1/doc/cfg.rst000066400000000000000000000156411451217043400154420ustar00rootroot00000000000000************************ Control Flow Graph (CFG) ************************ To analyze or optimize existing code, ``bytecode`` provides a :class:`ControlFlowGraph` class which is a `control flow graph (CFG) `_. The control flow graph is used to perform the stack depth analysis when converting to code. Because it is better at identifying dead code than CPython it can lead to reduced stack size. Example ======= Dump the control flow graph of the :ref:`conditional jump example `:: from bytecode import Label, Instr, Bytecode, ControlFlowGraph, dump_bytecode label_else = Label() label_print = Label() bytecode = Bytecode([Instr('LOAD_NAME', 'print'), Instr('LOAD_NAME', 'test'), Instr('POP_JUMP_IF_FALSE', label_else), Instr('LOAD_CONST', 'yes'), Instr('JUMP_FORWARD', label_print), label_else, Instr('LOAD_CONST', 'no'), label_print, Instr('CALL_FUNCTION', 1), Instr('LOAD_CONST', None), Instr('RETURN_VALUE')]) blocks = ControlFlowGraph.from_bytecode(bytecode) dump_bytecode(blocks) Output:: block1: LOAD_NAME 'print' LOAD_NAME 'test' POP_JUMP_IF_FALSE -> block2 block2: LOAD_CONST 'yes' JUMP_FORWARD block3: LOAD_CONST 'no' -> block4 block4: CALL_FUNCTION 1 LOAD_CONST None RETURN_VALUE We get 4 blocks: * block #1 is the start block and ends with ``POP_JUMP_IF_FALSE`` conditional jump and is followed by the block #2 * block #2 ends with ``JUMP_FORWARD`` unconditional jump * block #3 does not contain jump and is followed by the block #4 * block #4 is the final block The start block is always the first block. Analyze the control flow graph ============================== The ``bytecode`` module provides two ways to iterate on blocks: * iterate on the basic block as a sequential list * browse the graph by following jumps and links to next blocks Iterate on basic blocks ----------------------- Iterating on basic blocks is a simple as this loop:: for block in blocks: ... Example of a ``display_blocks()`` function:: from bytecode import UNSET, Label, Instr, Bytecode, BasicBlock, ControlFlowGraph def display_blocks(blocks): for block in blocks: print("Block #%s" % (1 + blocks.get_block_index(block))) for instr in block: if isinstance(instr.arg, BasicBlock): arg = "" % (1 + blocks.get_block_index(instr.arg)) elif instr.arg is not UNSET: arg = repr(instr.arg) else: arg = '' print(" %s %s" % (instr.name, arg)) if block.next_block is not None: print(" => " % (1 + blocks.get_block_index(block.next_block))) print() label_else = Label() label_print = Label() bytecode = Bytecode([Instr('LOAD_NAME', 'print'), Instr('LOAD_NAME', 'test'), Instr('POP_JUMP_IF_FALSE', label_else), Instr('LOAD_CONST', 'yes'), Instr('JUMP_FORWARD', label_print), label_else, Instr('LOAD_CONST', 'no'), label_print, Instr('CALL_FUNCTION', 1), Instr('LOAD_CONST', None), Instr('RETURN_VALUE')]) blocks = ControlFlowGraph.from_bytecode(bytecode) display_blocks(blocks) Output:: Block #1 LOAD_NAME 'print' LOAD_NAME 'test' POP_JUMP_IF_FALSE => Block #2 LOAD_CONST 'yes' JUMP_FORWARD Block #3 LOAD_CONST 'no' => Block #4 CALL_FUNCTION 1 LOAD_CONST None RETURN_VALUE .. note:: :class:`SetLineno` is not handled in the example to keep it simple. Browse the graph ---------------- Recursive function is a simple solution to browse the control flow graph. Example to a recursive ``display_block()`` function:: from bytecode import UNSET, Label, Instr, Bytecode, BasicBlock, ControlFlowGraph def display_block(blocks, block, seen=None): # avoid loop: remember which blocks were already seen if seen is None: seen = set() if id(block) in seen: return seen.add(id(block)) # display instructions of the block print("Block #%s" % (1 + blocks.get_block_index(block))) for instr in block: if isinstance(instr.arg, BasicBlock): arg = "" % (1 + blocks.get_block_index(instr.arg)) elif instr.arg is not UNSET: arg = repr(instr.arg) else: arg = '' print(" %s %s" % (instr.name, arg)) # is the block followed directly by another block? if block.next_block is not None: print(" => " % (1 + blocks.get_block_index(block.next_block))) print() # display the next block if block.next_block is not None: display_block(blocks, block.next_block, seen) # display the block linked by jump (if any) target_block = block.get_jump() if target_block is not None: display_block(blocks, target_block, seen) label_else = Label() label_print = Label() bytecode = Bytecode([Instr('LOAD_NAME', 'print'), Instr('LOAD_NAME', 'test'), Instr('POP_JUMP_IF_FALSE', label_else), Instr('LOAD_CONST', 'yes'), Instr('JUMP_FORWARD', label_print), label_else, Instr('LOAD_CONST', 'no'), label_print, Instr('CALL_FUNCTION', 1), Instr('LOAD_CONST', None), Instr('RETURN_VALUE')]) blocks = ControlFlowGraph.from_bytecode(bytecode) display_block(blocks, blocks[0]) Output:: Block #1 LOAD_NAME 'print' LOAD_NAME 'test' POP_JUMP_IF_FALSE => Block #2 LOAD_CONST 'yes' JUMP_FORWARD Block #4 CALL_FUNCTION 1 LOAD_CONST None RETURN_VALUE Block #3 LOAD_CONST 'no' => Block numbers are no displayed in the sequential order: block #4 is displayed before block #3. .. note:: Dead code (unreachable blocks) is not displayed by ``display_block``. bytecode-0.15.1/doc/changelog.rst000066400000000000000000000276461451217043400166420ustar00rootroot00000000000000ChangeLog ========= 2023-10-13: Version 0.15.1 -------------------------- Bugfixes: - Disallow creating an instruction targeting a pseudo/instrumented opcode PR #133 - Fixes encoding of 0 as a varint PR #132 - Correct spelling of "INTRINSIC" in several places; this affected some ops in Python 3.12. PR #131 2023-09-01: Version 0.15.0 -------------------------- New features: - Add support for Python 3.12 PR #122 Support for Python 3.12, comes with a number of changes reflecting changes in CPython bytecode itself: - handle the ability of ``LOAD_ATTR`` to replace ``LOAD_METHOD`` As a consequence the argument is now a ``tuple[bool, str]`` - similarly ``LOAD_SUPER_ATTR`` which uses the 2 lowest bits as flag takes a ``tuple[bool, bool, str]`` as argument - ``POP_JUMP_IF_*`` instructions are undirected in Python 3.12 - ``YIELD_VALUE`` now takes an argument - Support for ``CALL_INTRINSIC_1/2`` led to the addition of 2 new enums to represent the argument 2023-05-24: Version 0.14.2 -------------------------- Bugfixes: - allow to convert a CFG, for which stack sizes have not been computed, to Bytecode even in the presence of mergeable TryBegin/TryEnd PR #120 - remove spurious TryEnd leftover when going from CFG to Bytecode PR #120 2023-04-04: Version 0.14.1 -------------------------- Bugfixes: - allow to disassemble code containing ``EXTENDED_ARG`` targeting a ``NOP`` PR #117 2022-11-30: Version 0.14.0 -------------------------- New features: - Removed the peephole optimizer PR #107 Basically changes in Python 3.11 made it hard to port and the maintenance cost exceeded the perceived use. It could be re-added if there is a demand for it. - Add support for Python 3.11 PR #107 Support for Python 3.11, comes with a number of changes reflecting changes in CPython bytecode itself: - support for the exception table in ``ConcreteBytecode`` - support for pseudo-instruction ``TryBegin`` and ``TryEnd`` describing the exception table in ``Bytecode`` and ``ControlflowGraph`` - new keyword arguments in conversion method related to computations required for the exception table - handling of CACHE opcode at the ``ConcreteBytecode`` level - handling of the ability of ``LOAD_GLOBAL`` to push NULL (the argument is now a ``tuple[bool, str]``) - support for end_lineno and column offsets in instructions - support for ``co_qualname`` (as ``qualname`` on bytecode objects) and a number of internal changes related to changes in the internal bytecode representation. - Add type annotations and make types stricter PR # 105 In particular, ConcreteInstr does not inherit from Instr anymore and one cannot use ConcreteInstr in Bytecode object. This is saner than before. Bugfixes: - Removed ``EXC_MATCH`` from the ``Compare`` enumeration starting with Python 3.9. The new ``JUMP_IF_NOT_EXC_MATCH`` opcode should be used instead. - Removed ``IN``, ``NOT_IN``, ``IS``, ``NOT_IS`` from the ``Compare`` enumeration starting with Python 3.9. The new ``CONTAINS_OP`` and ``IS_OP`` opcodes should be used instead. - Add proper pre and post stack effects to all opcodes (up to Python 3.11) PR #106 #107 Maintenance: - Make the install process PEP517 compliant PR #97 - Drop support for Python 3.6 and 3.7 PR #100 2021-10-04: Version 0.13.0 -------------------------- New features: - Add support for Python 3.10 new encoding of line number. This support is minimal in the sense that we still systematically assign a line number while the new format allow bytecode with absolutely no line number. PR #72 Bugfixes: - Fix handling of RERAISE (introduced in 3.9) when creating a ControlFlowGraph, previously it was not considered final. PR #72 - Fix line table assembly in Python 3.10. PR #85 2021-02-02: Version 0.12.0 -------------------------- New features: - All calculations of stacksize now check for stack underflow to avoid segfault at runtime PR #69 Bugfixes: - Fix recursion limitations when compiling bytecode with numerous basic blocks. PR #57 - Fix handling of line offsets. Issue #67, PR #71 API changes: - Forbid an :class:`Instr` to hold an EXTENDED_ARG op_code PR #65 - Forbid the use of :class:`ConcreteInstr` in :class:`Bytecode` and :class:`ControlFlowGraph` PR #65 This is motivated by the extra complexity that handling possible EXTENDED_ARG instruction in those representation would bring (stack computation, etc) - Always remove EXTENDED_ARG when converting :class:`ConcreteBytecode` to :class:`Bytecode` PR #65 This is equivalent to say that the :class:`ConcreteBytecode` converted to :class:`Bytecode` was generated by :meth:`ConcreteBytecode.from_code` with extended_args=False - :class:`Instr` now has a new method :meth:`Instr.pre_and_post_stack_effect` for checking the prerequisite stack size of an operation PR #69 - :meth:`_compute_stack_size` now uses :meth:`Instr.pre_and_post_stack_effect` to compute the stack size to reject code that will lead to runtime segfault caused by stack underflow PR #69 2020-03-02: Version 0.11.0 -------------------------- New features: - The :func:`infer_flags` can now be used to forcibly mark a function as asynchronous or not. Bugfixes: - Fix a design flaw in the flag inference mechanism that could very easily lead to invalid flags configuration PR #56 2020-02-02: Version 0.10.0 -------------------------- New features: - Slices and copy of :class:`Bytecode`, :class:`ConcreteBytecode` and :class:`BasicBlock` are now of the same type as the original container. PR #52 - :class:`Bytecode`, :class:`ConcreteBytecode`, :class:`BasicBlock` and :class:`ControlFlowGraph` have a new :meth:`legalize` method validating their content and removing SetLineno. PR #52 - Modify the implementation of :code:`const_key` to avoid manual synchronizations with :code:`_PyCode_ConstantKey` in CPython codebase and allow the use of arbitrary Python objects as constants of nested code objects. #54 API changes: - Add :class:`Compare` enum to public API. PR #53 2019-12-01: Version 0.9.0 ------------------------- New features: - Add support for released version of Python 3.8 and update documentation. 2019-02-18: Version 0.8.0 ------------------------- New features: - Add support for Python 3.7 PR #29 - Add preliminary support for Python 3.8-dev PR #41 - Allow to use any Python object as constants to enable aggressive optimizations PR #34 API changes: - `stack_effect` is now a method of :class:`Instr` and not as property anymore. PR #29 Bugfixes: - Avoid throwing `OverflowError` when applying `stack_effect` on valid :class:`Instr` objects. PR #43, PR #44 2018-04-15: Version 0.7.0 ------------------------- New features: - Add `compute_jumps_passes` optional argument to :meth:`Bytecode.to_code` and to :meth:`Bytecode.to_concrete_bytecode` to control the number of passes performed to compute jump targets. In theory the required number is only bounded by the size of the code, but usually the algorithm converges quickly (< 10 iterations). Bugfixes: - proper handling of `EXTENDED_ARG` without arguments PR #28: `EXTENDED_ARG` are once again removed but their presence is recorded to avoid having issues with offsets in jumps. Similarly when round tripping code through :class:`ConcreteBytecode` the `EXTENDED_ARG` without args are preserved while if going through :class:`Bytecode` they are removed. 2018-03-24: Version 0.6 ----------------------- * Add stack depth computation based on control flow graph analysis * Add higher level flags handling using IntFlags enum and inference function * Add an instructions argument to ConcreteBytecode, and validate its value * Do not delete `EXTENDED_ARG` instructions that have no arg 2017-01-05: Version 0.5 ----------------------- * Add the new bytecode format of Python 3.6. * Remove the ``BaseInstr`` class which became useless. It was replaced with the :class:`Instr` class. * Documentation: Add a comparison with byteplay and codetransformer. * Remove the BaseIntr class: Instr becomes the new base class. * Fix PEP 8 issues and check PEP 8 on Travis CI. 2016-04-12: Version 0.4 ----------------------- Peephole optimizer: * Reenable optimization on ``JUMP_IF_TRUE_OR_POP`` jumping to ``POP_JUMP_IF_FALSE ``. 2016-03-02: Version 0.3 ----------------------- New features: - Add :meth:`ControlFlowGraph.get_block_index` method API changes: - Rename ``Block`` class to :class:`BasicBlock` - Rename ``BytecodeBlocks`` class to :class:`ControlFlowGraph` - Rename ``BaseInstr.op`` to :attr:`BaseInstr.opcode` - Rename ``BaseBytecode.kw_only_argcount`` attribute to :attr:`BaseBytecode.kwonlyargcount`, name closer to the Python code object attribute (``co_kwonlyargcount``) - :class:`Instr` constructor and its :meth:`~BaseInstr.set` method now validates the argument type - Add :class:`Compare` enum, used for ``COMPARE_OP`` argument of :class:`Instr` - Remove *lineno* parameter from the :meth:`BaseInstr.set` method - Add :class:`CellVar` and :class:`FreeVar` classes: instructions having a cell or free variable now require a :class:`CellVar` or :class:`FreeVar` instance rather than a simple string (``str``). This change is required to handle correctly code with duplicated variable names in cell and free variables. - :class:`ControlFlowGraph`: remove undocumented ``to_concrete_bytecode()`` and ``to_code()`` methods Bugfixes: - Fix support of :class:`SetLineno` Peephole optimizer: - Better code for LOAD_CONST x n + BUILD_LIST + UNPACK_SEQUENCE: rewrite LOAD_CONST in the reverse order instead of using ROT_TWO and ROT_THREE. This optimization supports more than 3 items. - Remove JUMP_ABSOLUTE pointing to the following code. It can occur after dead code was removed. - Remove NOP instructions - Bugfix: catch IndexError when trying to get the next instruction. 2016-02-29: Version 0.2 ----------------------- - Again, the API is deeply reworked. - The project has now a documentation: `bytecode documentation `_ - Fix bug #1: support jumps larger than 2^16. - Add a new bytecode.peephole_opt module: a peephole optimizer, code based on peephole optimizer of CPython 3.6 which is implemented in C - Add :func:`dump_bytecode` function to ease debug. - :class:`Instr`: * Add :func:`Instr.is_final` method * Add :meth:`Instr.copy` and :meth:`ConcreteInstr.copy` methods * :class:`Instr` now uses variable name instead of integer for cell and free variables. * Rename ``Instr.is_jump`` to :meth:`Instr.has_jump` - :class:`ConcreteInstr` is now mutable - Redesign the :class:`BytecodeBlocks` class: - :class:`Block` have no more label attribute: jump targets are now directly blocks - Rename ``BytecodeBlocks.add_label()`` method to :meth:`BytecodeBlocks.split_block` - Labels are not more allowed in blocks - :meth:`BytecodeBlocks.from_bytecode` now splits blocks after final instructions (:meth:`Instr.is_final`) and after conditional jumps (:meth:`Instr.is_cond_jump`). It helps the peephole optimizer to respect the control flow and to remove dead code. - Rework API to convert bytecode classes: - BytecodeBlocks: Remove ``to_concrete_bytecode()`` and ``to_code()`` methods. Now you first have to convert blocks to bytecode using :meth:`~BytecodeBlocks.to_bytecode`. - Remove ``Bytecode.to_bytecode_blocks()`` method, replaced with :meth:`BytecodeBlocks.from_bytecode` - Remove ``ConcreteBytecode.to_concrete_bytecode()`` and ``Bytecode.to_bytecode()`` methods which did nothing (return ``self``) - Fix :class:`ConcreteBytecode` for code with no constant (empty list of constants) - Fix argnames in :meth:`ConcreteBytecode.to_bytecode`: use CO_VARARGS and CO_VARKEYWORDS flags to count the number of arguments - Fix const_key() to compare correctly constants equal but of different types and special cases like ``-0.0`` and ``+0.0`` 2016-02-26: Version 0.1 ----------------------- - Rewrite completely the API! 2016-02-23: Release 0.0 ----------------------- - First public release bytecode-0.15.1/doc/conf.py000066400000000000000000000203621451217043400154440ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # bytecode documentation build configuration file, created by # sphinx-quickstart on Mon Feb 29 00:54:53 2016. # # This file is execfile()d with the current directory set to its # containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. import os import sys # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. sys.path.insert(0, os.path.abspath("../src")) from bytecode import __version__ # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = ["sphinx_tabs.tabs"] # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] # The suffix of source filenames. source_suffix = ".rst" # The encoding of source files. # source_encoding = 'utf-8-sig' # The master toctree document. master_doc = "index" # General information about the project. project = "bytecode" copyright = "Contributors to the bytecode project" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. # The full version, including alpha/beta/rc tags. version = release = __version__ # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: # today = '' # Else, today_fmt is used as the format for a strftime call. # today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. exclude_patterns = ["_build"] # The reST default role (used for this markup: `text`) to use for all # documents. # default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. # add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). # add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. # show_authors = False # The name of the Pygments (syntax highlighting) style to use. pygments_style = "sphinx" # A list of ignored prefixes for module index sorting. # modindex_common_prefix = [] # If true, keep warnings as "system message" paragraphs in the built documents. # keep_warnings = False # -- Options for HTML output ---------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. html_theme = "sphinx_rtd_theme" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. # html_theme_path = [] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". # html_title = None # A shorter title for the navigation bar. Default is the same as html_title. # html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. # html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. # html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". # html_static_path = ["_static"] # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied # directly to the root of the documentation. # html_extra_path = [] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. # html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. # html_use_smartypants = True # Custom sidebar templates, maps document names to template names. # html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. # html_additional_pages = {} # If false, no module index is generated. # html_domain_indices = True # If false, no index is generated. # html_use_index = True # If true, the index is split into individual pages for each letter. # html_split_index = False # If true, links to the reST sources are added to the pages. # html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. # html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. # html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. # html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). # html_file_suffix = None # Output file base name for HTML help builder. htmlhelp_basename = "bytecodedoc" # -- Options for LaTeX output --------------------------------------------- latex_elements = { # The paper size ('letterpaper' or 'a4paper'). #'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). #'pointsize': '10pt', # Additional stuff for the LaTeX preamble. #'preamble': '', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ ("index", "bytecode.tex", "bytecode Documentation", "Victor Stinner", "manual"), ] # The name of an image file (relative to this directory) to place at the top of # the title page. # latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. # latex_use_parts = False # If true, show page references after internal links. # latex_show_pagerefs = False # If true, show URL addresses after external links. # latex_show_urls = False # Documents to append as an appendix to all manuals. # latex_appendices = [] # If false, no module index is generated. # latex_domain_indices = True # -- Options for manual page output --------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ ( "index", "bytecode", "bytecode Documentation", ["Victor Stinner", "Matthieu C. Dartiailh"], 1, ) ] # If true, show URL addresses after external links. # man_show_urls = False # -- Options for Texinfo output ------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ ( "index", "bytecode", "bytecode Documentation", "Victor Stinner", "bytecode", "Python module to generate and modify bytecode", "Miscellaneous", ), ] # Documents to append as an appendix to all manuals. # texinfo_appendices = [] # If false, no module index is generated. # texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. # texinfo_show_urls = 'footnote' # If true, do not generate a @detailmenu in the "Top" node's menu. # texinfo_no_detailmenu = False bytecode-0.15.1/doc/index.rst000066400000000000000000000016571451217043400160140ustar00rootroot00000000000000******** bytecode ******** ``bytecode`` is a Python module to generate and modify bytecode. * `bytecode project homepage at GitHub `_ (code, bugs) * `bytecode documentation `_ (this documentation) * `Download latest bytecode release at the Python Cheeseshop (PyPI) `_ Table Of Contents ================= .. toctree:: :maxdepth: 3 usage cfg api byteplay_codetransformer changelog todo See also ======== * `codetransformer `_ * `byteplay `_ * `byteasm `_: an "assembler" for Python 3 bytecodes. * `BytecodeAssembler `_ * `PEP 511 -- API for code transformers `_ bytecode-0.15.1/doc/make.bat000066400000000000000000000150601451217043400155510ustar00rootroot00000000000000@ECHO OFF REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=sphinx-build ) set BUILDDIR=build set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . set I18NSPHINXOPTS=%SPHINXOPTS% . if NOT "%PAPER%" == "" ( set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% ) if "%1" == "" goto help if "%1" == "help" ( :help echo.Please use `make ^` where ^ is one of echo. html to make standalone HTML files echo. dirhtml to make HTML files named index.html in directories echo. singlehtml to make a single large HTML file echo. pickle to make pickle files echo. json to make JSON files echo. htmlhelp to make HTML files and a HTML help project echo. qthelp to make HTML files and a qthelp project echo. devhelp to make HTML files and a Devhelp project echo. epub to make an epub echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter echo. text to make text files echo. man to make manual pages echo. texinfo to make Texinfo files echo. gettext to make PO message catalogs echo. changes to make an overview over all changed/added/deprecated items echo. xml to make Docutils-native XML files echo. pseudoxml to make pseudoxml-XML files for display purposes echo. linkcheck to check all external links for integrity echo. doctest to run all doctests embedded in the documentation if enabled goto end ) if "%1" == "clean" ( for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i del /q /s %BUILDDIR%\* goto end ) %SPHINXBUILD% 2> nul if errorlevel 9009 ( echo. echo.The 'sphinx-build' command was not found. Make sure you have Sphinx echo.installed, then set the SPHINXBUILD environment variable to point echo.to the full path of the 'sphinx-build' executable. Alternatively you echo.may add the Sphinx directory to PATH. echo. echo.If you don't have Sphinx installed, grab it from echo.http://sphinx-doc.org/ exit /b 1 ) if "%1" == "html" ( %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html if errorlevel 1 exit /b 1 echo. echo.Build finished. The HTML pages are in %BUILDDIR%/html. goto end ) if "%1" == "dirhtml" ( %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml if errorlevel 1 exit /b 1 echo. echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. goto end ) if "%1" == "singlehtml" ( %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml if errorlevel 1 exit /b 1 echo. echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. goto end ) if "%1" == "pickle" ( %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can process the pickle files. goto end ) if "%1" == "json" ( %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can process the JSON files. goto end ) if "%1" == "htmlhelp" ( %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can run HTML Help Workshop with the ^ .hhp project file in %BUILDDIR%/htmlhelp. goto end ) if "%1" == "qthelp" ( %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can run "qcollectiongenerator" with the ^ .qhcp project file in %BUILDDIR%/qthelp, like this: echo.^> qcollectiongenerator %BUILDDIR%\qthelp\bytecode.qhcp echo.To view the help file: echo.^> assistant -collectionFile %BUILDDIR%\qthelp\bytecode.ghc goto end ) if "%1" == "devhelp" ( %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp if errorlevel 1 exit /b 1 echo. echo.Build finished. goto end ) if "%1" == "epub" ( %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub if errorlevel 1 exit /b 1 echo. echo.Build finished. The epub file is in %BUILDDIR%/epub. goto end ) if "%1" == "latex" ( %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex if errorlevel 1 exit /b 1 echo. echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. goto end ) if "%1" == "latexpdf" ( %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex cd %BUILDDIR%/latex make all-pdf cd %BUILDDIR%/.. echo. echo.Build finished; the PDF files are in %BUILDDIR%/latex. goto end ) if "%1" == "latexpdfja" ( %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex cd %BUILDDIR%/latex make all-pdf-ja cd %BUILDDIR%/.. echo. echo.Build finished; the PDF files are in %BUILDDIR%/latex. goto end ) if "%1" == "text" ( %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text if errorlevel 1 exit /b 1 echo. echo.Build finished. The text files are in %BUILDDIR%/text. goto end ) if "%1" == "man" ( %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man if errorlevel 1 exit /b 1 echo. echo.Build finished. The manual pages are in %BUILDDIR%/man. goto end ) if "%1" == "texinfo" ( %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo if errorlevel 1 exit /b 1 echo. echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. goto end ) if "%1" == "gettext" ( %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale if errorlevel 1 exit /b 1 echo. echo.Build finished. The message catalogs are in %BUILDDIR%/locale. goto end ) if "%1" == "changes" ( %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes if errorlevel 1 exit /b 1 echo. echo.The overview file is in %BUILDDIR%/changes. goto end ) if "%1" == "linkcheck" ( %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck if errorlevel 1 exit /b 1 echo. echo.Link check complete; look for any errors in the above output ^ or in %BUILDDIR%/linkcheck/output.txt. goto end ) if "%1" == "doctest" ( %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest if errorlevel 1 exit /b 1 echo. echo.Testing of doctests in the sources finished, look at the ^ results in %BUILDDIR%/doctest/output.txt. goto end ) if "%1" == "xml" ( %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml if errorlevel 1 exit /b 1 echo. echo.Build finished. The XML files are in %BUILDDIR%/xml. goto end ) if "%1" == "pseudoxml" ( %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml if errorlevel 1 exit /b 1 echo. echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. goto end ) :end bytecode-0.15.1/doc/requirements.txt000066400000000000000000000000511451217043400174220ustar00rootroot00000000000000sphinx>=4 sphinx-rtd-theme>=1 sphinx-tabsbytecode-0.15.1/doc/todo.rst000066400000000000000000000022021451217043400156350ustar00rootroot00000000000000TODO list ========= * Remove Bytecode.cellvars and Bytecode.freevars? * Remove Bytecode.first_lineno? Compute it on conversions. * Add instruction constants/enums? Example:: from bytecode import instructions as i bytecode = Bytecode([i.LOAD_NAME('print'), i.LOAD_CONST('Hello World!'), i.CALL_FUNCTION(1), i.POP_TOP(), i.LOAD_CONST(None), i.RETURN_VALUE()]) Should we support instructions without parenthesis for instruction with no parameter? Example with POP_TOP and RETURN_VALUE:: from bytecode import instructions as i bytecode = Bytecode([i.LOAD_NAME('print'), i.LOAD_CONST('Hello World!'), i.CALL_FUNCTION(1), i.POP_TOP, i.LOAD_CONST(None), i.RETURN_VALUE]) * Nicer API for function arguments in bytecode object? Bytecode has argcount, kwonlyargcount and argnames. 4 types of parameters: indexed, ``*args``, ``**kwargs`` and ``*, kwonly=3``. See inspect.signature() bytecode-0.15.1/doc/usage.rst000066400000000000000000000105601451217043400160020ustar00rootroot00000000000000************** Bytecode Usage ************** Installation ============ Install bytecode:: python3 -m pip install bytecode ``bytecode`` requires Python 3.8 or newer. Hello World =========== Abstract bytecode ----------------- Example using abstract bytecode to execute ``print('Hello World!')``:: from bytecode import Instr, Bytecode bytecode = Bytecode([Instr("LOAD_NAME", 'print'), Instr("LOAD_CONST", 'Hello World!'), Instr("CALL_FUNCTION", 1), Instr("POP_TOP"), Instr("LOAD_CONST", None), Instr("RETURN_VALUE")]) code = bytecode.to_code() exec(code) Output:: Hello World! Concrete bytecode ----------------- Example using concrete bytecode to execute ``print('Hello World!')``:: from bytecode import ConcreteInstr, ConcreteBytecode bytecode = ConcreteBytecode() bytecode.names = ['print'] bytecode.consts = ['Hello World!', None] bytecode.extend([ConcreteInstr("LOAD_NAME", 0), ConcreteInstr("LOAD_CONST", 0), ConcreteInstr("CALL_FUNCTION", 1), ConcreteInstr("POP_TOP"), ConcreteInstr("LOAD_CONST", 1), ConcreteInstr("RETURN_VALUE")]) code = bytecode.to_code() exec(code) Output:: Hello World! Setting the compiler flags -------------------------- Bytecode, ConcreteBytecode and ControlFlowGraph instances all have a flags attribute which is an instance of the CompilerFlag enum. The value can be manipulated like any binary flags. Setting the OPTIMIZED flag:: from bytecode import Bytecode, CompilerFlags bytecode = Bytecode() bytecode.flags |= CompilerFlags.OPTIMIZED Clearing the OPTIMIZED flag:: from bytecode import Bytecode, CompilerFlags bytecode = Bytecode() bytecode.flags ^= CompilerFlags.OPTIMIZED The flags can be updated based on the instructions stored in the code object using the method update_flags. Simple loop =========== Bytecode of ``for x in (1, 2, 3): print(x)``: .. tabs:: .. group-tab:: Python >= 3.8 .. code:: python from bytecode import Label, Instr, Bytecode loop_start = Label() loop_done = Label() loop_exit = Label() code = Bytecode( [ # Python 3.8 removed SETUP_LOOP Instr("LOAD_CONST", (1, 2, 3)), Instr("GET_ITER"), loop_start, Instr("FOR_ITER", loop_exit), Instr("STORE_NAME", "x"), Instr("LOAD_NAME", "print"), Instr("LOAD_NAME", "x"), Instr("CALL_FUNCTION", 1), Instr("POP_TOP"), Instr("JUMP_ABSOLUTE", loop_start), # Python 3.8 removed the need to manually manage blocks in loops # This is now handled internally by the interpreter loop_exit, Instr("LOAD_CONST", None), Instr("RETURN_VALUE"), ] ) # The conversion to Python code object resolve jump targets: # abstract labels are replaced with concrete offsets code = code.to_code() exec(code) Output:: 1 2 3 .. _ex-cond-jump: Conditional jump ================ Bytecode of the Python code ``print('yes' if test else 'no')``:: from bytecode import Label, Instr, Bytecode label_else = Label() label_print = Label() bytecode = Bytecode([Instr('LOAD_NAME', 'print'), Instr('LOAD_NAME', 'test'), Instr('POP_JUMP_IF_FALSE', label_else), Instr('LOAD_CONST', 'yes'), Instr('JUMP_FORWARD', label_print), label_else, Instr('LOAD_CONST', 'no'), label_print, Instr('CALL_FUNCTION', 1), Instr('LOAD_CONST', None), Instr('RETURN_VALUE')]) code = bytecode.to_code() test = 0 exec(code) test = 1 exec(code) Output:: no yes .. note:: Instructions are only indented for readability. bytecode-0.15.1/pyproject.toml000066400000000000000000000042671451217043400163220ustar00rootroot00000000000000[project] name = "bytecode" description = "Python module to generate and modify bytecode" readme = "README.rst" requires-python = ">=3.8" license = {file = "COPYING"} authors = [ {name = "Victor Stinner", email = "victor.stinner@gmail.com"} ] maintainers = [ {name = "Matthieu C. Dartiailh", email = "m.dartiailh@gmail.com"} ] classifiers = [ "Development Status :: 4 - Beta", "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Natural Language :: English", "Operating System :: OS Independent", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Topic :: Software Development :: Libraries :: Python Modules", ] dependencies = ["typing_extensions;python_version<'3.10'"] dynamic=["version"] [project.urls] homepage = "https://github.com/MatthieuDartiailh/bytecode" documentation = "https://bytecode.readthedocs.io/en/latest/" repository = "https://github.com/MatthieuDartiailh/bytecode" changelog = "https://github.com/MatthieuDartiailh/bytecode/blob/main/doc/changelog.rst" [build-system] requires = ["setuptools>=61.2", "wheel", "setuptools_scm[toml]>=3.4.3"] build-backend = "setuptools.build_meta" [tool.setuptools_scm] write_to = "src/bytecode/version.py" write_to_template = """ # This file is auto-generated by setuptools-scm do NOT edit it. from collections import namedtuple #: A namedtuple of the version info for the current release. _version_info = namedtuple("_version_info", "major minor micro status") parts = "{version}".split(".", 3) version_info = _version_info( int(parts[0]), int(parts[1]), int(parts[2]), parts[3] if len(parts) == 4 else "", ) # Remove everything but the 'version_info' from this module. del namedtuple, _version_info, parts __version__ = "{version}" """ [tool.black] line-length = 88 # Enforce the default value [tool.pytest.ini_options] minversion = "6.0" [tool.mypy] follow_imports = "normal" strict_optional = true [tool.isort] profile = "black" extra_standard_library = ["opcode"] bytecode-0.15.1/scripts/000077500000000000000000000000001451217043400150645ustar00rootroot00000000000000bytecode-0.15.1/scripts/frameworks/000077500000000000000000000000001451217043400172445ustar00rootroot00000000000000bytecode-0.15.1/scripts/frameworks/boto3/000077500000000000000000000000001451217043400202725ustar00rootroot00000000000000bytecode-0.15.1/scripts/frameworks/boto3/run.sh000066400000000000000000000002511451217043400214300ustar00rootroot00000000000000#!/bin/bash -eu set -e set -u PREFIX=${1}-${2} PY=${2} cd ${PREFIX}/boto3 source ${PREFIX}/.venv/bin/activate python scripts/ci/run-tests deactivate cd - bytecode-0.15.1/scripts/frameworks/boto3/setup.sh000066400000000000000000000006321451217043400217670ustar00rootroot00000000000000#!/bin/bash -eu set -e set -u PREFIX=${1}-${2} PY=${2} # Clone boto3 test -d ${PREFIX}/boto3 || git clone --depth=1 https://github.com/boto/boto3.git ${PREFIX}/boto3 # Create venv python$PY -m venv ${PREFIX}/.venv source ${PREFIX}/.venv/bin/activate # Install bytecode pip install setuptools wheel pip install -e . # Install dependencies cd ${PREFIX}/boto3 python scripts/ci/install cd - deactivate bytecode-0.15.1/setup.py000066400000000000000000000007501451217043400151110ustar00rootroot00000000000000#!/usr/bin/env python3 # Prepare a release: # # - git pull --rebase # - run tests: tox # - set release date in the changelog # - git commit -a # - git push # - check GHA CI status: # https://github.com/MatthieuDartiailh/bytecode/actions # # Release a new version: # # - git tag VERSION # - git push --tags # # After the release: # # - set version to n+1 # - git commit -a -m "post-release" # - git push from setuptools import setup if __name__ == "__main__": setup() bytecode-0.15.1/src/000077500000000000000000000000001451217043400141645ustar00rootroot00000000000000bytecode-0.15.1/src/bytecode/000077500000000000000000000000001451217043400157625ustar00rootroot00000000000000bytecode-0.15.1/src/bytecode/__init__.py000066400000000000000000000154071451217043400201020ustar00rootroot00000000000000__all__ = [ "Label", "Instr", "SetLineno", "Bytecode", "ConcreteInstr", "ConcreteBytecode", "ControlFlowGraph", "CompilerFlags", "Compare", "BinaryOp", "__version__", ] from io import StringIO from typing import List, Union # import needed to use it in bytecode.py from bytecode.bytecode import ( # noqa BaseBytecode, Bytecode, _BaseBytecodeList, _InstrList, ) # import needed to use it in bytecode.py from bytecode.cfg import BasicBlock, ControlFlowGraph # noqa # import needed to use it in bytecode.py from bytecode.concrete import _ConvertBytecodeToConcrete # noqa from bytecode.concrete import ConcreteBytecode, ConcreteInstr from bytecode.flags import CompilerFlags # import needed to use it in bytecode.py from bytecode.instr import ( # noqa UNSET, BinaryOp, CellVar, Compare, FreeVar, Instr, Intrinsic1Op, Intrinsic2Op, Label, SetLineno, TryBegin, TryEnd, ) from bytecode.version import __version__ def format_bytecode( bytecode: Union[Bytecode, ConcreteBytecode, ControlFlowGraph], *, lineno: bool = False, ) -> str: try_begins: List[TryBegin] = [] def format_line(index, line): nonlocal cur_lineno, prev_lineno if lineno: if cur_lineno != prev_lineno: line = "L.% 3s % 3s: %s" % (cur_lineno, index, line) prev_lineno = cur_lineno else: line = " % 3s: %s" % (index, line) else: line = line return line def format_instr(instr, labels=None): text = instr.name arg = instr._arg if arg is not UNSET: if isinstance(arg, Label): try: arg = "<%s>" % labels[arg] except KeyError: arg = "" elif isinstance(arg, BasicBlock): try: arg = "<%s>" % labels[id(arg)] except KeyError: arg = "" else: arg = repr(arg) text = "%s %s" % (text, arg) return text def format_try_begin(instr: TryBegin, labels: dict) -> str: if isinstance(instr.target, Label): try: arg = "<%s>" % labels[instr.target] except KeyError: arg = "" else: try: arg = "<%s>" % labels[id(instr.target)] except KeyError: arg = "" line = "TryBegin %s -> %s [%s]" % ( len(try_begins), arg, instr.stack_depth, ) + (" last_i" if instr.push_lasti else "") # Track the seen try begin try_begins.append(instr) return line def format_try_end(instr: TryEnd) -> str: i = try_begins.index(instr.entry) if instr.entry in try_begins else "" return "TryEnd (%s)" % i buffer = StringIO() indent = " " * 4 cur_lineno = bytecode.first_lineno prev_lineno = None if isinstance(bytecode, ConcreteBytecode): offset = 0 for c_instr in bytecode: fields = [] if c_instr.lineno is not None: cur_lineno = c_instr.lineno if lineno: fields.append(format_instr(c_instr)) line = "".join(fields) line = format_line(offset, line) else: fields.append("% 3s %s" % (offset, format_instr(c_instr))) line = "".join(fields) buffer.write(line + "\n") if isinstance(c_instr, ConcreteInstr): offset += c_instr.size if bytecode.exception_table: buffer.write("\n") buffer.write("Exception table:\n") for entry in bytecode.exception_table: buffer.write( f"{entry.start_offset} to {entry.stop_offset} -> " f"{entry.target} [{entry.stack_depth}]" + (" lasti" if entry.push_lasti else "") + "\n" ) elif isinstance(bytecode, Bytecode): labels: dict[Label, str] = {} for index, instr in enumerate(bytecode): if isinstance(instr, Label): labels[instr] = "label_instr%s" % index for index, instr in enumerate(bytecode): if isinstance(instr, Label): label = labels[instr] line = "%s:" % label if index != 0: buffer.write("\n") elif isinstance(instr, TryBegin): line = indent + format_line(index, format_try_begin(instr, labels)) indent += " " elif isinstance(instr, TryEnd): indent = indent[:-2] line = indent + format_line(index, format_try_end(instr)) else: if instr.lineno is not None: cur_lineno = instr.lineno line = format_instr(instr, labels) line = indent + format_line(index, line) buffer.write(line + "\n") buffer.write("\n") elif isinstance(bytecode, ControlFlowGraph): cfg_labels = {} for block_index, block in enumerate(bytecode, 1): cfg_labels[id(block)] = "block%s" % block_index for block_index, block in enumerate(bytecode, 1): buffer.write("%s:\n" % cfg_labels[id(block)]) seen_instr = False for index, instr in enumerate(block): if isinstance(instr, TryBegin): line = indent + format_line( index, format_try_begin(instr, cfg_labels) ) indent += " " elif isinstance(instr, TryEnd): if seen_instr: indent = indent[:-2] line = indent + format_line(index, format_try_end(instr)) else: if isinstance(instr, Instr): seen_instr = True if instr.lineno is not None: cur_lineno = instr.lineno line = format_instr(instr, cfg_labels) line = indent + format_line(index, line) buffer.write(line + "\n") if block.next_block is not None: buffer.write(indent + "-> %s\n" % cfg_labels[id(block.next_block)]) buffer.write("\n") else: raise TypeError("unknown bytecode class") return buffer.getvalue()[:-1] def dump_bytecode( bytecode: Union[Bytecode, ConcreteBytecode, ControlFlowGraph], *, lineno: bool = False, ) -> None: print(format_bytecode(bytecode, lineno=lineno)) bytecode-0.15.1/src/bytecode/bytecode.py000066400000000000000000000255661451217043400201500ustar00rootroot00000000000000# alias to keep the 'bytecode' variable free import sys import types from abc import abstractmethod from typing import ( Any, Dict, Generic, Iterator, List, Optional, Sequence, SupportsIndex, TypeVar, Union, overload, ) import bytecode as _bytecode from bytecode.flags import CompilerFlags, infer_flags from bytecode.instr import ( _UNSET, UNSET, BaseInstr, Instr, Label, SetLineno, TryBegin, TryEnd, ) class BaseBytecode: def __init__(self) -> None: self.argcount = 0 self.posonlyargcount = 0 self.kwonlyargcount = 0 self.first_lineno = 1 self.name = "" self.qualname = self.name self.filename = "" self.docstring: Union[str, None, _UNSET] = UNSET # We cannot recreate cellvars/freevars from instructions because of super() # special-case, which involves an implicit __class__ cell/free variable # We could try to detect it. # CPython itself breaks if one aliases super so we could maybe make it work # but it will require careful design and will be done later in the future. self.cellvars: List[str] = [] self.freevars: List[str] = [] self._flags: CompilerFlags = CompilerFlags(0) def _copy_attr_from(self, bytecode: "BaseBytecode") -> None: self.argcount = bytecode.argcount self.posonlyargcount = bytecode.posonlyargcount self.kwonlyargcount = bytecode.kwonlyargcount self.flags = bytecode.flags self.first_lineno = bytecode.first_lineno self.name = bytecode.name self.qualname = bytecode.qualname self.filename = bytecode.filename self.docstring = bytecode.docstring self.cellvars = list(bytecode.cellvars) self.freevars = list(bytecode.freevars) def __eq__(self, other: Any) -> bool: if type(self) is not type(other): return False if self.argcount != other.argcount: return False if self.posonlyargcount != other.posonlyargcount: return False if self.kwonlyargcount != other.kwonlyargcount: return False if self.flags != other.flags: return False if self.first_lineno != other.first_lineno: return False if self.filename != other.filename: return False if self.name != other.name: return False if self.qualname != other.qualname: return False if self.docstring != other.docstring: return False if self.cellvars != other.cellvars: return False if self.freevars != other.freevars: return False if self.compute_stacksize() != other.compute_stacksize(): return False return True @property def flags(self) -> CompilerFlags: return self._flags @flags.setter def flags(self, value: CompilerFlags) -> None: if not isinstance(value, CompilerFlags): value = CompilerFlags(value) self._flags = value def update_flags(self, *, is_async: Optional[bool] = None) -> None: # infer_flags reasonably only accept concrete subclasses self.flags = infer_flags(self, is_async) # type: ignore @abstractmethod def compute_stacksize(self, *, check_pre_and_post: bool = True) -> int: raise NotImplementedError T = TypeVar("T", bound="_BaseBytecodeList") U = TypeVar("U") class _BaseBytecodeList(BaseBytecode, list, Generic[U]): """List subclass providing type stable slicing and copying.""" @overload def __getitem__(self, index: SupportsIndex) -> U: ... @overload def __getitem__(self: T, index: slice) -> T: ... def __getitem__(self, index): value = super().__getitem__(index) if isinstance(index, slice): value = type(self)(value) value._copy_attr_from(self) return value def copy(self: T) -> T: # This is a list subclass and works new = type(self)(super().copy()) # type: ignore new._copy_attr_from(self) return new def legalize(self) -> None: """Check that all the element of the list are valid and remove SetLineno.""" lineno_pos = [] set_lineno = None current_lineno = self.first_lineno for pos, instr in enumerate(self): if isinstance(instr, SetLineno): set_lineno = instr.lineno lineno_pos.append(pos) continue # Filter out other pseudo instructions if not isinstance(instr, BaseInstr): continue if set_lineno is not None: instr.lineno = set_lineno elif instr.lineno is UNSET: instr.lineno = current_lineno elif instr.lineno is not None: current_lineno = instr.lineno for i in reversed(lineno_pos): del self[i] def __iter__(self) -> Iterator[U]: instructions = super().__iter__() for instr in instructions: self._check_instr(instr) yield instr def _check_instr(self, instr): raise NotImplementedError() V = TypeVar("V") class _InstrList(List[V]): # Providing a stricter typing for this helper whose use is limited to the __eq__ # implementation is more effort than it is worth. def _flat(self) -> List: instructions: List = [] labels = {} jumps = [] try_begins: Dict[TryBegin, int] = {} try_jumps = [] offset = 0 instr: Any for index, instr in enumerate(self): if isinstance(instr, Label): instructions.append("label_instr%s" % index) labels[instr] = offset elif isinstance(instr, TryBegin): try_begins.setdefault(instr, len(try_begins)) assert isinstance(instr.target, Label) try_jumps.append((instr.target, len(instructions))) instructions.append(instr) elif isinstance(instr, TryEnd): instructions.append(("TryEnd", try_begins[instr.entry])) else: if isinstance(instr, Instr) and isinstance(instr.arg, Label): target_label = instr.arg instr = _bytecode.ConcreteInstr( instr.name, 0, location=instr.location ) jumps.append((target_label, instr)) instructions.append(instr) offset += 1 for target_label, instr in jumps: instr.arg = labels[target_label] for target_label, index in try_jumps: instr = instructions[index] assert isinstance(instr, TryBegin) instructions[index] = ( "TryBegin", try_begins[instr], labels[target_label], instr.push_lasti, ) return instructions def __eq__(self, other: Any) -> bool: if not isinstance(other, _InstrList): other = _InstrList(other) return self._flat() == other._flat() class Bytecode( _InstrList[Union[Instr, Label, TryBegin, TryEnd, SetLineno]], _BaseBytecodeList[Union[Instr, Label, TryBegin, TryEnd, SetLineno]], ): def __init__( self, instructions: Sequence[Union[Instr, Label, TryBegin, TryEnd, SetLineno]] = (), ) -> None: BaseBytecode.__init__(self) self.argnames: List[str] = [] for instr in instructions: self._check_instr(instr) self.extend(instructions) def __iter__(self) -> Iterator[Union[Instr, Label, TryBegin, TryEnd, SetLineno]]: instructions = super().__iter__() seen_try_begin = False for instr in instructions: self._check_instr(instr) if isinstance(instr, TryBegin): if seen_try_begin: raise RuntimeError("TryBegin pseudo instructions cannot be nested.") seen_try_begin = True elif isinstance(instr, TryEnd): seen_try_begin = False yield instr def _check_instr(self, instr: Any) -> None: if not isinstance(instr, (Label, SetLineno, Instr, TryBegin, TryEnd)): raise ValueError( "Bytecode must only contain Label, " "SetLineno, and Instr objects, " "but %s was found" % type(instr).__name__ ) def _copy_attr_from(self, bytecode: BaseBytecode) -> None: super()._copy_attr_from(bytecode) if isinstance(bytecode, Bytecode): self.argnames = bytecode.argnames @staticmethod def from_code( code: types.CodeType, prune_caches: bool = True, conserve_exception_block_stackdepth: bool = False, ) -> "Bytecode": concrete = _bytecode.ConcreteBytecode.from_code(code) return concrete.to_bytecode( prune_caches=prune_caches, conserve_exception_block_stackdepth=conserve_exception_block_stackdepth, ) def compute_stacksize(self, *, check_pre_and_post: bool = True) -> int: cfg = _bytecode.ControlFlowGraph.from_bytecode(self) return cfg.compute_stacksize(check_pre_and_post=check_pre_and_post) def to_code( self, compute_jumps_passes: Optional[int] = None, stacksize: Optional[int] = None, *, check_pre_and_post: bool = True, compute_exception_stack_depths: bool = True, ) -> types.CodeType: # Prevent reconverting the concrete bytecode to bytecode and cfg to do the # calculation if we need to do it. if stacksize is None or ( sys.version_info >= (3, 11) and compute_exception_stack_depths ): cfg = _bytecode.ControlFlowGraph.from_bytecode(self) stacksize = cfg.compute_stacksize( check_pre_and_post=check_pre_and_post, compute_exception_stack_depths=compute_exception_stack_depths, ) self = cfg.to_bytecode() compute_exception_stack_depths = False # avoid redoing everything bc = self.to_concrete_bytecode( compute_jumps_passes=compute_jumps_passes, compute_exception_stack_depths=compute_exception_stack_depths, ) return bc.to_code( stacksize=stacksize, compute_exception_stack_depths=compute_exception_stack_depths, ) def to_concrete_bytecode( self, compute_jumps_passes: Optional[int] = None, compute_exception_stack_depths: bool = True, ) -> "_bytecode.ConcreteBytecode": converter = _bytecode._ConvertBytecodeToConcrete(self) return converter.to_concrete_bytecode( compute_jumps_passes=compute_jumps_passes, compute_exception_stack_depths=compute_exception_stack_depths, ) bytecode-0.15.1/src/bytecode/cfg.py000066400000000000000000001214711451217043400171010ustar00rootroot00000000000000import sys import types from collections import defaultdict from dataclasses import dataclass from typing import ( Any, Dict, Generator, Iterable, Iterator, List, Optional, Set, SupportsIndex, Tuple, TypeVar, Union, overload, ) # alias to keep the 'bytecode' variable free import bytecode as _bytecode from bytecode.concrete import ConcreteInstr from bytecode.flags import CompilerFlags from bytecode.instr import UNSET, Instr, Label, SetLineno, TryBegin, TryEnd T = TypeVar("T", bound="BasicBlock") U = TypeVar("U", bound="ControlFlowGraph") class BasicBlock(_bytecode._InstrList[Union[Instr, SetLineno, TryBegin, TryEnd]]): def __init__( self, instructions: Optional[ Iterable[Union[Instr, SetLineno, TryBegin, TryEnd]] ] = None, ) -> None: # a BasicBlock object, or None self.next_block: Optional["BasicBlock"] = None if instructions: super().__init__(instructions) def __iter__(self) -> Iterator[Union[Instr, SetLineno, TryBegin, TryEnd]]: index = 0 while index < len(self): instr = self[index] index += 1 if not isinstance(instr, (SetLineno, Instr, TryBegin, TryEnd)): raise ValueError( "BasicBlock must only contain SetLineno and Instr objects, " "but %s was found" % instr.__class__.__name__ ) if isinstance(instr, Instr) and instr.has_jump(): if index < len(self) and any( isinstance(self[i], Instr) for i in range(index, len(self)) ): raise ValueError( "Only the last instruction of a basic " "block can be a jump" ) if not isinstance(instr.arg, BasicBlock): raise ValueError( "Jump target must a BasicBlock, got %s", type(instr.arg).__name__, ) if isinstance(instr, TryBegin): if not isinstance(instr.target, BasicBlock): raise ValueError( "TryBegin target must a BasicBlock, got %s", type(instr.target).__name__, ) yield instr @overload def __getitem__( self, index: SupportsIndex ) -> Union[Instr, SetLineno, TryBegin, TryEnd]: ... @overload def __getitem__(self: T, index: slice) -> T: ... def __getitem__(self, index): value = super().__getitem__(index) if isinstance(index, slice): value = type(self)(value) value.next_block = self.next_block return value def get_last_non_artificial_instruction(self) -> Optional[Instr]: for instr in reversed(self): if isinstance(instr, Instr): return instr return None def copy(self: T) -> T: new = type(self)(super().copy()) new.next_block = self.next_block return new def legalize(self, first_lineno: int) -> int: """Check that all the element of the list are valid and remove SetLineno.""" lineno_pos = [] set_lineno = None current_lineno = first_lineno for pos, instr in enumerate(self): if isinstance(instr, SetLineno): set_lineno = current_lineno = instr.lineno lineno_pos.append(pos) continue if isinstance(instr, (TryBegin, TryEnd)): continue if set_lineno is not None: instr.lineno = set_lineno elif instr.lineno is UNSET: instr.lineno = current_lineno elif instr.lineno is not None: current_lineno = instr.lineno for i in reversed(lineno_pos): del self[i] return current_lineno def get_jump(self) -> Optional["BasicBlock"]: if not self: return None last_instr = self.get_last_non_artificial_instruction() if last_instr is None or not last_instr.has_jump(): return None target_block = last_instr.arg assert isinstance(target_block, BasicBlock) return target_block def get_trailing_try_end(self, index: int): while index + 1 < len(self): if isinstance(b := self[index + 1], TryEnd): return b index += 1 return None def _update_size(pre_delta, post_delta, size, maxsize, minsize): size += pre_delta if size < 0: msg = "Failed to compute stacksize, got negative size" raise RuntimeError(msg) size += post_delta maxsize = max(maxsize, size) minsize = min(minsize, size) return size, maxsize, minsize # We can never have nested TryBegin, so we can simply update the min stack size # when we encounter one and use the number we have when we encounter the TryEnd @dataclass class _StackSizeComputationStorage: """Common storage shared by the computers involved in computing CFG stack usage.""" #: Should we check that all stack operation are "safe" i.e. occurs while there #: is a sufficient number of items on the stack. check_pre_and_post: bool #: Id the blocks for which an analysis is under progress to avoid getting stuck #: in recursions. seen_blocks: Set[int] #: Sizes and exception handling status with which the analysis of the block #: has been performed. Used to avoid running multiple times equivalent analysis. blocks_startsizes: Dict[int, Set[Tuple[int, Optional[bool]]]] #: Track the encountered TryBegin pseudo-instruction to update their target #: depth at the end of the calculation. try_begins: List[TryBegin] #: Stacksize that should be used for exception blocks. This is the smallest size #: with which this block was reached which is the only size that can be safely #: restored. exception_block_startsize: Dict[int, int] #: Largest stack size used in an exception block. We record the size corresponding #: to the smallest start size for the block since the interpreter enforces that #: we start with this size. exception_block_maxsize: Dict[int, int] class _StackSizeComputer: """Helper computing the stack usage for a single block.""" #: Common storage shared by all helpers involved in the stack size computation common: _StackSizeComputationStorage #: Block this helper is running the computation for. block: BasicBlock #: Current stack usage. size: int #: Maximal stack usage. maxsize: int #: Minimal stack usage. This value is only relevant in between a TryBegin/TryEnd #: pair and determine the startsize for the exception handling block associated #: with the try begin. minsize: int #: Flag indicating if the block analyzed is an exception handler (i.e. a target #: of a TryBegin). exception_handler: Optional[bool] #: TryBegin that was encountered before jumping to this block and for which #: no try end was met yet. pending_try_begin: Optional[TryBegin] def __init__( self, common: _StackSizeComputationStorage, block: BasicBlock, size: int, maxsize: int, minsize: int, exception_handler: Optional[bool], pending_try_begin: Optional[TryBegin], ) -> None: self.common = common self.block = block self.size = size self.maxsize = maxsize self.minsize = minsize self.exception_handler = exception_handler self.pending_try_begin = pending_try_begin self._current_try_begin = pending_try_begin def run(self) -> Generator[Union["_StackSizeComputer", int], int, None]: """Iterate over the block instructions to compute stack usage.""" # Blocks are not hashable but in this particular context we know we won't be # modifying blocks in place so we can safely use their id as hash rather than # making them generally hashable which would be weird since they are list # subclasses block_id = id(self.block) # If the block is currently being visited (seen = True) or # it was visited previously with parameters that makes the computation # irrelevant return the maxsize. fingerprint = (self.size, self.exception_handler) if id(self.block) in self.common.seen_blocks or ( not self._is_stacksize_computation_relevant(block_id, fingerprint) ): yield self.maxsize # Prevent recursive visit of block if two blocks are nested (jump from one # to the other). self.common.seen_blocks.add(block_id) # Track which size has been used to run an analysis to avoid re-running multiple # times the same calculation. self.common.blocks_startsizes[block_id].add(fingerprint) # If this block is an exception handler reached through the exception table # we will push some extra objects on the stack before processing start. if self.exception_handler is not None: self._update_size(0, 1 + self.exception_handler) # True is used to indicated that push_lasti is True, leading to pushing # an extra object on the stack. for i, instr in enumerate(self.block): # Ignore SetLineno if isinstance(instr, (SetLineno)): continue # When we encounter a TryBegin, we: # - store it as the current TryBegin (since TryBegin cannot be nested) # - record its existence to remember to update its stack size when # the computation ends # - update the minsize to the current size value since we need to # know the minimal stack usage between the TryBegin/TryEnd pair to # set the startsize of the exception handling block # # This approach does not require any special handling for with statements. if isinstance(instr, TryBegin): assert self._current_try_begin is None self.common.try_begins.append(instr) self._current_try_begin = instr self.minsize = self.size continue elif isinstance(instr, TryEnd): # When we encounter a TryEnd we can start the computation for the # exception block using the minimum stack size encountered since # the TryBegin matching this TryEnd. # TryBegin cannot be nested so a TryEnd should always match the # current try begin. However inside the CFG some blocks may # start with a TryEnd relevant only when reaching this block # through a particular jump. So we are lenient here. if instr.entry is not self._current_try_begin: continue # Compute the stack usage of the exception handler assert isinstance(instr.entry.target, BasicBlock) yield from self._compute_exception_handler_stack_usage( instr.entry.target, instr.entry.push_lasti, ) self._current_try_begin = None continue # For instructions with a jump first compute the stacksize required when the # jump is taken. if instr.has_jump(): effect = ( instr.pre_and_post_stack_effect(jump=True) if self.common.check_pre_and_post else (instr.stack_effect(jump=True), 0) ) taken_size, maxsize, minsize = _update_size( *effect, self.size, self.maxsize, self.minsize ) # Yield the parameters required to compute the stacksize required # by the block to which the jump points to and resume when we now # the maxsize. assert isinstance(instr.arg, BasicBlock) maxsize = yield _StackSizeComputer( self.common, instr.arg, taken_size, maxsize, minsize, None, # Do not propagate the TryBegin if a final instruction is followed # by a TryEnd. None if instr.is_final() and self.block.get_trailing_try_end(i) else self._current_try_begin, ) # Update the maximum used size by the usage implied by the following # the jump self.maxsize = max(self.maxsize, maxsize) # For unconditional jumps abort early since the other instruction will # never be seen. if instr.is_uncond_jump(): # Check for TryEnd after the final instruction which is possible # TryEnd being only pseudo instructions if te := self.block.get_trailing_try_end(i): # TryBegin cannot be nested assert te.entry is self._current_try_begin assert isinstance(te.entry.target, BasicBlock) yield from self._compute_exception_handler_stack_usage( te.entry.target, te.entry.push_lasti, ) self.common.seen_blocks.remove(id(self.block)) yield self.maxsize # jump=False: non-taken path of jumps, or any non-jump effect = ( instr.pre_and_post_stack_effect(jump=False) if self.common.check_pre_and_post else (instr.stack_effect(jump=False), 0) ) self._update_size(*effect) # Instruction is final (return, raise, ...) so any following instruction # in the block is dead code. if instr.is_final(): # Check for TryEnd after the final instruction which is possible # TryEnd being only pseudo instructions. if te := self.block.get_trailing_try_end(i): assert isinstance(te.entry.target, BasicBlock) yield from self._compute_exception_handler_stack_usage( te.entry.target, te.entry.push_lasti, ) self.common.seen_blocks.remove(id(self.block)) yield self.maxsize if self.block.next_block: self.maxsize = yield _StackSizeComputer( self.common, self.block.next_block, self.size, self.maxsize, self.minsize, None, self._current_try_begin, ) self.common.seen_blocks.remove(id(self.block)) yield self.maxsize # --- Private API _current_try_begin: Optional[TryBegin] def _update_size(self, pre_delta: int, post_delta: int) -> None: size, maxsize, minsize = _update_size( pre_delta, post_delta, self.size, self.maxsize, self.minsize ) self.size = size self.minsize = minsize self.maxsize = maxsize def _compute_exception_handler_stack_usage( self, block: BasicBlock, push_lasti: bool ) -> Generator[Union["_StackSizeComputer", int], int, None]: b_id = id(block) if self.minsize < self.common.exception_block_startsize[b_id]: block_size = yield _StackSizeComputer( self.common, block, self.minsize, self.maxsize, self.minsize, push_lasti, None, ) # The entry cannot be smaller than abs(stc.minimal_entry_size) as otherwise # we an underflow would have occured. self.common.exception_block_startsize[b_id] = self.minsize self.common.exception_block_maxsize[b_id] = block_size def _is_stacksize_computation_relevant( self, block_id: int, fingerprint: Tuple[int, Optional[bool]] ) -> bool: if sys.version_info >= (3, 11): # The computation is relevant if the block was not visited previously # with the same starting size and exception handler status than the # one in use return fingerprint not in self.common.blocks_startsizes[block_id] else: # The computation is relevant if the block was only visited with smaller # starting sizes than the one in use if sizes := self.common.blocks_startsizes[block_id]: return fingerprint[0] > max(f[0] for f in sizes) else: return True class ControlFlowGraph(_bytecode.BaseBytecode): def __init__(self) -> None: super().__init__() self._blocks: List[BasicBlock] = [] self._block_index: Dict[int, int] = {} self.argnames: List[str] = [] self.add_block() def legalize(self) -> None: """Legalize all blocks.""" current_lineno = self.first_lineno for block in self._blocks: current_lineno = block.legalize(current_lineno) def get_block_index(self, block: BasicBlock) -> int: try: return self._block_index[id(block)] except KeyError: raise ValueError("the block is not part of this bytecode") def _add_block(self, block: BasicBlock) -> None: block_index = len(self._blocks) self._blocks.append(block) self._block_index[id(block)] = block_index def add_block( self, instructions: Optional[Iterable[Union[Instr, SetLineno]]] = None ) -> BasicBlock: block = BasicBlock(instructions) self._add_block(block) return block def compute_stacksize( self, *, check_pre_and_post: bool = True, compute_exception_stack_depths: bool = True, ) -> int: """Compute the stack size by iterating through the blocks The implementation make use of a generator function to avoid issue with deeply nested recursions. """ # In the absence of any block return 0 if not self: return 0 # Create the common storage for the calculation common = _StackSizeComputationStorage( check_pre_and_post, seen_blocks=set(), blocks_startsizes={id(b): set() for b in self}, exception_block_startsize=dict.fromkeys([id(b) for b in self], 32768), exception_block_maxsize=dict.fromkeys([id(b) for b in self], -32768), try_begins=[], ) # Starting with Python 3.10, generator and coroutines start with one object # on the stack (None, anything is an error). initial_stack_size = 0 if sys.version_info >= (3, 10) and self.flags & ( CompilerFlags.GENERATOR | CompilerFlags.COROUTINE | CompilerFlags.ASYNC_GENERATOR ): initial_stack_size = 1 # Create a generator/coroutine responsible of dealing with the first block coro = _StackSizeComputer( common, self[0], initial_stack_size, 0, 0, None, None ).run() # Create a list of generator that have not yet been exhausted coroutines: List[Generator[Union[_StackSizeComputer, int], int, None]] = [] push_coroutine = coroutines.append pop_coroutine = coroutines.pop args = None try: while True: # Mypy does not seem to honor the fact that one must send None # to a brand new generator irrespective of its send type. args = coro.send(None) # type: ignore # Consume the stored generators as long as they return a simple # integer that is to be used to resume the last stored generator. while isinstance(args, int): coro = pop_coroutine() args = coro.send(args) # Otherwise we enter a new block and we store the generator under # use and create a new one to process the new block push_coroutine(coro) coro = args.run() except IndexError: # The exception occurs when all the generators have been exhausted # in which case the last yielded value is the stacksize. assert args is not None and isinstance(args, int) # Exception handling block size is reported separately since we need # to report only the stack usage for the smallest start size for the # block args = max(args, *common.exception_block_maxsize.values()) # Check if there is dead code that may contain TryBegin/TryEnd pairs. # For any such pair we set a huge size (the exception table format does not # mandate a maximum value). We do so so that if the pair is fused with # another it does not alter the computed size. for block in self: if not common.blocks_startsizes[id(block)]: for i in block: if isinstance(i, TryBegin) and i.stack_depth is UNSET: i.stack_depth = 32768 # If requested update the TryBegin stack size if compute_exception_stack_depths: for tb in common.try_begins: size = common.exception_block_startsize[id(tb.target)] assert size >= 0 tb.stack_depth = size return args def __repr__(self) -> str: return "" % len(self._blocks) # Helper to obtain a flat list of instr, which does not refer to block at # anymore. Used for comparison of different CFG. def _get_instructions( self, ) -> List: instructions: List = [] try_begins: Dict[TryBegin, int] = {} for block in self: for index, instr in enumerate(block): if isinstance(instr, TryBegin): assert isinstance(instr.target, BasicBlock) try_begins.setdefault(instr, len(try_begins)) instructions.append( ( "TryBegin", try_begins[instr], self.get_block_index(instr.target), instr.push_lasti, ) ) elif isinstance(instr, TryEnd): instructions.append(("TryEnd", try_begins[instr.entry])) elif isinstance(instr, Instr) and ( instr.has_jump() or instr.is_final() ): if instr.has_jump(): target_block = instr.arg assert isinstance(target_block, BasicBlock) # We use a concrete instr here to be able to use an integer as # argument rather than a Label. This is fine for comparison # purposes which is our sole goal here. c_instr = ConcreteInstr( instr.name, self.get_block_index(target_block), location=instr.location, ) instructions.append(c_instr) else: instructions.append(instr) if te := block.get_trailing_try_end(index): instructions.append(("TryEnd", try_begins[te.entry])) break else: instructions.append(instr) return instructions def __eq__(self, other: Any) -> bool: if type(self) is not type(other): return False if self.argnames != other.argnames: return False instrs1 = self._get_instructions() instrs2 = other._get_instructions() if instrs1 != instrs2: return False # FIXME: compare block.next_block return super().__eq__(other) def __len__(self) -> int: return len(self._blocks) def __iter__(self) -> Iterator[BasicBlock]: return iter(self._blocks) @overload def __getitem__(self, index: Union[int, BasicBlock]) -> BasicBlock: ... @overload def __getitem__(self: U, index: slice) -> U: ... def __getitem__(self, index): if isinstance(index, BasicBlock): index = self.get_block_index(index) return self._blocks[index] def __delitem__(self, index: Union[int, BasicBlock]) -> None: if isinstance(index, BasicBlock): index = self.get_block_index(index) block = self._blocks[index] del self._blocks[index] del self._block_index[id(block)] for index in range(index, len(self)): block = self._blocks[index] self._block_index[id(block)] -= 1 def split_block(self, block: BasicBlock, index: int) -> BasicBlock: if not isinstance(block, BasicBlock): raise TypeError("expected block") block_index = self.get_block_index(block) if index < 0: raise ValueError("index must be positive") block = self._blocks[block_index] if index == 0: return block if index > len(block): raise ValueError("index out of the block") instructions = block[index:] if not instructions: if block_index + 1 < len(self): return self[block_index + 1] del block[index:] block2 = BasicBlock(instructions) block.next_block = block2 for block in self[block_index + 1 :]: self._block_index[id(block)] += 1 self._blocks.insert(block_index + 1, block2) self._block_index[id(block2)] = block_index + 1 return block2 def get_dead_blocks(self) -> List[BasicBlock]: if not self: return [] seen_block_ids = set() stack = [self[0]] while stack: block = stack.pop() if id(block) in seen_block_ids: continue seen_block_ids.add(id(block)) for i in block: if isinstance(i, Instr) and isinstance(i.arg, BasicBlock): stack.append(i.arg) elif isinstance(i, TryBegin): assert isinstance(i.target, BasicBlock) stack.append(i.target) return [b for b in self if id(b) not in seen_block_ids] @staticmethod def from_bytecode(bytecode: _bytecode.Bytecode) -> "ControlFlowGraph": # label => instruction index label_to_block_index = {} jumps = [] try_end_locations = {} for index, instr in enumerate(bytecode): if isinstance(instr, Label): label_to_block_index[instr] = index elif isinstance(instr, Instr) and isinstance(instr.arg, Label): jumps.append((index, instr.arg)) elif isinstance(instr, TryBegin): assert isinstance(instr.target, Label) jumps.append((index, instr.target)) elif isinstance(instr, TryEnd): try_end_locations[instr.entry] = index # Figure out on which index block targeted by a label start block_starts = {} for target_index, target_label in jumps: target_index = label_to_block_index[target_label] block_starts[target_index] = target_label bytecode_blocks = ControlFlowGraph() bytecode_blocks._copy_attr_from(bytecode) bytecode_blocks.argnames = list(bytecode.argnames) # copy instructions, convert labels to block labels block = bytecode_blocks[0] labels = {} jumping_instrs: List[Instr] = [] # Map input TryBegin to CFG TryBegins (split across blocks may yield multiple # TryBegin from a single in the bytecode). try_begins: Dict[TryBegin, list[TryBegin]] = {} # Storage for TryEnds that need to be inserted at the beginning of a block. # We use a list because the same block can be reached through several paths # with different active TryBegins add_try_end: Dict[Label, List[TryEnd]] = defaultdict(list) # Track the currently active try begin active_try_begin: Optional[TryBegin] = None try_begin_inserted_in_block = False last_instr: Optional[Instr] = None for index, instr in enumerate(bytecode): # Reference to the current block if we create a new one in the following. old_block: BasicBlock | None = None # First we determine if we need to create a new block: # - by checking the current instruction index if index in block_starts: old_label = block_starts[index] # Create a new block if the last created one is not empty # (of real instructions) if index != 0 and (li := block.get_last_non_artificial_instruction()): old_block = block new_block = bytecode_blocks.add_block() # If the last non artificial instruction is not final connect # this block to the next. if not li.is_final(): block.next_block = new_block block = new_block if old_label is not None: labels[old_label] = block # - by inspecting the last instr elif block.get_last_non_artificial_instruction() and last_instr is not None: # The last instruction is final but we did not create a block # -> sounds like a block of dead code but we preserve it if last_instr.is_final(): old_block = block block = bytecode_blocks.add_block() # We are dealing with a conditional jump elif last_instr.has_jump(): assert isinstance(last_instr.arg, Label) old_block = block new_block = bytecode_blocks.add_block() block.next_block = new_block block = new_block # If we created a new block, we check: # - if the current instruction is a TryEnd and if the last instruction # is final in which case we insert the TryEnd in the old block. # - if we have a currently active TryBegin for which we may need to # create a TryEnd in the previous block and a new TryBegin in the # new one because the blocks are not connected. if old_block is not None: temp = try_begin_inserted_in_block try_begin_inserted_in_block = False if old_block is not None and last_instr is not None: # The last instruction is final, if the current instruction is a # TryEnd insert it in the same block and move to the next instruction if last_instr.is_final() and isinstance(instr, TryEnd): assert active_try_begin nte = instr.copy() nte.entry = try_begins[active_try_begin][-1] old_block.append(nte) active_try_begin = None continue # If we have an active TryBegin and last_instr is: elif active_try_begin is not None: # - a jump whose target is beyond the TryEnd of the active # TryBegin: we remember TryEnd should be prepended to the # target block. if ( last_instr.has_jump() and active_try_begin in try_end_locations and ( # last_instr is a jump so arg is a Label label_to_block_index[last_instr.arg] # type: ignore >= try_end_locations[active_try_begin] ) ): assert isinstance(last_instr.arg, Label) add_try_end[last_instr.arg].append( TryEnd(try_begins[active_try_begin][-1]) ) # - final and the try begin originate from the current block: # we insert a TryEnd in the old block and a new TryBegin in # the new one since the blocks are disconnected. if last_instr.is_final() and temp: old_block.append(TryEnd(try_begins[active_try_begin][-1])) new_tb = TryBegin( active_try_begin.target, active_try_begin.push_lasti ) block.append(new_tb) # Add this new TryBegin to the map to properly update # the target. try_begins[active_try_begin].append(new_tb) try_begin_inserted_in_block = True last_instr = None if isinstance(instr, Label): continue # don't copy SetLineno objects if isinstance(instr, (Instr, TryBegin, TryEnd)): new = instr.copy() if isinstance(instr, TryBegin): assert active_try_begin is None active_try_begin = instr try_begin_inserted_in_block = True assert isinstance(new, TryBegin) try_begins[instr] = [new] elif isinstance(instr, TryEnd): assert isinstance(new, TryEnd) new.entry = try_begins[instr.entry][-1] active_try_begin = None try_begin_inserted_in_block = False else: last_instr = instr if isinstance(instr.arg, Label): assert isinstance(new, Instr) jumping_instrs.append(new) instr = new block.append(instr) # Insert the necessary TryEnds at the beginning of block that were marked # (if we did not already insert an equivalent TryEnd earlier). for lab, tes in add_try_end.items(): block = labels[lab] existing_te_entries = set() index = 0 # We use a while loop since the block cannot yet be iterated on since # jumps still use labels instead of blocks while index < len(block): i = block[index] index += 1 if isinstance(i, TryEnd): existing_te_entries.add(i.entry) else: break for te in tes: if te.entry not in existing_te_entries: labels[lab].insert(0, te) existing_te_entries.add(te.entry) # Replace labels by block in jumping instructions for instr in jumping_instrs: label = instr.arg assert isinstance(label, Label) instr.arg = labels[label] # Replace labels by block in TryBegin for b_tb, c_tbs in try_begins.items(): label = b_tb.target assert isinstance(label, Label) for c_tb in c_tbs: c_tb.target = labels[label] return bytecode_blocks def to_bytecode(self) -> _bytecode.Bytecode: """Convert to Bytecode.""" used_blocks = set() for block in self: target_block = block.get_jump() if target_block is not None: used_blocks.add(id(target_block)) for tb in (i for i in block if isinstance(i, TryBegin)): used_blocks.add(id(tb.target)) labels = {} jumps = [] try_begins = {} seen_try_end: Set[TryBegin] = set() instructions: List[Union[Instr, Label, TryBegin, TryEnd, SetLineno]] = [] # Track the last seen TryBegin and TryEnd to be able to fuse adjacent # TryEnd/TryBegin pair which share the same target. # In each case, we store the value found in the CFG and the value # inserted in the bytecode. last_try_begin: tuple[TryBegin, TryBegin] | None = None last_try_end: tuple[TryEnd, TryEnd] | None = None for block in self: if id(block) in used_blocks: new_label = Label() labels[id(block)] = new_label instructions.append(new_label) for instr in block: # don't copy SetLineno objects if isinstance(instr, (Instr, TryBegin, TryEnd)): new = instr.copy() if isinstance(instr, TryBegin): # If due to jumps and split TryBegin, we encounter a TryBegin # while we still have a TryBegin ensure they can be fused. if last_try_begin is not None: cfg_tb, byt_tb = last_try_begin assert instr.target is cfg_tb.target assert instr.push_lasti == cfg_tb.push_lasti byt_tb.stack_depth = min( byt_tb.stack_depth, instr.stack_depth ) # If the TryBegin share the target and push_lasti of the # entry of an adjacent TryEnd, omit the new TryBegin that # was inserted to allow analysis of the CFG and remove # the already inserted TryEnd. if last_try_end is not None: cfg_te, byt_te = last_try_end entry = cfg_te.entry if ( entry.target is instr.target and entry.push_lasti == instr.push_lasti ): # If we did not yet compute the required stack depth # keep the value as UNSET if entry.stack_depth is UNSET: assert instr.stack_depth is UNSET byt_te.entry.stack_depth = UNSET else: byt_te.entry.stack_depth = min( entry.stack_depth, instr.stack_depth ) try_begins[instr] = byt_te.entry instructions.remove(byt_te) continue assert isinstance(new, TryBegin) try_begins[instr] = new last_try_begin = (instr, new) last_try_end = None elif isinstance(instr, TryEnd): # Only keep the first seen TryEnd matching a TryBegin assert isinstance(new, TryEnd) if instr.entry in seen_try_end: continue seen_try_end.add(instr.entry) new.entry = try_begins[instr.entry] last_try_begin = None last_try_end = (instr, new) elif isinstance(instr.arg, BasicBlock): assert isinstance(new, Instr) jumps.append(new) last_try_end = None else: last_try_end = None instr = new instructions.append(instr) # Map to new labels for instr in jumps: instr.arg = labels[id(instr.arg)] for tb in set(try_begins.values()): tb.target = labels[id(tb.target)] bytecode = _bytecode.Bytecode() bytecode._copy_attr_from(self) bytecode.argnames = list(self.argnames) bytecode[:] = instructions return bytecode def to_code( self, stacksize: Optional[int] = None, *, check_pre_and_post: bool = True, compute_exception_stack_depths: bool = True, ) -> types.CodeType: """Convert to code.""" if stacksize is None: stacksize = self.compute_stacksize( check_pre_and_post=check_pre_and_post, compute_exception_stack_depths=compute_exception_stack_depths, ) bc = self.to_bytecode() return bc.to_code( stacksize=stacksize, check_pre_and_post=False, compute_exception_stack_depths=False, ) bytecode-0.15.1/src/bytecode/concrete.py000066400000000000000000001457101451217043400201460ustar00rootroot00000000000000import dis import inspect import opcode as _opcode import struct import sys import types from typing import ( Any, Dict, Iterable, Iterator, List, MutableSequence, Optional, Sequence, Set, Tuple, Type, TypeVar, Union, ) # alias to keep the 'bytecode' variable free import bytecode as _bytecode from bytecode.flags import CompilerFlags from bytecode.instr import ( _UNSET, BITFLAG2_INSTRUCTIONS, BITFLAG_INSTRUCTIONS, INTRINSIC, INTRINSIC_1OP, INTRINSIC_2OP, PLACEHOLDER_LABEL, UNSET, BaseInstr, CellVar, Compare, FreeVar, Instr, InstrArg, InstrLocation, Intrinsic1Op, Intrinsic2Op, Label, SetLineno, TryBegin, TryEnd, _check_arg_int, const_key, opcode_has_argument, ) # - jumps use instruction # - lineno use bytes (dis.findlinestarts(code)) # - dis displays bytes OFFSET_AS_INSTRUCTION = sys.version_info >= (3, 10) def _set_docstring(code: _bytecode.BaseBytecode, consts: Sequence) -> None: if not consts: return first_const = consts[0] if isinstance(first_const, str) or first_const is None: code.docstring = first_const T = TypeVar("T", bound="ConcreteInstr") class ConcreteInstr(BaseInstr[int]): """Concrete instruction. arg must be an integer in the range 0..2147483647. It has a read-only size attribute. """ # For ConcreteInstr the argument is always an integer _arg: int __slots__ = ("_size", "_extended_args") def __init__( self, name: str, arg: int = UNSET, *, lineno: Union[int, None, _UNSET] = UNSET, location: Optional[InstrLocation] = None, extended_args: Optional[int] = None, ): # Allow to remember a potentially meaningless EXTENDED_ARG emitted by # Python to properly compute the size and avoid messing up the jump # targets self._extended_args = extended_args super().__init__(name, arg, lineno=lineno, location=location) def _check_arg(self, name: str, opcode: int, arg: int) -> None: if opcode_has_argument(opcode): if arg is UNSET: raise ValueError("operation %s requires an argument" % name) _check_arg_int(arg, name) # opcode == 0 corresponds to CACHE instruction in 3.11+ and was unused before elif opcode == 0: arg = arg if arg is not UNSET else 0 _check_arg_int(arg, name) else: if arg is not UNSET: raise ValueError("operation %s has no argument" % name) def _set( self, name: str, arg: int, ) -> None: super()._set(name, arg) size = 2 if arg is not UNSET: while arg > 0xFF: size += 2 arg >>= 8 if self._extended_args is not None: size = 2 + 2 * self._extended_args self._size = size @property def size(self) -> int: return self._size def _cmp_key(self) -> Tuple[Optional[InstrLocation], str, int]: return (self._location, self._name, self._arg) def get_jump_target(self, instr_offset: int) -> Optional[int]: # When a jump arg is zero the jump always points to the first non-CACHE # opcode following the jump. The passed in offset is the offset at # which the jump opcode starts. So to compute the target, we add to it # the instruction size (accounting for extended args) and the # number of caches expected to follow the jump instruction. s = ( (self._size // 2) if OFFSET_AS_INSTRUCTION else self._size ) + self.use_cache_opcodes() if self.is_forward_rel_jump(): return instr_offset + s + self._arg if self.is_backward_rel_jump(): return instr_offset + s - self._arg if self.is_abs_jump(): return self._arg return None def assemble(self) -> bytes: if self._arg is UNSET: return bytes((self._opcode, 0)) arg = self._arg b = [self._opcode, arg & 0xFF] while arg > 0xFF: arg >>= 8 b[:0] = [_opcode.EXTENDED_ARG, arg & 0xFF] if self._extended_args: while len(b) < self._size: b[:0] = [_opcode.EXTENDED_ARG, 0x00] return bytes(b) @classmethod def disassemble(cls: Type[T], lineno: Optional[int], code: bytes, offset: int) -> T: index = 2 * offset if OFFSET_AS_INSTRUCTION else offset op = code[index] if opcode_has_argument(op): arg = code[index + 1] else: arg = UNSET name = _opcode.opname[op] return cls(name, arg, lineno=lineno) def use_cache_opcodes(self) -> int: return ( # Not supposed to be used but we need it dis._inline_cache_entries[self._opcode] # type: ignore if sys.version_info >= (3, 11) else 0 ) class ExceptionTableEntry: """Entry for a given line in the exception table. All offset are expressed in instructions not in bytes. """ #: Offset in instruction between the beginning of the bytecode and the beginning #: of this entry. start_offset: int #: Offset in instruction between the beginning of the bytecode and the end #: of this entry. This offset is inclusive meaning that the instruction it points #: to is included in the try/except handling. stop_offset: int #: Offset in instruction to the first instruction of the exception handling block. target: int #: Minimal stack depth in the block delineated by start and stop #: offset of the exception table entry. Used to restore the stack (by #: popping items) when entering the exception handling block. stack_depth: int #: Should the offset, at which an exception was raised, be pushed on the stack #: before the exception itself (which is pushed as a single value)). push_lasti: bool __slots__ = ("start_offset", "stop_offset", "target", "stack_depth", "push_lasti") def __init__( self, start_offset: int, stop_offset: int, target: int, stack_depth: int, push_lasti: bool, ) -> None: self.start_offset = start_offset self.stop_offset = stop_offset self.target = target self.stack_depth = stack_depth self.push_lasti = push_lasti def __repr__(self) -> str: return ( "ExceptionTableEntry(" f"start_offset={self.start_offset}, " f"stop_offset={self.stop_offset}, " f"target={self.target}, " f"stack_depth={self.stack_depth}, " f"push_lasti={self.push_lasti}" ) class ConcreteBytecode(_bytecode._BaseBytecodeList[Union[ConcreteInstr, SetLineno]]): #: List of "constant" objects for the bytecode consts: List #: List of names used by local variables. names: List[str] #: List of names used by input variables. varnames: List[str] #: Table describing portion of the bytecode in which exceptions are caught and #: where there are handled. #: Used only in Python 3.11+ exception_table: List[ExceptionTableEntry] def __init__( self, instructions=(), *, consts: tuple = (), names: Tuple[str, ...] = (), varnames: Iterable[str] = (), exception_table: Optional[List[ExceptionTableEntry]] = None, ): super().__init__() self.consts = list(consts) self.names = list(names) self.varnames = list(varnames) self.exception_table = exception_table or [] for instr in instructions: self._check_instr(instr) self.extend(instructions) def __iter__(self) -> Iterator[Union[ConcreteInstr, SetLineno]]: instructions = super().__iter__() for instr in instructions: self._check_instr(instr) yield instr def _check_instr(self, instr: Any) -> None: if not isinstance(instr, (ConcreteInstr, SetLineno)): raise ValueError( "ConcreteBytecode must only contain " "ConcreteInstr and SetLineno objects, " "but %s was found" % type(instr).__name__ ) def _copy_attr_from(self, bytecode): super()._copy_attr_from(bytecode) if isinstance(bytecode, ConcreteBytecode): self.consts = bytecode.consts self.names = bytecode.names self.varnames = bytecode.varnames def __repr__(self) -> str: return "" % len(self) def __eq__(self, other: Any) -> bool: if type(self) is not type(other): return False const_keys1 = list(map(const_key, self.consts)) const_keys2 = list(map(const_key, other.consts)) if const_keys1 != const_keys2: return False if self.names != other.names: return False if self.varnames != other.varnames: return False return super().__eq__(other) @staticmethod def from_code( code: types.CodeType, *, extended_arg: bool = False ) -> "ConcreteBytecode": instructions: MutableSequence[Union[SetLineno, ConcreteInstr]] # For Python 3.11+ we use dis to extract the detailed location information at # reduced maintenance cost. if sys.version_info >= (3, 11): instructions = [ # dis.get_instructions automatically handle extended arg which # we do not want, so we fold back arguments to be between 0 and 255 ConcreteInstr( i.opname, i.arg % 256 if i.arg is not None else UNSET, location=InstrLocation.from_positions(i.positions) if i.positions else None, ) for i in dis.get_instructions(code, show_caches=True) ] else: if sys.version_info >= (3, 10): line_starts = dict( (offset, lineno) for offset, _, lineno in code.co_lines() ) else: line_starts = dict(dis.findlinestarts(code)) # find block starts instructions = [] offset = 0 lineno: Optional[int] = code.co_firstlineno while offset < (len(code.co_code) // (2 if OFFSET_AS_INSTRUCTION else 1)): lineno_off = (2 * offset) if OFFSET_AS_INSTRUCTION else offset if lineno_off in line_starts: lineno = line_starts[lineno_off] instr = ConcreteInstr.disassemble(lineno, code.co_code, offset) instructions.append(instr) offset += (instr.size // 2) if OFFSET_AS_INSTRUCTION else instr.size bytecode = ConcreteBytecode() # HINT : in some cases Python generate useless EXTENDED_ARG opcode # with a value of zero. Such opcodes do not increases the size of the # following opcode the way a normal EXTENDED_ARG does. As a # consequence, they need to be tracked manually as otherwise the # offsets in jump targets can end up being wrong. if not extended_arg: # The list is modified in place bytecode._remove_extended_args(instructions) bytecode.name = code.co_name bytecode.filename = code.co_filename bytecode.flags = CompilerFlags(code.co_flags) bytecode.argcount = code.co_argcount bytecode.posonlyargcount = code.co_posonlyargcount bytecode.kwonlyargcount = code.co_kwonlyargcount bytecode.first_lineno = code.co_firstlineno bytecode.names = list(code.co_names) bytecode.consts = list(code.co_consts) bytecode.varnames = list(code.co_varnames) bytecode.freevars = list(code.co_freevars) bytecode.cellvars = list(code.co_cellvars) _set_docstring(bytecode, code.co_consts) if sys.version_info >= (3, 11): bytecode.exception_table = bytecode._parse_exception_table( code.co_exceptiontable ) bytecode.qualname = code.co_qualname else: bytecode.qualname = bytecode.qualname bytecode[:] = instructions return bytecode @staticmethod def _normalize_lineno( instructions: Sequence[Union[ConcreteInstr, SetLineno]], first_lineno: int ) -> Iterator[Tuple[int, ConcreteInstr]]: lineno = first_lineno # For each instruction compute an "inherited" lineno used: # - on 3.8 and 3.9 for which a lineno is mandatory # - to infer a lineno on 3.10+ if no lineno was provided for instr in instructions: i_lineno = instr.lineno # if instr.lineno is not set, it's inherited from the previous # instruction, or from self.first_lineno if i_lineno is not None and i_lineno is not UNSET: lineno = i_lineno if isinstance(instr, ConcreteInstr): yield (lineno, instr) def _assemble_code( self, ) -> Tuple[bytes, List[Tuple[int, int, int, Optional[InstrLocation]]]]: offset = 0 code_str = [] linenos = [] for lineno, instr in self._normalize_lineno(self, self.first_lineno): code_str.append(instr.assemble()) i_size = instr.size linenos.append( ( (offset * 2) if OFFSET_AS_INSTRUCTION else offset, i_size, lineno, instr.location, ) ) offset += (i_size // 2) if OFFSET_AS_INSTRUCTION else i_size return (b"".join(code_str), linenos) # Used on 3.8 and 3.9 @staticmethod def _assemble_lnotab( first_lineno: int, linenos: List[Tuple[int, int, int, Optional[InstrLocation]]] ) -> bytes: lnotab = [] old_offset = 0 old_lineno = first_lineno for offset, _, lineno, _ in linenos: dlineno = lineno - old_lineno if dlineno == 0: continue old_lineno = lineno doff = offset - old_offset old_offset = offset while doff > 255: lnotab.append(b"\xff\x00") doff -= 255 while dlineno < -128: lnotab.append(struct.pack("Bb", doff, -128)) doff = 0 dlineno -= -128 while dlineno > 127: lnotab.append(struct.pack("Bb", doff, 127)) doff = 0 dlineno -= 127 assert 0 <= doff <= 255 assert -128 <= dlineno <= 127 lnotab.append(struct.pack("Bb", doff, dlineno)) return b"".join(lnotab) @staticmethod def _pack_linetable( linetable: List[bytes], doff: int, dlineno: Optional[int] ) -> None: if dlineno is not None: # Ensure linenos are between -126 and +126, by using 127 lines jumps with # a 0 byte offset while dlineno < -127: linetable.append(struct.pack("Bb", 0, -127)) dlineno -= -127 while dlineno > 127: linetable.append(struct.pack("Bb", 0, 127)) dlineno -= 127 assert -127 <= dlineno <= 127 else: dlineno = -128 # Ensure offsets are less than 255. # If an offset is larger, we first mark the line change with an offset of 254 # then use as many 254 offset with no line change to reduce the offset to # less than 254. if doff > 254: linetable.append(struct.pack("Bb", 254, dlineno)) doff -= 254 while doff > 254: linetable.append(b"\xfe\x00") doff -= 254 linetable.append(struct.pack("Bb", doff, 0)) else: linetable.append(struct.pack("Bb", doff, dlineno)) assert 0 <= doff <= 254 # Used on 3.10 def _assemble_linestable( self, first_lineno: int, linenos: Iterable[Tuple[int, int, int, Optional[InstrLocation]]], ) -> bytes: if not linenos: return b"" linetable: List[bytes] = [] old_offset = 0 iter_in = iter(linenos) offset, i_size, old_lineno, old_location = next(iter_in) if old_location is not None: old_dlineno = ( old_location.lineno - first_lineno if old_location.lineno is not None else None ) else: old_dlineno = old_lineno - first_lineno for offset, i_size, lineno, location in iter_in: if location is not None: dlineno = ( location.lineno - old_lineno if location.lineno is not None else None ) else: dlineno = lineno - old_lineno if dlineno == 0 or (old_dlineno is None and dlineno is None): continue old_lineno = lineno doff = offset - old_offset old_offset = offset self._pack_linetable(linetable, doff, old_dlineno) old_dlineno = dlineno # Pack the line of the last instruction. doff = offset + i_size - old_offset self._pack_linetable(linetable, doff, old_dlineno) return b"".join(linetable) # The formats are describes in CPython/Objects/locations.md @staticmethod def _encode_location_varint(varint: int) -> bytearray: encoded = bytearray() # We encode on 6 bits while True: encoded.append(varint & 0x3F) varint >>= 6 if varint: encoded[-1] |= 0x40 # bit 6 is set except on the last entry else: break return encoded def _encode_location_svarint(self, svarint: int) -> bytearray: if svarint < 0: return self._encode_location_varint(((-svarint) << 1) | 1) else: return self._encode_location_varint(svarint << 1) # Python 3.11+ location format encoding @staticmethod def _pack_location_header(code: int, size: int) -> int: return (1 << 7) + (code << 3) + (size - 1 if size <= 8 else 7) def _pack_location( self, size: int, lineno: int, location: Optional[InstrLocation] ) -> bytearray: packed = bytearray() l_lineno: Optional[int] # The location was not set so we infer a line. if location is None: l_lineno, end_lineno, col_offset, end_col_offset = ( lineno, None, None, None, ) else: l_lineno, end_lineno, col_offset, end_col_offset = ( location.lineno, location.end_lineno, location.col_offset, location.end_col_offset, ) # We have no location information so the code is 15 if l_lineno is None: packed.append(self._pack_location_header(15, size)) # No column info, code 13 elif col_offset is None: if end_lineno is not None and end_lineno != l_lineno: raise ValueError( "An instruction cannot have no column offset and span " f"multiple lines (lineno: {l_lineno}, end lineno: {end_lineno}" ) packed.extend( ( self._pack_location_header(13, size), *self._encode_location_svarint(l_lineno - lineno), ) ) # We enforce the end_lineno to be defined else: assert end_lineno is not None assert end_col_offset is not None # Short forms if ( end_lineno == l_lineno and l_lineno - lineno == 0 and col_offset < 72 and (end_col_offset - col_offset) <= 15 ): packed.extend( ( self._pack_location_header(col_offset // 8, size), ((col_offset % 8) << 4) + (end_col_offset - col_offset), ) ) # One line form elif ( end_lineno == l_lineno and l_lineno - lineno in (1, 2) and col_offset < 256 and end_col_offset < 256 ): packed.extend( ( self._pack_location_header(10 + l_lineno - lineno, size), col_offset, end_col_offset, ) ) # Long form else: packed.extend( ( self._pack_location_header(14, size), *self._encode_location_svarint(l_lineno - lineno), *self._encode_location_varint(end_lineno - l_lineno), # When decoding in codeobject.c::advance_with_locations # we remove 1 from the offset ... *self._encode_location_varint(col_offset + 1), *self._encode_location_varint(end_col_offset + 1), ) ) return packed def _push_locations( self, locations: List[bytearray], size: int, lineno: int, location: InstrLocation, ) -> int: # We need the size in instruction not in bytes size //= 2 # Repeatedly add element since we cannot cover more than 8 code # elements. We recompute each time since in practice we will # rarely loop. while True: locations.append(self._pack_location(size, lineno, location)) # Update the lineno since if we need more than one entry the # reference for the delta of the lineno change lineno = location.lineno if location.lineno is not None else lineno size -= 8 if size < 1: break return lineno def _assemble_locations( self, first_lineno: int, linenos: Iterable[Tuple[int, int, int, Optional[InstrLocation]]], ) -> bytes: if not linenos: return b"" locations: List[bytearray] = [] iter_in = iter(linenos) _, size, lineno, old_location = next(iter_in) # Infer the line if location is None old_location = old_location or InstrLocation(lineno, None, None, None) lineno = first_lineno # We track the last set lineno to be able to compute deltas for _, i_size, new_lineno, location in iter_in: # Infer the line if location is None location = location or InstrLocation(new_lineno, None, None, None) # Group together instruction with equivalent locations if old_location.lineno and old_location == location: size += i_size continue lineno = self._push_locations(locations, size, lineno, old_location) size = i_size old_location = location # Pack the line of the last instruction. self._push_locations(locations, size, lineno, old_location) return b"".join(locations) @staticmethod def _remove_extended_args( instructions: MutableSequence[Union[SetLineno, ConcreteInstr]] ) -> None: # replace jump targets with blocks # HINT : in some cases Python generate useless EXTENDED_ARG opcode # with a value of zero. Such opcodes do not increases the size of the # following opcode the way a normal EXTENDED_ARG does. As a # consequence, they need to be tracked manually as otherwise the # offsets in jump targets can end up being wrong. nb_extended_args = 0 extended_arg = None index = 0 while index < len(instructions): instr = instructions[index] # Skip SetLineno meta instruction if isinstance(instr, SetLineno): index += 1 continue if instr.name == "EXTENDED_ARG": nb_extended_args += 1 if extended_arg is not None: extended_arg = (extended_arg << 8) + instr.arg else: extended_arg = instr.arg del instructions[index] continue if extended_arg is not None: arg = UNSET if instr.name == "NOP" else (extended_arg << 8) + instr.arg extended_arg = None instr = ConcreteInstr( instr.name, arg, location=instr.location, extended_args=nb_extended_args, ) instructions[index] = instr nb_extended_args = 0 index += 1 if extended_arg is not None: raise ValueError("EXTENDED_ARG at the end of the code") # Taken and adapted from exception_handling_notes.txt in cpython/Objects @staticmethod def _parse_varint(except_table_iterator: Iterator[int]) -> int: b = next(except_table_iterator) val = b & 63 while b & 64: val <<= 6 b = next(except_table_iterator) val |= b & 63 return val def _parse_exception_table( self, exception_table: bytes ) -> List[ExceptionTableEntry]: assert sys.version_info >= (3, 11) table = [] iterator = iter(exception_table) try: while True: start = self._parse_varint(iterator) length = self._parse_varint(iterator) end = start + length - 1 # Present as inclusive target = self._parse_varint(iterator) dl = self._parse_varint(iterator) depth = dl >> 1 lasti = bool(dl & 1) table.append(ExceptionTableEntry(start, end, target, depth, lasti)) except StopIteration: return table @staticmethod def _encode_varint(value: int, set_begin_marker: bool = False) -> Iterator[int]: # Encode value as a varint on 7 bits (MSB should come first) and set # the begin marker if requested. temp: List[int] = [] assert value >= 0 while value: temp.append(value & 63 | (64 if temp else 0)) value >>= 6 temp = temp or [0] if set_begin_marker: temp[-1] |= 128 return reversed(temp) def _assemble_exception_table(self) -> bytes: table = bytearray() for entry in self.exception_table or []: size = entry.stop_offset - entry.start_offset + 1 depth = (entry.stack_depth << 1) + entry.push_lasti table.extend(self._encode_varint(entry.start_offset, True)) table.extend(self._encode_varint(size)) table.extend(self._encode_varint(entry.target)) table.extend(self._encode_varint(depth)) return bytes(table) def compute_stacksize(self, *, check_pre_and_post: bool = True) -> int: bytecode = self.to_bytecode() cfg = _bytecode.ControlFlowGraph.from_bytecode(bytecode) return cfg.compute_stacksize(check_pre_and_post=check_pre_and_post) def to_code( self, stacksize: Optional[int] = None, *, check_pre_and_post: bool = True, compute_exception_stack_depths: bool = True, ) -> types.CodeType: # Prevent reconverting the concrete bytecode to bytecode and cfg to do the # calculation if we need to do it. if stacksize is None or ( sys.version_info >= (3, 11) and compute_exception_stack_depths ): cfg = _bytecode.ControlFlowGraph.from_bytecode(self.to_bytecode()) stacksize = cfg.compute_stacksize( check_pre_and_post=check_pre_and_post, compute_exception_stack_depths=compute_exception_stack_depths, ) self = cfg.to_bytecode().to_concrete_bytecode( compute_exception_stack_depths=False ) # Assemble the code string after round tripping to CFG if necessary. code_str, linenos = self._assemble_code() lnotab = ( self._assemble_locations(self.first_lineno, linenos) if sys.version_info >= (3, 11) else ( self._assemble_linestable(self.first_lineno, linenos) if sys.version_info >= (3, 10) else self._assemble_lnotab(self.first_lineno, linenos) ) ) nlocals = len(self.varnames) if sys.version_info >= (3, 11): return types.CodeType( self.argcount, self.posonlyargcount, self.kwonlyargcount, nlocals, stacksize, int(self.flags), code_str, tuple(self.consts), tuple(self.names), tuple(self.varnames), self.filename, self.name, self.qualname, self.first_lineno, lnotab, self._assemble_exception_table(), tuple(self.freevars), tuple(self.cellvars), ) else: return types.CodeType( self.argcount, self.posonlyargcount, self.kwonlyargcount, nlocals, stacksize, int(self.flags), code_str, tuple(self.consts), tuple(self.names), tuple(self.varnames), self.filename, self.name, self.first_lineno, lnotab, tuple(self.freevars), tuple(self.cellvars), ) def to_bytecode( self, prune_caches: bool = True, conserve_exception_block_stackdepth: bool = False, ) -> _bytecode.Bytecode: # On 3.11 we generate pseudo-instruction from the exception table # Copy instruction and remove extended args if any (in-place) c_instructions = self[:] self._remove_extended_args(c_instructions) # Find jump targets jump_targets: Set[int] = set() offset = 0 for c_instr in c_instructions: if isinstance(c_instr, SetLineno): continue target = c_instr.get_jump_target(offset) if target is not None: jump_targets.add(target) offset += (c_instr.size // 2) if OFFSET_AS_INSTRUCTION else c_instr.size # On 3.11+ we need to also look at the exception table for jump targets for ex_entry in self.exception_table: jump_targets.add(ex_entry.target) # Create look up dict to find entries based on either exception handling # block exit or entry offsets. Several blocks can end on the same instruction # so we store a list of entry per offset. ex_start: Dict[int, ExceptionTableEntry] = {} ex_end: Dict[int, List[ExceptionTableEntry]] = {} for entry in self.exception_table: # Ensure we do not have more than one entry with identical starting # offsets assert entry.start_offset not in ex_start ex_start[entry.start_offset] = entry ex_end.setdefault(entry.stop_offset, []).append(entry) # Create labels and instructions jumps: List[Tuple[int, int]] = [] instructions: List[Union[Instr, Label, TryBegin, TryEnd, SetLineno]] = [] labels = {} tb_instrs: Dict[ExceptionTableEntry, TryBegin] = {} offset = 0 # In Python 3.11+ cell and varnames can be shared and are indexed in a single # array. # As a consequence, the instruction argument can be either: # - < len(varnames): the name is shared an we can directly use # the index to access the name in cellvars # - > len(varnames): the name is not shared and is offset by the # number unshared varname. # Free vars are never shared and correspond to index larger than the # largest cell var. # See PyCode_NewWithPosOnlyArgs if sys.version_info >= (3, 11): cells_lookup = self.varnames + [ n for n in self.cellvars if n not in self.varnames ] ncells = len(cells_lookup) else: ncells = len(self.cellvars) cells_lookup = self.cellvars for lineno, c_instr in self._normalize_lineno( c_instructions, self.first_lineno ): if offset in jump_targets: label = Label() labels[offset] = label instructions.append(label) # Handle TryBegin pseudo instructions if offset in ex_start: entry = ex_start[offset] tb_instr = TryBegin( Label(), entry.push_lasti, entry.stack_depth if conserve_exception_block_stackdepth else UNSET, ) # Per entry store the pseudo instruction associated tb_instrs[entry] = tb_instr instructions.append(tb_instr) jump_target = c_instr.get_jump_target(offset) size = c_instr.size # If an instruction uses extended args, those appear before the instruction # causing the instruction to appear at offset that accounts for extended # args. So we first update the offset to account for extended args, then # record the instruction offset and then add the instruction itself to the # offset. offset += (size // 2 - 1) if OFFSET_AS_INSTRUCTION else (size - 2) current_instr_offset = offset offset += 1 if OFFSET_AS_INSTRUCTION else 2 # on Python 3.11+ remove CACHE opcodes if we are requested to do so. # We are careful to first advance the offset and check that the CACHE # is not a jump target. It should never be the case but we double check. if prune_caches and c_instr.name == "CACHE": assert jump_target is None # We may need to insert a TryEnd after a CACHE so we need to run the # through the last block. else: arg: InstrArg c_arg = c_instr.arg # FIXME: better error reporting if c_instr.opcode in _opcode.hasconst: arg = self.consts[c_arg] elif c_instr.opcode in _opcode.haslocal: arg = self.varnames[c_arg] elif c_instr.opcode in _opcode.hasname: if c_instr.name in BITFLAG_INSTRUCTIONS: arg = (bool(c_arg & 1), self.names[c_arg >> 1]) elif c_instr.name in BITFLAG2_INSTRUCTIONS: arg = (bool(c_arg & 1), bool(c_arg & 2), self.names[c_arg >> 2]) else: arg = self.names[c_arg] elif c_instr.opcode in _opcode.hasfree: if c_arg < ncells: name = cells_lookup[c_arg] arg = CellVar(name) else: name = self.freevars[c_arg - ncells] arg = FreeVar(name) elif c_instr.opcode in _opcode.hascompare: arg = Compare( (c_arg >> 4) if sys.version_info >= (3, 12) else c_arg ) elif c_instr.opcode in INTRINSIC_1OP: arg = Intrinsic1Op(c_arg) elif c_instr.opcode in INTRINSIC_2OP: arg = Intrinsic2Op(c_arg) else: arg = c_arg location = c_instr.location or InstrLocation(lineno, None, None, None) if jump_target is not None: arg = PLACEHOLDER_LABEL instr_index = len(instructions) jumps.append((instr_index, jump_target)) instructions.append(Instr(c_instr.name, arg, location=location)) # We now insert the TryEnd entries if current_instr_offset in ex_end: entries = ex_end[current_instr_offset] for entry in reversed(entries): instructions.append(TryEnd(tb_instrs[entry])) # Replace jump targets with labels for index, jump_target in jumps: instr = instructions[index] assert isinstance(instr, Instr) and instr.arg is PLACEHOLDER_LABEL # FIXME: better error reporting on missing label instr.arg = labels[jump_target] # Set the label for TryBegin for entry, tb in tb_instrs.items(): tb.target = labels[entry.target] bytecode = _bytecode.Bytecode() bytecode._copy_attr_from(self) nargs = bytecode.argcount + bytecode.kwonlyargcount nargs += bytecode.posonlyargcount if bytecode.flags & inspect.CO_VARARGS: nargs += 1 if bytecode.flags & inspect.CO_VARKEYWORDS: nargs += 1 bytecode.argnames = self.varnames[:nargs] _set_docstring(bytecode, self.consts) bytecode.extend(instructions) return bytecode class _ConvertBytecodeToConcrete: # XXX document attributes #: Default number of passes of compute_jumps() before giving up. Refer to #: assemble_jump_offsets() in compile.c for background. _compute_jumps_passes = 10 def __init__(self, code: _bytecode.Bytecode) -> None: assert isinstance(code, _bytecode.Bytecode) self.bytecode = code # temporary variables self.instructions: List[ConcreteInstr] = [] self.jumps: List[Tuple[int, Label, ConcreteInstr]] = [] self.labels: Dict[Label, int] = {} self.exception_handling_blocks: Dict[TryBegin, ExceptionTableEntry] = {} self.required_caches = 0 self.seen_manual_cache = False # used to build ConcreteBytecode() object self.consts_indices: Dict[Union[bytes, Tuple[type, int]], int] = {} self.consts_list: List[Any] = [] self.names: List[str] = [] self.varnames: List[str] = [] def add_const(self, value: Any) -> int: key = const_key(value) if key in self.consts_indices: return self.consts_indices[key] index = len(self.consts_indices) self.consts_indices[key] = index self.consts_list.append(value) return index @staticmethod def add(names: List[str], name: str) -> int: try: index = names.index(name) except ValueError: index = len(names) names.append(name) return index def concrete_instructions(self) -> None: lineno = self.bytecode.first_lineno # Track instruction (index) using cell vars and free vars to be able to update # the index used once all the names are known. cell_instrs: List[int] = [] free_instrs: List[int] = [] for instr in self.bytecode: # Enforce proper use of CACHE opcode on Python 3.11+ by checking we get the # number we expect or directly generate the needed ones. if isinstance(instr, Instr) and instr.name == "CACHE": if not self.required_caches: raise RuntimeError("Found a CACHE opcode when none was expected.") self.seen_manual_cache = True self.required_caches -= 1 elif self.required_caches: if not self.seen_manual_cache: # We preserve the location of the instruction requiring the # presence of cache instructions self.instructions.extend( [ ConcreteInstr( "CACHE", 0, location=self.instructions[-1].location ) for i in range(self.required_caches) ] ) self.required_caches = 0 self.seen_manual_cache = False else: raise RuntimeError( "Found some manual opcode but less than expected. " f"Missing {self.required_caches} CACHE opcodes." ) if isinstance(instr, Label): self.labels[instr] = len(self.instructions) continue if isinstance(instr, SetLineno): lineno = instr.lineno continue if isinstance(instr, TryBegin): # We expect the stack depth to have be provided or computed earlier assert instr.stack_depth is not UNSET # NOTE here we store the index of the instruction at which the # exception table entry starts. This is not the final value we want, # we want the offset in the bytecode but that requires to compute # the jumps first to resolve any possible extended arg needed in a # jump. self.exception_handling_blocks[instr] = ExceptionTableEntry( len(self.instructions), 0, 0, instr.stack_depth, instr.push_lasti ) continue # Do not handle TryEnd before we insert possible CACHE opcode if isinstance(instr, TryEnd): entry = self.exception_handling_blocks[instr.entry] # The TryEnd is located after the last opcode in the exception entry # so we move the offset by one. We choose one so that the end does # encompass a possible EXTENDED_ARG entry.stop_offset = len(self.instructions) - 1 continue assert isinstance(instr, Instr) if instr.lineno is not UNSET and instr.lineno is not None: lineno = instr.lineno elif instr.lineno is UNSET: instr.lineno = lineno arg = instr.arg is_jump = False if isinstance(arg, Label): label = arg # fake value, real value is set in compute_jumps() arg = 0 is_jump = True elif instr.opcode in _opcode.hasconst: arg = self.add_const(arg) elif instr.opcode in _opcode.haslocal: assert isinstance(arg, str) arg = self.add(self.varnames, arg) elif instr.opcode in _opcode.hasname: if instr.name in BITFLAG_INSTRUCTIONS: assert ( isinstance(arg, tuple) and len(arg) == 2 and isinstance(arg[0], bool) and isinstance(arg[1], str) ), arg index = self.add(self.names, arg[1]) arg = int(arg[0]) + (index << 1) elif instr.name in BITFLAG2_INSTRUCTIONS: assert ( isinstance(arg, tuple) and len(arg) == 3 and isinstance(arg[0], bool) and isinstance(arg[1], bool) and isinstance(arg[2], str) ), arg index = self.add(self.names, arg[2]) arg = int(arg[0]) + 2 * int(arg[1]) + (index << 2) else: assert isinstance(arg, str), f"Got {arg}, expected a str" arg = self.add(self.names, arg) elif instr.opcode in _opcode.hasfree: if isinstance(arg, CellVar): cell_instrs.append(len(self.instructions)) arg = self.bytecode.cellvars.index(arg.name) else: assert isinstance(arg, FreeVar) free_instrs.append(len(self.instructions)) arg = self.bytecode.freevars.index(arg.name) elif instr.opcode in _opcode.hascompare: if isinstance(arg, Compare): # In Python 3.12 the 4 lowest bits are used for caching # See compare_masks in compile.c if sys.version_info >= (3, 12): arg = arg._get_mask() + (arg.value << 4) else: arg = arg.value elif instr.opcode in INTRINSIC: if isinstance(arg, (Intrinsic1Op, Intrinsic2Op)): arg = arg.value # The above should have performed all the necessary conversion assert isinstance(arg, int) c_instr = ConcreteInstr(instr.name, arg, location=instr.location) if is_jump: self.jumps.append((len(self.instructions), label, c_instr)) # If the instruction expect some cache if sys.version_info >= (3, 11): self.required_caches = c_instr.use_cache_opcodes() self.seen_manual_cache = False self.instructions.append(c_instr) # On Python 3.11 varnames and cells can share some names. Wind the shared # names and update the arg argument of instructions using cell vars. # We also track by how much to offset free vars which are stored in a # contiguous array after the cell vars if sys.version_info >= (3, 11): # Map naive cell index to shared index shared_name_indexes: Dict[int, int] = {} n_shared = 0 n_unshared = 0 for i, name in enumerate(self.bytecode.cellvars): if name in self.varnames: shared_name_indexes[i] = self.varnames.index(name) n_shared += 1 else: shared_name_indexes[i] = len(self.varnames) + n_unshared n_unshared += 1 for index in cell_instrs: c_instr = self.instructions[index] c_instr.arg = shared_name_indexes[c_instr.arg] free_offset = len(self.varnames) + len(self.bytecode.cellvars) - n_shared else: free_offset = len(self.bytecode.cellvars) for index in free_instrs: c_instr = self.instructions[index] c_instr.arg += free_offset def compute_jumps(self) -> bool: # For labels we need the offset before the instruction at a given index but for # exception table entries we need the offset of the instruction which can differ # in the presence of extended args... label_offsets = [] instruction_offsets = [] offset = 0 for index, instr in enumerate(self.instructions): label_offsets.append(offset) # If an instruction uses extended args, those appear before the instruction # causing the instruction to appear at offset that accounts for extended # args. offset += ( (instr.size // 2 - 1) if OFFSET_AS_INSTRUCTION else (instr.size - 2) ) instruction_offsets.append(offset) offset += 1 if OFFSET_AS_INSTRUCTION else 2 # needed if a label is at the end label_offsets.append(offset) # FIXME may need some extra check to validate jump forward vs jump backward # fix argument of jump instructions: resolve labels modified = False for index, label, instr in self.jumps: target_index = self.labels[label] target_offset = label_offsets[target_index] # FIXME use opcode # Under 3.12+, FOR_ITER, SEND jump is increased by 1 implicitely # to skip over END_FOR, END_SEND see Python/instrumentation.c if sys.version_info >= (3, 12) and instr.name in ("FOR_ITER", "SEND"): target_offset -= 1 if instr.is_forward_rel_jump(): instr_offset = label_offsets[index] target_offset -= instr_offset + ( instr.size // 2 if OFFSET_AS_INSTRUCTION else instr.size ) elif instr.is_backward_rel_jump(): instr_offset = label_offsets[index] target_offset = ( instr_offset + (instr.size // 2 if OFFSET_AS_INSTRUCTION else instr.size) - target_offset ) old_size = instr.size # FIXME: better error report if target_offset is negative instr.arg = target_offset if instr.size != old_size: modified = True # If a jump required an extended arg hence invalidating the calculation # we return early before filling the exception table entries if modified: return modified # Resolve labels for exception handling entries for tb, entry in self.exception_handling_blocks.items(): # Set the offset for the start and end offset from the instruction # index stored when assembling the concrete instructions. entry.start_offset = instruction_offsets[entry.start_offset] entry.stop_offset = instruction_offsets[entry.stop_offset] # Set the offset to the target instruction lb = tb.target assert isinstance(lb, Label) target_index = self.labels[lb] target_offset = label_offsets[target_index] entry.target = target_offset return False def to_concrete_bytecode( self, compute_jumps_passes: Optional[int] = None, compute_exception_stack_depths: bool = True, ) -> ConcreteBytecode: if sys.version_info >= (3, 11) and compute_exception_stack_depths: cfg = _bytecode.ControlFlowGraph.from_bytecode(self.bytecode) cfg.compute_stacksize(compute_exception_stack_depths=True) self.bytecode = cfg.to_bytecode() if compute_jumps_passes is None: compute_jumps_passes = self._compute_jumps_passes first_const = self.bytecode.docstring if first_const is not UNSET: self.add_const(first_const) self.varnames.extend(self.bytecode.argnames) self.concrete_instructions() for pas in range(0, compute_jumps_passes): modified = self.compute_jumps() if not modified: break else: raise RuntimeError( "compute_jumps() failed to converge after" " %d passes" % (pas + 1) ) concrete = ConcreteBytecode( self.instructions, consts=tuple(self.consts_list), names=tuple(self.names), varnames=self.varnames, exception_table=list(self.exception_handling_blocks.values()), ) concrete._copy_attr_from(self.bytecode) return concrete bytecode-0.15.1/src/bytecode/flags.py000066400000000000000000000140551451217043400174350ustar00rootroot00000000000000import opcode import sys from enum import IntFlag from typing import Optional, Union # alias to keep the 'bytecode' variable free import bytecode as _bytecode class CompilerFlags(IntFlag): """Possible values of the co_flags attribute of Code object. Note: We do not rely on inspect values here as some of them are missing and furthermore would be version dependent. """ OPTIMIZED = 0x00001 # noqa NEWLOCALS = 0x00002 # noqa VARARGS = 0x00004 # noqa VARKEYWORDS = 0x00008 # noqa NESTED = 0x00010 # noqa GENERATOR = 0x00020 # noqa NOFREE = 0x00040 # noqa # New in Python 3.5 # Used for coroutines defined using async def ie native coroutine COROUTINE = 0x00080 # noqa # Used for coroutines defined as a generator and then decorated using # types.coroutine ITERABLE_COROUTINE = 0x00100 # noqa # New in Python 3.6 # Generator defined in an async def function ASYNC_GENERATOR = 0x00200 # noqa # __future__ flags # future flags changed in Python 3.9 if sys.version_info < (3, 9): FUTURE_GENERATOR_STOP = 0x80000 # noqa FUTURE_ANNOTATIONS = 0x100000 else: FUTURE_GENERATOR_STOP = 0x800000 # noqa FUTURE_ANNOTATIONS = 0x1000000 def infer_flags( bytecode: Union[ "_bytecode.Bytecode", "_bytecode.ConcreteBytecode", "_bytecode.ControlFlowGraph" ], is_async: Optional[bool] = None, ): """Infer the proper flags for a bytecode based on the instructions. Because the bytecode does not have enough context to guess if a function is asynchronous the algorithm tries to be conservative and will never turn a previously async code into a sync one. Parameters ---------- bytecode : Bytecode | ConcreteBytecode | ControlFlowGraph Bytecode for which to infer the proper flags is_async : bool | None, optional Force the code to be marked as asynchronous if True, prevent it from being marked as asynchronous if False and simply infer the best solution based on the opcode and the existing flag if None. """ flags = CompilerFlags(0) if not isinstance( bytecode, (_bytecode.Bytecode, _bytecode.ConcreteBytecode, _bytecode.ControlFlowGraph), ): msg = ( "Expected a Bytecode, ConcreteBytecode or ControlFlowGraph " "instance not %s" ) raise ValueError(msg % bytecode) instructions = ( bytecode._get_instructions() if isinstance(bytecode, _bytecode.ControlFlowGraph) else bytecode ) instr_names = { i.name for i in instructions if not isinstance( i, ( _bytecode.SetLineno, _bytecode.Label, _bytecode.TryBegin, _bytecode.TryEnd, ), ) } # Identify optimized code if not (instr_names & {"STORE_NAME", "LOAD_NAME", "DELETE_NAME"}): flags |= CompilerFlags.OPTIMIZED # Check for free variables if not (instr_names & {opcode.opname[i] for i in opcode.hasfree}): flags |= CompilerFlags.NOFREE # Copy flags for which we cannot infer the right value flags |= bytecode.flags & ( CompilerFlags.NEWLOCALS | CompilerFlags.VARARGS | CompilerFlags.VARKEYWORDS | CompilerFlags.NESTED ) sure_generator = instr_names & {"YIELD_VALUE"} maybe_generator = instr_names & {"YIELD_VALUE", "YIELD_FROM"} sure_async = instr_names & { "GET_AWAITABLE", "GET_AITER", "GET_ANEXT", "BEFORE_ASYNC_WITH", "SETUP_ASYNC_WITH", "END_ASYNC_FOR", "ASYNC_GEN_WRAP", # New in 3.11 } # If performing inference or forcing an async behavior, first inspect # the flags since this is the only way to identify iterable coroutines if is_async in (None, True): if bytecode.flags & CompilerFlags.COROUTINE: if sure_generator: flags |= CompilerFlags.ASYNC_GENERATOR else: flags |= CompilerFlags.COROUTINE elif bytecode.flags & CompilerFlags.ITERABLE_COROUTINE: if sure_async: msg = ( "The ITERABLE_COROUTINE flag is set but bytecode that" "can only be used in async functions have been " "detected. Please unset that flag before performing " "inference." ) raise ValueError(msg) flags |= CompilerFlags.ITERABLE_COROUTINE elif bytecode.flags & CompilerFlags.ASYNC_GENERATOR: if not sure_generator: flags |= CompilerFlags.COROUTINE else: flags |= CompilerFlags.ASYNC_GENERATOR # If the code was not asynchronous before determine if it should now be # asynchronous based on the opcode and the is_async argument. else: if sure_async: # YIELD_FROM is not allowed in async generator if sure_generator: flags |= CompilerFlags.ASYNC_GENERATOR else: flags |= CompilerFlags.COROUTINE elif maybe_generator: if is_async: if sure_generator: flags |= CompilerFlags.ASYNC_GENERATOR else: flags |= CompilerFlags.COROUTINE else: flags |= CompilerFlags.GENERATOR elif is_async: flags |= CompilerFlags.COROUTINE # If the code should not be asynchronous, check first it is possible and # next set the GENERATOR flag if relevant else: if sure_async: raise ValueError( "The is_async argument is False but bytecodes " "that can only be used in async functions have " "been detected." ) if maybe_generator: flags |= CompilerFlags.GENERATOR flags |= bytecode.flags & CompilerFlags.FUTURE_GENERATOR_STOP return flags bytecode-0.15.1/src/bytecode/instr.py000066400000000000000000000642311451217043400175010ustar00rootroot00000000000000import dis import enum import opcode as _opcode import sys from abc import abstractmethod from dataclasses import dataclass from marshal import dumps as _dumps from typing import Any, Callable, Dict, Generic, Optional, Tuple, TypeVar, Union try: from typing import TypeGuard except ImportError: from typing_extensions import TypeGuard # type: ignore import bytecode as _bytecode # --- Instruction argument tools and MIN_INSTRUMENTED_OPCODE = getattr(_opcode, "MIN_INSTRUMENTED_OPCODE", 256) # Instructions relying on a bit to modify its behavior. # The lowest bit is used to encode custom behavior. BITFLAG_INSTRUCTIONS = ( ("LOAD_GLOBAL", "LOAD_ATTR") if sys.version_info >= (3, 12) else ("LOAD_GLOBAL",) if sys.version_info >= (3, 11) else () ) BITFLAG2_INSTRUCTIONS = ("LOAD_SUPER_ATTR",) if sys.version_info >= (3, 12) else () # Intrinsic related opcodes INTRINSIC_1OP = ( (_opcode.opmap["CALL_INTRINSIC_1"],) if sys.version_info >= (3, 12) else () ) INTRINSIC_2OP = ( (_opcode.opmap["CALL_INTRINSIC_2"],) if sys.version_info >= (3, 12) else () ) INTRINSIC = INTRINSIC_1OP + INTRINSIC_2OP # Used for COMPARE_OP opcode argument @enum.unique class Compare(enum.IntEnum): LT = 0 LE = 1 EQ = 2 NE = 3 GT = 4 GE = 5 if sys.version_info < (3, 9): IN = 6 NOT_IN = 7 IS = 8 IS_NOT = 9 EXC_MATCH = 10 if sys.version_info >= (3, 12): def _get_mask(self): if self == Compare.EQ: return 8 elif self == Compare.NE: return 1 + 2 + 4 elif self == Compare.LT: return 2 elif self == Compare.LE: return 2 + 8 elif self == Compare.GT: return 4 elif self == Compare.GE: return 4 + 8 # Used for BINARY_OP under Python 3.11+ @enum.unique class BinaryOp(enum.IntEnum): ADD = 0 AND = 1 FLOOR_DIVIDE = 2 LSHIFT = 3 MATRIX_MULTIPLY = 4 MULTIPLY = 5 REMAINDER = 6 OR = 7 POWER = 8 RSHIFT = 9 SUBTRACT = 10 TRUE_DIVIDE = 11 XOR = 12 INPLACE_ADD = 13 INPLACE_AND = 14 INPLACE_FLOOR_DIVIDE = 15 INPLACE_LSHIFT = 16 INPLACE_MATRIX_MULTIPLY = 17 INPLACE_MULTIPLY = 18 INPLACE_REMAINDER = 19 INPLACE_OR = 20 INPLACE_POWER = 21 INPLACE_RSHIFT = 22 INPLACE_SUBTRACT = 23 INPLACE_TRUE_DIVIDE = 24 INPLACE_XOR = 25 @enum.unique class Intrinsic1Op(enum.IntEnum): INTRINSIC_1_INVALID = 0 INTRINSIC_PRINT = 1 INTRINSIC_IMPORT_STAR = 2 INTRINSIC_STOPITERATION_ERROR = 3 INTRINSIC_ASYNC_GEN_WRAP = 4 INTRINSIC_UNARY_POSITIVE = 5 INTRINSIC_LIST_TO_TUPLE = 6 INTRINSIC_TYPEVAR = 7 INTRINSIC_PARAMSPEC = 8 INTRINSIC_TYPEVARTUPLE = 9 INTRINSIC_SUBSCRIPT_GENERIC = 10 INTRINSIC_TYPEALIAS = 11 @enum.unique class Intrinsic2Op(enum.IntEnum): INTRINSIC_2_INVALID = 0 INTRINSIC_PREP_RERAISE_STAR = 1 INTRINSIC_TYPEVAR_WITH_BOUND = 2 INTRINSIC_TYPEVAR_WITH_CONSTRAINTS = 3 INTRINSIC_SET_FUNCTION_TYPE_PARAMS = 4 # This make type checking happy but means it won't catch attempt to manipulate an unset # statically. We would need guard on object attribute narrowed down through methods class _UNSET(int): instance = None def __new__(cls): if cls.instance is None: cls.instance = super().__new__(cls) return cls.instance def __eq__(self, other) -> bool: return self is other for op in [ "__abs__", "__add__", "__and__", "__bool__", "__ceil__", "__divmod__", "__float__", "__floor__", "__floordiv__", "__ge__", "__gt__", "__hash__", "__index__", "__int__", "__invert__", "__le__", "__lshift__", "__lt__", "__mod__", "__mul__", "__ne__", "__neg__", "__or__", "__pos__", "__pow__", "__radd__", "__rand__", "__rdivmod__", "__rfloordiv__", "__rlshift__", "__rmod__", "__rmul__", "__ror__", "__round__", "__rpow__", "__rrshift__", "__rshift__", "__rsub__", "__rtruediv__", "__rxor__", "__sub__", "__truediv__", "__trunc__", "__xor__", ]: setattr(_UNSET, op, lambda *args: NotImplemented) UNSET = _UNSET() def const_key(obj: Any) -> Union[bytes, Tuple[type, int]]: try: return _dumps(obj) except ValueError: # For other types, we use the object identifier as an unique identifier # to ensure that they are seen as unequal. return (type(obj), id(obj)) class Label: __slots__ = () #: Placeholder label temporarily used when performing some conversions #: concrete -> bytecode PLACEHOLDER_LABEL = Label() class _Variable: __slots__ = ("name",) def __init__(self, name: str) -> None: self.name: str = name def __eq__(self, other: Any) -> bool: if type(self) is not type(other): return False return self.name == other.name def __str__(self) -> str: return self.name def __repr__(self) -> str: return "<%s %r>" % (self.__class__.__name__, self.name) class CellVar(_Variable): __slots__ = () class FreeVar(_Variable): __slots__ = () def _check_arg_int(arg: Any, name: str) -> TypeGuard[int]: if not isinstance(arg, int): raise TypeError( "operation %s argument must be an int, " "got %s" % (name, type(arg).__name__) ) if not (0 <= arg <= 2147483647): raise ValueError( "operation %s argument must be in " "the range 0..2,147,483,647" % name ) return True if sys.version_info >= (3, 12): def opcode_has_argument(opcode: int) -> bool: return opcode in dis.hasarg else: def opcode_has_argument(opcode: int) -> bool: return opcode >= dis.HAVE_ARGUMENT # --- Instruction stack effect impact # We split the stack effect between the manipulations done on the stack before # executing the instruction (fetching the elements that are going to be used) # and what is pushed back on the stack after the execution is complete. # Stack effects that do not depend on the argument of the instruction STATIC_STACK_EFFECTS: Dict[str, Tuple[int, int]] = { "ROT_TWO": (-2, 2), "ROT_THREE": (-3, 3), "ROT_FOUR": (-4, 4), "DUP_TOP": (-1, 2), "DUP_TOP_TWO": (-2, 4), "GET_LEN": (-1, 2), "GET_ITER": (-1, 1), "GET_YIELD_FROM_ITER": (-1, 1), "GET_AWAITABLE": (-1, 1), "GET_AITER": (-1, 1), "GET_ANEXT": (-1, 2), "LIST_TO_TUPLE": (-1, 1), "LIST_EXTEND": (-2, 1), "SET_UPDATE": (-2, 1), "DICT_UPDATE": (-2, 1), "DICT_MERGE": (-2, 1), "COMPARE_OP": (-2, 1), "IS_OP": (-2, 1), "CONTAINS_OP": (-2, 1), "IMPORT_NAME": (-2, 1), "ASYNC_GEN_WRAP": (-1, 1), "PUSH_EXC_INFO": (-1, 2), # Pop TOS and push TOS.__aexit__ and result of TOS.__aenter__() "BEFORE_ASYNC_WITH": (-1, 2), # Replace TOS based on TOS and TOS1 "IMPORT_FROM": (-1, 2), "COPY_DICT_WITHOUT_KEYS": (-2, 2), # Call a function at position 7 (4 3.11+) on the stack and push the return value "WITH_EXCEPT_START": (-4, 5) if sys.version_info >= (3, 11) else (-7, 8), # Starting with Python 3.11 MATCH_CLASS does not push a boolean anymore "MATCH_CLASS": (-3, 1 if sys.version_info >= (3, 11) else 2), "MATCH_MAPPING": (-1, 2), "MATCH_SEQUENCE": (-1, 2), "MATCH_KEYS": (-2, 3 if sys.version_info >= (3, 11) else 4), "CHECK_EXC_MATCH": (-2, 2), # (TOS1, TOS) -> (TOS1, bool) "CHECK_EG_MATCH": (-2, 2), # (TOS, TOS1) -> non-matched, matched or TOS1, None) "PREP_RERAISE_STAR": (-2, 1), # (TOS1, TOS) -> new exception group) **{k: (-1, 1) for k in (o for o in _opcode.opmap if (o.startswith("UNARY_")))}, **{ k: (-2, 1) for k in ( o for o in _opcode.opmap if (o.startswith("BINARY_") or o.startswith("INPLACE_")) ) }, # Python 3.12 changes not covered by dis.stack_effect "BINARY_SLICE": (-3, 1), # "STORE_SLICE" handled by dis.stack_effect "LOAD_FROM_DICT_OR_GLOBALS": (-1, 1), "LOAD_FROM_DICT_OR_DEREF": (-1, 1), "LOAD_INTRISIC_1": (-1, 1), "LOAD_INTRISIC_2": (-2, 1), } DYNAMIC_STACK_EFFECTS: Dict[ str, Callable[[int, Any, Optional[bool]], Tuple[int, int]] ] = { # PRECALL pops all arguments (as per its stack effect) and leaves # the callable and either self or NULL # CALL pops the 2 above items and push the return # (when PRECALL does not exist it pops more as encoded by the effect) "CALL": lambda effect, arg, jump: ( -2 - arg if sys.version_info >= (3, 12) else -2, 1, ), # 3.12 changed the behavior of LOAD_ATTR "LOAD_ATTR": lambda effect, arg, jump: (-1, 1 + effect), "LOAD_SUPER_ATTR": lambda effect, arg, jump: (-3, 3 + effect), "SWAP": lambda effect, arg, jump: (-arg, arg), "COPY": lambda effect, arg, jump: (-arg, arg + effect), "ROT_N": lambda effect, arg, jump: (-arg, arg), "SET_ADD": lambda effect, arg, jump: (-arg, arg - 1), "LIST_APPEND": lambda effect, arg, jump: (-arg, arg - 1), "MAP_ADD": lambda effect, arg, jump: (-arg, arg - 2), "FORMAT_VALUE": lambda effect, arg, jump: (effect - 1, 1), # FOR_ITER needs TOS to be an iterator, hence a prerequisite of 1 on the stack "FOR_ITER": lambda effect, arg, jump: (effect, 0) if jump else (-1, 2), **{ # Instr(UNPACK_* , n) pops 1 and pushes n k: lambda effect, arg, jump: (-1, effect + 1) for k in ( "UNPACK_SEQUENCE", "UNPACK_EX", ) }, **{ k: lambda effect, arg, jump: (effect - 1, 1) for k in ( "MAKE_FUNCTION", "CALL_FUNCTION", "CALL_FUNCTION_EX", "CALL_FUNCTION_KW", "CALL_METHOD", *(o for o in _opcode.opmap if o.startswith("BUILD_")), ) }, } # --- Instruction location def _check_location( location: Optional[int], location_name: str, min_value: int ) -> None: if location is None: return if not isinstance(location, int): raise TypeError(f"{location_name} must be an int, got {type(location)}") if location < min_value: raise ValueError( f"invalid {location_name}, expected >= {min_value}, got {location}" ) @dataclass(frozen=True) class InstrLocation: """Location information for an instruction.""" #: Lineno at which the instruction corresponds. #: Optional so that a location of None in an instruction encode an unset value. lineno: Optional[int] #: End lineno at which the instruction corresponds (Python 3.11+ only) end_lineno: Optional[int] #: Column offset at which the instruction corresponds (Python 3.11+ only) col_offset: Optional[int] #: End column offset at which the instruction corresponds (Python 3.11+ only) end_col_offset: Optional[int] __slots__ = ["lineno", "end_lineno", "col_offset", "end_col_offset"] def __init__( self, lineno: Optional[int], end_lineno: Optional[int], col_offset: Optional[int], end_col_offset: Optional[int], ) -> None: # Needed because we want the class to be frozen object.__setattr__(self, "lineno", lineno) object.__setattr__(self, "end_lineno", end_lineno) object.__setattr__(self, "col_offset", col_offset) object.__setattr__(self, "end_col_offset", end_col_offset) # In Python 3.11 0 is a valid lineno for some instructions (RESUME for example) _check_location(lineno, "lineno", 0 if sys.version_info >= (3, 11) else 1) _check_location(end_lineno, "end_lineno", 1) _check_location(col_offset, "col_offset", 0) _check_location(end_col_offset, "end_col_offset", 0) if end_lineno: if lineno is None: raise ValueError("End lineno specified with no lineno.") elif lineno > end_lineno: raise ValueError( f"End lineno {end_lineno} cannot be smaller than lineno {lineno}." ) if col_offset is not None or end_col_offset is not None: if lineno is None or end_lineno is None: raise ValueError( "Column offsets were specified but lineno information are " f"incomplete. Lineno: {lineno}, end lineno: {end_lineno}." ) if end_col_offset is not None: if col_offset is None: raise ValueError( "End column offset specified with no column offset." ) # Column offset must be increasing inside a signle line but # have no relations between different lines. elif lineno == end_lineno and col_offset > end_col_offset: raise ValueError( f"End column offset {end_col_offset} cannot be smaller than " f"column offset: {col_offset}." ) else: raise ValueError( "No end column offset was specified but a column offset was given." ) @classmethod def from_positions(cls, position: "dis.Positions") -> "InstrLocation": # type: ignore return InstrLocation( position.lineno, position.end_lineno, position.col_offset, position.end_col_offset, ) class SetLineno: __slots__ = ("_lineno",) def __init__(self, lineno: int) -> None: # In Python 3.11 0 is a valid lineno for some instructions (RESUME for example) _check_location(lineno, "lineno", 0 if sys.version_info >= (3, 11) else 1) self._lineno: int = lineno @property def lineno(self) -> int: return self._lineno def __eq__(self, other: Any) -> bool: if not isinstance(other, SetLineno): return False return self._lineno == other._lineno # --- Pseudo instructions used to represent exception handling (3.11+) class TryBegin: __slots__ = ("target", "push_lasti", "stack_depth") def __init__( self, target: Union[Label, "_bytecode.BasicBlock"], push_lasti: bool, stack_depth: Union[int, _UNSET] = UNSET, ) -> None: self.target: Union[Label, "_bytecode.BasicBlock"] = target self.push_lasti: bool = push_lasti self.stack_depth: Union[int, _UNSET] = stack_depth def copy(self) -> "TryBegin": return TryBegin(self.target, self.push_lasti, self.stack_depth) class TryEnd: __slots__ = "entry" def __init__(self, entry: TryBegin) -> None: self.entry: TryBegin = entry def copy(self) -> "TryEnd": return TryEnd(self.entry) T = TypeVar("T", bound="BaseInstr") A = TypeVar("A", bound=object) class BaseInstr(Generic[A]): """Abstract instruction.""" __slots__ = ("_name", "_opcode", "_arg", "_location") # Work around an issue with the default value of arg def __init__( self, name: str, arg: A = UNSET, # type: ignore *, lineno: Union[int, None, _UNSET] = UNSET, location: Optional[InstrLocation] = None, ) -> None: self._set(name, arg) if location: self._location = location elif lineno is UNSET: self._location = None else: self._location = InstrLocation(lineno, None, None, None) # Work around an issue with the default value of arg def set(self, name: str, arg: A = UNSET) -> None: # type: ignore """Modify the instruction in-place. Replace name and arg attributes. Don't modify lineno. """ self._set(name, arg) def require_arg(self) -> bool: """Does the instruction require an argument?""" return opcode_has_argument(self._opcode) @property def name(self) -> str: return self._name @name.setter def name(self, name: str) -> None: self._set(name, self._arg) @property def opcode(self) -> int: return self._opcode @opcode.setter def opcode(self, op: int) -> None: if not isinstance(op, int): raise TypeError("operator code must be an int") if 0 <= op <= 255: name = _opcode.opname[op] valid = name != "<%r>" % op else: valid = False if not valid: raise ValueError("invalid operator code") self._set(name, self._arg) @property def arg(self) -> A: return self._arg @arg.setter def arg(self, arg: A): self._set(self._name, arg) @property def lineno(self) -> Union[int, _UNSET, None]: return self._location.lineno if self._location is not None else UNSET @lineno.setter def lineno(self, lineno: Union[int, _UNSET, None]) -> None: loc = self._location if loc and ( loc.end_lineno is not None or loc.col_offset is not None or loc.end_col_offset is not None ): raise RuntimeError( "The lineno of an instruction with detailed location information " "cannot be set." ) if lineno is UNSET: self._location = None else: self._location = InstrLocation(lineno, None, None, None) @property def location(self) -> Optional[InstrLocation]: return self._location @location.setter def location(self, location: Optional[InstrLocation]) -> None: if location and not isinstance(location, InstrLocation): raise TypeError( "The instr location must be an instance of InstrLocation or None." ) self._location = location def stack_effect(self, jump: Optional[bool] = None) -> int: if not self.require_arg(): arg = None # 3.11 where LOAD_GLOBAL arg encode whether or we push a null # 3.12 does the same for LOAD_ATTR elif self.name in BITFLAG_INSTRUCTIONS and isinstance(self._arg, tuple): assert len(self._arg) == 2 arg = self._arg[0] # 3.12 does a similar trick for LOAD_SUPER_ATTR elif self.name in BITFLAG2_INSTRUCTIONS and isinstance(self._arg, tuple): assert len(self._arg) == 3 arg = self._arg[0] elif not isinstance(self._arg, int) or self._opcode in _opcode.hasconst: # Argument is either a non-integer or an integer constant, # not oparg. arg = 0 else: arg = self._arg return dis.stack_effect(self._opcode, arg, jump=jump) def pre_and_post_stack_effect(self, jump: Optional[bool] = None) -> Tuple[int, int]: # Allow to check that execution will not cause a stack underflow _effect = self.stack_effect(jump=jump) n = self.name if n in STATIC_STACK_EFFECTS: return STATIC_STACK_EFFECTS[n] elif n in DYNAMIC_STACK_EFFECTS: return DYNAMIC_STACK_EFFECTS[n](_effect, self.arg, jump) else: # For instruction with no special value we simply consider the effect apply # before execution return (_effect, 0) def copy(self: T) -> T: return self.__class__(self._name, self._arg, location=self._location) def has_jump(self) -> bool: return self._has_jump(self._opcode) def is_cond_jump(self) -> bool: """Is a conditional jump?""" # Ex: POP_JUMP_IF_TRUE, JUMP_IF_FALSE_OR_POP # IN 3.11+ the JUMP and the IF are no necessary adjacent in the name. name = self._name return "JUMP_" in name and "IF_" in name def is_uncond_jump(self) -> bool: """Is an unconditional jump?""" # JUMP_BACKWARD has been introduced in 3.11+ # JUMP_ABSOLUTE was removed in 3.11+ return self.name in { "JUMP_FORWARD", "JUMP_ABSOLUTE", "JUMP_BACKWARD", "JUMP_BACKWARD_NO_INTERRUPT", } def is_abs_jump(self) -> bool: """Is an absolute jump.""" return self._opcode in _opcode.hasjabs def is_forward_rel_jump(self) -> bool: """Is a forward relative jump.""" return self._opcode in _opcode.hasjrel and "BACKWARD" not in self._name def is_backward_rel_jump(self) -> bool: """Is a backward relative jump.""" return self._opcode in _opcode.hasjrel and "BACKWARD" in self._name def is_final(self) -> bool: if self._name in { "RETURN_VALUE", "RETURN_CONST", "RAISE_VARARGS", "RERAISE", "BREAK_LOOP", "CONTINUE_LOOP", }: return True if self.is_uncond_jump(): return True return False def __repr__(self) -> str: if self._arg is not UNSET: return "<%s arg=%r location=%s>" % (self._name, self._arg, self._location) else: return "<%s location=%s>" % (self._name, self._location) def __eq__(self, other: Any) -> bool: if type(self) is not type(other): return False return self._cmp_key() == other._cmp_key() # --- Private API _name: str _location: Optional[InstrLocation] _opcode: int _arg: A def _set(self, name: str, arg: A) -> None: if not isinstance(name, str): raise TypeError("operation name must be a str") try: opcode = _opcode.opmap[name] except KeyError: raise ValueError(f"invalid operation name: {name}") if opcode >= MIN_INSTRUMENTED_OPCODE: raise ValueError( f"operation {name} is an instrumented or pseudo opcode. " "Only base opcodes are supported" ) self._check_arg(name, opcode, arg) self._name = name self._opcode = opcode self._arg = arg @staticmethod def _has_jump(opcode) -> bool: return opcode in _opcode.hasjrel or opcode in _opcode.hasjabs @abstractmethod def _check_arg(self, name: str, opcode: int, arg: A) -> None: pass @abstractmethod def _cmp_key(self) -> Tuple[Optional[InstrLocation], str, Any]: pass InstrArg = Union[ int, str, Label, CellVar, FreeVar, "_bytecode.BasicBlock", Compare, Tuple[bool, str], Tuple[bool, bool, str], ] class Instr(BaseInstr[InstrArg]): __slots__ = () def _cmp_key(self) -> Tuple[Optional[InstrLocation], str, Any]: arg: Any = self._arg if self._opcode in _opcode.hasconst: arg = const_key(arg) return (self._location, self._name, arg) def _check_arg(self, name: str, opcode: int, arg: InstrArg) -> None: if name == "EXTENDED_ARG": raise ValueError( "only concrete instruction can contain EXTENDED_ARG, " "highlevel instruction can represent arbitrary argument without it" ) if opcode_has_argument(opcode): if arg is UNSET: raise ValueError("operation %s requires an argument" % name) else: if arg is not UNSET: raise ValueError("operation %s has no argument" % name) if self._has_jump(opcode): if not isinstance(arg, (Label, _bytecode.BasicBlock)): raise TypeError( "operation %s argument type must be " "Label or BasicBlock, got %s" % (name, type(arg).__name__) ) elif opcode in _opcode.hasfree: if not isinstance(arg, (CellVar, FreeVar)): raise TypeError( "operation %s argument must be CellVar " "or FreeVar, got %s" % (name, type(arg).__name__) ) elif opcode in _opcode.haslocal or opcode in _opcode.hasname: if name in BITFLAG_INSTRUCTIONS: if not ( isinstance(arg, tuple) and len(arg) == 2 and isinstance(arg[0], bool) and isinstance(arg[1], str) ): raise TypeError( "operation %s argument must be a tuple[bool, str], " "got %s (value=%s)" % (name, type(arg).__name__, str(arg)) ) elif name in BITFLAG2_INSTRUCTIONS: if not ( isinstance(arg, tuple) and len(arg) == 3 and isinstance(arg[0], bool) and isinstance(arg[1], bool) and isinstance(arg[2], str) ): raise TypeError( "operation %s argument must be a tuple[bool, bool, str], " "got %s (value=%s)" % (name, type(arg).__name__, str(arg)) ) elif not isinstance(arg, str): raise TypeError( "operation %s argument must be a str, " "got %s" % (name, type(arg).__name__) ) elif opcode in _opcode.hasconst: if isinstance(arg, Label): raise ValueError( "label argument cannot be used " "in %s operation" % name ) if isinstance(arg, _bytecode.BasicBlock): raise ValueError( "block argument cannot be used " "in %s operation" % name ) elif opcode in _opcode.hascompare: if not isinstance(arg, Compare): raise TypeError( "operation %s argument type must be " "Compare, got %s" % (name, type(arg).__name__) ) elif opcode in INTRINSIC_1OP: if not isinstance(arg, Intrinsic1Op): raise TypeError( "operation %s argument type must be " "Intrinsic1Op, got %s" % (name, type(arg).__name__) ) elif opcode in INTRINSIC_2OP: if not isinstance(arg, Intrinsic2Op): raise TypeError( "operation %s argument type must be " "Intrinsic2Op, got %s" % (name, type(arg).__name__) ) elif opcode_has_argument(opcode): _check_arg_int(arg, name) bytecode-0.15.1/src/bytecode/py.typed000066400000000000000000000000001451217043400174470ustar00rootroot00000000000000bytecode-0.15.1/tests/000077500000000000000000000000001451217043400145375ustar00rootroot00000000000000bytecode-0.15.1/tests/__init__.py000066400000000000000000000210131451217043400166450ustar00rootroot00000000000000import dis import sys import textwrap import types import unittest from bytecode import BasicBlock # noqa from bytecode import ( UNSET, Bytecode, ConcreteBytecode, ConcreteInstr, ControlFlowGraph, Instr, Label, ) def _format_instr_list(block, labels, lineno): instr_list = [] for instr in block: if not isinstance(instr, Label): if isinstance(instr, ConcreteInstr): cls_name = "ConcreteInstr" else: cls_name = "Instr" arg = instr.arg if arg is not UNSET: if isinstance(arg, Label): arg = labels[arg] elif isinstance(arg, BasicBlock): arg = labels[id(arg)] else: arg = repr(arg) if lineno: text = "%s(%r, %s, lineno=%s)" % ( cls_name, instr.name, arg, instr.lineno, ) else: text = "%s(%r, %s)" % (cls_name, instr.name, arg) else: if lineno: text = "%s(%r, lineno=%s)" % (cls_name, instr.name, instr.lineno) else: text = "%s(%r)" % (cls_name, instr.name) else: text = labels[instr] instr_list.append(text) return "[%s]" % ",\n ".join(instr_list) def dump_bytecode(code, lineno=False): """ Use this function to write unit tests: copy/paste its output to write a self.assertBlocksEqual() check. """ print() if isinstance(code, (Bytecode, ConcreteBytecode)): is_concrete = isinstance(code, ConcreteBytecode) if is_concrete: block = list(code) else: block = code indent = " " * 8 labels = {} for index, instr in enumerate(block): if isinstance(instr, Label): name = "label_instr%s" % index labels[instr] = name if is_concrete: name = "ConcreteBytecode" print(indent + "code = %s()" % name) if code.argcount: print(indent + "code.argcount = %s" % code.argcount) if code.posonlyargcount: print(indent + "code.posonlyargcount = %s" % code.posonlyargcount) if code.kwonlyargcount: print(indent + "code.kwargonlycount = %s" % code.kwonlyargcount) print(indent + "code.flags = %#x" % code.flags) if code.consts: print(indent + "code.consts = %r" % code.consts) if code.names: print(indent + "code.names = %r" % code.names) if code.varnames: print(indent + "code.varnames = %r" % code.varnames) for name in sorted(labels.values()): print(indent + "%s = Label()" % name) if is_concrete: text = indent + "code.extend(" indent = " " * len(text) else: text = indent + "code = Bytecode(" indent = " " * len(text) lines = _format_instr_list(code, labels, lineno).splitlines() last_line = len(lines) - 1 for index, line in enumerate(lines): if index == 0: print(text + lines[0]) elif index == last_line: print(indent + line + ")") else: print(indent + line) print() else: assert isinstance(code, ControlFlowGraph) labels = {} for block_index, block in enumerate(code): labels[id(block)] = "code[%s]" % block_index for block_index, block in enumerate(code): text = _format_instr_list(block, labels, lineno) if block_index != len(code) - 1: text += "," print(text) print() def get_code(source, *, filename="", function=False): source = textwrap.dedent(source).strip() code = compile(source, filename, "exec") if function: sub_code = [ const for const in code.co_consts if isinstance(const, types.CodeType) ] if len(sub_code) != 1: raise ValueError("unable to find function code") code = sub_code[0] return code def disassemble(source, *, filename="", function=False): code = get_code(source, filename=filename, function=function) return Bytecode.from_code(code) class TestCase(unittest.TestCase): def assertInstructionListEqual(self, l1, l2): # DO not check location information self.assertEqual(len(l1), len(l2)) for i1, i2 in zip(l1, l2): if isinstance(i1, Instr): self.assertEqual(i1.name, i2.name) if not isinstance(i1.arg, Label): self.assertEqual(i1.arg, i2.arg) else: self.assertIs(l1.index(i1.arg), l2.index(i2.arg)) self.assertEqual(i1.lineno, i2.lineno) else: assert type(i1) is type(i2) def assertCodeObjectEqual(self, code1: types.CodeType, code2: types.CodeType): self.assertEqual(code1.co_stacksize, code2.co_stacksize) self.assertEqual(code1.co_firstlineno, code2.co_firstlineno) self.assertSequenceEqual(code1.co_cellvars, code2.co_cellvars) self.assertSequenceEqual(code1.co_freevars, code2.co_freevars) self.assertSetEqual(set(code1.co_varnames), set(code2.co_varnames)) if sys.version_info >= (3, 11): self.assertSequenceEqual(code1.co_exceptiontable, code2.co_exceptiontable) # We do not compare linetables because CPython does not always optimize # the packing of the table self.assertSequenceEqual( list(code1.co_positions()), list(code2.co_positions()) ) self.assertEqual(code1.co_qualname, code2.co_qualname) elif sys.version_info >= (3, 10): self.assertSequenceEqual(list(code1.co_lines()), list(code2.co_lines())) else: # This is safer than directly comparing co_lnotab that sometimes contains # cruft self.assertSequenceEqual( list(dis.findlinestarts(code1)), list(dis.findlinestarts(code2)) ) # If names have been re-ordered compared the output of dis.instructions if sys.version_info >= (3, 12) and ( code1.co_names != code2.co_names or code1.co_varnames != code2.co_varnames ): instrs1 = list(dis.get_instructions(code1)) instrs2 = list(dis.get_instructions(code2)) self.assertEqual(len(instrs1), len(instrs2)) for i1, i2 in zip(instrs1, instrs2): self.assertEqual(i1.opcode, i2.opcode) self.assertEqual(i1.argval, i2.argval) elif sys.version_info >= (3, 9): self.assertSequenceEqual(code1.co_code, code2.co_code) # On Python 3.8 it happens that fast storage index vary in a roundtrip else: import opcode fast_storage = opcode.opmap["LOAD_FAST"], opcode.opmap["STORE_FAST"] load_const = opcode.opmap["LOAD_CONST"] load_by_name = ( opcode.opmap["LOAD_GLOBAL"], opcode.opmap["LOAD_NAME"], opcode.opmap["LOAD_METHOD"], ) if code1.co_code != code2.co_code: for b1, a1, b2, a2 in zip( code1.co_code[::2], code1.co_code[1::2], code2.co_code[::2], code2.co_code[1::2], ): if b1 != b2: self.assertSequenceEqual(code1.co_code, code2.co_code) # Do not check the argument of fast storage manipulation opcode elif b1 in fast_storage: pass elif b1 == load_const: self.assertEqual(code1.co_consts[a1], code2.co_consts[a2]) elif b1 in load_by_name: self.assertEqual(code1.co_names[a1], code2.co_names[a2]) elif a1 != a2: self.assertSequenceEqual(code1.co_code, code2.co_code) self.assertEqual(code1.co_flags, code2.co_flags) def assertBlocksEqual(self, code, *expected_blocks): self.assertEqual(len(code), len(expected_blocks)) for block1, block2 in zip(code, expected_blocks): self.assertInstructionListEqual(list(block1), block2) bytecode-0.15.1/tests/cell_free_vars_cases.py000066400000000000000000000034111451217043400212410ustar00rootroot00000000000000# Function making heavy use of cell and free vars to test bytecode round tripping # capabilities. def simple_cellvar(): # a cellvar in f a = 1 def g(): # a freevar in g return a return g def cellvar_share_name(a=1): # a cellvar in f, but stored as varname def g(): # a freevar in g return a return g def cellvar_shared_and_unshared(a=1): # a, b cellvar in f, but a stored as varname b = 1 def g(): # a, b freevar in g return a + b return g class A: a = 1 def f(self): return 1 def class_loadderef(): a = 1 class B(A): b = a return B.b # NOTE aliasing super such that there is no LOAD_GLOBAL super cause the omission of # the required implicit __class__ cell which breaks the subsequent call # Under Python 3.11 the creation of cellvars is made explicit by MAKE_CELL def class_super(): class B(A): def f(self): super().f() return B().f # NOTE this is not really a cell var case but it ensures proper # placements of CACHE vs labels _localedirs = {} _default_localedir = "" def bindtextdomain(domain="", localedir=None): global _localedirs if localedir is not None: _localedirs[domain] = localedir return _localedirs.get(domain, _default_localedir) TEST_CASES = [ simple_cellvar, cellvar_share_name, cellvar_shared_and_unshared, class_super, class_loadderef, bindtextdomain, ] if __name__ == "__main__": import dis import inspect for f in TEST_CASES: print("--------------------------------------------------------------") for line in inspect.getsourcelines(f)[0]: # type: ignore print(line.rstrip()) print() dis.dis(f.__code__) print() bytecode-0.15.1/tests/exception_handling_cases.py000066400000000000000000000170271451217043400221400ustar00rootroot00000000000000# flake8: noqa import contextlib import sys # Functions attempting to cover most combination of exception error handling mechanisms # to test bytecode round tripping capabilities. # NOTE we use call in except/finally clause expression requiring a larger stack usage def try_except(): try: a = 1 except Exception: return min(1, 2) return a def try_multi_except(): try: a = 1 except ValueError: return min(1, 2) except Exception: return min(1, 2) return a def try_finally(): try: a = 1 finally: c = min(1, 2) return a def try_except_else(): try: a = 1 except Exception: return min(1, 2) else: b = 1 return a def try_except_finally(): try: a = 1 except Exception: return min(1, 2) finally: c = 1 return a def try_except_else_finally(): try: a = 1 except Exception: return min(1, 2) else: b = 1 finally: c = min(1, 2) return a def nested_try(): try: a = 1 try: b = 2 except Exception: e = min(1, 2) c = 3 except Exception: d = min(1, 2) return a def nested_try_finally(): try: a = 1 try: b = 2 finally: e = min(1, 2) c = 3 finally: d = min(1, 2) return a # This case exhibits several pitfalls: # - a TryBegin appears in the block as a reraise requiring to create an artificial # TryBegin/TryEnd pair # - complex exit conditions through jumps # - TryEnd following a non conditional jump def nested_try_with_looping_construct(): try: try: a = 1 finally: b = min(1, 2) while a: c = 0 if min(5, 6): break finally: c = 3 return a # Test converting from bytecode to concrete in the presence of extended arg # which means the number of instruction before generating extended arg is not # the offset. # Here if we ignore this we end with wrong start/stop value in the table def try_except_with_extended_arg(): a = [1] b = [(1, 2), (3, 4)] for x in a: if a[0] is b[1]: try: a.append(b.index((a[0], 2))) except BrokenPipeError: sys.stdout.write(str(a)) sys.stdout.flush() else: c = 1 d = 2 b.append(a.append((c, d))) sys.stdout.write(str(b)) sys.stdout.flush() # Here extended arg can lead to omitting a TryEnd because we went over the offset # value at which we expected it. def try_except_with_extended_arg2(): a = list(range(10)) with contextlib.nullcontext() as selector: while a.pop(): # timeout = self._remaining_time(endtime) if sys is not None and sys.hexversion < 0: sys.stdout.write(a) raise RuntimeError("test") for key in sys.version_info: # Dead code for the execution but help trigger the bug this test # is meant to avoid regressing. if key is sys.stdin: chunk = a[self._input_offset : self._input_offset + _PIPE_BUF] try: self._input_offset += os.write(key.fd, chunk) except BrokenPipeError: selector.unregister(key.fileobj) key.fileobj.close() else: if self._input_offset >= len(self._input): selector.unregister(key.fileobj) key.fileobj.close() def try_except_in_except(): try: a = 1 except Exception: d = 4 try: b = 2 except Exception: return min(1, 2) c = 3 return a def try_finally_in_except(): try: a = min(1, 2) except Exception: try: b = min(3, 4) finally: c = 1 return c return a def try_except_in_else(): try: a = min(1, 2) except Exception: a = 1 else: try: b = min(3, 4) except Exception: b = 1 return b return a def try_finally_in_else(): try: a = 1 except ValueError as e: return else: try: pass finally: a = 1 def try_except_in_finally(): try: a = min(1, 2) finally: try: a = max(1, 2) except Exception: a = 1 return a def try_finally_in_finally(): a = 0 try: a = min(1, 2) finally: try: a = max(1, 2) finally: a = min(a, 1) return a # Trick since the syntax does not exist pre-3.11 if sys.version_info >= (3, 11): src = """ def try_except_group(): try: a = 1 except* ValueError: b = min(1, 2) return a """ exec(src) def with_no_store(): with contextlib.nullcontext(1): a = 1 return a def with_store(): with contextlib.nullcontext(1) as b: a = 1 return a def try_with(): try: with contextlib.nullcontext(1): a = 1 except Exception: return min(1, 2) return a def with_try(): with contextlib.nullcontext(1): try: b = 1 except Exception: return min(1, 2) return b async def async_with_no_store(): async with contextlib.nullcontext(): a = 1 return a async def async_with_store(): async with contextlib.nullcontext() as b: a = 1 return a async def try_async_with(): try: async with contextlib.nullcontext(1): a = 1 except Exception: return min(1, 2) return a async def async_with_try(): async with contextlib.nullcontext(1): try: b = 1 except Exception: return min(1, 2) return b TEST_CASES = [ try_except, try_multi_except, try_finally, try_except_else, try_except_finally, try_except_else_finally, nested_try, nested_try_finally, nested_try_with_looping_construct, try_except_in_except, try_except_in_else, try_except_in_finally, try_finally_in_except, try_finally_in_else, try_finally_in_finally, try_except_with_extended_arg, try_except_with_extended_arg2, with_no_store, with_store, try_with, with_try, async_with_no_store, async_with_store, try_async_with, async_with_try, ] if sys.version_info >= (3, 11): TEST_CASES.insert(0, try_except_group) # type: ignore # On 3.8 those two cases fail due to a re-ordering of the fast variables if sys.version_info < (3, 9): TEST_CASES.remove(try_except_else_finally) TEST_CASES.remove(try_except_finally) # Fail due to a varname re-ordering TEST_CASES.remove(try_finally) TEST_CASES.remove(nested_try_finally) TEST_CASES.remove(try_finally_in_except) TEST_CASES.remove(nested_try_with_looping_construct) TEST_CASES.remove(try_except_with_extended_arg) TEST_CASES.remove(try_except_with_extended_arg2) if __name__ == "__main__": import dis import inspect for f in TEST_CASES: print("--------------------------------------------------------------") for l in inspect.getsourcelines(f)[0]: print(l.rstrip()) print() dis.dis(f) print() bytecode-0.15.1/tests/frameworks/000077500000000000000000000000001451217043400167175ustar00rootroot00000000000000bytecode-0.15.1/tests/frameworks/function.py000066400000000000000000000122671451217043400211260ustar00rootroot00000000000000from collections import deque from collections.abc import Iterator from os.path import abspath from types import FunctionType, ModuleType from typing import Any, Dict, Optional, Protocol, Tuple, Type, Union, cast from module import origin # type: ignore FunctionContainerType = Union[ type, property, classmethod, staticmethod, Tuple, ModuleType ] ContainerKey = Union[str, int, Type[staticmethod], Type[classmethod]] CONTAINER_TYPES = (type, property, classmethod, staticmethod) def set_cell_contents(cell, contents): # type: ignore[misc] cell.cell_contents = contents class FullyNamed(Protocol): """A fully named object.""" __name__ = None # type: Optional[str] __fullname__ = None # type: Optional[str] class FullyNamedFunction(FullyNamed): """A fully named function object.""" def __call__(self, *args, **kwargs): pass class ContainerIterator(Iterator, FullyNamedFunction): """Wrapper around different types of function containers. A container comes with an origin, i.e. a parent container and a position within it in the form of a key. """ def __init__( self, container, # type: FunctionContainerType origin=None, # type: Optional[Union[Tuple[ContainerIterator, ContainerKey], Tuple[FullyNamedFunction, str]]] ): # type: (...) -> None if isinstance(container, (type, ModuleType)): self._iter = iter(container.__dict__.items()) self.__name__ = container.__name__ elif isinstance(container, tuple): self._iter = iter(enumerate(_.cell_contents for _ in container)) # type: ignore[arg-type] self.__name__ = "" elif isinstance(container, property): self._iter = iter( (m, getattr(container, a)) for m, a in { ("getter", "fget"), ("setter", "fset"), ("deleter", "fdel"), } ) assert container.fget is not None self.__name__ = container.fget.__name__ elif isinstance(container, (classmethod, staticmethod)): self._iter = iter([(type(container), container.__func__)]) # type: ignore[list-item] self.__name__ = None else: raise TypeError("Unsupported container type: %s", type(container)) self._container = container if origin is not None and origin[0].__fullname__ is not None: origin_fullname = origin[0].__fullname__ self.__fullname__ = ( ".".join((origin_fullname, self.__name__)) if self.__name__ else origin_fullname ) else: self.__fullname__ = self.__name__ def __iter__(self): # type: () -> Iterator[Tuple[ContainerKey, Any]] return self._iter def __next__(self): # type: () -> Tuple[ContainerKey, Any] return next(self._iter) next = __next__ def _collect_functions(module): # type: (ModuleType) -> Dict[str, FullyNamedFunction] """Collect functions from a given module.""" assert isinstance(module, ModuleType) path = origin(module) containers = deque([ContainerIterator(module)]) functions = {} seen_containers = set() seen_functions = set() while containers: c = containers.pop() if id(c._container) in seen_containers: continue seen_containers.add(id(c._container)) for k, o in c: code = getattr(o, "__code__", None) if isinstance(o, FunctionType) else None if code is not None and abspath(code.co_filename) == path: if o not in seen_functions: seen_functions.add(o) o = cast(FullyNamedFunction, o) o.__fullname__ = ( ".".join((c.__fullname__, o.__name__)) if c.__fullname__ else o.__name__ ) for name in (k, o.__name__) if isinstance(k, str) else (o.__name__,): fullname = ( ".".join((c.__fullname__, name)) if c.__fullname__ else name ) functions[fullname] = o try: if o.__closure__: containers.append( ContainerIterator(o.__closure__, origin=(o, "")) ) except AttributeError: pass elif isinstance(o, CONTAINER_TYPES): if isinstance(o, property) and not isinstance(o.fget, FunctionType): continue containers.append(ContainerIterator(o, origin=(c, k))) return functions class FunctionDiscovery(dict): """Discover all function objects in a module.""" def __init__(self, module): # type: (ModuleType) -> None super(FunctionDiscovery, self).__init__() self._module = module functions = _collect_functions(module) seen_functions = set() for fname, function in functions.items(): self[fname] = function seen_functions.add(function) bytecode-0.15.1/tests/frameworks/module.py000066400000000000000000000146771451217043400205750ustar00rootroot00000000000000import sys from importlib.abc import Loader from importlib.machinery import ModuleSpec from importlib.util import find_spec from pathlib import Path from types import ModuleType from typing import Any, Callable, Dict, Optional, Set, Union, cast def origin(module): # type: (ModuleType) -> str """Get the origin source file of the module.""" try: assert module.__file__ is not None orig = str(Path(module.__file__).resolve()) # type: ignore[type-var] except (AttributeError, TypeError): # Module is probably only partially initialised, so we look at its # spec instead try: orig = str(Path(module.__spec__.origin).resolve()) # type: ignore except (AttributeError, ValueError, TypeError): orig = None if orig is not None and Path(orig).is_file(): if orig.endswith(".pyc"): orig = orig[:-1] return orig return "" def find_loader(fullname): # type: (str) -> Optional[Loader] return getattr(find_spec(fullname), "loader", None) class _ImportHookChainedLoader(Loader): def __init__(self, loader): # type: (Loader) -> None self.loader = loader self.callbacks = {} # type: Dict[Any, Callable[[ModuleType], None]] # DEV: load_module is deprecated so we define it at runtime if also # defined by the default loader. We also check and define for the # methods that are supposed to replace the load_module functionality. if hasattr(loader, "load_module"): self.load_module = self._load_module # type: ignore[assignment] if hasattr(loader, "create_module"): self.create_module = self._create_module # type: ignore[assignment] if hasattr(loader, "exec_module"): self.exec_module = self._exec_module # type: ignore[assignment] def __getattribute__(self, name): if name == "__class__": # Make isinstance believe that self is also an instance of # type(self.loader). This is required, e.g. by some tools, like # slotscheck, that can handle known loaders only. return self.loader.__class__ return super(_ImportHookChainedLoader, self).__getattribute__(name) def __getattr__(self, name): # Proxy any other attribute access to the underlying loader. return getattr(self.loader, name) def add_callback(self, key, callback): # type: (Any, Callable[[ModuleType], None]) -> None self.callbacks[key] = callback def _load_module(self, fullname): # type: (str) -> ModuleType module = self.loader.load_module(fullname) for callback in self.callbacks.values(): callback(module) return module def _create_module(self, spec): return self.loader.create_module(spec) def _exec_module(self, module): self.loader.exec_module(module) for callback in self.callbacks.values(): callback(module) class ModuleWatchdog: """Module watchdog. Replace the standard ``sys.modules`` dictionary to detect when modules are loaded/unloaded. This is also responsible for triggering any registered import hooks. Subclasses might customize the default behavior by overriding the ``after_import`` method, which is triggered on every module import, once the subclass is installed. """ _instance = None # type: Optional[ModuleWatchdog] def __init__(self): # type: () -> None self._finding = set() # type: Set[str] def _add_to_meta_path(self): # type: () -> None sys.meta_path.insert(0, self) # type: ignore[arg-type] @classmethod def _find_in_meta_path(cls): # type: () -> Optional[int] for i, meta_path in enumerate(sys.meta_path): if type(meta_path) is cls: return i return None @classmethod def _remove_from_meta_path(cls): # type: () -> None i = cls._find_in_meta_path() if i is not None: sys.meta_path.pop(i) def after_import(self, module): raise NotImplementedError() def find_module(self, fullname, path=None): # type: (str, Optional[str]) -> Union[ModuleWatchdog, _ImportHookChainedLoader, None] if fullname in self._finding: return None self._finding.add(fullname) try: loader = find_loader(fullname) if loader is not None: if not isinstance(loader, _ImportHookChainedLoader): loader = _ImportHookChainedLoader(loader) loader.add_callback(type(self), self.after_import) return loader finally: self._finding.remove(fullname) return None def find_spec(self, fullname, path=None, target=None): # type: (str, Optional[str], Optional[ModuleType]) -> Optional[ModuleSpec] if fullname in self._finding: return None self._finding.add(fullname) try: spec = find_spec(fullname) if spec is None: return None loader = getattr(spec, "loader", None) if loader is not None: if not isinstance(loader, _ImportHookChainedLoader): spec.loader = _ImportHookChainedLoader(loader) cast(_ImportHookChainedLoader, spec.loader).add_callback( type(self), self.after_import ) return spec finally: self._finding.remove(fullname) @classmethod def _check_installed(cls): # type: () -> None if not cls.is_installed(): raise RuntimeError("%s is not installed" % cls.__name__) @classmethod def install(cls): # type: () -> None """Install the module watchdog.""" if cls.is_installed(): raise RuntimeError("%s is already installed" % cls.__name__) cls._instance = cls() cls._instance._add_to_meta_path() @classmethod def is_installed(cls): """Check whether this module watchdog class is installed.""" return cls._instance is not None and type(cls._instance) is cls @classmethod def uninstall(cls): # type: () -> None """Uninstall the module watchdog. This will uninstall only the most recently installed instance of this class. """ cls._check_installed() cls._remove_from_meta_path() bytecode-0.15.1/tests/frameworks/sitecustomize.py000066400000000000000000000020171451217043400222000ustar00rootroot00000000000000import dis import io import sys import typing as t from types import FunctionType, ModuleType from function import FunctionDiscovery # type: ignore from module import ModuleWatchdog # type: ignore from bytecode import Bytecode, ControlFlowGraph class FunctionCollector(ModuleWatchdog): def after_import(self, module): # type: (ModuleType) -> None discovery = FunctionDiscovery(module) for fname, f in discovery.items(): function = t.cast(FunctionType, f) try: byt = Bytecode.from_code(function.__code__) cfg = ControlFlowGraph.from_bytecode(byt) new = cfg.to_code() # Check we can still disassemble the code dis.dis(new, file=io.StringIO()) except Exception: print("Failed to recompile %s" % fname) dis.dis(function) raise else: function.__code__ = new print("Collecting functions") FunctionCollector.install() bytecode-0.15.1/tests/long_lines_example.py000066400000000000000000000053751451217043400207670ustar00rootroot00000000000000# flake8: noqa # fmt: off def long_lines(): a = 1 b = 1 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 # Notice the huge space here to the next instruction (we want to hit some odd conditions # in the line table generation, which is why this sample file has such long lines). c = 1 if b > 1 else 2 if b > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 d = 1 if c > 1 else 2 if c > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 if a > 1 else 2 if a > 0 else 3 if a > 4 else 23 e = d + 1 return e # fmt: on bytecode-0.15.1/tests/test_bytecode.py000066400000000000000000000573741451217043400177660ustar00rootroot00000000000000#!/usr/bin/env python3 import asyncio import inspect import sys import textwrap import types import unittest from bytecode import Bytecode, ConcreteInstr, FreeVar, Instr, Label, SetLineno from bytecode.instr import BinaryOp from . import TestCase, get_code class BytecodeTests(TestCase): maxDiff = 80 * 100 def test_constructor(self): code = Bytecode() self.assertEqual(code.name, "") self.assertEqual(code.filename, "") self.assertEqual(code.flags, 0) self.assertEqual(code, []) def test_invalid_types(self): code = Bytecode() code.append(123) with self.assertRaises(ValueError): list(code) with self.assertRaises(ValueError): code.legalize() with self.assertRaises(ValueError): Bytecode([123]) def test_legalize(self): code = Bytecode() code.first_lineno = 3 code.extend( [ Instr("LOAD_CONST", 7), Instr("STORE_NAME", "x"), Instr("LOAD_CONST", 8, lineno=4), Instr("STORE_NAME", "y"), Label(), SetLineno(5), Instr("LOAD_CONST", 9, lineno=6), Instr("STORE_NAME", "z"), ] ) code.legalize() self.assertListEqual( code, [ Instr("LOAD_CONST", 7, lineno=3), Instr("STORE_NAME", "x", lineno=3), Instr("LOAD_CONST", 8, lineno=4), Instr("STORE_NAME", "y", lineno=4), Label(), Instr("LOAD_CONST", 9, lineno=5), Instr("STORE_NAME", "z", lineno=5), ], ) def test_slice(self): code = Bytecode() code.first_lineno = 3 code.extend( [ Instr("LOAD_CONST", 7), Instr("STORE_NAME", "x"), SetLineno(4), Instr("LOAD_CONST", 8), Instr("STORE_NAME", "y"), SetLineno(5), Instr("LOAD_CONST", 9), Instr("STORE_NAME", "z"), ] ) sliced_code = code[:] self.assertEqual(code, sliced_code) for name in ( "argcount", "posonlyargcount", "kwonlyargcount", "first_lineno", "name", "filename", "docstring", "cellvars", "freevars", "argnames", ): self.assertEqual( getattr(code, name, None), getattr(sliced_code, name, None) ) def test_copy(self): code = Bytecode() code.first_lineno = 3 code.extend( [ Instr("LOAD_CONST", 7), Instr("STORE_NAME", "x"), SetLineno(4), Instr("LOAD_CONST", 8), Instr("STORE_NAME", "y"), SetLineno(5), Instr("LOAD_CONST", 9), Instr("STORE_NAME", "z"), ] ) copy_code = code.copy() self.assertEqual(code, copy_code) for name in ( "argcount", "posonlyargcount", "kwonlyargcount", "first_lineno", "name", "filename", "docstring", "cellvars", "freevars", "argnames", ): self.assertEqual(getattr(code, name, None), getattr(copy_code, name, None)) def test_eq(self): code = get_code( """ if test: x = 1 else: x = 2 """ ) b1 = Bytecode.from_code(code) b2 = Bytecode.from_code(code) self.assertEqual(b1, b2) def test_eq_with_try(self): code = get_code( """ try: x = 1 except Exception: pass finally: print() """ ) b1 = Bytecode.from_code(code) b2 = Bytecode.from_code(code) self.assertEqual(b1, b2) def test_from_code(self): code = get_code( """ if test: x = 1 else: x = 2 """ ) bytecode = Bytecode.from_code(code) label_else = Label() label_exit = Label() if sys.version_info < (3, 10): self.assertEqual( bytecode, [ Instr("LOAD_NAME", "test", lineno=1), Instr("POP_JUMP_IF_FALSE", label_else, lineno=1), Instr("LOAD_CONST", 1, lineno=2), Instr("STORE_NAME", "x", lineno=2), Instr("JUMP_FORWARD", label_exit, lineno=2), label_else, Instr("LOAD_CONST", 2, lineno=4), Instr("STORE_NAME", "x", lineno=4), label_exit, Instr("LOAD_CONST", None, lineno=4), Instr("RETURN_VALUE", lineno=4), ], ) # Control flow handling appears to have changed under Python 3.10 elif sys.version_info < (3, 11): self.assertEqual( bytecode, [ Instr("LOAD_NAME", "test", lineno=1), Instr("POP_JUMP_IF_FALSE", label_else, lineno=1), Instr("LOAD_CONST", 1, lineno=2), Instr("STORE_NAME", "x", lineno=2), Instr("LOAD_CONST", None, lineno=2), Instr("RETURN_VALUE", lineno=2), label_else, Instr("LOAD_CONST", 2, lineno=4), Instr("STORE_NAME", "x", lineno=4), Instr("LOAD_CONST", None, lineno=4), Instr("RETURN_VALUE", lineno=4), ], ) elif sys.version_info < (3, 12): self.assertInstructionListEqual( bytecode, [ Instr("RESUME", 0, lineno=0), Instr("LOAD_NAME", "test", lineno=1), Instr("POP_JUMP_FORWARD_IF_FALSE", label_else, lineno=1), Instr("LOAD_CONST", 1, lineno=2), Instr("STORE_NAME", "x", lineno=2), Instr("LOAD_CONST", None, lineno=2), Instr("RETURN_VALUE", lineno=2), label_else, Instr("LOAD_CONST", 2, lineno=4), Instr("STORE_NAME", "x", lineno=4), Instr("LOAD_CONST", None, lineno=4), Instr("RETURN_VALUE", lineno=4), ], ) else: self.assertInstructionListEqual( bytecode, [ Instr("RESUME", 0, lineno=0), Instr("LOAD_NAME", "test", lineno=1), Instr("POP_JUMP_IF_FALSE", label_else, lineno=1), Instr("LOAD_CONST", 1, lineno=2), Instr("STORE_NAME", "x", lineno=2), Instr("RETURN_CONST", None, lineno=2), label_else, Instr("LOAD_CONST", 2, lineno=4), Instr("STORE_NAME", "x", lineno=4), Instr("RETURN_CONST", None, lineno=4), ], ) def test_from_code_freevars(self): ns = {} exec( textwrap.dedent( """ def create_func(): x = 1 def func(): return x return func func = create_func() """ ), ns, ns, ) code = ns["func"].__code__ bytecode = Bytecode.from_code(code) self.assertInstructionListEqual( bytecode, ( [ Instr("COPY_FREE_VARS", 1, lineno=None), Instr("RESUME", 0, lineno=4), ] if sys.version_info >= (3, 11) else [] ) + [ Instr("LOAD_DEREF", FreeVar("x"), lineno=5), Instr("RETURN_VALUE", lineno=5), ], ) def test_from_code_load_fast(self): code = get_code( """ def func(): x = 33 y = x """, function=True, ) code = Bytecode.from_code(code) self.assertInstructionListEqual( code, ( [ Instr("RESUME", 0, lineno=1), ] if sys.version_info >= (3, 11) else [] ) + [ Instr("LOAD_CONST", 33, lineno=2), Instr("STORE_FAST", "x", lineno=2), Instr("LOAD_FAST", "x", lineno=3), Instr("STORE_FAST", "y", lineno=3), ] + ( [Instr("RETURN_CONST", None, lineno=3)] if sys.version_info >= (3, 12) else [ Instr("LOAD_CONST", None, lineno=3), Instr("RETURN_VALUE", lineno=3), ] ), ) def test_setlineno(self): # x = 7 # y = 8 # z = 9 code = Bytecode() code.first_lineno = 3 code.extend( [ Instr("LOAD_CONST", 7), Instr("STORE_NAME", "x"), SetLineno(4), Instr("LOAD_CONST", 8), Instr("STORE_NAME", "y"), SetLineno(5), Instr("LOAD_CONST", 9), Instr("STORE_NAME", "z"), ] ) concrete = code.to_concrete_bytecode() self.assertEqual(concrete.consts, [7, 8, 9]) self.assertEqual(concrete.names, ["x", "y", "z"]) self.assertListEqual( list(concrete), [ ConcreteInstr("LOAD_CONST", 0, lineno=3), ConcreteInstr("STORE_NAME", 0, lineno=3), ConcreteInstr("LOAD_CONST", 1, lineno=4), ConcreteInstr("STORE_NAME", 1, lineno=4), ConcreteInstr("LOAD_CONST", 2, lineno=5), ConcreteInstr("STORE_NAME", 2, lineno=5), ], ) def test_to_code(self): code = Bytecode() code.first_lineno = 50 code.extend( [ Instr("LOAD_NAME", "print"), Instr("LOAD_CONST", "%s"), Instr( "LOAD_GLOBAL", (False, "a") if sys.version_info >= (3, 11) else "a" ), Instr("BINARY_OP", BinaryOp.ADD) if sys.version_info >= (3, 11) else Instr("BINARY_ADD"), ] # For 3.12+ we need a NULL before a CALL to a free function + ([Instr("PUSH_NULL")] if sys.version_info >= (3, 12) else []) + [ # On 3.11 we should have a pre-call Instr("CALL" if sys.version_info >= (3, 11) else "CALL_FUNCTION", 1), Instr("RETURN_VALUE"), ] ) co = code.to_code() # hopefully this is obvious from inspection? :-) self.assertEqual(co.co_stacksize, 3) co = code.to_code(stacksize=42, compute_exception_stack_depths=False) self.assertEqual(co.co_stacksize, 42) def test_negative_size_unary(self): opnames = ( "UNARY_POSITIVE", "UNARY_NEGATIVE", "UNARY_NOT", "UNARY_INVERT", ) for opname in opnames: # Replaced by an intrinsic in 3.12 if sys.version_info >= (3, 12) and opname == "UNARY_POSITIVE": continue with self.subTest(opname): code = Bytecode() code.first_lineno = 1 code.extend([Instr(opname)]) with self.assertRaises(RuntimeError): code.compute_stacksize() def test_negative_size_unary_with_disable_check_of_pre_and_post(self): opnames = ( "UNARY_POSITIVE", "UNARY_NEGATIVE", "UNARY_NOT", "UNARY_INVERT", ) for opname in opnames: # Replaced by an intrinsic in 3.12 if sys.version_info >= (3, 12) and opname == "UNARY_POSITIVE": continue with self.subTest(opname): code = Bytecode() code.first_lineno = 1 code.extend([Instr(opname)]) co = code.to_code(check_pre_and_post=False) self.assertEqual(co.co_stacksize, 0) def test_negative_size_binary(self): operations = ( "SUBSCR", # Subscr is special "POWER", "MULTIPLY", "MATRIX_MULTIPLY", "FLOOR_DIVIDE", "TRUE_DIVIDE", "ADD", "SUBTRACT", "LSHIFT", "RSHIFT", "AND", "XOR", "OR", ) if sys.version_info >= (3, 11): operations += ("REMAINDER",) else: operations += ("MODULO",) for opname in operations: ops = (opname,) if opname != "SUBSCR": ops += ("INPLACE_" + opname,) for op in ops: with self.subTest(op): code = Bytecode() code.first_lineno = 1 if sys.version_info >= (3, 11): if op == "SUBSCR": i = Instr("BINARY_SUBSCR") else: i = Instr("BINARY_OP", getattr(BinaryOp, op)) else: if "INPLACE" not in op: op = "BINARY_" + op i = Instr(op) code.extend([Instr("LOAD_CONST", 1), i]) with self.assertRaises(RuntimeError): code.compute_stacksize() def test_negative_size_binary_with_disable_check_of_pre_and_post(self): operations = ( "SUBSCR", # Subscr is special "POWER", "MULTIPLY", "MATRIX_MULTIPLY", "FLOOR_DIVIDE", "TRUE_DIVIDE", "ADD", "SUBTRACT", "LSHIFT", "RSHIFT", "AND", "XOR", "OR", ) if sys.version_info >= (3, 11): operations += ("REMAINDER",) else: operations += ("MODULO",) for opname in operations: ops = (opname,) if opname != "SUBSCR": ops += ("INPLACE_" + opname,) for op in ops: with self.subTest(op): code = Bytecode() code.first_lineno = 1 if sys.version_info >= (3, 11): if op == "SUBSCR": i = Instr("BINARY_SUBSCR") else: i = Instr("BINARY_OP", getattr(BinaryOp, op)) else: if "INPLACE" not in op: op = "BINARY_" + op i = Instr(op) code.extend([Instr("LOAD_CONST", 1), i]) co = code.to_code(check_pre_and_post=False) self.assertEqual(co.co_stacksize, 1) def test_negative_size_call(self): code = Bytecode() code.first_lineno = 1 code.extend( [Instr("CALL" if sys.version_info >= (3, 11) else "CALL_FUNCTION", 0)] ) with self.assertRaises(RuntimeError): code.compute_stacksize() def test_negative_size_unpack(self): opnames = ( "UNPACK_SEQUENCE", "UNPACK_EX", ) for opname in opnames: with self.subTest(opname): code = Bytecode() code.first_lineno = 1 code.extend([Instr(opname, 1)]) with self.assertRaises(RuntimeError): code.compute_stacksize() def test_negative_size_build(self): opnames = ( "BUILD_TUPLE", "BUILD_LIST", "BUILD_SET", ) opnames = (*opnames, "BUILD_STRING") for opname in opnames: with self.subTest(opname): code = Bytecode() code.first_lineno = 1 code.extend([Instr(opname, 1)]) with self.assertRaises(RuntimeError): code.compute_stacksize() def test_negative_size_build_map(self): code = Bytecode() code.first_lineno = 1 code.extend([Instr("LOAD_CONST", 1), Instr("BUILD_MAP", 1)]) with self.assertRaises(RuntimeError): code.compute_stacksize() def test_negative_size_build_map_with_disable_check_of_pre_and_post(self): code = Bytecode() code.first_lineno = 1 code.extend([Instr("LOAD_CONST", 1), Instr("BUILD_MAP", 1)]) co = code.to_code(check_pre_and_post=False) self.assertEqual(co.co_stacksize, 1) def test_negative_size_build_const_map(self): code = Bytecode() code.first_lineno = 1 code.extend([Instr("LOAD_CONST", ("a",)), Instr("BUILD_CONST_KEY_MAP", 1)]) with self.assertRaises(RuntimeError): code.compute_stacksize() def test_negative_size_build_const_map_with_disable_check_of_pre_and_post(self): code = Bytecode() code.first_lineno = 1 code.extend([Instr("LOAD_CONST", ("a",)), Instr("BUILD_CONST_KEY_MAP", 1)]) co = code.to_code(check_pre_and_post=False) self.assertEqual(co.co_stacksize, 1) def test_empty_dup(self): if sys.version_info >= (3, 11): self.skipTest("Instructions DUP_TOP do not exist in 3.11+") code = Bytecode() code.first_lineno = 1 code.extend([Instr("DUP_TOP")]) with self.assertRaises(RuntimeError): code.compute_stacksize() def test_not_enough_dup(self): if sys.version_info >= (3, 11): self.skipTest("Instructions DUP_TOP_TWO do not exist in 3.11+") code = Bytecode() code.first_lineno = 1 code.extend([Instr("LOAD_CONST", 1), Instr("DUP_TOP_TWO")]) with self.assertRaises(RuntimeError): code.compute_stacksize() def test_not_enough_rot(self): if sys.version_info >= (3, 11): self.skipTest("Instructions ROT_* do not exist in 3.11+") opnames = ["ROT_TWO", "ROT_THREE", "ROT_FOUR"] for opname in opnames: with self.subTest(opname): code = Bytecode() code.first_lineno = 1 code.extend([Instr("LOAD_CONST", 1), Instr(opname)]) with self.assertRaises(RuntimeError): code.compute_stacksize() def test_not_enough_rot_with_disable_check_of_pre_and_post(self): if sys.version_info >= (3, 11): self.skipTest("Instructions ROT_* do not exist in 3.11+") opnames = ["ROT_TWO", "ROT_THREE", "ROT_FOUR"] for opname in opnames: with self.subTest(opname): code = Bytecode() code.first_lineno = 1 code.extend([Instr("LOAD_CONST", 1), Instr(opname)]) co = code.to_code(check_pre_and_post=False) self.assertEqual(co.co_stacksize, 1) def test_not_enough_copy(self): if sys.version_info < (3, 11): self.skipTest("Instruction COPY does not exist before 3.11") code = Bytecode() code.first_lineno = 1 code.extend([Instr("LOAD_CONST", 1), Instr("COPY", 2)]) with self.assertRaises(RuntimeError): code.compute_stacksize() def test_not_enough_copy_with_disable_check_of_pre_and_post(self): if sys.version_info < (3, 11): self.skipTest("Instruction COPY does not exist before 3.11") code = Bytecode() code.first_lineno = 1 code.extend([Instr("LOAD_CONST", 1), Instr("COPY", 2)]) co = code.to_code(check_pre_and_post=False) self.assertEqual(co.co_stacksize, 2) def test_not_enough_swap(self): if sys.version_info < (3, 11): self.skipTest("Instruction SWAP does not exist before 3.11") code = Bytecode() code.first_lineno = 1 code.extend([Instr("LOAD_CONST", 1), Instr("SWAP", 2)]) with self.assertRaises(RuntimeError): code.compute_stacksize() def test_not_enough_swap_with_disable_check_of_pre_and_post(self): if sys.version_info < (3, 11): self.skipTest("Instruction SWAP does not exist before 3.11") code = Bytecode() code.first_lineno = 1 code.extend([Instr("LOAD_CONST", 1), Instr("SWAP", 2)]) co = code.to_code(check_pre_and_post=False) self.assertEqual(co.co_stacksize, 1) def test_for_iter_stack_effect_computation(self): code = Bytecode() code.first_lineno = 1 lab1 = Label() lab2 = Label() code.extend( [ lab1, Instr("FOR_ITER", lab2), Instr("STORE_FAST", "i"), Instr( "JUMP_BACKWARD" if sys.version_info >= (3, 11) else "JUMP_ABSOLUTE", lab1, ), lab2, ] ) # Under 3.12+ FOR_ITER does not pop the iterator on completion so this # does not fail a coarse stack effect computation. if sys.version_info >= (3, 12): self.skipTest("Irrelevant on 3.12+") with self.assertRaises(RuntimeError): # Use compute_stacksize since the code is so broken that conversion # to from concrete is actually broken code.compute_stacksize(check_pre_and_post=False) def test_exception_table_round_trip(self): from . import exception_handling_cases as ehc for f in ehc.TEST_CASES: print(f.__name__) with self.subTest(f.__name__): origin = f.__code__ bytecode = Bytecode.from_code( origin, conserve_exception_block_stackdepth=True, ) as_code = bytecode.to_code( stacksize=f.__code__.co_stacksize, compute_exception_stack_depths=False, ) self.assertCodeObjectEqual(origin, as_code) if inspect.iscoroutinefunction(f): # contextlib.nullcontext support async context only in 3.10+ if sys.version_info >= (3, 10): asyncio.run(f()) else: f() def test_cellvar_freevar_roundtrip(self): from . import cell_free_vars_cases as cfc def recompile_code_and_inner(code): bytecode = Bytecode.from_code( code, conserve_exception_block_stackdepth=True, ) for instr in bytecode: if isinstance(instr, Instr) and isinstance(instr.arg, types.CodeType): instr.arg = recompile_code_and_inner(instr.arg) as_code = bytecode.to_code( stacksize=code.co_stacksize, compute_exception_stack_depths=False, ) self.assertCodeObjectEqual(code, as_code) return as_code for f in cfc.TEST_CASES: print(f.__name__) with self.subTest(f.__name__): origin = f.__code__ f.__code__ = recompile_code_and_inner(origin) while callable(f := f()): pass if __name__ == "__main__": unittest.main() # pragma: no cover bytecode-0.15.1/tests/test_cfg.py000066400000000000000000000772671451217043400167320ustar00rootroot00000000000000#!/usr/bin/env python3 import asyncio import contextlib import inspect import io import sys import textwrap import types import unittest from bytecode import ( BasicBlock, Bytecode, Compare, ControlFlowGraph, Instr, Label, SetLineno, dump_bytecode, ) from bytecode.concrete import OFFSET_AS_INSTRUCTION from . import TestCase from . import disassemble as _disassemble def disassemble( source, *, filename="", function=False, remove_last_return_none=False ): code = _disassemble(source, filename=filename, function=function) blocks = ControlFlowGraph.from_bytecode(code) if remove_last_return_none: # drop LOAD_CONST+RETURN_VALUE to only keep 2 instructions, # to make unit tests shorter block = blocks[-1] test = ( (block[-1].name == "RETURN_CONST" and block[-1].arg is None) if sys.version_info >= (3, 12) else ( block[-2].name == "LOAD_CONST" and block[-2].arg is None and block[-1].name == "RETURN_VALUE" ) ) if not test: raise ValueError( "unable to find implicit RETURN_VALUE : %s" % block[-2:] ) if sys.version_info >= (3, 12): del block[-1] else: del block[-2:] return blocks class BlockTests(unittest.TestCase): def test_iter_invalid_types(self): # Labels are not allowed in basic blocks block = BasicBlock() block.append(Label()) with self.assertRaises(ValueError): list(block) with self.assertRaises(ValueError): block.legalize(1) # Only one jump allowed and only at the end block = BasicBlock() block2 = BasicBlock() block.extend( [ Instr( "JUMP_FORWARD" if sys.version_info >= (3, 11) else "JUMP_ABSOLUTE", block2, ), Instr("NOP"), ] ) with self.assertRaises(ValueError): list(block) with self.assertRaises(ValueError): block.legalize(1) # jump target must be a BasicBlock block = BasicBlock() label = Label() block.extend( [ Instr( "JUMP_FORWARD" if sys.version_info >= (3, 11) else "JUMP_ABSOLUTE", label, ) ] ) with self.assertRaises(ValueError): list(block) with self.assertRaises(ValueError): block.legalize(1) def test_slice(self): block = BasicBlock([Instr("NOP")]) next_block = BasicBlock() block.next_block = next_block self.assertEqual(block, block[:]) self.assertIs(next_block, block[:].next_block) def test_copy(self): block = BasicBlock([Instr("NOP")]) next_block = BasicBlock() block.next_block = next_block self.assertEqual(block, block.copy()) self.assertIs(next_block, block.copy().next_block) class BytecodeBlocksTests(TestCase): maxDiff = 80 * 100 def test_constructor(self): code = ControlFlowGraph() self.assertEqual(code.name, "") self.assertEqual(code.filename, "") self.assertEqual(code.flags, 0) self.assertBlocksEqual(code, []) def test_attr(self): source = """ first_line = 1 def func(arg1, arg2, *, arg3): x = 1 y = 2 return arg1 """ code = disassemble(source, filename="hello.py", function=True) self.assertEqual(code.argcount, 2) self.assertEqual(code.filename, "hello.py") self.assertEqual(code.first_lineno, 3) self.assertEqual(code.posonlyargcount, 0) self.assertEqual(code.kwonlyargcount, 1) self.assertEqual(code.name, "func") self.assertEqual(code.cellvars, []) code.name = "name" code.filename = "filename" code.flags = 123 self.assertEqual(code.name, "name") self.assertEqual(code.filename, "filename") self.assertEqual(code.flags, 123) # FIXME: test non-empty cellvars def test_add_del_block(self): code = ControlFlowGraph() code[0].append(Instr("LOAD_CONST", 0)) block = code.add_block() self.assertEqual(len(code), 2) self.assertIs(block, code[1]) code[1].append(Instr("LOAD_CONST", 2)) self.assertBlocksEqual(code, [Instr("LOAD_CONST", 0)], [Instr("LOAD_CONST", 2)]) del code[0] self.assertBlocksEqual(code, [Instr("LOAD_CONST", 2)]) del code[0] self.assertEqual(len(code), 0) def test_setlineno(self): # x = 7 # y = 8 # z = 9 code = Bytecode() code.first_lineno = 3 code.extend( [ Instr("LOAD_CONST", 7), Instr("STORE_NAME", "x"), SetLineno(4), Instr("LOAD_CONST", 8), Instr("STORE_NAME", "y"), SetLineno(5), Instr("LOAD_CONST", 9), Instr("STORE_NAME", "z"), ] ) blocks = ControlFlowGraph.from_bytecode(code) self.assertBlocksEqual( blocks, [ Instr("LOAD_CONST", 7), Instr("STORE_NAME", "x"), SetLineno(4), Instr("LOAD_CONST", 8), Instr("STORE_NAME", "y"), SetLineno(5), Instr("LOAD_CONST", 9), Instr("STORE_NAME", "z"), ], ) def test_legalize(self): code = Bytecode() code.first_lineno = 3 code.extend( [ Instr("LOAD_CONST", 7), Instr("STORE_NAME", "x"), Instr("LOAD_CONST", 8, lineno=4), Instr("STORE_NAME", "y"), SetLineno(5), Instr("LOAD_CONST", 9, lineno=6), Instr("STORE_NAME", "z"), ] ) blocks = ControlFlowGraph.from_bytecode(code) blocks.legalize() self.assertBlocksEqual( blocks, [ Instr("LOAD_CONST", 7, lineno=3), Instr("STORE_NAME", "x", lineno=3), Instr("LOAD_CONST", 8, lineno=4), Instr("STORE_NAME", "y", lineno=4), Instr("LOAD_CONST", 9, lineno=5), Instr("STORE_NAME", "z", lineno=5), ], ) def test_repr(self): r = repr(ControlFlowGraph()) self.assertIn("ControlFlowGraph", r) self.assertIn("1", r) def test_to_bytecode(self): # if test: # x = 2 # x = 5 blocks = ControlFlowGraph() blocks.add_block() blocks.add_block() blocks[0].extend( [ Instr("LOAD_NAME", "test", lineno=1), Instr( "POP_JUMP_FORWARD_IF_FALSE" if (3, 12) > sys.version_info >= (3, 11) else "POP_JUMP_IF_FALSE", blocks[2], lineno=1, ), ] ) blocks[1].extend( [ Instr("LOAD_CONST", 5, lineno=2), Instr("STORE_NAME", "x", lineno=2), Instr("JUMP_FORWARD", blocks[2], lineno=2), ] ) blocks[2].extend( [ Instr("LOAD_CONST", 7, lineno=3), Instr("STORE_NAME", "x", lineno=3), Instr("LOAD_CONST", None, lineno=3), Instr("RETURN_VALUE", lineno=3), ] ) bytecode = blocks.to_bytecode() label = Label() self.assertEqual( bytecode, [ Instr("LOAD_NAME", "test", lineno=1), Instr( "POP_JUMP_FORWARD_IF_FALSE" if (3, 12) > sys.version_info >= (3, 11) else "POP_JUMP_IF_FALSE", label, lineno=1, ), Instr("LOAD_CONST", 5, lineno=2), Instr("STORE_NAME", "x", lineno=2), Instr("JUMP_FORWARD", label, lineno=2), label, Instr("LOAD_CONST", 7, lineno=3), Instr("STORE_NAME", "x", lineno=3), Instr("LOAD_CONST", None, lineno=3), Instr("RETURN_VALUE", lineno=3), ], ) # FIXME: test other attributes def test_label_at_the_end(self): label = Label() code = Bytecode( [ Instr("LOAD_NAME", "x"), Instr("UNARY_NOT"), Instr( "POP_JUMP_FORWARD_IF_FALSE" if (3, 12) > sys.version_info >= (3, 11) else "POP_JUMP_IF_FALSE", label, ), Instr("LOAD_CONST", 9), Instr("STORE_NAME", "y"), label, ] ) cfg = ControlFlowGraph.from_bytecode(code) self.assertBlocksEqual( cfg, [ Instr("LOAD_NAME", "x"), Instr("UNARY_NOT"), Instr( "POP_JUMP_FORWARD_IF_FALSE" if (3, 12) > sys.version_info >= (3, 11) else "POP_JUMP_IF_FALSE", cfg[2], ), ], [Instr("LOAD_CONST", 9), Instr("STORE_NAME", "y")], [], ) def test_from_bytecode(self): bytecode = Bytecode() label = Label() bytecode.extend( [ Instr("LOAD_NAME", "test", lineno=1), Instr( "POP_JUMP_FORWARD_IF_FALSE" if (3, 12) > sys.version_info >= (3, 11) else "POP_JUMP_IF_FALSE", label, lineno=1, ), Instr("LOAD_CONST", 5, lineno=2), Instr("STORE_NAME", "x", lineno=2), Instr("JUMP_FORWARD", label, lineno=2), # dead code! Instr("LOAD_CONST", 7, lineno=4), Instr("STORE_NAME", "x", lineno=4), Label(), # unused label label, Label(), # unused label Instr("LOAD_CONST", None, lineno=4), Instr("RETURN_VALUE", lineno=4), ] ) blocks = ControlFlowGraph.from_bytecode(bytecode) label2 = blocks[3] self.assertBlocksEqual( blocks, [ Instr("LOAD_NAME", "test", lineno=1), Instr( "POP_JUMP_FORWARD_IF_FALSE" if (3, 12) > sys.version_info >= (3, 11) else "POP_JUMP_IF_FALSE", label2, lineno=1, ), ], [ Instr("LOAD_CONST", 5, lineno=2), Instr("STORE_NAME", "x", lineno=2), Instr("JUMP_FORWARD", label2, lineno=2), ], [Instr("LOAD_CONST", 7, lineno=4), Instr("STORE_NAME", "x", lineno=4)], [Instr("LOAD_CONST", None, lineno=4), Instr("RETURN_VALUE", lineno=4)], ) # FIXME: test other attributes def test_from_bytecode_loop(self): # for x in (1, 2, 3): # if x == 2: # break # continue label_loop_start = Label() label_loop_exit = Label() code = Bytecode() code.extend( ( Instr("LOAD_CONST", (1, 2, 3), lineno=1), Instr("GET_ITER", lineno=1), label_loop_start, Instr("FOR_ITER", label_loop_exit, lineno=1), Instr("STORE_NAME", "x", lineno=1), Instr("LOAD_NAME", "x", lineno=2), Instr("LOAD_CONST", 2, lineno=2), Instr("COMPARE_OP", Compare.EQ, lineno=2), Instr( "POP_JUMP_BACKWARD_IF_FALSE" if (3, 12) > sys.version_info >= (3, 11) else "POP_JUMP_IF_FALSE", label_loop_start, lineno=2, ), Instr( "JUMP_FORWARD" if sys.version_info >= (3, 11) else "JUMP_ABSOLUTE", label_loop_exit, lineno=3, ), Instr( "JUMP_BACKWARD" if sys.version_info >= (3, 11) else "JUMP_ABSOLUTE", label_loop_start, lineno=4, ), Instr( "JUMP_BACKWARD" if sys.version_info >= (3, 11) else "JUMP_ABSOLUTE", label_loop_start, lineno=4, ), label_loop_exit, Instr("LOAD_CONST", None, lineno=4), Instr("RETURN_VALUE", lineno=4), ) ) blocks = ControlFlowGraph.from_bytecode(code) expected = [ [Instr("LOAD_CONST", (1, 2, 3), lineno=1), Instr("GET_ITER", lineno=1)], [Instr("FOR_ITER", blocks[6], lineno=1)], [ Instr("STORE_NAME", "x", lineno=1), Instr("LOAD_NAME", "x", lineno=2), Instr("LOAD_CONST", 2, lineno=2), Instr("COMPARE_OP", Compare.EQ, lineno=2), Instr( "POP_JUMP_BACKWARD_IF_FALSE" if (3, 12) > sys.version_info >= (3, 11) else "POP_JUMP_IF_FALSE", blocks[1], lineno=2, ), ], [ Instr( "JUMP_FORWARD" if sys.version_info >= (3, 11) else "JUMP_ABSOLUTE", blocks[6], lineno=3, ) ], [ Instr( "JUMP_BACKWARD" if sys.version_info >= (3, 11) else "JUMP_ABSOLUTE", blocks[1], lineno=4, ) ], [ Instr( "JUMP_BACKWARD" if sys.version_info >= (3, 11) else "JUMP_ABSOLUTE", blocks[1], lineno=4, ) ], [Instr("LOAD_CONST", None, lineno=4), Instr("RETURN_VALUE", lineno=4)], ] self.assertBlocksEqual(blocks, *expected) class BytecodeBlocksFunctionalTests(TestCase): def test_eq(self): # compare codes with multiple blocks and labels, # Code.__eq__() renumbers labels to get equal labels source = "x = 1 if test else 2" code1 = disassemble(source) code2 = disassemble(source) self.assertEqual(code1, code2) # Type mismatch self.assertFalse(code1 == 1) # argnames mismatch cfg = ControlFlowGraph() cfg.argnames = 10 self.assertFalse(code1 == cfg) # instr mismatch cfg = ControlFlowGraph() cfg.argnames = code1.argnames self.assertFalse(code1 == cfg) def test_eq_with_try_except(self): source = "try:\n x = 1\nexcept Exception:\n pass\nfinally:\n print()" code1 = disassemble(source) code2 = disassemble(source) self.assertEqual(code1, code2) def check_getitem(self, code): # check internal Code block indexes (index by index, index by label) for block_index, block in enumerate(code): self.assertIs(code[block_index], block) self.assertIs(code[block], block) self.assertEqual(code.get_block_index(block), block_index) def test_delitem(self): cfg = ControlFlowGraph() b = cfg.add_block() del cfg[b] self.assertEqual(len(cfg._get_instructions()), 0) def sample_code(self): code = disassemble("x = 1", remove_last_return_none=True) self.assertBlocksEqual( code, ([Instr("RESUME", 0, lineno=0)] if sys.version_info >= (3, 11) else []) + [Instr("LOAD_CONST", 1, lineno=1), Instr("STORE_NAME", "x", lineno=1)], ) if sys.version_info >= (3, 11): del code[0][0] return code def test_split_block(self): code = self.sample_code() code[0].append(Instr("NOP", lineno=1)) label = code.split_block(code[0], 2) self.assertIs(label, code[1]) self.assertBlocksEqual( code, [Instr("LOAD_CONST", 1, lineno=1), Instr("STORE_NAME", "x", lineno=1)], [Instr("NOP", lineno=1)], ) self.check_getitem(code) label2 = code.split_block(code[0], 1) self.assertIs(label2, code[1]) self.assertBlocksEqual( code, [Instr("LOAD_CONST", 1, lineno=1)], [Instr("STORE_NAME", "x", lineno=1)], [Instr("NOP", lineno=1)], ) self.check_getitem(code) with self.assertRaises(TypeError): code.split_block(1, 1) with self.assertRaises(ValueError) as e: code.split_block(code[0], -2) self.assertIn("positive", e.exception.args[0]) def test_split_block_end(self): code = self.sample_code() # split at the end of the last block requires to add a new empty block label = code.split_block(code[0], 2) self.assertIs(label, code[1]) self.assertBlocksEqual( code, [Instr("LOAD_CONST", 1, lineno=1), Instr("STORE_NAME", "x", lineno=1)], [], ) self.check_getitem(code) # split at the end of a block which is not the end doesn't require to # add a new block label = code.split_block(code[0], 2) self.assertIs(label, code[1]) self.assertBlocksEqual( code, [Instr("LOAD_CONST", 1, lineno=1), Instr("STORE_NAME", "x", lineno=1)], [], ) def test_split_block_dont_split(self): code = self.sample_code() # FIXME: is it really useful to support that? block = code.split_block(code[0], 0) self.assertIs(block, code[0]) self.assertBlocksEqual( code, [Instr("LOAD_CONST", 1, lineno=1), Instr("STORE_NAME", "x", lineno=1)] ) def test_split_block_error(self): code = self.sample_code() with self.assertRaises(ValueError): # invalid index code.split_block(code[0], 3) def test_to_code(self): # test resolution of jump labels bytecode = ControlFlowGraph() bytecode.first_lineno = 3 bytecode.argcount = 3 bytecode.posonlyargcount = 0 bytecode.kwonlyargcount = 2 bytecode.name = "func" bytecode.filename = "hello.py" bytecode.flags = 0x43 bytecode.argnames = ("arg", "arg2", "arg3", "kwonly", "kwonly2") bytecode.docstring = None block0 = bytecode[0] block1 = bytecode.add_block() block2 = bytecode.add_block() block0.extend( [ Instr("LOAD_FAST", "x", lineno=4), Instr( "POP_JUMP_FORWARD_IF_FALSE" if (3, 12) > sys.version_info >= (3, 11) else "POP_JUMP_IF_FALSE", block2, lineno=4, ), ] ) block1.extend( [Instr("LOAD_FAST", "arg", lineno=5), Instr("STORE_FAST", "x", lineno=5)] ) block2.extend( [ Instr("LOAD_CONST", 3, lineno=6), Instr("STORE_FAST", "x", lineno=6), Instr("LOAD_FAST", "x", lineno=7), Instr("RETURN_VALUE", lineno=7), ] ) if sys.version_info >= (3, 11): # jump is relative not absolute expected = ( b"|\x05" b"r\x02" b"|\x00" b"}\x05" b"d\x01" b"}\x05" b"|\x05" b"S\x00" ) elif OFFSET_AS_INSTRUCTION: # The argument of the jump is divided by 2 expected = ( b"|\x05" b"r\x04" b"|\x00" b"}\x05" b"d\x01" b"}\x05" b"|\x05" b"S\x00" ) else: expected = ( b"|\x05" b"r\x08" b"|\x00" b"}\x05" b"d\x01" b"}\x05" b"|\x05" b"S\x00" ) code = bytecode.to_code() self.assertEqual(code.co_consts, (None, 3)) self.assertEqual(code.co_argcount, 3) self.assertEqual(code.co_posonlyargcount, 0) self.assertEqual(code.co_kwonlyargcount, 2) self.assertEqual(code.co_nlocals, 6) self.assertEqual(code.co_stacksize, 1) # FIXME: don't use hardcoded constants self.assertEqual(code.co_flags, 0x43) self.assertEqual(code.co_code, expected) self.assertEqual(code.co_names, ()) self.assertEqual( code.co_varnames, ("arg", "arg2", "arg3", "kwonly", "kwonly2", "x") ) self.assertEqual(code.co_filename, "hello.py") self.assertEqual(code.co_name, "func") self.assertEqual(code.co_firstlineno, 3) # verify stacksize argument is honored explicit_stacksize = code.co_stacksize + 42 code = bytecode.to_code( stacksize=explicit_stacksize, compute_exception_stack_depths=False ) self.assertEqual(code.co_stacksize, explicit_stacksize) def test_get_block_index(self): blocks = ControlFlowGraph() block0 = blocks[0] block1 = blocks.add_block() block2 = blocks.add_block() self.assertEqual(blocks.get_block_index(block0), 0) self.assertEqual(blocks.get_block_index(block1), 1) self.assertEqual(blocks.get_block_index(block2), 2) other_block = BasicBlock() self.assertRaises(ValueError, blocks.get_block_index, other_block) class CFGStacksizeComputationTests(TestCase): def check_stack_size(self, func): code = func.__code__ bytecode = Bytecode.from_code(code) cfg = ControlFlowGraph.from_bytecode(bytecode) as_code = cfg.to_code(check_pre_and_post=False) self.assertCodeObjectEqual(code, as_code) self.assertEqual(code.co_stacksize, cfg.compute_stacksize()) def test_empty_code(self): cfg = ControlFlowGraph() del cfg[0] self.assertEqual(cfg.compute_stacksize(), 0) def test_handling_of_set_lineno(self): code = Bytecode() code.first_lineno = 3 code.extend( [ Instr("LOAD_CONST", 7), Instr("STORE_NAME", "x"), SetLineno(4), Instr("LOAD_CONST", 8), Instr("STORE_NAME", "y"), SetLineno(5), Instr("LOAD_CONST", 9), Instr("STORE_NAME", "z"), ] ) self.assertEqual(code.compute_stacksize(), 1) def test_invalid_stacksize(self): code = Bytecode() code.extend([Instr("STORE_NAME", "x")]) with self.assertRaises(RuntimeError): code.compute_stacksize() def test_stack_size_computation_and(self): def test(arg1, *args, **kwargs): # pragma: no cover return arg1 and args # Test JUMP_IF_FALSE_OR_POP self.check_stack_size(test) def test_stack_size_computation_or(self): def test(arg1, *args, **kwargs): # pragma: no cover return arg1 or args # Test JUMP_IF_TRUE_OR_POP self.check_stack_size(test) def test_stack_size_computation_if_else(self): def test(arg1, *args, **kwargs): # pragma: no cover if args: return 0 elif kwargs: return 1 else: return 2 self.check_stack_size(test) def test_stack_size_computation_for_loop_continue(self): def test(arg1, *args, **kwargs): # pragma: no cover for k in kwargs: if k in args: continue else: return 1 self.check_stack_size(test) def test_stack_size_computation_while_loop_break(self): def test(arg1, *args, **kwargs): # pragma: no cover while True: if arg1: break self.check_stack_size(test) def test_stack_size_computation_with(self): def test(arg1, *args, **kwargs): # pragma: no cover with open(arg1) as f: return f.read() self.check_stack_size(test) def test_stack_size_computation_try_except(self): def test(arg1, *args, **kwargs): # pragma: no cover try: return args[0] except Exception: return 2 self.check_stack_size(test) def test_stack_size_computation_try_finally(self): def test(arg1, *args, **kwargs): # pragma: no cover try: return args[0] finally: return 2 self.check_stack_size(test) def test_stack_size_computation_try_except_finally(self): def test(arg1, *args, **kwargs): # pragma: no cover try: return args[0] except Exception: return 2 finally: print("Interrupt") self.check_stack_size(test) def test_stack_size_computation_try_except_else_finally(self): def test(arg1, *args, **kwargs): # pragma: no cover try: return args[0] except Exception: return 2 else: return arg1 finally: print("Interrupt") self.check_stack_size(test) def test_stack_size_computation_nested_try_except_finally(self): def test(arg1, *args, **kwargs): # pragma: no cover k = 1 try: getattr(arg1, k) except AttributeError: pass except Exception: try: assert False except Exception: return 2 finally: print("unexpected") finally: print("attempted to get {}".format(k)) self.check_stack_size(test) def test_stack_size_computation_nested_try_except_else_finally(self): def test(*args, **kwargs): try: v = args[1] except IndexError: try: w = kwargs["value"] except KeyError: return -1 else: return w finally: print("second finally") else: return v finally: print("first finally") # A direct comparison of the stack depth fails because CPython # generate dead code that is used in stack computation. cpython_stacksize = test.__code__.co_stacksize test.__code__ = Bytecode.from_code(test.__code__).to_code() self.assertLessEqual(test.__code__.co_stacksize, cpython_stacksize) with contextlib.redirect_stdout(io.StringIO()) as stdout: self.assertEqual(test(1, 4), 4) self.assertEqual(stdout.getvalue(), "first finally\n") with contextlib.redirect_stdout(io.StringIO()) as stdout: self.assertEqual(test([], value=3), 3) self.assertEqual(stdout.getvalue(), "second finally\nfirst finally\n") with contextlib.redirect_stdout(io.StringIO()) as stdout: self.assertEqual(test([], name=None), -1) self.assertEqual(stdout.getvalue(), "second finally\nfirst finally\n") def test_stack_size_with_dead_code(self): # Simply demonstrate more directly the previously mentioned issue. def test(*args): # pragma: no cover a = 0 return a try: a = args[0] except IndexError: return -1 else: return a test.__code__ = Bytecode.from_code(test.__code__).to_code() self.assertEqual(test.__code__.co_stacksize, 1) self.assertEqual(test(1), 0) def test_stack_size_with_dead_code2(self): # See GH #118 source = """ try: pass except Exception as e: pass """ source = textwrap.dedent(source).strip() code = compile(source, "", "exec") bytecode = Bytecode.from_code(code) cfg = ControlFlowGraph.from_bytecode(bytecode) cfg.to_bytecode() def test_huge_code_with_numerous_blocks(self): def base_func(x): pass def mk_if_then_else(depth): instructions = [] for i in range(depth): label_else = Label() instructions.extend( [ Instr("LOAD_FAST", "x"), Instr( "POP_JUMP_FORWARD_IF_FALSE" if (3, 12) > sys.version_info >= (3, 11) else "POP_JUMP_IF_FALSE", label_else, ), Instr( "LOAD_GLOBAL", (False, f"f{i}") if sys.version_info >= (3, 11) else f"f{i}", ), Instr("RETURN_VALUE"), label_else, ] ) instructions.extend([Instr("LOAD_CONST", None), Instr("RETURN_VALUE")]) return instructions bytecode = Bytecode(mk_if_then_else(5000)) bytecode.compute_stacksize() class CFGRoundTripTests(TestCase): def test_roundtrip_exception_handling(self): from . import exception_handling_cases as ehc for f in ehc.TEST_CASES: # 3.12 use one less exception table entry causing to optimize this case # less than we could otherwise if sys.version_info >= (3, 12) and f.__name__ == "try_except_finally": continue print(f.__name__) with self.subTest(f.__name__): origin = f.__code__ print("Bytecode:") bytecode = Bytecode.from_code( f.__code__, conserve_exception_block_stackdepth=True ) dump_bytecode(bytecode) print() print("CFG:") cfg = ControlFlowGraph.from_bytecode(bytecode) dump_bytecode(cfg) as_code = cfg.to_code() self.assertCodeObjectEqual(origin, as_code) if inspect.iscoroutinefunction(f): if sys.version_info >= (3, 10): asyncio.run(f()) else: f() def test_cellvar_freevar_roundtrip(self): from . import cell_free_vars_cases as cfc def recompile_code_and_inner(code): cfg = ControlFlowGraph.from_bytecode(Bytecode.from_code(code)) for block in cfg: for instr in block: if isinstance(instr.arg, types.CodeType): instr.arg = recompile_code_and_inner(instr.arg) as_code = cfg.to_code() self.assertCodeObjectEqual(code, as_code) return as_code for f in cfc.TEST_CASES: print(f.__name__) with self.subTest(f.__name__): origin = f.__code__ f.__code__ = recompile_code_and_inner(origin) while callable(f := f()): pass if __name__ == "__main__": unittest.main() # pragma: no cover bytecode-0.15.1/tests/test_code.py000066400000000000000000000041061451217043400170630ustar00rootroot00000000000000import unittest from bytecode import Bytecode, ConcreteBytecode, ControlFlowGraph from . import TestCase, get_code class CodeTests(TestCase): """Check that bytecode.from_code(code).to_code() returns code.""" def check(self, source, function=False): ref_code = get_code(source, function=function) code = ConcreteBytecode.from_code(ref_code).to_code() self.assertCodeObjectEqual(ref_code, code) code = Bytecode.from_code(ref_code).to_code() self.assertCodeObjectEqual(ref_code, code) bytecode = Bytecode.from_code(ref_code) blocks = ControlFlowGraph.from_bytecode(bytecode) code = blocks.to_bytecode().to_code() self.assertCodeObjectEqual(ref_code, code) def test_loop(self): self.check( """ for x in range(1, 10): x += 1 if x == 3: continue x -= 1 if x > 7: break x = 0 print(x) """ ) def test_varargs(self): self.check( """ def func(a, b, *varargs): pass """, function=True, ) def test_kwargs(self): self.check( """ def func(a, b, **kwargs): pass """, function=True, ) def test_kwonlyargs(self): self.check( """ def func(*, arg, arg2): pass """, function=True, ) # Added because Python 3.10 added some special behavior with respect to # generators in term of stack size def test_generator_func(self): self.check( """ def func(arg, arg2): yield """, function=True, ) def test_async_func(self): self.check( """ async def func(arg, arg2): pass """, function=True, ) if __name__ == "__main__": unittest.main() # pragma: no cover bytecode-0.15.1/tests/test_concrete.py000066400000000000000000001615171451217043400177650ustar00rootroot00000000000000#!/usr/bin/env python3 import asyncio import dis import inspect import opcode import sys import textwrap import types import unittest from bytecode import ( UNSET, Bytecode, CellVar, CompilerFlags, ConcreteBytecode, ConcreteInstr, FreeVar, Instr, Label, SetLineno, ) from bytecode.concrete import OFFSET_AS_INSTRUCTION, ExceptionTableEntry from . import TestCase, get_code class ConcreteInstrTests(TestCase): def test_constructor(self): with self.assertRaises(ValueError): # need an argument ConcreteInstr("LOAD_CONST") with self.assertRaises(ValueError): # must not have an argument ConcreteInstr("ROT_TWO", 33) # invalid argument with self.assertRaises(TypeError): ConcreteInstr("LOAD_CONST", 1.0) with self.assertRaises(ValueError): ConcreteInstr("LOAD_CONST", -1) with self.assertRaises(TypeError): ConcreteInstr("LOAD_CONST", 5, lineno=1.0) with self.assertRaises(ValueError): ConcreteInstr("LOAD_CONST", 5, lineno=-1) # test maximum argument with self.assertRaises(ValueError): ConcreteInstr("LOAD_CONST", 2147483647 + 1) instr = ConcreteInstr("LOAD_CONST", 2147483647) self.assertEqual(instr.arg, 2147483647) # test meaningless extended args instr = ConcreteInstr("LOAD_FAST", 8, lineno=3, extended_args=1) self.assertEqual(instr.name, "LOAD_FAST") self.assertEqual(instr.arg, 8) self.assertEqual(instr.lineno, 3) self.assertEqual(instr.size, 4) def test_attr(self): instr = ConcreteInstr("LOAD_CONST", 5, lineno=12) self.assertEqual(instr.name, "LOAD_CONST") self.assertEqual(instr.opcode, 100) self.assertEqual(instr.arg, 5) self.assertEqual(instr.lineno, 12) self.assertEqual(instr.size, 2) def test_set(self): instr = ConcreteInstr("LOAD_CONST", 5, lineno=3) instr.set("NOP") self.assertEqual(instr.name, "NOP") self.assertIs(instr.arg, UNSET) self.assertEqual(instr.lineno, 3) instr.set("LOAD_FAST", 8) self.assertEqual(instr.name, "LOAD_FAST") self.assertEqual(instr.arg, 8) self.assertEqual(instr.lineno, 3) # invalid with self.assertRaises(ValueError): instr.set("LOAD_CONST") with self.assertRaises(ValueError): instr.set("NOP", 5) def test_set_attr(self): instr = ConcreteInstr("LOAD_CONST", 5, lineno=12) # operator name instr.name = "LOAD_FAST" self.assertEqual(instr.name, "LOAD_FAST") self.assertEqual(instr.opcode, 124) self.assertRaises(TypeError, setattr, instr, "name", 3) self.assertRaises(ValueError, setattr, instr, "name", "xxx") # operator code instr.opcode = 100 self.assertEqual(instr.name, "LOAD_CONST") self.assertEqual(instr.opcode, 100) self.assertRaises(ValueError, setattr, instr, "opcode", -12) self.assertRaises(TypeError, setattr, instr, "opcode", "abc") # extended argument instr.arg = 0x1234ABCD self.assertEqual(instr.arg, 0x1234ABCD) self.assertEqual(instr.size, 8) # small argument instr.arg = 0 self.assertEqual(instr.arg, 0) self.assertEqual(instr.size, 2) # invalid argument self.assertRaises(ValueError, setattr, instr, "arg", -1) self.assertRaises(ValueError, setattr, instr, "arg", 2147483647 + 1) # size attribute is read-only self.assertRaises(AttributeError, setattr, instr, "size", 3) # lineno instr.lineno = 33 self.assertEqual(instr.lineno, 33) self.assertRaises(TypeError, setattr, instr, "lineno", 1.0) self.assertRaises(ValueError, setattr, instr, "lineno", -1) def test_size(self): self.assertEqual(ConcreteInstr("LOAD_CONST", 3).size, 2) self.assertEqual(ConcreteInstr("LOAD_CONST", 0x1234ABCD).size, 8) def test_disassemble(self): code = b"\t\x00d\x03" instr = ConcreteInstr.disassemble(1, code, 0) self.assertEqual(instr, ConcreteInstr("NOP", lineno=1)) instr = ConcreteInstr.disassemble(2, code, 1 if OFFSET_AS_INSTRUCTION else 2) self.assertEqual(instr, ConcreteInstr("LOAD_CONST", 3, lineno=2)) code = b"\x90\x12\x904\x90\xabd\xcd" instr = ConcreteInstr.disassemble(3, code, 0) self.assertEqual(instr, ConcreteInstr("EXTENDED_ARG", 0x12, lineno=3)) def test_assemble(self): instr = ConcreteInstr("NOP") self.assertEqual(instr.assemble(), b"\t\x00") instr = ConcreteInstr("LOAD_CONST", 3) self.assertEqual(instr.assemble(), b"d\x03") instr = ConcreteInstr("LOAD_CONST", 0x1234ABCD) self.assertEqual( instr.assemble(), (b"\x90\x12\x904\x90\xabd\xcd"), ) instr = ConcreteInstr("LOAD_CONST", 3, extended_args=1) self.assertEqual( instr.assemble(), (b"\x90\x00d\x03"), ) def test_get_jump_target(self): if sys.version_info < (3, 11): jump_abs = ConcreteInstr("JUMP_ABSOLUTE", 3) self.assertEqual(jump_abs.get_jump_target(100), 3) jump_forward = ConcreteInstr("JUMP_FORWARD", 5) self.assertEqual( jump_forward.get_jump_target(10), 16 if OFFSET_AS_INSTRUCTION else 17 ) class ConcreteBytecodeTests(TestCase): def test_repr(self): r = repr(ConcreteBytecode()) self.assertIn("ConcreteBytecode", r) self.assertIn("0", r) def test_exception_table_repr(self): t = ExceptionTableEntry(0, 1, 2, 3, True) self.assertSequenceEqual( repr(t), ( "ExceptionTableEntry(" "start_offset=0, " "stop_offset=1, " "target=2, " "stack_depth=3, " "push_lasti=True" ), ) def test_eq(self): code = ConcreteBytecode() self.assertFalse(code == 1) for name, val in ( ("names", ["a"]), ("varnames", ["a"]), ("consts", [1]), ("argcount", 1), ("kwonlyargcount", 2), ("flags", CompilerFlags(CompilerFlags.GENERATOR)), ("first_lineno", 10), ("filename", "xxxx.py"), ("name", "__x"), ("docstring", "x-x-x"), ("cellvars", [CellVar("x")]), ("freevars", [FreeVar("x")]), ): c = ConcreteBytecode() setattr(c, name, val) # For obscure reasons using assertNotEqual here fail self.assertFalse(code == c) c = ConcreteBytecode() c.posonlyargcount = 10 self.assertFalse(code == c) c = ConcreteBytecode() c.consts = [1] code.consts = [1] c.append(ConcreteInstr("LOAD_CONST", 0)) self.assertFalse(code == c) def test_attr(self): code_obj = get_code("x = 5") code = ConcreteBytecode.from_code(code_obj) self.assertEqual(code.consts, [5, None]) self.assertEqual(code.names, ["x"]) self.assertEqual(code.varnames, []) self.assertEqual(code.freevars, []) self.assertInstructionListEqual( list(code), ( [ConcreteInstr("RESUME", 0, lineno=0)] if sys.version_info >= (3, 11) else [] ) + [ ConcreteInstr("LOAD_CONST", 0, lineno=1), ConcreteInstr("STORE_NAME", 0, lineno=1), ] + ( [ConcreteInstr("RETURN_CONST", 1, lineno=1)] if sys.version_info >= (3, 12) else [ ConcreteInstr("LOAD_CONST", 1, lineno=1), ConcreteInstr("RETURN_VALUE", lineno=1), ] ), ) # FIXME: test other attributes def test_invalid_types(self): code = ConcreteBytecode() code.append(Label()) with self.assertRaises(ValueError): list(code) with self.assertRaises(ValueError): code.legalize() with self.assertRaises(ValueError): ConcreteBytecode([Label()]) def test_to_code_lnotab(self): # We use an actual function for the simple case to # ensure we get lnotab right def f(): # # x = 7 # noqa y = 8 # noqa z = 9 # noqa fl = f.__code__.co_firstlineno concrete = ConcreteBytecode() concrete.consts = [None, 7, 8, 9] concrete.varnames = ["x", "y", "z"] concrete.first_lineno = fl concrete.extend( ( [ConcreteInstr("RESUME", 0), SetLineno(1)] if sys.version_info >= (3, 11) else [] ) + [ SetLineno(fl + 3), ConcreteInstr("LOAD_CONST", 1), ConcreteInstr("STORE_FAST", 0), SetLineno(fl + 4), ConcreteInstr("LOAD_CONST", 2), ConcreteInstr("STORE_FAST", 1), SetLineno(fl + 5), ConcreteInstr("LOAD_CONST", 3), ConcreteInstr("STORE_FAST", 2), ] + ( [ConcreteInstr("RETURN_CONST", 0)] if sys.version_info >= (3, 12) else [ ConcreteInstr("LOAD_CONST", 0), ConcreteInstr("RETURN_VALUE"), ] ) ) code = concrete.to_code() self.assertSequenceEqual(code.co_code, f.__code__.co_code) if sys.version_info >= (3, 11): # Offset cannot be right so only check the lines self.assertSequenceEqual( list(dis.findlinestarts(code)), list(dis.findlinestarts(f.__code__)) ) else: self.assertEqual(code.co_lnotab, f.__code__.co_lnotab) if sys.version_info >= (3, 10): self.assertEqual(code.co_linetable, f.__code__.co_linetable) def test_negative_lnotab(self): # x = 7 # y = 8 concrete = ConcreteBytecode( [ ConcreteInstr("LOAD_CONST", 0), ConcreteInstr("STORE_NAME", 0), # line number goes backward! SetLineno(2), ConcreteInstr("LOAD_CONST", 1), ConcreteInstr("STORE_NAME", 1), ] ) concrete.consts = [7, 8] concrete.names = ["x", "y"] concrete.first_lineno = 5 code = concrete.to_code() expected = b"d\x00Z\x00d\x01Z\x01" self.assertEqual(code.co_code, expected) self.assertEqual(code.co_firstlineno, 5) if sys.version_info >= (3, 12): self.skipTest("lnotab is deprecated in Python 3.12+") self.assertEqual(code.co_lnotab, b"\x04\xfd") def test_extended_lnotab(self): # x = 7 # 200 blank lines # y = 8 concrete = ConcreteBytecode( [ ConcreteInstr("LOAD_CONST", 0), SetLineno(1 + 128), ConcreteInstr("STORE_NAME", 0), # line number goes backward! SetLineno(1 + 129), ConcreteInstr("LOAD_CONST", 1), SetLineno(1), ConcreteInstr("STORE_NAME", 1), ] ) concrete.consts = [7, 8] concrete.names = ["x", "y"] concrete.first_lineno = 1 code = concrete.to_code() expected = b"d\x00Z\x00d\x01Z\x01" self.assertEqual(code.co_code, expected) self.assertEqual(code.co_firstlineno, 1) if sys.version_info >= (3, 11): self.assertSequenceEqual( list(code.co_positions()), [ (1, 1, None, None), (129, 129, None, None), (130, 130, None, None), (1, 1, None, None), ], ) else: self.assertEqual( code.co_lnotab, b"\x02\x7f\x00\x01\x02\x01\x02\x80\x00\xff" ) def test_extended_lnotab2(self): # x = 7 # 200 blank lines # y = 8 base_code = compile("x = 7" + "\n" * 200 + "y = 8", "", "exec") concrete = ConcreteBytecode( ( [ConcreteInstr("RESUME", 0, lineno=0), SetLineno(1)] if sys.version_info >= (3, 11) else [] ) + [ ConcreteInstr("LOAD_CONST", 0), ConcreteInstr("STORE_NAME", 0), SetLineno(201), ConcreteInstr("LOAD_CONST", 1), ConcreteInstr("STORE_NAME", 1), ] + ( [ConcreteInstr("RETURN_CONST", 2)] if sys.version_info >= (3, 12) else [ ConcreteInstr("LOAD_CONST", 2), ConcreteInstr("RETURN_VALUE"), ] ) ) concrete.consts = [None, 7, 8] concrete.names = ["x", "y"] concrete.first_lineno = 1 code = concrete.to_code() self.assertSequenceEqual(code.co_code, base_code.co_code) self.assertEqual(code.co_firstlineno, base_code.co_firstlineno) if sys.version_info >= (3, 11): # Offset cannot be right so only check the lines self.assertSequenceEqual( list(dis.findlinestarts(code)), list(dis.findlinestarts(base_code)) ) else: self.assertSequenceEqual(code.co_lnotab, base_code.co_lnotab) if sys.version_info >= (3, 10): self.assertSequenceEqual(code.co_linetable, base_code.co_linetable) def test_to_bytecode_consts(self): # x = -0.0 # x = +0.0 # # code optimized by the CPython 3.6 peephole optimizer which emits # duplicated constants (0.0 is twice in consts). code = ConcreteBytecode() code.consts = [0.0, None, -0.0, 0.0] code.names = ["x", "y"] code.extend( [ ConcreteInstr("LOAD_CONST", 2, lineno=1), ConcreteInstr("STORE_NAME", 0, lineno=1), ConcreteInstr("LOAD_CONST", 3, lineno=2), ConcreteInstr("STORE_NAME", 1, lineno=2), ConcreteInstr("LOAD_CONST", 1, lineno=2), ConcreteInstr("RETURN_VALUE", lineno=2), ] ) code = code.to_bytecode().to_concrete_bytecode() # the conversion changes the constant order: the order comes from # the order of LOAD_CONST instructions self.assertEqual(code.consts, [-0.0, 0.0, None]) code.names = ["x", "y"] self.assertListEqual( list(code), [ ConcreteInstr("LOAD_CONST", 0, lineno=1), ConcreteInstr("STORE_NAME", 0, lineno=1), ConcreteInstr("LOAD_CONST", 1, lineno=2), ConcreteInstr("STORE_NAME", 1, lineno=2), ConcreteInstr("LOAD_CONST", 2, lineno=2), ConcreteInstr("RETURN_VALUE", lineno=2), ], ) def test_cellvar(self): concrete = ConcreteBytecode() concrete.cellvars = ["x"] concrete.append(ConcreteInstr("LOAD_DEREF", 0)) code = concrete.to_code() concrete = ConcreteBytecode.from_code(code) self.assertEqual(concrete.cellvars, ["x"]) self.assertEqual(concrete.freevars, []) self.assertInstructionListEqual( list(concrete), [ConcreteInstr("LOAD_DEREF", 0, lineno=1)] ) bytecode = concrete.to_bytecode() self.assertEqual(bytecode.cellvars, ["x"]) self.assertInstructionListEqual( list(bytecode), [Instr("LOAD_DEREF", CellVar("x"), lineno=1)] ) def test_freevar(self): concrete = ConcreteBytecode() concrete.freevars = ["x"] concrete.append(ConcreteInstr("LOAD_DEREF", 0)) code = concrete.to_code() concrete = ConcreteBytecode.from_code(code) self.assertEqual(concrete.cellvars, []) self.assertEqual(concrete.freevars, ["x"]) self.assertInstructionListEqual( list(concrete), [ConcreteInstr("LOAD_DEREF", 0, lineno=1)] ) bytecode = concrete.to_bytecode() self.assertEqual(bytecode.cellvars, []) self.assertInstructionListEqual( list(bytecode), [Instr("LOAD_DEREF", FreeVar("x"), lineno=1)] ) def test_cellvar_freevar(self): concrete = ConcreteBytecode() concrete.cellvars = ["cell"] concrete.freevars = ["free"] concrete.append(ConcreteInstr("LOAD_DEREF", 0)) concrete.append(ConcreteInstr("LOAD_DEREF", 1)) code = concrete.to_code() concrete = ConcreteBytecode.from_code(code) self.assertEqual(concrete.cellvars, ["cell"]) self.assertEqual(concrete.freevars, ["free"]) self.assertInstructionListEqual( list(concrete), [ ConcreteInstr("LOAD_DEREF", 0, lineno=1), ConcreteInstr("LOAD_DEREF", 1, lineno=1), ], ) bytecode = concrete.to_bytecode() self.assertEqual(bytecode.cellvars, ["cell"]) self.assertInstructionListEqual( list(bytecode), [ Instr("LOAD_DEREF", CellVar("cell"), lineno=1), Instr("LOAD_DEREF", FreeVar("free"), lineno=1), ], ) def test_load_classderef(self): i_name = ( "LOAD_FROM_DICT_OR_DEREF" if sys.version_info >= (3, 12) else "LOAD_CLASSDEREF" ) i_arg = 2 if sys.version_info >= (3, 11) else 1 concrete = ConcreteBytecode() concrete.varnames = ["a"] concrete.cellvars = ["__class__"] concrete.freevars = ["__class__"] concrete.extend( [ ConcreteInstr("LOAD_FAST", 0, lineno=1), ConcreteInstr(i_name, i_arg, lineno=1), ConcreteInstr("STORE_DEREF", i_arg, lineno=1), ] ) bytecode = concrete.to_bytecode() self.assertEqual(bytecode.freevars, ["__class__"]) self.assertEqual(bytecode.cellvars, ["__class__"]) self.assertInstructionListEqual( list(bytecode), [ Instr("LOAD_FAST", "a", lineno=1), Instr(i_name, FreeVar("__class__"), lineno=1), Instr("STORE_DEREF", FreeVar("__class__"), lineno=1), ], ) concrete = bytecode.to_concrete_bytecode() self.assertEqual(concrete.freevars, ["__class__"]) self.assertEqual(concrete.cellvars, ["__class__"]) self.assertInstructionListEqual( list(concrete), [ ConcreteInstr("LOAD_FAST", 1, lineno=1), ConcreteInstr(i_name, i_arg, lineno=1), ConcreteInstr("STORE_DEREF", i_arg, lineno=1), ], ) code = concrete.to_code() self.assertEqual(code.co_freevars, ("__class__",)) self.assertEqual(code.co_cellvars, ("__class__",)) self.assertEqual( code.co_code, bytes( [ opcode.opmap["LOAD_FAST"], 0, opcode.opmap[i_name], i_arg, opcode.opmap["STORE_DEREF"], i_arg, ] ), ) def test_explicit_stacksize(self): # Passing stacksize=... to ConcreteBytecode.to_code should result in a # code object with the specified stacksize. We pass some silly values # and assert that they are honored. code_obj = get_code("print('%s' % (a,b,c))") original_stacksize = code_obj.co_stacksize concrete = ConcreteBytecode.from_code(code_obj) # First with something bigger than necessary. explicit_stacksize = original_stacksize + 42 new_code_obj = concrete.to_code( stacksize=explicit_stacksize, compute_exception_stack_depths=False ) self.assertEqual(new_code_obj.co_stacksize, explicit_stacksize) # Then with something bogus. We probably don't want to advertise this # in the documentation. If this fails then decide if it's for good # reason, and remove if so. explicit_stacksize = 0 new_code_obj = concrete.to_code( stacksize=explicit_stacksize, compute_exception_stack_depths=False ) self.assertEqual(new_code_obj.co_stacksize, explicit_stacksize) def test_legalize(self): concrete = ConcreteBytecode() concrete.first_lineno = 3 concrete.consts = [7, 8, 9] concrete.names = ["x", "y", "z"] concrete.extend( [ ConcreteInstr("LOAD_CONST", 0), ConcreteInstr("STORE_NAME", 0), ConcreteInstr("LOAD_CONST", 1, lineno=4), ConcreteInstr("STORE_NAME", 1), SetLineno(5), ConcreteInstr("LOAD_CONST", 2, lineno=6), ConcreteInstr("STORE_NAME", 2), ] ) concrete.legalize() self.assertInstructionListEqual( list(concrete), [ ConcreteInstr("LOAD_CONST", 0, lineno=3), ConcreteInstr("STORE_NAME", 0, lineno=3), ConcreteInstr("LOAD_CONST", 1, lineno=4), ConcreteInstr("STORE_NAME", 1, lineno=4), ConcreteInstr("LOAD_CONST", 2, lineno=5), ConcreteInstr("STORE_NAME", 2, lineno=5), ], ) def test_slice(self): concrete = ConcreteBytecode() concrete.first_lineno = 3 concrete.consts = [7, 8, 9] concrete.names = ["x", "y", "z"] concrete.extend( [ ConcreteInstr("LOAD_CONST", 0), ConcreteInstr("STORE_NAME", 0), SetLineno(4), ConcreteInstr("LOAD_CONST", 1), ConcreteInstr("STORE_NAME", 1), SetLineno(5), ConcreteInstr("LOAD_CONST", 2), ConcreteInstr("STORE_NAME", 2), ] ) self.assertInstructionListEqual(concrete, concrete[:]) def test_copy(self): concrete = ConcreteBytecode() concrete.first_lineno = 3 concrete.consts = [7, 8, 9] concrete.names = ["x", "y", "z"] concrete.extend( [ ConcreteInstr("LOAD_CONST", 0), ConcreteInstr("STORE_NAME", 0), SetLineno(4), ConcreteInstr("LOAD_CONST", 1), ConcreteInstr("STORE_NAME", 1), SetLineno(5), ConcreteInstr("LOAD_CONST", 2), ConcreteInstr("STORE_NAME", 2), ] ) self.assertInstructionListEqual(concrete, concrete.copy()) def test_encode_varint(self): self.assertListEqual(list(ConcreteBytecode._encode_varint(0)), [0]) self.assertListEqual(list(ConcreteBytecode._encode_varint(0, True)), [128]) self.assertListEqual(list(ConcreteBytecode._encode_varint(64, False)), [65, 0]) class ConcreteFromCodeTests(TestCase): def test_extended_arg(self): # Create a code object from arbitrary bytecode co_code = b"\x90\x12\x904\x90\xabd\xcd" code = get_code("x=1") if sys.version_info >= (3, 11): self.skipTest("Under Python 3.11 we cannot easily disassemble invalid code") else: args = ( code.co_argcount, code.co_posonlyargcount, code.co_kwonlyargcount, code.co_nlocals, code.co_stacksize, code.co_flags, co_code, code.co_consts, code.co_names, code.co_varnames, code.co_filename, code.co_name, code.co_firstlineno, code.co_linetable if sys.version_info >= (3, 10) else code.co_lnotab, code.co_freevars, code.co_cellvars, ) new_code = types.CodeType(*args) # without EXTENDED_ARG opcode bytecode = ConcreteBytecode.from_code(new_code) self.assertInstructionListEqual( list(bytecode), [ConcreteInstr("LOAD_CONST", 0x1234ABCD, lineno=1)] ) # with EXTENDED_ARG opcode bytecode = ConcreteBytecode.from_code(new_code, extended_arg=True) expected = [ ConcreteInstr("EXTENDED_ARG", 0x12, lineno=1), ConcreteInstr("EXTENDED_ARG", 0x34, lineno=1), ConcreteInstr("EXTENDED_ARG", 0xAB, lineno=1), ConcreteInstr("LOAD_CONST", 0xCD, lineno=1), ] self.assertInstructionListEqual(list(bytecode), expected) def test_extended_arg_make_function(self): if (3, 9) <= sys.version_info < (3, 10): from .util_annotation import get_code as get_code_future code_obj = get_code_future( """ def foo(x: int, y: int): pass """ ) else: code_obj = get_code( """ def foo(x: int, y: int): pass """ ) # without EXTENDED_ARG concrete = ConcreteBytecode.from_code(code_obj) if sys.version_info >= (3, 11): func_code = concrete.consts[2] names = ["int", "foo"] consts = ["x", "y", func_code, None] const_offset = 1 name_offset = 1 first_instrs = [ ConcreteInstr("LOAD_CONST", 0, lineno=1), ConcreteInstr("LOAD_NAME", 0, lineno=1), ConcreteInstr("LOAD_CONST", 1, lineno=1), ConcreteInstr("LOAD_NAME", 0, lineno=1), ConcreteInstr("BUILD_TUPLE", 4, lineno=1), ] elif sys.version_info >= (3, 10): func_code = concrete.consts[2] names = ["int", "foo"] consts = ["x", "y", func_code, "foo", None] const_offset = 1 name_offset = 1 first_instrs = [ ConcreteInstr("LOAD_CONST", 0, lineno=1), ConcreteInstr("LOAD_NAME", 0, lineno=1), ConcreteInstr("LOAD_CONST", 1, lineno=1), ConcreteInstr("LOAD_NAME", 0, lineno=1), ConcreteInstr("BUILD_TUPLE", 4, lineno=1), ] elif ( sys.version_info >= (3, 7) and concrete.flags & CompilerFlags.FUTURE_ANNOTATIONS ): func_code = concrete.consts[2] names = ["foo"] consts = ["int", ("x", "y"), func_code, "foo", None] const_offset = 1 name_offset = 0 first_instrs = [ ConcreteInstr("LOAD_CONST", 0, lineno=1), ConcreteInstr("LOAD_CONST", 0, lineno=1), ConcreteInstr("LOAD_CONST", 0 + const_offset, lineno=1), ConcreteInstr("BUILD_CONST_KEY_MAP", 2, lineno=1), ] else: func_code = concrete.consts[1] names = ["int", "foo"] consts = [("x", "y"), func_code, "foo", None] const_offset = 0 name_offset = 1 first_instrs = [ ConcreteInstr("LOAD_NAME", 0, lineno=1), ConcreteInstr("LOAD_NAME", 0, lineno=1), ConcreteInstr("LOAD_CONST", 0 + const_offset, lineno=1), ConcreteInstr("BUILD_CONST_KEY_MAP", 2, lineno=1), ] self.assertSequenceEqual(concrete.names, names) self.assertSequenceEqual(concrete.consts, consts) expected = ( first_instrs + [ ConcreteInstr("LOAD_CONST", 1 + const_offset, lineno=1), ConcreteInstr("LOAD_CONST", 2 + const_offset, lineno=1), ConcreteInstr("MAKE_FUNCTION", 4, lineno=1), ConcreteInstr("STORE_NAME", name_offset, lineno=1), ] + ( [ConcreteInstr("RETURN_CONST", 3 + const_offset, lineno=1)] if sys.version_info >= (3, 12) else [ ConcreteInstr("LOAD_CONST", 3 + const_offset, lineno=1), ConcreteInstr("RETURN_VALUE", lineno=1), ] ) ) self.assertInstructionListEqual(list(concrete), expected) # with EXTENDED_ARG concrete = ConcreteBytecode.from_code(code_obj, extended_arg=True) # With future annotation the int annotation is stringified and # stored as constant this the default behavior under Python 3.10 if sys.version_info >= (3, 11): func_code = concrete.consts[2] names = ["int", "foo"] consts = ["x", "y", func_code, None] elif sys.version_info >= (3, 10): func_code = concrete.consts[2] names = ["int", "foo"] consts = ["x", "y", func_code, "foo", None] elif concrete.flags & CompilerFlags.FUTURE_ANNOTATIONS: func_code = concrete.consts[2] names = ["foo"] consts = ["int", ("x", "y"), func_code, "foo", None] else: func_code = concrete.consts[1] names = ["int", "foo"] consts = [("x", "y"), func_code, "foo", None] self.assertEqual(concrete.names, names) self.assertEqual(concrete.consts, consts) self.assertInstructionListEqual(list(concrete), expected) # Ensure that concrete._remove_extended_args can handle extended_arg NOPs that get # passed in from other to_code/from_code methods. def test_extended_arg_nop(self): constants = [None] * (0x000129 + 1) constants[0x000129] = "Arbitrary String" # EXTENDED_ARG 0x01, NOP 0xFF, EXTENDED_ARG 0x01, # LOAD_CONST 0x29, RETURN_VALUE 0x00 codestring = bytes([0x90, 0x01, 0x09, 0xFF, 0x90, 0x01, 0x64, 0x29, 0x53, 0x00]) codetype_list = [ 0, 0, 0, 1, 64, codestring, tuple(constants), (), (), "", "code", 1, b"", (), (), ] if sys.version_info >= (3, 8): codetype_list.insert(1, 0) if sys.version_info >= (3, 11): codetype_list.insert(12, "code") codetype_list.insert(14, bytes()) codetype_args = tuple(codetype_list) code = types.CodeType(*codetype_args) # Check it can be encoded and decoded codetype_output = Bytecode.from_code(code).to_code().co_consts code = ConcreteBytecode() code.consts = constants code.extend( [ ConcreteInstr("EXTENDED_ARG", 0x01), ConcreteInstr("NOP"), ConcreteInstr("EXTENDED_ARG", 0x01), ConcreteInstr("LOAD_CONST", 0x29), ConcreteInstr("RETURN_VALUE"), ] ) concrete_output = ConcreteBytecode.to_code(code).co_consts self.assertEqual(codetype_output, concrete_output) # The next three tests ensure we can round trip ConcreteBytecode generated # with extended_args=True def test_extended_arg_unpack_ex(self): def test(): p = [1, 2, 3, 4, 5, 6] q, r, *s, t = p return q, r, s, t cpython_stacksize = test.__code__.co_stacksize test.__code__ = ConcreteBytecode.from_code( test.__code__, extended_arg=True ).to_code() self.assertEqual(test.__code__.co_stacksize, cpython_stacksize) self.assertEqual(test(), (1, 2, [3, 4, 5], 6)) def test_expected_arg_with_many_consts(self): def test(): var = 0 var = 1 var = 2 var = 3 var = 4 var = 5 var = 6 var = 7 var = 8 var = 9 var = 10 var = 11 var = 12 var = 13 var = 14 var = 15 var = 16 var = 17 var = 18 var = 19 var = 20 var = 21 var = 22 var = 23 var = 24 var = 25 var = 26 var = 27 var = 28 var = 29 var = 30 var = 31 var = 32 var = 33 var = 34 var = 35 var = 36 var = 37 var = 38 var = 39 var = 40 var = 41 var = 42 var = 43 var = 44 var = 45 var = 46 var = 47 var = 48 var = 49 var = 50 var = 51 var = 52 var = 53 var = 54 var = 55 var = 56 var = 57 var = 58 var = 59 var = 60 var = 61 var = 62 var = 63 var = 64 var = 65 var = 66 var = 67 var = 68 var = 69 var = 70 var = 71 var = 72 var = 73 var = 74 var = 75 var = 76 var = 77 var = 78 var = 79 var = 80 var = 81 var = 82 var = 83 var = 84 var = 85 var = 86 var = 87 var = 88 var = 89 var = 90 var = 91 var = 92 var = 93 var = 94 var = 95 var = 96 var = 97 var = 98 var = 99 var = 100 var = 101 var = 102 var = 103 var = 104 var = 105 var = 106 var = 107 var = 108 var = 109 var = 110 var = 111 var = 112 var = 113 var = 114 var = 115 var = 116 var = 117 var = 118 var = 119 var = 120 var = 121 var = 122 var = 123 var = 124 var = 125 var = 126 var = 127 var = 128 var = 129 var = 130 var = 131 var = 132 var = 133 var = 134 var = 135 var = 136 var = 137 var = 138 var = 139 var = 140 var = 141 var = 142 var = 143 var = 144 var = 145 var = 146 var = 147 var = 148 var = 149 var = 150 var = 151 var = 152 var = 153 var = 154 var = 155 var = 156 var = 157 var = 158 var = 159 var = 160 var = 161 var = 162 var = 163 var = 164 var = 165 var = 166 var = 167 var = 168 var = 169 var = 170 var = 171 var = 172 var = 173 var = 174 var = 175 var = 176 var = 177 var = 178 var = 179 var = 180 var = 181 var = 182 var = 183 var = 184 var = 185 var = 186 var = 187 var = 188 var = 189 var = 190 var = 191 var = 192 var = 193 var = 194 var = 195 var = 196 var = 197 var = 198 var = 199 var = 200 var = 201 var = 202 var = 203 var = 204 var = 205 var = 206 var = 207 var = 208 var = 209 var = 210 var = 211 var = 212 var = 213 var = 214 var = 215 var = 216 var = 217 var = 218 var = 219 var = 220 var = 221 var = 222 var = 223 var = 224 var = 225 var = 226 var = 227 var = 228 var = 229 var = 230 var = 231 var = 232 var = 233 var = 234 var = 235 var = 236 var = 237 var = 238 var = 239 var = 240 var = 241 var = 242 var = 243 var = 244 var = 245 var = 246 var = 247 var = 248 var = 249 var = 250 var = 251 var = 252 var = 253 var = 254 var = 255 var = 256 var = 257 var = 258 var = 259 return var test.__code__ = ConcreteBytecode.from_code( test.__code__, extended_arg=True ).to_code() self.assertEqual(test.__code__.co_stacksize, 1) self.assertEqual(test(), 259) def test_fail_extended_arg_jump(self): def test(): var = None for _ in range(0, 1): var = 0 var = 1 var = 2 var = 3 var = 4 var = 5 var = 6 var = 7 var = 8 var = 9 var = 10 var = 11 var = 12 var = 13 var = 14 var = 15 var = 16 var = 17 var = 18 var = 19 var = 20 var = 21 var = 22 var = 23 var = 24 var = 25 var = 26 var = 27 var = 28 var = 29 var = 30 var = 31 var = 32 var = 33 var = 34 var = 35 var = 36 var = 37 var = 38 var = 39 var = 40 var = 41 var = 42 var = 43 var = 44 var = 45 var = 46 var = 47 var = 48 var = 49 var = 50 var = 51 var = 52 var = 53 var = 54 var = 55 var = 56 var = 57 var = 58 var = 59 var = 60 var = 61 var = 62 var = 63 var = 64 var = 65 var = 66 var = 67 var = 68 var = 69 var = 70 return var # Generate the bytecode with extended arguments bytecode = ConcreteBytecode.from_code(test.__code__, extended_arg=True) bytecode.to_code() # XXX add tests for linenumbers which are None def test_packing_lines(self): import dis from .long_lines_example import long_lines line_starts = list(dis.findlinestarts(long_lines.__code__)) concrete = ConcreteBytecode.from_code(long_lines.__code__) as_code = concrete.to_code() self.assertEqual(line_starts, list(dis.findlinestarts(as_code))) def test_exception_table_round_trip(self): from . import exception_handling_cases as ehc for f in ehc.TEST_CASES: print(f.__name__) with self.subTest(f.__name__): origin = f.__code__ concrete = ConcreteBytecode.from_code(f.__code__) as_code = concrete.to_code( stacksize=f.__code__.co_stacksize, compute_exception_stack_depths=False, ) self.assertCodeObjectEqual(origin, as_code) f.__code__ = as_code if inspect.iscoroutinefunction(f): if sys.version_info >= (3, 10): asyncio.run(f()) else: f() def test_cellvar_freevar_roundtrip(self): from . import cell_free_vars_cases as cfc def recompile_code_and_inner(code): concrete = ConcreteBytecode.from_code(code) for i, c in enumerate(concrete.consts): if isinstance(c, types.CodeType): concrete.consts[i] = recompile_code_and_inner(c) as_code = concrete.to_code( stacksize=code.co_stacksize, compute_exception_stack_depths=False ) self.assertCodeObjectEqual(code, as_code) return as_code for f in cfc.TEST_CASES: print(f.__name__) with self.subTest(f.__name__): origin = f.__code__ f.__code__ = recompile_code_and_inner(origin) while callable(f := f()): pass class BytecodeToConcreteTests(TestCase): def test_label(self): code = Bytecode() label = Label() code.extend( [ Instr("LOAD_CONST", "hello", lineno=1), Instr("JUMP_FORWARD", label, lineno=1), label, Instr("POP_TOP", lineno=1), ] ) code = code.to_concrete_bytecode() expected = [ ConcreteInstr("LOAD_CONST", 0, lineno=1), ConcreteInstr("JUMP_FORWARD", 0, lineno=1), ConcreteInstr("POP_TOP", lineno=1), ] self.assertInstructionListEqual(list(code), expected) self.assertListEqual(code.consts, ["hello"]) def test_label2(self): bytecode = Bytecode() label = Label() bytecode.extend( [ Instr("LOAD_NAME", "test", lineno=1), Instr( "POP_JUMP_FORWARD_IF_FALSE" if (3, 12) > sys.version_info >= (3, 11) else "POP_JUMP_IF_FALSE", label, ), Instr("LOAD_CONST", 5, lineno=2), Instr("STORE_NAME", "x"), Instr("JUMP_FORWARD", label), Instr("LOAD_CONST", 7, lineno=4), Instr("STORE_NAME", "x"), label, Instr("LOAD_CONST", None), Instr("RETURN_VALUE"), ] ) concrete = bytecode.to_concrete_bytecode() expected = [ ConcreteInstr("LOAD_NAME", 0, lineno=1), ConcreteInstr( "POP_JUMP_FORWARD_IF_FALSE" if (3, 12) > sys.version_info >= (3, 11) else "POP_JUMP_IF_FALSE", 7 if OFFSET_AS_INSTRUCTION else 14, lineno=1, ), ConcreteInstr("LOAD_CONST", 0, lineno=2), ConcreteInstr("STORE_NAME", 1, lineno=2), ConcreteInstr("JUMP_FORWARD", 2 if OFFSET_AS_INSTRUCTION else 4, lineno=2), ConcreteInstr("LOAD_CONST", 1, lineno=4), ConcreteInstr("STORE_NAME", 1, lineno=4), ConcreteInstr("LOAD_CONST", 2, lineno=4), ConcreteInstr("RETURN_VALUE", lineno=4), ] self.assertInstructionListEqual(list(concrete), expected) self.assertListEqual(concrete.consts, [5, 7, None]) self.assertListEqual(concrete.names, ["test", "x"]) self.assertListEqual(concrete.varnames, []) def test_label3(self): """ CPython generates useless EXTENDED_ARG 0 in some cases. We need to properly track them as otherwise we can end up with broken offset for jumps. """ source = """ def func(x): if x == 1: return x + 0 elif x == 2: return x + 1 elif x == 3: return x + 2 elif x == 4: return x + 3 elif x == 5: return x + 4 elif x == 6: return x + 5 elif x == 7: return x + 6 elif x == 8: return x + 7 elif x == 9: return x + 8 elif x == 10: return x + 9 elif x == 11: return x + 10 elif x == 12: return x + 11 elif x == 13: return x + 12 elif x == 14: return x + 13 elif x == 15: return x + 14 elif x == 16: return x + 15 elif x == 17: return x + 16 return -1 """ code = get_code(source, function=True) bcode = Bytecode.from_code(code) concrete = bcode.to_concrete_bytecode() self.assertIsInstance(concrete, ConcreteBytecode) # Ensure that we do not generate broken code loc = {} exec(textwrap.dedent(source), loc) func = loc["func"] func.__code__ = bcode.to_code() for i, x in enumerate(range(1, 18)): self.assertEqual(func(x), x + i) self.assertEqual(func(18), -1) # Ensure that we properly round trip in such cases self.assertSequenceEqual( ConcreteBytecode.from_code(code) .to_code(stacksize=code.co_stacksize, compute_exception_stack_depths=False) .co_code, code.co_code, ) def test_setlineno(self): # x = 7 # y = 8 # z = 9 concrete = ConcreteBytecode() concrete.consts = [7, 8, 9] concrete.names = ["x", "y", "z"] concrete.first_lineno = 3 concrete.extend( [ ConcreteInstr("LOAD_CONST", 0), ConcreteInstr("STORE_NAME", 0), SetLineno(4), ConcreteInstr("LOAD_CONST", 1), ConcreteInstr("STORE_NAME", 1), SetLineno(5), ConcreteInstr("LOAD_CONST", 2), ConcreteInstr("STORE_NAME", 2), ] ) code = concrete.to_bytecode() self.assertInstructionListEqual( code, [ Instr("LOAD_CONST", 7, lineno=3), Instr("STORE_NAME", "x", lineno=3), Instr("LOAD_CONST", 8, lineno=4), Instr("STORE_NAME", "y", lineno=4), Instr("LOAD_CONST", 9, lineno=5), Instr("STORE_NAME", "z", lineno=5), ], ) def test_extended_jump(self): NOP = bytes((opcode.opmap["NOP"], 0)) # code using jumps > 0xffff to test extended arg nb_nop = 2**16 if OFFSET_AS_INSTRUCTION else 2**15 # The length of the jump is independent of the number of instruction # per the above logic. jump = 2**16 code = ConcreteBytecode( [ConcreteInstr("JUMP_FORWARD", jump)] + [ConcreteInstr("NOP")] * nb_nop + [ ConcreteInstr("LOAD_CONST", 0), ConcreteInstr("RETURN_VALUE"), ], consts=(None,), ) code_obj = code.to_code() # We use 2 extended args (0x90) out of the maximum 3 which are allowed i_code = opcode.opmap["JUMP_FORWARD"].to_bytes(1, "little") expected = b"\x90\x01\x90\x00" + i_code + b"\x00" + NOP * nb_nop + b"d\x00S\x00" self.assertSequenceEqual(code_obj.co_code, expected) def test_jumps(self): # if test: # x = 12 # else: # x = 37 code = Bytecode() label_else = Label() label_return = Label() code.extend( [ Instr("LOAD_NAME", "test", lineno=1), Instr( "POP_JUMP_FORWARD_IF_FALSE" if (3, 12) > sys.version_info >= (3, 11) else "POP_JUMP_IF_FALSE", label_else, ), Instr("LOAD_CONST", 12, lineno=2), Instr("STORE_NAME", "x"), Instr("JUMP_FORWARD", label_return), label_else, Instr("LOAD_CONST", 37, lineno=4), Instr("STORE_NAME", "x"), label_return, Instr("LOAD_CONST", None, lineno=4), Instr("RETURN_VALUE"), ] ) code = code.to_concrete_bytecode() expected = [ ConcreteInstr("LOAD_NAME", 0, lineno=1), ConcreteInstr( "POP_JUMP_FORWARD_IF_FALSE" if (3, 12) > sys.version_info >= (3, 11) else "POP_JUMP_IF_FALSE", 5 if OFFSET_AS_INSTRUCTION else 10, lineno=1, ), ConcreteInstr("LOAD_CONST", 0, lineno=2), ConcreteInstr("STORE_NAME", 1, lineno=2), ConcreteInstr("JUMP_FORWARD", 2 if OFFSET_AS_INSTRUCTION else 4, lineno=2), ConcreteInstr("LOAD_CONST", 1, lineno=4), ConcreteInstr("STORE_NAME", 1, lineno=4), ConcreteInstr("LOAD_CONST", 2, lineno=4), ConcreteInstr("RETURN_VALUE", lineno=4), ] self.assertInstructionListEqual(list(code), expected) self.assertListEqual(code.consts, [12, 37, None]) self.assertListEqual(code.names, ["test", "x"]) self.assertListEqual(code.varnames, []) def test_dont_merge_constants(self): # test two constants which are equal but have a different type code = Bytecode() code.extend( [ Instr("LOAD_CONST", 5, lineno=1), Instr("LOAD_CONST", 5.0, lineno=1), Instr("LOAD_CONST", -0.0, lineno=1), Instr("LOAD_CONST", +0.0, lineno=1), ] ) code = code.to_concrete_bytecode() expected = [ ConcreteInstr("LOAD_CONST", 0, lineno=1), ConcreteInstr("LOAD_CONST", 1, lineno=1), ConcreteInstr("LOAD_CONST", 2, lineno=1), ConcreteInstr("LOAD_CONST", 3, lineno=1), ] self.assertInstructionListEqual(list(code), expected) self.assertListEqual(code.consts, [5, 5.0, -0.0, +0.0]) def test_cellvars(self): code = Bytecode() code.cellvars = ["x"] code.freevars = ["y"] code.extend( [ Instr("LOAD_DEREF", CellVar("x"), lineno=1), Instr("LOAD_DEREF", FreeVar("y"), lineno=1), ] ) concrete = code.to_concrete_bytecode() self.assertEqual(concrete.cellvars, ["x"]) self.assertEqual(concrete.freevars, ["y"]) def test_compute_jumps_convergence(self): # Consider the following sequence of instructions: # # JUMP_FORWARD Label1 # JUMP_FORWARD Label2 # ...126 instructions... # Label1: Offset 254 on first pass, 256 second pass # NOP # ... many more instructions ... # Label2: Offset > 256 on first pass # # On first pass of compute_jumps(), Label2 will be at address 254, so # that value encodes into the single byte arg of JUMP_ABSOLUTE. # # On second pass compute_jumps() the instr at Label1 will have offset # of 256 so will also be given an EXTENDED_ARG. # # Thus we need to make an additional pass. This test only verifies # case where 2 passes is insufficient but three is enough. # # On Python > 3.10 we need to double the number since the offset is now # in term of instructions and not bytes. # Create code from comment above. code = Bytecode() label1 = Label() label2 = Label() nop = "NOP" code.append(Instr("JUMP_FORWARD", label1)) code.append(Instr("JUMP_FORWARD", label2)) # range excludes the last point ... for _ in range(4, 511 if OFFSET_AS_INSTRUCTION else 255, 2): code.append(Instr(nop)) code.append(label1) code.append(Instr(nop)) for _ in range( 514 if OFFSET_AS_INSTRUCTION else 256, 600 if OFFSET_AS_INSTRUCTION else 300, 2, ): code.append(Instr(nop)) code.append(label2) code.append(Instr(nop)) # This should pass by default. code.to_code() # Try with max of two passes: it should raise with self.assertRaises(RuntimeError): code.to_code(compute_jumps_passes=2) def test_extreme_compute_jumps_convergence(self): """Test of compute_jumps() requiring absurd number of passes. NOTE: This test also serves to demonstrate that there is no worst case: the number of passes can be unlimited (or, actually, limited by the size of the provided code). This is an extension of test_compute_jumps_convergence. Instead of two jumps, where the earlier gets extended after the latter, we instead generate a series of many jumps. Each pass of compute_jumps() extends one more instruction, which in turn causes the one behind it to be extended on the next pass. """ # N: the number of unextended instructions that can be squeezed into a # set of bytes adressable by the arg of an unextended instruction. # The answer is "128", but here's how we arrive at it. max_unextended_offset = 1 << 8 unextended_branch_instr_size = 2 N = max_unextended_offset // unextended_branch_instr_size # When using instruction rather than bytes in the offset multiply by 2 if OFFSET_AS_INSTRUCTION: N *= 2 nop = "UNARY_NEGATIVE" # don't use NOP, dis.stack_effect will raise # The number of jumps will be equal to the number of labels. The # number of passes of compute_jumps() required will be one greater # than this. labels = [Label() for x in range(0, 3 * N)] code = Bytecode() code.extend( Instr("JUMP_FORWARD", labels[len(labels) - x - 1]) for x in range(0, len(labels)) ) end_of_jumps = len(code) code.extend(Instr(nop) for x in range(0, N)) # Now insert the labels. The first is N instructions (i.e. 256 # bytes) after the last jump. Then they proceed to earlier positions # 4 bytes at a time. While the targets are in the range of the nop # instructions, 4 bytes is two instructions. When the targets are in # the range of JUMP_FORWARD instructions we have to allow for the fact # that the instructions will have been extended to four bytes each, so # working backwards 4 bytes per label means just one instruction per # label. offset = end_of_jumps + N for index in range(0, len(labels)): code.insert(offset, labels[index]) if offset <= end_of_jumps: offset -= 1 else: offset -= 2 code.insert(0, Instr("LOAD_CONST", 0)) del end_of_jumps code.append(Instr("RETURN_VALUE")) code.to_code(compute_jumps_passes=(len(labels) + 1)) def test_general_constants(self): """Test if general object could be linked as constants.""" class CustomObject: pass class UnHashableCustomObject: __hash__ = None obj1 = [1, 2, 3] obj2 = {1, 2, 3} obj3 = CustomObject() obj4 = UnHashableCustomObject() code = Bytecode( [ Instr("LOAD_CONST", obj1, lineno=1), Instr("LOAD_CONST", obj2, lineno=1), Instr("LOAD_CONST", obj3, lineno=1), Instr("LOAD_CONST", obj4, lineno=1), Instr("BUILD_TUPLE", 4, lineno=1), Instr("RETURN_VALUE", lineno=1), ] ) self.assertEqual(code.to_code().co_consts, (obj1, obj2, obj3, obj4)) def f(): return # pragma: no cover f.__code__ = code.to_code() self.assertEqual(f(), (obj1, obj2, obj3, obj4)) # FIXME test more cases for line encoding in particular with extended args if __name__ == "__main__": unittest.main() # pragma: no cover bytecode-0.15.1/tests/test_flags.py000066400000000000000000000163231451217043400172510ustar00rootroot00000000000000#!/usr/bin/env python3 import sys import unittest from bytecode import ( Bytecode, CompilerFlags, ConcreteBytecode, ConcreteInstr, ControlFlowGraph, ) from bytecode.flags import infer_flags from bytecode.instr import UNSET, FreeVar, Instr # Py 3.11 # - new opcodes could modify inference: # - SEND, ASYNC_GEN_WRAP, RETURN_GENERATOR, class FlagsTests(unittest.TestCase): def test_type_validation_on_inference(self): with self.assertRaises(ValueError): infer_flags(1) def test_flag_inference(self): # Check no loss of non-infered flags code = ControlFlowGraph() code.flags |= ( CompilerFlags.NEWLOCALS | CompilerFlags.VARARGS | CompilerFlags.VARKEYWORDS | CompilerFlags.NESTED | CompilerFlags.FUTURE_GENERATOR_STOP ) code.update_flags() for f in ( CompilerFlags.NEWLOCALS, CompilerFlags.VARARGS, CompilerFlags.VARKEYWORDS, CompilerFlags.NESTED, CompilerFlags.NOFREE, CompilerFlags.OPTIMIZED, CompilerFlags.FUTURE_GENERATOR_STOP, ): self.assertTrue(bool(code.flags & f)) # Infer optimized and nofree code = Bytecode() flags = infer_flags(code) self.assertTrue(bool(flags & CompilerFlags.OPTIMIZED)) self.assertTrue(bool(flags & CompilerFlags.NOFREE)) code.append(Instr("STORE_NAME", "a")) flags = infer_flags(code) self.assertFalse(bool(flags & CompilerFlags.OPTIMIZED)) self.assertTrue(bool(flags & CompilerFlags.NOFREE)) code.append(Instr("STORE_DEREF", FreeVar("b"))) code.update_flags() self.assertFalse(bool(code.flags & CompilerFlags.OPTIMIZED)) self.assertFalse(bool(code.flags & CompilerFlags.NOFREE)) def test_async_gen_no_flag_is_async_None(self): # Test inference in the absence of any flag set on the bytecode # Infer generator code = ConcreteBytecode() code.append( ConcreteInstr("YIELD_VALUE", 0) if sys.version_info >= (3, 12) else ConcreteInstr("YIELD_VALUE") ) code.update_flags() self.assertTrue(bool(code.flags & CompilerFlags.GENERATOR)) # Infer coroutine code = ConcreteBytecode() code.append( ConcreteInstr("GET_AWAITABLE", 0 if sys.version_info >= (3, 11) else UNSET) ) code.update_flags() self.assertTrue(bool(code.flags & CompilerFlags.COROUTINE)) # Infer coroutine or async generator for i, expected in ( ("YIELD_VALUE", CompilerFlags.ASYNC_GENERATOR), ("YIELD_FROM", CompilerFlags.COROUTINE), ): with self.subTest(i): if sys.version_info >= (3, 11) and i == "YIELD_FROM": self.skipTest("YIELD_FROM does not exist on 3.11") code = ConcreteBytecode() code.append( ConcreteInstr( "GET_AWAITABLE", 0 if sys.version_info >= (3, 11) else UNSET ) ) code.append( ConcreteInstr(i, 0) if sys.version_info >= (3, 12) else ConcreteInstr(i) ) code.update_flags() self.assertTrue(bool(code.flags & expected)) def test_async_gen_no_flag_is_async_True(self): # Test inference when we request an async function # Force coroutine code = ConcreteBytecode() code.update_flags(is_async=True) self.assertTrue(bool(code.flags & CompilerFlags.COROUTINE)) # Infer coroutine or async generator for i, expected in ( ("YIELD_VALUE", CompilerFlags.ASYNC_GENERATOR), ("YIELD_FROM", CompilerFlags.COROUTINE), ): with self.subTest(i): if sys.version_info >= (3, 11) and i == "YIELD_FROM": self.skipTest("YIELD_FROM does not exist on 3.11") code = ConcreteBytecode() code.append( ConcreteInstr(i, 0) if sys.version_info >= (3, 12) else ConcreteInstr(i) ) code.update_flags(is_async=True) self.assertTrue(bool(code.flags & expected)) def test_async_gen_no_flag_is_async_False(self): # Test inference when we request a non-async function # Infer generator code = ConcreteBytecode() code.append( ConcreteInstr("YIELD_VALUE", 0) if sys.version_info >= (3, 12) else ConcreteInstr("YIELD_VALUE") ) code.flags = CompilerFlags(CompilerFlags.COROUTINE) code.update_flags(is_async=False) self.assertTrue(bool(code.flags & CompilerFlags.GENERATOR)) # Abort on coroutine code = ConcreteBytecode() code.append( ConcreteInstr("GET_AWAITABLE", 0 if sys.version_info >= (3, 11) else UNSET) ) code.flags = CompilerFlags(CompilerFlags.COROUTINE) with self.assertRaises(ValueError): code.update_flags(is_async=False) def test_async_gen_flags(self): # Test inference in the presence of pre-existing flags for is_async in (None, True): # Infer generator code = ConcreteBytecode() code.append( ConcreteInstr("YIELD_VALUE", 0) if sys.version_info >= (3, 12) else ConcreteInstr("YIELD_VALUE") ) for f, expected in ( (CompilerFlags.COROUTINE, CompilerFlags.ASYNC_GENERATOR), (CompilerFlags.ASYNC_GENERATOR, CompilerFlags.ASYNC_GENERATOR), (CompilerFlags.ITERABLE_COROUTINE, CompilerFlags.ITERABLE_COROUTINE), ): code.flags = CompilerFlags(f) code.update_flags(is_async=is_async) self.assertTrue(bool(code.flags & expected)) # Infer coroutine if sys.version_info < (3, 11): code = ConcreteBytecode() code.append(ConcreteInstr("YIELD_FROM")) for f, expected in ( (CompilerFlags.COROUTINE, CompilerFlags.COROUTINE), (CompilerFlags.ASYNC_GENERATOR, CompilerFlags.COROUTINE), ( CompilerFlags.ITERABLE_COROUTINE, CompilerFlags.ITERABLE_COROUTINE, ), ): code.flags = CompilerFlags(f) code.update_flags(is_async=is_async) self.assertTrue(bool(code.flags & expected)) # Crash on ITERABLE_COROUTINE with async bytecode code = ConcreteBytecode() code.append( ConcreteInstr( "GET_AWAITABLE", 0 if sys.version_info >= (3, 11) else UNSET ) ) code.flags = CompilerFlags(CompilerFlags.ITERABLE_COROUTINE) with self.assertRaises(ValueError): code.update_flags(is_async=is_async) if __name__ == "__main__": unittest.main() # pragma: no cover bytecode-0.15.1/tests/test_instr.py000066400000000000000000000376271451217043400173260ustar00rootroot00000000000000#!/usr/bin/env python3 import opcode import sys import unittest from bytecode import ( UNSET, BasicBlock, CellVar, Compare, FreeVar, Instr, Label, SetLineno, ) from bytecode.instr import ( BITFLAG2_INSTRUCTIONS, BITFLAG_INSTRUCTIONS, INTRINSIC_1OP, INTRINSIC_2OP, InstrLocation, Intrinsic1Op, Intrinsic2Op, opcode_has_argument, ) from . import TestCase # XXX tests for location and lineno setter # Starting with Python 3.11 jump opcode have changed quite a bit. We define here # opcode useful to test for both Python < 3.11 and Python >= 3.11 UNCONDITIONAL_JUMP = "JUMP_FORWARD" if sys.version_info >= (3, 11) else "JUMP_ABSOLUTE" CONDITIONAL_JUMP = ( "POP_JUMP_FORWARD_IF_TRUE" if (3, 12) > sys.version_info >= (3, 11) else "POP_JUMP_IF_TRUE" ) CALL = "CALL" if sys.version_info >= (3, 11) else "CALL_FUNCTION" class SetLinenoTests(TestCase): def test_lineno(self): lineno = SetLineno(1) self.assertEqual(lineno.lineno, 1) def test_equality(self): lineno = SetLineno(1) self.assertNotEqual(lineno, 1) self.assertEqual(lineno, SetLineno(1)) self.assertNotEqual(lineno, SetLineno(2)) class VariableTests(TestCase): def test_str(self): for cls in (CellVar, FreeVar): var = cls("a") self.assertEqual(str(var), "a") def test_repr(self): for cls in (CellVar, FreeVar): var = cls("_a_x_a_") r = repr(var) self.assertIn("_a_x_a_", r) self.assertIn(cls.__name__, r) def test_eq(self): f1 = FreeVar("a") f2 = FreeVar("b") c1 = CellVar("a") c2 = CellVar("b") for v1, v2, eq in ( (f1, f1, True), (f1, f2, False), (f1, c1, False), (c1, c1, True), (c1, c2, False), ): if eq: self.assertEqual(v1, v2) else: self.assertNotEqual(v1, v2) class InstrLocationTests(TestCase): def test_init(self): for args, error in [ ((None, None, None, None), ""), ((None, 1, None, None), "End lineno specified with no lineno"), ((12, 1, None, None), "cannot be smaller than lineno"), ((12, 13, None, None), ""), ((None, None, 1, None), "lineno information are incomplete"), ((None, None, None, 1), "lineno information are incomplete"), ((1, None, 1, None), "lineno information are incomplete"), ((1, None, None, 1), "lineno information are incomplete"), ((1, 2, None, 1), "with no column offset"), ((1, 2, 12, 1), ""), ((1, 1, 12, 1), "cannot be smaller than column offset"), ((1, 1, 12, None), "No end column offset was"), ]: print(f"{args}, {error}") with self.subTest(f"{args}, {error}"): if error: with self.assertRaises(ValueError) as e: InstrLocation(*args) self.assertIn(error, str(e.exception)) else: InstrLocation(*args) class InstrTests(TestCase): def test_constructor(self): # invalid line number with self.assertRaises(TypeError): Instr("NOP", lineno="x") with self.assertRaises(ValueError): Instr("NOP", lineno=-1 if sys.version_info >= (3, 11) else 0) # invalid name with self.assertRaises(TypeError): Instr(1) with self.assertRaises(ValueError): Instr("xxx") def test_repr(self): # No arg r = repr(Instr("NOP", lineno=10)) self.assertIn("NOP", r) self.assertIn("10", r) self.assertIn("lineno", r) # Arg r = repr(Instr("LOAD_FAST", "_x_", lineno=10)) self.assertIn("LOAD_FAST", r) self.assertIn("lineno", r) self.assertIn("10", r) self.assertIn("arg", r) self.assertIn("_x_", r) def test_reject_pseudo_opcode(self): if sys.version_info >= (3, 12): with self.assertRaises(ValueError) as e: Instr("LOAD_METHOD", "x") self.assertIn("is an instrumented or pseudo opcode", str(e.exception)) def test_invalid_arg(self): label = Label() block = BasicBlock() # EXTENDED_ARG self.assertRaises(ValueError, Instr, "EXTENDED_ARG", 0) # has_jump() self.assertRaises(TypeError, Instr, UNCONDITIONAL_JUMP, 1) self.assertRaises(TypeError, Instr, UNCONDITIONAL_JUMP, 1.0) Instr(UNCONDITIONAL_JUMP, label) Instr(UNCONDITIONAL_JUMP, block) # hasfree self.assertRaises(TypeError, Instr, "LOAD_DEREF", "x") Instr("LOAD_DEREF", CellVar("x")) Instr("LOAD_DEREF", FreeVar("x")) # haslocal self.assertRaises(TypeError, Instr, "LOAD_FAST", 1) Instr("LOAD_FAST", "x") # hasname self.assertRaises(TypeError, Instr, "LOAD_NAME", 1) Instr("LOAD_NAME", "x") # hasconst self.assertRaises(ValueError, Instr, "LOAD_CONST") # UNSET self.assertRaises(ValueError, Instr, "LOAD_CONST", label) self.assertRaises(ValueError, Instr, "LOAD_CONST", block) Instr("LOAD_CONST", 1.0) Instr("LOAD_CONST", object()) # hascompare self.assertRaises(TypeError, Instr, "COMPARE_OP", 1) Instr("COMPARE_OP", Compare.EQ) # HAVE_ARGUMENT self.assertRaises(ValueError, Instr, CALL, -1) self.assertRaises(TypeError, Instr, CALL, 3.0) Instr(CALL, 3) # test maximum argument self.assertRaises(ValueError, Instr, CALL, 2147483647 + 1) instr = Instr(CALL, 2147483647) self.assertEqual(instr.arg, 2147483647) # not HAVE_ARGUMENT self.assertRaises(ValueError, Instr, "NOP", 0) Instr("NOP") # Instructions using a bitflag in their oparg for name in BITFLAG_INSTRUCTIONS: self.assertRaises(TypeError, Instr, name, "arg") self.assertRaises(TypeError, Instr, name, ("arg",)) self.assertRaises(TypeError, Instr, name, ("", "arg")) self.assertRaises(TypeError, Instr, name, (False, 1)) Instr(name, (True, "arg")) # Instructions using 2 bitflag in their oparg for name in BITFLAG2_INSTRUCTIONS: self.assertRaises(TypeError, Instr, name, "arg") self.assertRaises(TypeError, Instr, name, ("arg",)) self.assertRaises(TypeError, Instr, name, ("", True, "arg")) self.assertRaises(TypeError, Instr, name, (True, "", "arg")) self.assertRaises(TypeError, Instr, name, (False, True, 1)) Instr(name, (False, True, "arg")) for name in [opcode.opname[i] for i in INTRINSIC_1OP]: self.assertRaises(TypeError, Instr, name, 1) Instr(name, Intrinsic1Op.INTRINSIC_PRINT) for name in [opcode.opname[i] for i in INTRINSIC_2OP]: self.assertRaises(TypeError, Instr, name, 1) Instr(name, Intrinsic2Op.INTRINSIC_PREP_RERAISE_STAR) def test_require_arg(self): i = Instr(CALL, 3) self.assertTrue(i.require_arg()) i = Instr("NOP") self.assertFalse(i.require_arg()) def test_attr(self): instr = Instr("LOAD_CONST", 3, lineno=5) self.assertEqual(instr.name, "LOAD_CONST") self.assertEqual(instr.opcode, 100) self.assertEqual(instr.arg, 3) self.assertEqual(instr.lineno, 5) # invalid values/types self.assertRaises( ValueError, setattr, instr, "lineno", -1 if sys.version_info >= (3, 11) else 0, ) self.assertRaises(TypeError, setattr, instr, "lineno", 1.0) self.assertRaises(TypeError, setattr, instr, "name", 5) self.assertRaises(TypeError, setattr, instr, "opcode", 1.0) self.assertRaises(ValueError, setattr, instr, "opcode", -1) self.assertRaises(ValueError, setattr, instr, "opcode", 255) # arg can take any attribute but cannot be deleted instr.arg = -8 instr.arg = object() self.assertRaises(AttributeError, delattr, instr, "arg") # no argument instr = Instr("RETURN_VALUE") self.assertIs(instr.arg, UNSET) def test_modify_op(self): instr = Instr("LOAD_NAME", "x") load_fast = opcode.opmap["LOAD_FAST"] instr.opcode = load_fast self.assertEqual(instr.name, "LOAD_FAST") self.assertEqual(instr.opcode, load_fast) def test_extended_arg(self): instr = Instr("LOAD_CONST", 0x1234ABCD) self.assertEqual(instr.arg, 0x1234ABCD) def test_slots(self): instr = Instr("NOP") with self.assertRaises(AttributeError): instr.myattr = 1 def test_compare(self): instr = Instr("LOAD_CONST", 3, lineno=7) self.assertEqual(instr, Instr("LOAD_CONST", 3, lineno=7)) self.assertNotEqual(instr, 1) # different lineno self.assertNotEqual(instr, Instr("LOAD_CONST", 3)) self.assertNotEqual(instr, Instr("LOAD_CONST", 3, lineno=6)) # different op self.assertNotEqual(instr, Instr("LOAD_FAST", "x", lineno=7)) # different arg self.assertNotEqual(instr, Instr("LOAD_CONST", 4, lineno=7)) def test_has_jump(self): label = Label() jump = Instr(UNCONDITIONAL_JUMP, label) self.assertTrue(jump.has_jump()) instr = Instr("LOAD_FAST", "x") self.assertFalse(instr.has_jump()) def test_is_cond_jump(self): label = Label() jump = Instr(CONDITIONAL_JUMP, label) self.assertTrue(jump.is_cond_jump()) instr = Instr("LOAD_FAST", "x") self.assertFalse(instr.is_cond_jump()) def test_is_uncond_jump(self): label = Label() jump = Instr(UNCONDITIONAL_JUMP, label) self.assertTrue(jump.is_uncond_jump()) instr = Instr(CONDITIONAL_JUMP, label) self.assertFalse(instr.is_uncond_jump()) def test_const_key_not_equal(self): def check(value): self.assertEqual(Instr("LOAD_CONST", value), Instr("LOAD_CONST", value)) def func(): pass check(None) check(0) check(0.0) check(b"bytes") check("text") check(Ellipsis) check((1, 2, 3)) check(frozenset({1, 2, 3})) check(func.__code__) check(object()) def test_const_key_equal(self): neg_zero = -0.0 pos_zero = +0.0 # int and float: 0 == 0.0 self.assertNotEqual(Instr("LOAD_CONST", 0), Instr("LOAD_CONST", 0.0)) # float: -0.0 == +0.0 self.assertNotEqual( Instr("LOAD_CONST", neg_zero), Instr("LOAD_CONST", pos_zero) ) # complex self.assertNotEqual( Instr("LOAD_CONST", complex(neg_zero, 1.0)), Instr("LOAD_CONST", complex(pos_zero, 1.0)), ) self.assertNotEqual( Instr("LOAD_CONST", complex(1.0, neg_zero)), Instr("LOAD_CONST", complex(1.0, pos_zero)), ) # tuple self.assertNotEqual(Instr("LOAD_CONST", (0,)), Instr("LOAD_CONST", (0.0,))) nested_tuple1 = (0,) nested_tuple1 = (nested_tuple1,) nested_tuple2 = (0.0,) nested_tuple2 = (nested_tuple2,) self.assertNotEqual( Instr("LOAD_CONST", nested_tuple1), Instr("LOAD_CONST", nested_tuple2) ) # frozenset self.assertNotEqual( Instr("LOAD_CONST", frozenset({0})), Instr("LOAD_CONST", frozenset({0.0})) ) def test_stack_effects(self): # Verify all opcodes are handled and that "jump=None" really returns # the max of the other cases. from bytecode.concrete import ConcreteInstr def check_pre_post(instr, jump): effect = instr.stack_effect(jump) pre, post = instr.pre_and_post_stack_effect(jump) self.assertEqual(pre + post, effect) return effect def check(instr): jump = check_pre_post(instr, jump=True) no_jump = check_pre_post(instr, jump=False) max_effect = check_pre_post(instr, jump=None) self.assertEqual(instr.stack_effect(), max_effect) self.assertEqual(max_effect, max(jump, no_jump)) if not instr.has_jump(): self.assertEqual(jump, no_jump) for name, op in opcode.opmap.items(): if sys.version_info >= (3, 12) and op >= opcode.MIN_INSTRUMENTED_OPCODE: continue print(name) with self.subTest(name): # Use ConcreteInstr instead of Instr because it doesn't care # what kind of argument it is constructed with. # The 0 handles the CACHE case if not opcode_has_argument(op) and op != 0: check(ConcreteInstr(name)) else: for arg in range(256): check(ConcreteInstr(name, arg)) # LOAD_CONST uses a concrete python object as its oparg, however, in # dis.stack_effect(opcode.opmap['LOAD_CONST'], oparg), # oparg should be the index of that python object in the constants. # # Fortunately, for an instruction whose oparg isn't equivalent to its # form in binary files(pyc format), the stack effect is a # constant which does not depend on its oparg. # # The second argument of dis.stack_effect cannot be # more than 2**31 - 1. If stack effect of an instruction is # independent of its oparg, we pass 0 as the second argument # of dis.stack_effect. # (As a result we can calculate stack_effect for # any LOAD_CONST instructions, even for large integers) for arg in 2**31, 2**32, 2**63, 2**64, -1: self.assertEqual(Instr("LOAD_CONST", arg).stack_effect(), 1) def test_code_object_containing_mutable_data(self): from types import CodeType from bytecode import Bytecode, Instr def f(): def g(): # Under Python 3.12+ we need a temporary var to be sure we use # LOAD_CONST rather than RETURN_CONST a = "value" return a return g f_code = Bytecode.from_code(f.__code__) instr_load_code = None mutable_datum = [4, 2] for each in f_code: if ( isinstance(each, Instr) and each.name == "LOAD_CONST" and isinstance(each.arg, CodeType) ): instr_load_code = each break self.assertIsNotNone(instr_load_code) g_code = Bytecode.from_code(instr_load_code.arg) # Under Python 3.11+, the first instruction is not LOAD_CONST but RESUME for instr in g_code: if isinstance(each, Instr) and instr.name == "LOAD_CONST": instr.arg = mutable_datum instr_load_code.arg = g_code.to_code() f.__code__ = f_code.to_code() self.assertIs(f()(), mutable_datum) class CompareTests(TestCase): def test_compare_ops(self): from bytecode import Bytecode, Instr def f(): pass params = zip(iter(Compare), (True, True, False, True, False, False)) for cmp, expected in params: with self.subTest(cmp): bcode = Bytecode( ([Instr("RESUME", 0)] if sys.version_info >= (3, 11) else []) + [ Instr("LOAD_CONST", 24), Instr("LOAD_CONST", 42), Instr("COMPARE_OP", cmp), Instr("RETURN_VALUE"), ] ) f.__code__ = bcode.to_code() self.assertIs(f(), expected) if __name__ == "__main__": unittest.main() # pragma: no cover bytecode-0.15.1/tests/test_misc.py000066400000000000000000000327651451217043400171200ustar00rootroot00000000000000#!/usr/bin/env python3 import contextlib import io import sys import textwrap import unittest import bytecode from bytecode import BasicBlock, Bytecode, ControlFlowGraph, Instr, Label from bytecode.concrete import OFFSET_AS_INSTRUCTION from . import disassemble class DumpCodeTests(unittest.TestCase): maxDiff = 80 * 100 def check_dump_bytecode(self, code, expected, lineno=None): with contextlib.redirect_stdout(io.StringIO()) as stderr: if lineno is not None: bytecode.dump_bytecode(code, lineno=True) else: bytecode.dump_bytecode(code) output = stderr.getvalue() self.assertMultiLineEqual(output, expected) def test_bytecode(self): source = """ def func(test): if test == 1: return 1 elif test == 2: return 2 return 3 """ code = disassemble(source, function=True) # without line numbers enum_repr = "" if sys.version_info >= (3, 12): expected = f""" RESUME 0 LOAD_FAST 'test' LOAD_CONST 1 COMPARE_OP {enum_repr} POP_JUMP_IF_FALSE RETURN_CONST 1 label_instr6: LOAD_FAST 'test' LOAD_CONST 2 COMPARE_OP {enum_repr} POP_JUMP_IF_FALSE RETURN_CONST 2 label_instr12: RETURN_CONST 3 """ elif sys.version_info >= (3, 11): expected = f""" RESUME 0 LOAD_FAST 'test' LOAD_CONST 1 COMPARE_OP {enum_repr} POP_JUMP_FORWARD_IF_FALSE LOAD_CONST 1 RETURN_VALUE label_instr7: LOAD_FAST 'test' LOAD_CONST 2 COMPARE_OP {enum_repr} POP_JUMP_FORWARD_IF_FALSE LOAD_CONST 2 RETURN_VALUE label_instr14: LOAD_CONST 3 RETURN_VALUE """ else: expected = f""" LOAD_FAST 'test' LOAD_CONST 1 COMPARE_OP {enum_repr} POP_JUMP_IF_FALSE LOAD_CONST 1 RETURN_VALUE label_instr6: LOAD_FAST 'test' LOAD_CONST 2 COMPARE_OP {enum_repr} POP_JUMP_IF_FALSE LOAD_CONST 2 RETURN_VALUE label_instr13: LOAD_CONST 3 RETURN_VALUE """ self.check_dump_bytecode(code, expected[1:].rstrip(" ")) # with line numbers if sys.version_info >= (3, 12): expected = f""" L. 1 0: RESUME 0 L. 2 1: LOAD_FAST 'test' 2: LOAD_CONST 1 3: COMPARE_OP {enum_repr} 4: POP_JUMP_IF_FALSE L. 3 5: RETURN_CONST 1 label_instr6: L. 4 7: LOAD_FAST 'test' 8: LOAD_CONST 2 9: COMPARE_OP {enum_repr} 10: POP_JUMP_IF_FALSE L. 5 11: RETURN_CONST 2 label_instr12: L. 6 13: RETURN_CONST 3 """ elif sys.version_info >= (3, 11): expected = f""" L. 1 0: RESUME 0 L. 2 1: LOAD_FAST 'test' 2: LOAD_CONST 1 3: COMPARE_OP {enum_repr} 4: POP_JUMP_FORWARD_IF_FALSE L. 3 5: LOAD_CONST 1 6: RETURN_VALUE label_instr7: L. 4 8: LOAD_FAST 'test' 9: LOAD_CONST 2 10: COMPARE_OP {enum_repr} 11: POP_JUMP_FORWARD_IF_FALSE L. 5 12: LOAD_CONST 2 13: RETURN_VALUE label_instr14: L. 6 15: LOAD_CONST 3 16: RETURN_VALUE """ else: expected = f""" L. 2 0: LOAD_FAST 'test' 1: LOAD_CONST 1 2: COMPARE_OP {enum_repr} 3: POP_JUMP_IF_FALSE L. 3 4: LOAD_CONST 1 5: RETURN_VALUE label_instr6: L. 4 7: LOAD_FAST 'test' 8: LOAD_CONST 2 9: COMPARE_OP {enum_repr} 10: POP_JUMP_IF_FALSE L. 5 11: LOAD_CONST 2 12: RETURN_VALUE label_instr13: L. 6 14: LOAD_CONST 3 15: RETURN_VALUE """ self.check_dump_bytecode(code, expected[1:].rstrip(" "), lineno=True) def test_bytecode_broken_label(self): label = Label() code = Bytecode([Instr("JUMP_FORWARD", label)]) expected = " JUMP_FORWARD \n\n" self.check_dump_bytecode(code, expected) def test_blocks_broken_jump(self): block = BasicBlock() code = ControlFlowGraph() code[0].append(Instr("JUMP_FORWARD", block)) expected = textwrap.dedent( """ block1: JUMP_FORWARD """ ).lstrip("\n") self.check_dump_bytecode(code, expected) def test_bytecode_blocks(self): source = """ def func(test): if test == 1: return 1 elif test == 2: return 2 return 3 """ code = disassemble(source, function=True) code = ControlFlowGraph.from_bytecode(code) # without line numbers enum_repr = "" if sys.version_info >= (3, 12): expected = textwrap.dedent( f""" block1: RESUME 0 LOAD_FAST 'test' LOAD_CONST 1 COMPARE_OP {enum_repr} POP_JUMP_IF_FALSE -> block2 block2: RETURN_CONST 1 block3: LOAD_FAST 'test' LOAD_CONST 2 COMPARE_OP {enum_repr} POP_JUMP_IF_FALSE -> block4 block4: RETURN_CONST 2 block5: RETURN_CONST 3 """ ) elif sys.version_info >= (3, 11): expected = textwrap.dedent( f""" block1: RESUME 0 LOAD_FAST 'test' LOAD_CONST 1 COMPARE_OP {enum_repr} POP_JUMP_FORWARD_IF_FALSE -> block2 block2: LOAD_CONST 1 RETURN_VALUE block3: LOAD_FAST 'test' LOAD_CONST 2 COMPARE_OP {enum_repr} POP_JUMP_FORWARD_IF_FALSE -> block4 block4: LOAD_CONST 2 RETURN_VALUE block5: LOAD_CONST 3 RETURN_VALUE """ ) else: expected = textwrap.dedent( f""" block1: LOAD_FAST 'test' LOAD_CONST 1 COMPARE_OP {enum_repr} POP_JUMP_IF_FALSE -> block2 block2: LOAD_CONST 1 RETURN_VALUE block3: LOAD_FAST 'test' LOAD_CONST 2 COMPARE_OP {enum_repr} POP_JUMP_IF_FALSE -> block4 block4: LOAD_CONST 2 RETURN_VALUE block5: LOAD_CONST 3 RETURN_VALUE """ ) self.check_dump_bytecode(code, expected.lstrip()) # with line numbers if sys.version_info >= (3, 12): expected = textwrap.dedent( f""" block1: L. 1 0: RESUME 0 L. 2 1: LOAD_FAST 'test' 2: LOAD_CONST 1 3: COMPARE_OP {enum_repr} 4: POP_JUMP_IF_FALSE -> block2 block2: L. 3 0: RETURN_CONST 1 block3: L. 4 0: LOAD_FAST 'test' 1: LOAD_CONST 2 2: COMPARE_OP {enum_repr} 3: POP_JUMP_IF_FALSE -> block4 block4: L. 5 0: RETURN_CONST 2 block5: L. 6 0: RETURN_CONST 3 """ ) elif sys.version_info >= (3, 11): expected = textwrap.dedent( f""" block1: L. 1 0: RESUME 0 L. 2 1: LOAD_FAST 'test' 2: LOAD_CONST 1 3: COMPARE_OP {enum_repr} 4: POP_JUMP_FORWARD_IF_FALSE -> block2 block2: L. 3 0: LOAD_CONST 1 1: RETURN_VALUE block3: L. 4 0: LOAD_FAST 'test' 1: LOAD_CONST 2 2: COMPARE_OP {enum_repr} 3: POP_JUMP_FORWARD_IF_FALSE -> block4 block4: L. 5 0: LOAD_CONST 2 1: RETURN_VALUE block5: L. 6 0: LOAD_CONST 3 1: RETURN_VALUE """ ) else: expected = textwrap.dedent( f""" block1: L. 2 0: LOAD_FAST 'test' 1: LOAD_CONST 1 2: COMPARE_OP {enum_repr} 3: POP_JUMP_IF_FALSE -> block2 block2: L. 3 0: LOAD_CONST 1 1: RETURN_VALUE block3: L. 4 0: LOAD_FAST 'test' 1: LOAD_CONST 2 2: COMPARE_OP {enum_repr} 3: POP_JUMP_IF_FALSE -> block4 block4: L. 5 0: LOAD_CONST 2 1: RETURN_VALUE block5: L. 6 0: LOAD_CONST 3 1: RETURN_VALUE """ ) self.check_dump_bytecode(code, expected.lstrip(), lineno=True) def test_concrete_bytecode(self): source = """ def func(test): if test == 1: return 1 elif test == 2: return 2 return 3 """ code = disassemble(source, function=True) code = code.to_concrete_bytecode() # without line numbers if sys.version_info >= (3, 12): # COMPARE_OP use the 4 lowest bits as a cache expected = """ 0 RESUME 0 2 LOAD_FAST 0 4 LOAD_CONST 1 6 COMPARE_OP 40 8 CACHE 0 10 POP_JUMP_IF_FALSE 1 12 RETURN_CONST 1 14 LOAD_FAST 0 16 LOAD_CONST 2 18 COMPARE_OP 40 20 CACHE 0 22 POP_JUMP_IF_FALSE 1 24 RETURN_CONST 2 26 RETURN_CONST 3 """ elif sys.version_info >= (3, 11): expected = """ 0 RESUME 0 2 LOAD_FAST 0 4 LOAD_CONST 1 6 COMPARE_OP 2 8 CACHE 0 10 CACHE 0 12 POP_JUMP_FORWARD_IF_FALSE 2 14 LOAD_CONST 1 16 RETURN_VALUE 18 LOAD_FAST 0 20 LOAD_CONST 2 22 COMPARE_OP 2 24 CACHE 0 26 CACHE 0 28 POP_JUMP_FORWARD_IF_FALSE 2 30 LOAD_CONST 2 32 RETURN_VALUE 34 LOAD_CONST 3 36 RETURN_VALUE """ else: expected = f""" 0 LOAD_FAST 0 2 LOAD_CONST 1 4 COMPARE_OP 2 6 POP_JUMP_IF_FALSE {6 if OFFSET_AS_INSTRUCTION else 12} 8 LOAD_CONST 1 10 RETURN_VALUE 12 LOAD_FAST 0 14 LOAD_CONST 2 16 COMPARE_OP 2 18 POP_JUMP_IF_FALSE {12 if OFFSET_AS_INSTRUCTION else 24} 20 LOAD_CONST 2 22 RETURN_VALUE 24 LOAD_CONST 3 26 RETURN_VALUE """ self.check_dump_bytecode(code, expected.lstrip("\n")) # with line numbers if sys.version_info >= (3, 12): expected = """ L. 1 0: RESUME 0 L. 2 2: LOAD_FAST 0 4: LOAD_CONST 1 6: COMPARE_OP 40 8: CACHE 0 10: POP_JUMP_IF_FALSE 1 L. 3 12: RETURN_CONST 1 L. 4 14: LOAD_FAST 0 16: LOAD_CONST 2 18: COMPARE_OP 40 20: CACHE 0 22: POP_JUMP_IF_FALSE 1 L. 5 24: RETURN_CONST 2 L. 6 26: RETURN_CONST 3 """ elif sys.version_info >= (3, 11): expected = """ L. 1 0: RESUME 0 L. 2 2: LOAD_FAST 0 4: LOAD_CONST 1 6: COMPARE_OP 2 8: CACHE 0 10: CACHE 0 12: POP_JUMP_FORWARD_IF_FALSE 2 L. 3 14: LOAD_CONST 1 16: RETURN_VALUE L. 4 18: LOAD_FAST 0 20: LOAD_CONST 2 22: COMPARE_OP 2 24: CACHE 0 26: CACHE 0 28: POP_JUMP_FORWARD_IF_FALSE 2 L. 5 30: LOAD_CONST 2 32: RETURN_VALUE L. 6 34: LOAD_CONST 3 36: RETURN_VALUE """ else: expected = f""" L. 2 0: LOAD_FAST 0 2: LOAD_CONST 1 4: COMPARE_OP 2 6: POP_JUMP_IF_FALSE {6 if OFFSET_AS_INSTRUCTION else 12} L. 3 8: LOAD_CONST 1 10: RETURN_VALUE L. 4 12: LOAD_FAST 0 14: LOAD_CONST 2 16: COMPARE_OP 2 18: POP_JUMP_IF_FALSE {12 if OFFSET_AS_INSTRUCTION else 24} L. 5 20: LOAD_CONST 2 22: RETURN_VALUE L. 6 24: LOAD_CONST 3 26: RETURN_VALUE """ self.check_dump_bytecode(code, expected.lstrip("\n"), lineno=True) def test_type_validation(self): class T: first_lineno = 1 with self.assertRaises(TypeError): bytecode.dump_bytecode(T()) if __name__ == "__main__": unittest.main() # pragma: no cover bytecode-0.15.1/tests/util_annotation.py000066400000000000000000000007451451217043400203260ustar00rootroot00000000000000from __future__ import annotations import textwrap import types def get_code(source, *, filename="", function=False): source = textwrap.dedent(source).strip() code = compile(source, filename, "exec") if function: sub_code = [ const for const in code.co_consts if isinstance(const, types.CodeType) ] if len(sub_code) != 1: raise ValueError("unable to find function code") code = sub_code[0] return code bytecode-0.15.1/tox.ini000066400000000000000000000016021451217043400147070ustar00rootroot00000000000000[tox] envlist = py3, py38, py39, py310, py311, py312, fmt, docs isolated_build = true [testenv] deps= pytest pytest-cov pytest-subtests commands = pytest --cov bytecode --cov-report=xml -v tests [testenv:fmt] basepython = python3 deps= isort black flake8 commands = isort --check src/bytecode tests black --check src/bytecode tests setup.py flake8 src/bytecode setup.py tests [testenv:lint] basepython = python3 deps= isort black flake8 mypy commands = isort --check src/bytecode tests black --check src/bytecode tests setup.py flake8 src/bytecode setup.py tests mypy src tests [testenv:docs] basepython = python3 deps= -r doc/requirements.txt commands = pip install . sphinx-build doc docs_output -W -b html [flake8] ignore = E203, E266, E501, W503, F403, F401 max-line-length = 80 select = B,C,E,F,W,T4,B9,B950