pax_global_header00006660000000000000000000000064146345775010014526gustar00rootroot0000000000000052 comment=1fd3c7bde7b943fe8985c893310b5269a09b46c5 libplacebo-v7.349.0/000077500000000000000000000000001463457750100141745ustar00rootroot00000000000000libplacebo-v7.349.0/.github/000077500000000000000000000000001463457750100155345ustar00rootroot00000000000000libplacebo-v7.349.0/.github/FUNDING.yml000066400000000000000000000000641463457750100173510ustar00rootroot00000000000000github: haasn patreon: haasn open_collective: haasn libplacebo-v7.349.0/.github/workflows/000077500000000000000000000000001463457750100175715ustar00rootroot00000000000000libplacebo-v7.349.0/.github/workflows/ci.yml000066400000000000000000000005351463457750100207120ustar00rootroot00000000000000name: ci on: push: branches: - master - pages-test permissions: contents: write jobs: deploy: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - uses: actions/setup-python@v4 with: python-version: 3.x - run: pip install mkdocs-material - run: mkdocs gh-deploy --force libplacebo-v7.349.0/.gitignore000066400000000000000000000001131463457750100161570ustar00rootroot00000000000000/build* /tags /TAGS /demos/3rdparty /3rdparty *.exe *.o .cache __pycache__ libplacebo-v7.349.0/.gitlab-ci.yml000066400000000000000000000124141463457750100166320ustar00rootroot00000000000000workflow: rules: - if: $CI_PIPELINE_SOURCE == 'merge_request_event' - if: $CI_COMMIT_TAG - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH stages: - compile - test - sanitize variables: GIT_SUBMODULE_STRATEGY: recursive IMAGE_UBUNTU_JAMMY: registry.videolan.org/libplacebo-ubuntu-jammy:20230730213642 IMAGE_UBUNTU_JAMMY_AARCH: registry.videolan.org/libplacebo-ubuntu-jammy-aarch64:20230203024122 linux: image: $IMAGE_UBUNTU_JAMMY stage: compile tags: - docker - amd64 script: - meson build --buildtype release --werror -Dtests=true -Dshaderc=enabled -Dglslang=enabled - ninja -C build static: image: $IMAGE_UBUNTU_JAMMY stage: compile tags: - docker - amd64 script: - meson build --buildtype release --default-library static --werror -Dshaderc=enabled -Dglslang=enabled - ninja -C build win32: image: $IMAGE_UBUNTU_JAMMY stage: compile tags: - docker - amd64 script: - meson build --buildtype release --werror -Dtests=true -Ddebug-abort=true -Dd3d11=enabled --cross-file /opt/crossfiles/i686-w64-mingw32.meson - ninja -C build - cd build && meson test -t 5 -v --num-processes=1 win64: image: $IMAGE_UBUNTU_JAMMY stage: compile tags: - docker - amd64 script: - meson build --buildtype release --werror -Dtests=true -Ddebug-abort=true -Dd3d11=enabled --cross-file /opt/crossfiles/x86_64-w64-mingw32.meson - ninja -C build - cd build && meson test -t 5 -v --num-processes=1 aarch64: image: $IMAGE_UBUNTU_JAMMY_AARCH stage: compile tags: - docker - aarch64 script: - meson build --buildtype release --werror -Dtests=true - ninja -C build - cd build && meson test -t 5 -v --num-processes=1 macos: stage: compile tags: - amd64 - monterey script: - meson build --buildtype release -Ddefault_library=both -Dtests=true -Ddebug-abort=true -Dc_args='-mmacosx-version-min=10.11 -Wunguarded-availability' --werror - ninja -C build - cd build && meson test -t 5 -v --num-processes=1 scan: image: $IMAGE_UBUNTU_JAMMY stage: compile tags: - docker - amd64 script: - env CC=clang CXX=clang++ CC_LD=lld CXX_LD=lld meson build --buildtype debugoptimized --werror -Dtests=true -Dbench=true -Dshaderc=enabled -Dglslang=enabled - ninja -C build scan-build llvmpipe: image: $IMAGE_UBUNTU_JAMMY stage: test tags: - docker - amd64 script: - meson build --buildtype release --werror -Dtests=true -Ddebug-abort=true -Dc_args='-DCI_ALLOW_SW -DCI_MAXGL' -Dshaderc=enabled -Dglslang=enabled - ninja -C build - cd build && meson test -t 20 -v --num-processes=1 gpu: image: $IMAGE_UBUNTU_JAMMY stage: test tags: - gpu script: - meson build --buildtype release --werror -Dtests=true -Ddemos=false -Ddebug-abort=true -Dshaderc=enabled -Dglslang=enabled -Db_coverage=true - ninja -C build - vulkaninfo - cd build && meson test -t 5 -v --num-processes=1 - ninja coverage-html - mv meson-logs/coveragereport ../coverage - ninja coverage-xml - grep -Eo 'line-rate="[^"]+"' meson-logs/coverage.xml | head -n 1 | grep -Eo '[0-9.]+' | awk '{ print "coverage:", $1 * 100 } ' coverage: '/^coverage: (\d+.\d+)$/' artifacts: expose_as: 'Coverage HTML report' paths: - coverage/ reports: coverage_report: coverage_format: cobertura path: build/meson-logs/coverage.xml sanitize: image: $IMAGE_UBUNTU_JAMMY stage: sanitize tags: - gpu variables: UBSAN_OPTIONS: 'print_stacktrace=1:halt_on_error=1' script: - env CC=clang CXX=clang++ CC_LD=lld CXX_LD=lld meson build --buildtype debugoptimized --werror -Dtests=true -Ddebug-abort=true -Dc_args='-DCI_MAXGL -Wno-deprecated-declarations' -Db_sanitize=address,undefined -Db_lundef=false -Dshaderc=enabled - ninja -C build - cd build && time meson test -t 5 -v --num-processes=1 libplacebo-v7.349.0/.gitmodules000066400000000000000000000012111463457750100163440ustar00rootroot00000000000000[submodule "demos/3rdparty/nuklear"] path = demos/3rdparty/nuklear url = https://github.com/Immediate-Mode-UI/Nuklear.git [submodule "3rdparty/glad"] path = 3rdparty/glad url = https://github.com/Dav1dde/glad [submodule "3rdparty/jinja"] path = 3rdparty/jinja url = https://github.com/pallets/jinja [submodule "3rdparty/markupsafe"] path = 3rdparty/markupsafe url = https://github.com/pallets/markupsafe [submodule "3rdparty/Vulkan-Headers"] path = 3rdparty/Vulkan-Headers url = https://github.com/KhronosGroup/Vulkan-Headers [submodule "3rdparty/fast_float"] path = 3rdparty/fast_float url = https://github.com/fastfloat/fast_float.git libplacebo-v7.349.0/3rdparty/000077500000000000000000000000001463457750100157445ustar00rootroot00000000000000libplacebo-v7.349.0/3rdparty/Vulkan-Headers/000077500000000000000000000000001463457750100205555ustar00rootroot00000000000000libplacebo-v7.349.0/3rdparty/fast_float/000077500000000000000000000000001463457750100200665ustar00rootroot00000000000000libplacebo-v7.349.0/3rdparty/glad/000077500000000000000000000000001463457750100166535ustar00rootroot00000000000000libplacebo-v7.349.0/3rdparty/jinja/000077500000000000000000000000001463457750100170375ustar00rootroot00000000000000libplacebo-v7.349.0/3rdparty/markupsafe/000077500000000000000000000000001463457750100201025ustar00rootroot00000000000000libplacebo-v7.349.0/LICENSE000066400000000000000000000576361463457750100152220ustar00rootroot00000000000000 GNU LESSER GENERAL PUBLIC LICENSE Version 2.1, February 1999 Copyright (C) 1991, 1999 Free Software Foundation, Inc. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. [This is the first released version of the Lesser GPL. It also counts as the successor of the GNU Library Public License, version 2, hence the version number 2.1.] Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public Licenses are intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This license, the Lesser General Public License, applies to some specially designated software packages--typically libraries--of the Free Software Foundation and other authors who decide to use it. You can use it too, but we suggest you first think carefully about whether this license or the ordinary General Public License is the better strategy to use in any particular case, based on the explanations below. When we speak of free software, we are referring to freedom of use, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish); that you receive source code or can get it if you want it; that you can change the software and use pieces of it in new free programs; and that you are informed that you can do these things. To protect your rights, we need to make restrictions that forbid distributors to deny you these rights or to ask you to surrender these rights. These restrictions translate to certain responsibilities for you if you distribute copies of the library or if you modify it. For example, if you distribute copies of the library, whether gratis or for a fee, you must give the recipients all the rights that we gave you. You must make sure that they, too, receive or can get the source code. If you link other code with the library, you must provide complete object files to the recipients, so that they can relink them with the library after making changes to the library and recompiling it. And you must show them these terms so they know their rights. We protect your rights with a two-step method: (1) we copyright the library, and (2) we offer you this license, which gives you legal permission to copy, distribute and/or modify the library. To protect each distributor, we want to make it very clear that there is no warranty for the free library. Also, if the library is modified by someone else and passed on, the recipients should know that what they have is not the original version, so that the original author's reputation will not be affected by problems that might be introduced by others. Finally, software patents pose a constant threat to the existence of any free program. We wish to make sure that a company cannot effectively restrict the users of a free program by obtaining a restrictive license from a patent holder. Therefore, we insist that any patent license obtained for a version of the library must be consistent with the full freedom of use specified in this license. Most GNU software, including some libraries, is covered by the ordinary GNU General Public License. This license, the GNU Lesser General Public License, applies to certain designated libraries, and is quite different from the ordinary General Public License. We use this license for certain libraries in order to permit linking those libraries into non-free programs. When a program is linked with a library, whether statically or using a shared library, the combination of the two is legally speaking a combined work, a derivative of the original library. The ordinary General Public License therefore permits such linking only if the entire combination fits its criteria of freedom. The Lesser General Public License permits more lax criteria for linking other code with the library. We call this license the "Lesser" General Public License because it does Less to protect the user's freedom than the ordinary General Public License. It also provides other free software developers Less of an advantage over competing non-free programs. These disadvantages are the reason we use the ordinary General Public License for many libraries. However, the Lesser license provides advantages in certain special circumstances. For example, on rare occasions, there may be a special need to encourage the widest possible use of a certain library, so that it becomes a de-facto standard. To achieve this, non-free programs must be allowed to use the library. A more frequent case is that a free library does the same job as widely used non-free libraries. In this case, there is little to gain by limiting the free library to free software only, so we use the Lesser General Public License. In other cases, permission to use a particular library in non-free programs enables a greater number of people to use a large body of free software. For example, permission to use the GNU C Library in non-free programs enables many more people to use the whole GNU operating system, as well as its variant, the GNU/Linux operating system. Although the Lesser General Public License is Less protective of the users' freedom, it does ensure that the user of a program that is linked with the Library has the freedom and the wherewithal to run that program using a modified version of the Library. The precise terms and conditions for copying, distribution and modification follow. Pay close attention to the difference between a "work based on the library" and a "work that uses the library". The former contains code derived from the library, whereas the latter must be combined with the library in order to run. GNU LESSER GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License Agreement applies to any software library or other program which contains a notice placed by the copyright holder or other authorized party saying it may be distributed under the terms of this Lesser General Public License (also called "this License"). Each licensee is addressed as "you". A "library" means a collection of software functions and/or data prepared so as to be conveniently linked with application programs (which use some of those functions and data) to form executables. The "Library", below, refers to any such software library or work which has been distributed under these terms. A "work based on the Library" means either the Library or any derivative work under copyright law: that is to say, a work containing the Library or a portion of it, either verbatim or with modifications and/or translated straightforwardly into another language. (Hereinafter, translation is included without limitation in the term "modification".) "Source code" for a work means the preferred form of the work for making modifications to it. For a library, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the library. Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running a program using the Library is not restricted, and output from such a program is covered only if its contents constitute a work based on the Library (independent of the use of the Library in a tool for writing it). Whether that is true depends on what the Library does and what the program that uses the Library does. 1. You may copy and distribute verbatim copies of the Library's complete source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and distribute a copy of this License along with the Library. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Library or any portion of it, thus forming a work based on the Library, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) The modified work must itself be a software library. b) You must cause the files modified to carry prominent notices stating that you changed the files and the date of any change. c) You must cause the whole of the work to be licensed at no charge to all third parties under the terms of this License. d) If a facility in the modified Library refers to a function or a table of data to be supplied by an application program that uses the facility, other than as an argument passed when the facility is invoked, then you must make a good faith effort to ensure that, in the event an application does not supply such function or table, the facility still operates, and performs whatever part of its purpose remains meaningful. (For example, a function in a library to compute square roots has a purpose that is entirely well-defined independent of the application. Therefore, Subsection 2d requires that any application-supplied function or table used by this function must be optional: if the application does not supply it, the square root function must still compute square roots.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Library, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Library, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Library. In addition, mere aggregation of another work not based on the Library with the Library (or with a work based on the Library) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may opt to apply the terms of the ordinary GNU General Public License instead of this License to a given copy of the Library. To do this, you must alter all the notices that refer to this License, so that they refer to the ordinary GNU General Public License, version 2, instead of to this License. (If a newer version than version 2 of the ordinary GNU General Public License has appeared, then you can specify that version instead if you wish.) Do not make any other change in these notices. Once this change is made in a given copy, it is irreversible for that copy, so the ordinary GNU General Public License applies to all subsequent copies and derivative works made from that copy. This option is useful when you wish to copy part of the code of the Library into a program that is not a library. 4. You may copy and distribute the Library (or a portion or derivative of it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange. If distribution of object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place satisfies the requirement to distribute the source code, even though third parties are not compelled to copy the source along with the object code. 5. A program that contains no derivative of any portion of the Library, but is designed to work with the Library by being compiled or linked with it, is called a "work that uses the Library". Such a work, in isolation, is not a derivative work of the Library, and therefore falls outside the scope of this License. However, linking a "work that uses the Library" with the Library creates an executable that is a derivative of the Library (because it contains portions of the Library), rather than a "work that uses the library". The executable is therefore covered by this License. Section 6 states terms for distribution of such executables. When a "work that uses the Library" uses material from a header file that is part of the Library, the object code for the work may be a derivative work of the Library even though the source code is not. Whether this is true is especially significant if the work can be linked without the Library, or if the work is itself a library. The threshold for this to be true is not precisely defined by law. If such an object file uses only numerical parameters, data structure layouts and accessors, and small macros and small inline functions (ten lines or less in length), then the use of the object file is unrestricted, regardless of whether it is legally a derivative work. (Executables containing this object code plus portions of the Library will still fall under Section 6.) Otherwise, if the work is a derivative of the Library, you may distribute the object code for the work under the terms of Section 6. Any executables containing that work also fall under Section 6, whether or not they are linked directly with the Library itself. 6. As an exception to the Sections above, you may also combine or link a "work that uses the Library" with the Library to produce a work containing portions of the Library, and distribute that work under terms of your choice, provided that the terms permit modification of the work for the customer's own use and reverse engineering for debugging such modifications. You must give prominent notice with each copy of the work that the Library is used in it and that the Library and its use are covered by this License. You must supply a copy of this License. If the work during execution displays copyright notices, you must include the copyright notice for the Library among them, as well as a reference directing the user to the copy of this License. Also, you must do one of these things: a) Accompany the work with the complete corresponding machine-readable source code for the Library including whatever changes were used in the work (which must be distributed under Sections 1 and 2 above); and, if the work is an executable linked with the Library, with the complete machine-readable "work that uses the Library", as object code and/or source code, so that the user can modify the Library and then relink to produce a modified executable containing the modified Library. (It is understood that the user who changes the contents of definitions files in the Library will not necessarily be able to recompile the application to use the modified definitions.) b) Use a suitable shared library mechanism for linking with the Library. A suitable mechanism is one that (1) uses at run time a copy of the library already present on the user's computer system, rather than copying library functions into the executable, and (2) will operate properly with a modified version of the library, if the user installs one, as long as the modified version is interface-compatible with the version that the work was made with. c) Accompany the work with a written offer, valid for at least three years, to give the same user the materials specified in Subsection 6a, above, for a charge no more than the cost of performing this distribution. d) If distribution of the work is made by offering access to copy from a designated place, offer equivalent access to copy the above specified materials from the same place. e) Verify that the user has already received a copy of these materials or that you have already sent this user a copy. For an executable, the required form of the "work that uses the Library" must include any data and utility programs needed for reproducing the executable from it. However, as a special exception, the materials to be distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. It may happen that this requirement contradicts the license restrictions of other proprietary libraries that do not normally accompany the operating system. Such a contradiction means you cannot use both them and the Library together in an executable that you distribute. 7. You may place library facilities that are a work based on the Library side-by-side in a single library together with other library facilities not covered by this License, and distribute such a combined library, provided that the separate distribution of the work based on the Library and of the other library facilities is otherwise permitted, and provided that you do these two things: a) Accompany the combined library with a copy of the same work based on the Library, uncombined with any other library facilities. This must be distributed under the terms of the Sections above. b) Give prominent notice with the combined library of the fact that part of it is a work based on the Library, and explaining where to find the accompanying uncombined form of the same work. 8. You may not copy, modify, sublicense, link with, or distribute the Library except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense, link with, or distribute the Library is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 9. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Library or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Library (or any work based on the Library), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Library or works based on it. 10. Each time you redistribute the Library (or any work based on the Library), the recipient automatically receives a license from the original licensor to copy, distribute, link with or modify the Library subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties with this License. 11. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Library at all. For example, if a patent license would not permit royalty-free redistribution of the Library by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Library. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply, and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 12. If the distribution and/or use of the Library is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Library under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 13. The Free Software Foundation may publish revised and/or new versions of the Lesser General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Library specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Library does not specify a license version number, you may choose any version ever published by the Free Software Foundation. 14. If you wish to incorporate parts of the Library into other free programs whose distribution conditions are incompatible with these, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS libplacebo-v7.349.0/README.md000066400000000000000000000403331463457750100154560ustar00rootroot00000000000000# libplacebo [![gitlab-ci badge](https://code.videolan.org/videolan/libplacebo/badges/master/pipeline.svg)](https://code.videolan.org/videolan/libplacebo/pipelines) [![gitlab-ci coverage](https://code.videolan.org/videolan/libplacebo/badges/master/coverage.svg)](https://code.videolan.org/videolan/libplacebo/-/jobs/artifacts/master/file/coverage/index.html?job=test-gpu) [![GitHub](https://img.shields.io/github/sponsors/haasn?logo=github)](https://github.com/sponsors/haasn) [![PayPal](https://img.shields.io/badge/donate-PayPal-blue.svg?logo=paypal)](https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=SFJUTMPSZEAHC) [![Patreon](https://img.shields.io/badge/pledge-Patreon-red.svg?logo=patreon)](https://www.patreon.com/haasn) **libplacebo** is, in a nutshell, the core rendering algorithms and ideas of [mpv](https://mpv.io) rewritten as an independent library. As of today, libplacebo contains a large assortment of video processing shaders, focusing on both quality and performance. These include features such as the following: - High-quality, optimized **upscaling and downscaling** including support for polar filters ("Jinc"), anti-aliasing, anti-ringing and gamma correct scaling. - Dynamic **HDR tone mapping**, including real-time measurement of scene histogram, scene change detection, dynamic exposure control, perceptual gamut stretching, contrast recovery and more. - Native support for **Dolby Vision HDR**, including Profile 5 conversion to HDR/PQ or SDR, reading DV side data, and reshaping. (BL only, currently) - A colorimetrically accurate **color management** engine with support for soft gamut mapping, ICC profiles, accurate ITU-R BT.1886 emulation, black point compensation, and custom 3DLUTs (.cube). - A pluggable, extensible [**custom shader system**](http://libplacebo.org/custom-shaders/). This can be used to arbitrarily extend the range of custom shaders to include popular user shaders like RAVU, FSRCNNX, or Anime4K. See the [mpv wiki on user scripts](https://github.com/mpv-player/mpv/wiki/User-Scripts#user-shaders) for more information. - High performance **film grain synthesis** for AV1 and H.274, allowing media players to offload this part of decoding from the CPU to the GPU. - Tunable, fast **debanding** and **deinterlacing** shaders. - High quality gamma-correct **dithering**, including error diffusion modes. Every attempt was made to provide these features at a **high level of abstraction**, taking away all the messy details of GPU programming, color spaces, obscure subsampling modes, image metadata manipulation, and so on. Expert-level functionality is packed into easy-to-use functions like `pl_frame_from_avframe` and `pl_render_image`. ### Hardware requirements libplacebo currently supports Vulkan (including MoltenVK), OpenGL, and Direct3D 11. It currently has the following minimum hardware requirements: - **Vulkan**: Core version 1.2 - **OpenGL**: GLSL version >= 130 (GL >= 3.0, GL ES >= 3.0) - **Direct3D**: Feature level >= 9_1 For more documentation, including an introduction to the API, see [the project website](https://libplacebo.org). ### Examples This screenshot from the included [plplay demo program](./demos/plplay.c) highlights just some of the features supported by the libplacebo rendering code, all of which are adjustable dynamically during video playback. [plplay settings 1](./demos/screenshots/plplay1.png) [plplay settings 2](./demos/screenshots/plplay2.png) [plplay settings 3](./demos/screenshots/plplay3.png) [plplay settings 4](./demos/screenshots/plplay4.png) [plplay settings 5](./demos/screenshots/plplay5.png) [plplay settings 6](./demos/screenshots/plplay6.png) ### History This project grew out of an interest to accomplish the following goals: - Clean up mpv's internal [RA](#tier-1-rendering-abstraction) API and make it reusable for other projects, as a general high-level backend-agnostic graphics API wrapper. - Provide a standard library of useful GPU-accelerated image processing primitives based on GLSL, so projects like media players or browsers can use them without incurring a heavy dependency on `libmpv`. - Rewrite core parts of mpv's GPU-accelerated video renderer on top of redesigned abstractions, in order to modernize it and allow supporting more features. It has since been adopted by [VLC](https://www.videolan.org/vlc/) as their optional Vulkan-based video output path, and is provided as a Vulkan-based video filter in the FFmpeg project. ## API Overview The public API of libplacebo is currently split up into the following components, the header files (and documentation) for which are available inside the [`src/include/libplacebo`](src/include/libplacebo) directory. The API is available in different "tiers", representing levels of abstraction inside libplacebo. The APIs in higher tiers depend on those in lower tiers. Which tier is used by a user depends on how much power/control they want over the actual rendering. The low-level tiers are more suitable for big projects that need strong control over the entire rendering pipeline; whereas the high-level tiers are more suitable for smaller or simpler projects that want libplacebo to take care of everything. ### Tier 0 (logging, raw math primitives) - `cache.h`: Caching subsystem. Used to cache large or computationally heavy binary blobs, such as compiled shaders, 3DLUTs, and so on. - `colorspace.h`: A collection of enums and structs for describing color spaces, as well as a collection of helper functions for computing various color space transformation matrices. - `common.h`: A collection of miscellaneous utility types and macros that are shared among multiple subsystems. Usually does not need to be included directly. - `log.h`: Logging subsystem. - `config.h`: Macros defining information about the way libplacebo was built, including the version strings and compiled-in features/dependencies. Usually does not need to be included directly. May be useful for feature tests. - `dither.h`: Some helper functions for generating various noise and dithering matrices. Might be useful for somebody else. - `filters.h`: A collection of reusable reconstruction filter kernels, which can be used for scaling. The generated weights arrays are semi-tailored to the needs of libplacebo, but may be useful to somebody else regardless. Also contains the structs needed to define a filter kernel for the purposes of libplacebo's upscaling routines. - `tone_mapping.h`: A collection of tone mapping functions, used for conversions between HDR and SDR content. - `gamut_mapping.h`: A collection of gamut mapping functions, used for conversions between wide gamut and standard gamut content, as well as for gamut recompression after tone-mapping. The API functions in this tier are either used throughout the program (context, common etc.) or are low-level implementations of filter kernels, color space conversion logic etc.; which are entirely independent of GLSL and even the GPU in general. ### Tier 1 (rendering abstraction) - `gpu.h`: Exports the GPU abstraction API used by libplacebo internally. - `swapchain.h`: Exports an API for wrapping platform-specific swapchains and other display APIs. This is the API used to actually queue up rendered frames for presentation (e.g. to a window or display device). - `vulkan.h`: GPU API implementation based on Vulkan. - `opengl.h`: GPU API implementation based on OpenGL. - `d3d11.h`: GPU API implementation based on Direct3D 11. - `dummy.h`: Dummy GPI API (interfaces with CPU only, no shader support) As part of the public API, libplacebo exports a middle-level abstraction for dealing with GPU objects and state. Basically, this is the API libplacebo uses internally to wrap OpenGL, Vulkan, Direct3D etc. into a single unifying API subset that abstracts away state, messy details, synchronization etc. into a fairly high-level API suitable for libplacebo's image processing tasks. It's made public both because it constitutes part of the public API of various image processing functions, but also in the hopes that it will be useful for other developers of GPU-accelerated image processing software. ### Tier 2 (GLSL generating primitives) - `shaders.h`: The low-level interface to shader generation. This can be used to generate GLSL stubs suitable for inclusion in other programs, as part of larger shaders. For example, a program might use this interface to generate a specialized tone-mapping function for performing color space conversions, then call that from their own fragment shader code. This abstraction has an optional dependency on `gpu.h`, but can also be used independently from it. In addition to this low-level interface, there are several available shader routines which libplacebo exports: - `shaders/colorspace.h`: Shader routines for decoding and transforming colors, tone mapping, and so forth. - `shaders/custom.h`: Allows directly ingesting custom GLSL logic into the `pl_shader` abstraction, either as bare GLSL or in [mpv .hook format](https://mpv.io/manual/master/#options-glsl-shaders). - `shaders/deinterlacing.h`: GPU deinterlacing shader based on yadif. - `shaders/dithering.h`: Shader routine for various GPU dithering methods. - `shaders/film_grain.h`: Film grain synthesis shaders for AV1 and H.274. - `shaders/icc.h`: Shader for ICC profile based color management. - `shaders/lut.h`: Code for applying arbitrary 1D/3D LUTs. - `shaders/sampling.h`: Shader routines for various algorithms that sample from images, such as debanding and scaling. ### Tier 3 (shader dispatch) - `dispatch.h`: A higher-level interface to the `pl_shader` system, based on `gpu.h`. This dispatch mechanism generates+executes complete GLSL shaders, subject to the constraints and limitations of the underlying GPU. This shader dispatch mechanism is designed to be combined with the shader processing routines exported by `shaders/*.h`, but takes care of the low-level translation of the resulting `pl_shader_res` objects into legal GLSL. It also takes care of resource binding, shader input placement, as well as shader caching and resource pooling; and makes sure all generated shaders have unique identifiers (so they can be freely merged together). ### Tier 4 (high level renderer) - `options.h`: A high-level options framework which wraps all of the options comprising `pl_render_params` into a memory-managed, serializable struct that can also be treated as a key/value dictionary. Also includes an options parser to load options provided by the API user in string format. - `renderer.h`: A high-level renderer which combines the shader primitives and dispatch mechanism into a fully-fledged rendering pipeline that takes raw texture data and transforms it into the desired output image. - `utils/frame_queue.h`: A high-level frame queuing abstraction. This API can be used to interface with a decoder (or other source of frames), and takes care of translating timestamped frames into a virtual stream of presentation events suitable for use with `renderer.h`, including any extra context required for frame interpolation (`pl_frame_mix`). - `utils/upload.h`: A high-level helper for uploading generic data in some user-described format to a plane texture suitable for use with `renderer.h`. These helpers essentially take care of picking/mapping a good image format supported by the GPU. (Note: Eventually, this function will also support on-CPU conversions to a different format where necessary, but for now, it will just fail) - `utils/dav1d.h`: High level helper for translating between Dav1dPicture and libplacebo's `pl_frame`. (Single header library) - `utils/libav.h`: High-level helpers for interoperation between libplacebo and FFmpeg's libav* abstractions. (Single header library) This is the "primary" interface to libplacebo, and the one most users will be interested in. It takes care of internal details such as degrading to simpler algorithms depending on the hardware's capabilities, combining the correct sequence of colorspace transformations and shader passes in order to get the best overall image quality, and so forth. ## Authors libplacebo was founded and primarily developed by Niklas Haas ([@haasn](https://github.com/haasn)), but it would not be possible without the contributions of others, especially support for windows. [![contributor list](https://opencollective.com/libplacebo/contributors.svg?width=890&button=false)](https://github.com/haasn/libplacebo/graphs/contributors) ### License libplacebo is currently available under the terms of the LGPLv2.1 (or later) license. However, it's possible to release it under a more permissive license (e.g. BSD2) if a use case emerges. Please open an issue if you have a use case for a BSD2-licensed libplacebo. ## Installing ### Obtaining When cloning libplacebo, make sure to provide the `--recursive``` flag: ```bash $ git clone --recursive https://code.videolan.org/videolan/libplacebo ``` Alternatively (on an existing clone): ```bash $ git submodule update --init ``` Doing either of these pulls in a handful of bundled 3rdparty dependencies. Alternatively, they can be provided via the system. ### Building from source libplacebo is built using the [meson build system](http://mesonbuild.com/). You can build the project using the following steps: ```bash $ DIR=./build $ meson $DIR $ ninja -C$DIR ``` To rebuild the project on changes, re-run `ninja -Cbuild`. If you wish to install the build products to the configured prefix (typically `/usr/local/`), you can run `ninja -Cbuild install`. Note that this is normally ill-advised except for developers who know what they're doing. Regular users should rely on distro packages. ### Dependencies In principle, libplacebo has no mandatory dependencies - only optional ones. However, to get a useful version of libplacebo. you most likely want to build with support for either `opengl`, `vulkan` or `d3d11`. libplacebo built without these can still be used (e.g. to generate GLSL shaders such as the ones used in VLC), but the usefulness is severely impacted since most components will be missing, impaired or otherwise not functional. A full list of optional dependencies each feature requires: - **glslang**: `glslang` + its related libraries (e.g. `libSPIRV.so`) - **lcms**: `liblcms2` - **libdovi**: `libdovi` - **opengl**: `glad2` (*) - **shaderc**: `libshaderc` - **vulkan**: `libvulkan`, `python3-jinja2` (*) - **xxhash**: `libxxhash` (*) This dependency is bundled automatically when doing a recursive clone. #### Vulkan support Because the vulkan backend requires on code generation at compile time, `python3-Jinja2` is a hard dependency of the build system. In addition to this, the path to the Vulkan registry (`vk.xml`) must be locatable, ideally by explicitly providing it via the `-Dvulkan-registry=/path/to/vk.xml` option, unless it can be found in one of the built-in hard-coded locations. ### Configuring To get a list of configuration options supported by libplacebo, after running `meson $DIR` you can run `meson configure $DIR`, e.g.: ```bash $ meson $DIR $ meson configure $DIR ``` If you want to disable a component, for example Vulkan support, you can explicitly set it to `false`, i.e.: ```bash $ meson configure $DIR -Dvulkan=disabled -Dshaderc=disabled $ ninja -C$DIR ``` ### Testing To enable building and executing the tests, you need to build with `tests` enabled, i.e.: ```bash $ meson configure $DIR -Dtests=true $ ninja -C$DIR test ``` ### Benchmarking A naive benchmark suite is provided as an extra test case, disabled by default (due to the high execution time required). To enable it, use the `bench` option: ```bash $ meson configure $DIR -Dbench=true $ meson test -C$DIR benchmark --verbose ``` ## Using For a full documentation of the API, refer to the above [API Overview](#api-overview) as well as the [public header files](src/include/libplacebo). You can find additional examples of how to use the various components in the [demo programs](demos) as well as in the [unit tests](src/tests). libplacebo-v7.349.0/RELEASING.md000066400000000000000000000012151463457750100160260ustar00rootroot00000000000000# New release steps ## Pre-release (vX.Y.0-rcN) 1. Tag `vX.Y.0-rcN` on `master` ## Normal release (vX.Y.0) 1. Tag `vX.Y.0` on `master` 2. Create version branch `vX.Y` 3. Force-push `release` branch (or fast-forward if possible) 4. Update topic on IRC #libplacebo 5. Bump 'X' version number in meson.build, for next release (optional) 6. Tag release on github ## Bugfix release (vX.Y.Z) 1. Cherry-pick bug fixes onto version branch (`vX.Y`) 2. Update `Z` version number in `meson.build` 3. Tag `vX.Y.Z` on this branch 4. Fast-forward `release` branch iff this is the latest major release 5. Update topic on IRC #libplacebo 6. Tag release on github libplacebo-v7.349.0/compile000077500000000000000000000000751463457750100155540ustar00rootroot00000000000000#!/bin/sh DIR=./build [ -d $DIR ] || meson $DIR ninja -C$DIR libplacebo-v7.349.0/demos/000077500000000000000000000000001463457750100153035ustar00rootroot00000000000000libplacebo-v7.349.0/demos/3rdparty/000077500000000000000000000000001463457750100170535ustar00rootroot00000000000000libplacebo-v7.349.0/demos/3rdparty/nuklear/000077500000000000000000000000001463457750100205145ustar00rootroot00000000000000libplacebo-v7.349.0/demos/LICENSE000066400000000000000000000156101463457750100163130ustar00rootroot00000000000000Creative Commons Legal Code CC0 1.0 Universal CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED HEREUNDER. Statement of Purpose The laws of most jurisdictions throughout the world automatically confer exclusive Copyright and Related Rights (defined below) upon the creator and subsequent owner(s) (each and all, an "owner") of an original work of authorship and/or a database (each, a "Work"). Certain owners wish to permanently relinquish those rights to a Work for the purpose of contributing to a commons of creative, cultural and scientific works ("Commons") that the public can reliably and without fear of later claims of infringement build upon, modify, incorporate in other works, reuse and redistribute as freely as possible in any form whatsoever and for any purposes, including without limitation commercial purposes. These owners may contribute to the Commons to promote the ideal of a free culture and the further production of creative, cultural and scientific works, or to gain reputation or greater distribution for their Work in part through the use and efforts of others. For these and/or other purposes and motivations, and without any expectation of additional consideration or compensation, the person associating CC0 with a Work (the "Affirmer"), to the extent that he or she is an owner of Copyright and Related Rights in the Work, voluntarily elects to apply CC0 to the Work and publicly distribute the Work under its terms, with knowledge of his or her Copyright and Related Rights in the Work and the meaning and intended legal effect of CC0 on those rights. 1. Copyright and Related Rights. A Work made available under CC0 may be protected by copyright and related or neighboring rights ("Copyright and Related Rights"). Copyright and Related Rights include, but are not limited to, the following: i. the right to reproduce, adapt, distribute, perform, display, communicate, and translate a Work; ii. moral rights retained by the original author(s) and/or performer(s); iii. publicity and privacy rights pertaining to a person's image or likeness depicted in a Work; iv. rights protecting against unfair competition in regards to a Work, subject to the limitations in paragraph 4(a), below; v. rights protecting the extraction, dissemination, use and reuse of data in a Work; vi. database rights (such as those arising under Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, and under any national implementation thereof, including any amended or successor version of such directive); and vii. other similar, equivalent or corresponding rights throughout the world based on applicable law or treaty, and any national implementations thereof. 2. Waiver. To the greatest extent permitted by, but not in contravention of, applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and unconditionally waives, abandons, and surrenders all of Affirmer's Copyright and Related Rights and associated claims and causes of action, whether now known or unknown (including existing as well as future claims and causes of action), in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each member of the public at large and to the detriment of Affirmer's heirs and successors, fully intending that such Waiver shall not be subject to revocation, rescission, cancellation, termination, or any other legal or equitable action to disrupt the quiet enjoyment of the Work by the public as contemplated by Affirmer's express Statement of Purpose. 3. Public License Fallback. Should any part of the Waiver for any reason be judged legally invalid or ineffective under applicable law, then the Waiver shall be preserved to the maximum extent permitted taking into account Affirmer's express Statement of Purpose. In addition, to the extent the Waiver is so judged Affirmer hereby grants to each affected person a royalty-free, non transferable, non sublicensable, non exclusive, irrevocable and unconditional license to exercise Affirmer's Copyright and Related Rights in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "License"). The License shall be deemed effective as of the date CC0 was applied by Affirmer to the Work. Should any part of the License for any reason be judged legally invalid or ineffective under applicable law, such partial invalidity or ineffectiveness shall not invalidate the remainder of the License, and in such case Affirmer hereby affirms that he or she will not (i) exercise any of his or her remaining Copyright and Related Rights in the Work or (ii) assert any associated claims and causes of action with respect to the Work, in either case contrary to Affirmer's express Statement of Purpose. 4. Limitations and Disclaimers. a. No trademark or patent rights held by Affirmer are waived, abandoned, surrendered, licensed or otherwise affected by this document. b. Affirmer offers the Work as-is and makes no representations or warranties of any kind concerning the Work, express, implied, statutory or otherwise, including without limitation warranties of title, merchantability, fitness for a particular purpose, non infringement, or the absence of latent or other defects, accuracy, or the present or absence of errors, whether or not discoverable, all to the greatest extent permissible under applicable law. c. Affirmer disclaims responsibility for clearing rights of other persons that may apply to the Work or any use thereof, including without limitation any person's Copyright and Related Rights in the Work. Further, Affirmer disclaims responsibility for obtaining any necessary consents, permissions or other rights required for any use of the Work. d. Affirmer understands and acknowledges that Creative Commons is not a party to this document and has no duty or obligation with respect to this CC0 or use of the Work. libplacebo-v7.349.0/demos/colors.c000066400000000000000000000042261463457750100167540ustar00rootroot00000000000000/* Simplistic demo that just makes the window colorful, including alpha * transparency if supported by the windowing system. * * License: CC0 / Public Domain */ #include #include #include #include #include "common.h" #include "pl_clock.h" #include "window.h" static pl_log logger; static struct window *win; static void uninit(int ret) { window_destroy(&win); pl_log_destroy(&logger); exit(ret); } int main(int argc, char **argv) { logger = pl_log_create(PL_API_VER, pl_log_params( .log_cb = pl_log_color, .log_level = PL_LOG_DEBUG, )); win = window_create(logger, &(struct window_params) { .title = "colors demo", .width = 640, .height = 480, .alpha = true, }); if (!win) uninit(1); pl_clock_t ts_start, ts; if ((ts_start = pl_clock_now()) == 0) { uninit(1); } while (!win->window_lost) { if (window_get_key(win, KEY_ESC)) break; struct pl_swapchain_frame frame; bool ok = pl_swapchain_start_frame(win->swapchain, &frame); if (!ok) { // Something unexpected happened, perhaps the window is not // visible? Wait for events and try again. window_poll(win, true); continue; } if ((ts = pl_clock_now()) == 0) uninit(1); const double period = 10.; // in seconds double secs = fmod(pl_clock_diff(ts, ts_start), period); double pos = 2 * M_PI * secs / period; float alpha = (cos(pos) + 1.0) / 2.0; assert(frame.fbo->params.blit_dst); pl_tex_clear(win->gpu, frame.fbo, (float[4]) { alpha * (sinf(2 * pos + 0.0) + 1.0) / 2.0, alpha * (sinf(2 * pos + 2.0) + 1.0) / 2.0, alpha * (sinf(2 * pos + 4.0) + 1.0) / 2.0, alpha, }); ok = pl_swapchain_submit_frame(win->swapchain); if (!ok) { fprintf(stderr, "libplacebo: failed submitting frame!\n"); uninit(3); } pl_swapchain_swap_buffers(win->swapchain); window_poll(win, false); } uninit(0); } libplacebo-v7.349.0/demos/common.h000066400000000000000000000003021463457750100167370ustar00rootroot00000000000000// License: CC0 / Public Domain #pragma once #include #include #include #include #include #include "config_demos.h" libplacebo-v7.349.0/demos/meson.build000066400000000000000000000111531463457750100174460ustar00rootroot00000000000000glfw = dependency('glfw3', required: false) sdl = dependency('sdl2', required: false) sdl_image = dependency('SDL2_image', required: false) ffmpeg_deps = [ dependency('libavcodec', required: false), dependency('libavformat', required: false), dependency('libavutil', required: false), ] ffmpeg_found = true foreach dep : ffmpeg_deps ffmpeg_found = ffmpeg_found and dep.found() endforeach nuklear = disabler() nuklear_inc = include_directories('./3rdparty/nuklear') if cc.has_header('nuklear.h', include_directories: nuklear_inc) nuklear_lib = static_library('nuklear', include_directories: nuklear_inc, c_args: ['-O2', '-Wno-missing-prototypes'], dependencies: [ libplacebo, libm ], sources: 'ui.c', ) nuklear = declare_dependency( include_directories: nuklear_inc, link_with: nuklear_lib, ) else warning('Nuklear was not found in `demos/3rdparty`. Please run ' + '`git submodule update --init` followed by `meson --wipe`.') endif conf_demos = configuration_data() conf_demos.set('HAVE_NUKLEAR', nuklear.found()) conf_demos.set('HAVE_EGL', cc.check_header('EGL/egl.h', required: false)) apis = [] # Enable all supported combinations of API and windowing system if glfw.found() if components.get('vulkan') conf_demos.set('HAVE_GLFW_VULKAN', true) apis += static_library('glfw-vk', dependencies: [libplacebo, libm, glfw, vulkan_headers], sources: 'window_glfw.c', c_args: ['-DUSE_VK'], include_directories: vulkan_headers_inc, ) endif if components.get('opengl') conf_demos.set('HAVE_GLFW_OPENGL', true) apis += static_library('glfw-gl', dependencies: [libplacebo, glfw], sources: 'window_glfw.c', c_args: '-DUSE_GL', ) endif if components.get('d3d11') conf_demos.set('HAVE_GLFW_D3D11', true) apis += static_library('glfw-d3d11', dependencies: [libplacebo, glfw], sources: 'window_glfw.c', c_args: '-DUSE_D3D11', ) endif endif if sdl.found() if components.get('vulkan') conf_demos.set('HAVE_SDL_VULKAN', true) apis += static_library('sdl-vk', dependencies: [libplacebo, sdl, vulkan_headers], sources: 'window_sdl.c', c_args: ['-DUSE_VK'], include_directories: vulkan_headers_inc, ) endif if components.get('opengl') conf_demos.set('HAVE_SDL_OPENGL', true) apis += static_library('sdl-gl', dependencies: [libplacebo, sdl], sources: 'window_sdl.c', c_args: '-DUSE_GL', ) endif endif configure_file( output: 'config_demos.h', configuration: conf_demos, ) if apis.length() == 0 warning('Demos enabled but no supported combination of windowing system ' + 'and graphical APIs was found. Demo programs require either GLFW or ' + 'SDL and either Vulkan or OpenGL to function.') else additional_dep = [] if host_machine.system() == 'windows' additional_dep += cc.find_library('winmm') endif dep = declare_dependency( dependencies: [ libplacebo, build_deps ] + additional_dep, sources: ['window.c', 'utils.c'], include_directories: vulkan_headers_inc, link_with: apis, ) # Graphical demo programs executable('colors', 'colors.c', dependencies: [ dep, pl_clock, libm ], link_args: link_args, link_depends: link_depends, ) if sdl_image.found() executable('sdlimage', 'sdlimage.c', dependencies: [ dep, libm, sdl_image ], link_args: link_args, link_depends: link_depends, ) endif if ffmpeg_found plplay_deps = [ dep, pl_thread, pl_clock ] + ffmpeg_deps if nuklear.found() plplay_deps += nuklear endif if host_machine.system() == 'windows' plplay_deps += cc.find_library('shlwapi', required: true) endif plplay_sources = ['plplay.c', 'settings.c'] if host_machine.system() == 'windows' windows = import('windows') plplay_sources += windows.compile_resources(demos_rc, depends: version_h, include_directories: meson.project_source_root()/'win32') endif executable('plplay', plplay_sources, dependencies: plplay_deps, link_args: link_args, link_depends: link_depends, install: true, ) endif endif # Headless vulkan demos if components.get('vk-proc-addr') executable('video-filtering', 'video-filtering.c', dependencies: [ libplacebo, pl_clock, pl_thread, vulkan_loader ], c_args: '-O2', link_args: link_args, link_depends: link_depends, ) executable('multigpu-bench', 'multigpu-bench.c', dependencies: [ libplacebo, pl_clock, vulkan_loader ], c_args: '-O2', link_args: link_args, link_depends: link_depends, ) endif libplacebo-v7.349.0/demos/multigpu-bench.c000066400000000000000000000340211463457750100203720ustar00rootroot00000000000000/* GPU->GPU transfer benchmarks. Requires some manual setup. * * License: CC0 / Public Domain */ #include #include #include #include #include #include #include #include #include "pl_clock.h" #define ALIGN2(x, align) (((x) + (align) - 1) & ~((align) - 1)) enum { // Image configuration NUM_TEX = 16, WIDTH = 1920, HEIGHT = 1080, DEPTH = 16, COMPS = 1, // Queue configuration NUM_QUEUES = NUM_TEX, ASYNC_TX = 1, ASYNC_COMP = 1, // Buffer configuration PTR_ALIGN = 4096, PIXEL_PITCH = DEPTH / 8, ROW_PITCH = ALIGN2(WIDTH * PIXEL_PITCH, 256), IMAGE_SIZE = ROW_PITCH * HEIGHT, BUFFER_SIZE = IMAGE_SIZE + PTR_ALIGN - 1, // Test configuration TEST_MS = 1500, WARMUP_MS = 500, POLL_FREQ = 10, }; static uint8_t* page_align(uint8_t *data) { return (uint8_t *) ALIGN2((uintptr_t) data, PTR_ALIGN); } enum mem_owner { CPU, SRC, DST, NUM_MEM_OWNERS, }; enum mem_type { RAM, GPU, NUM_MEM_TYPES, }; // This is attached to every `pl_tex.params.user_data` struct buffers { pl_gpu gpu; pl_buf buf[NUM_MEM_TYPES]; pl_buf exported[NUM_MEM_TYPES]; pl_buf imported[NUM_MEM_TYPES]; struct pl_tex_transfer_params async; }; static struct buffers *alloc_buffers(pl_gpu gpu) { struct buffers *buffers = malloc(sizeof(*buffers)); *buffers = (struct buffers) { .gpu = gpu }; for (enum mem_type type = 0; type < NUM_MEM_TYPES; type++) { buffers->buf[type] = pl_buf_create(gpu, pl_buf_params( .size = BUFFER_SIZE, .memory_type = type == RAM ? PL_BUF_MEM_HOST : PL_BUF_MEM_DEVICE, .host_mapped = true, )); if (!buffers->buf[type]) exit(2); if (gpu->export_caps.buf & PL_HANDLE_DMA_BUF) { buffers->exported[type] = pl_buf_create(gpu, pl_buf_params( .size = BUFFER_SIZE, .memory_type = type == RAM ? PL_BUF_MEM_HOST : PL_BUF_MEM_DEVICE, .export_handle = PL_HANDLE_DMA_BUF, )); } } return buffers; } static void free_buffers(struct buffers *buffers) { for (enum mem_type type = 0; type < NUM_MEM_TYPES; type++) { pl_buf_destroy(buffers->gpu, &buffers->buf[type]); pl_buf_destroy(buffers->gpu, &buffers->exported[type]); pl_buf_destroy(buffers->gpu, &buffers->imported[type]); } free(buffers); } static void link_buffers(pl_gpu gpu, struct buffers *buffers, const struct buffers *import) { if (!(gpu->import_caps.buf & PL_HANDLE_DMA_BUF)) return; for (enum mem_type type = 0; type < NUM_MEM_TYPES; type++) { if (!import->exported[type]) continue; buffers->imported[type] = pl_buf_create(gpu, pl_buf_params( .size = BUFFER_SIZE, .memory_type = type == RAM ? PL_BUF_MEM_HOST : PL_BUF_MEM_DEVICE, .import_handle = PL_HANDLE_DMA_BUF, .shared_mem = import->exported[type]->shared_mem, )); } } struct ctx { pl_gpu srcgpu, dstgpu; pl_tex src, dst; // for copy-based methods enum mem_owner owner; enum mem_type type; bool noimport; bool async; }; static void await_buf(pl_gpu gpu, pl_buf buf) { while (pl_buf_poll(gpu, buf, UINT64_MAX)) ; // do nothing } static void async_upload(void *priv) { struct buffers *buffers = priv; pl_tex_upload(buffers->gpu, &buffers->async); } static inline void copy_ptr(struct ctx ctx) { const pl_gpu srcgpu = ctx.srcgpu, dstgpu = ctx.dstgpu; const pl_tex src = ctx.src, dst = ctx.dst; struct buffers *srcbuffers = src->params.user_data; struct buffers *dstbuffers = dst->params.user_data; pl_buf buf = NULL; uint8_t *data = NULL; if (ctx.owner == CPU) { static uint8_t static_buffer[BUFFER_SIZE]; data = page_align(static_buffer); } else { struct buffers *b = ctx.owner == SRC ? srcbuffers : dstbuffers; buf = b->buf[ctx.type]; data = page_align(buf->data); await_buf(b->gpu, buf); } struct pl_tex_transfer_params src_params = { .tex = src, .row_pitch = ROW_PITCH, .no_import = ctx.noimport, }; if (ctx.owner == SRC) { src_params.buf = buf; src_params.buf_offset = data - buf->data; } else { src_params.ptr = data; } struct pl_tex_transfer_params dst_params = { .tex = dst, .row_pitch = ROW_PITCH, .no_import = ctx.noimport, }; if (ctx.owner == DST) { dst_params.buf = buf; dst_params.buf_offset = data - buf->data; } else { dst_params.ptr = data; } if (ctx.async) { src_params.callback = async_upload; src_params.priv = dstbuffers; dstbuffers->async = dst_params; pl_tex_download(srcgpu, &src_params); } else { pl_tex_download(srcgpu, &src_params); pl_tex_upload(dstgpu, &dst_params); } } static inline void copy_interop(struct ctx ctx) { const pl_gpu srcgpu = ctx.srcgpu, dstgpu = ctx.dstgpu; const pl_tex src = ctx.src, dst = ctx.dst; struct buffers *srcbuffers = src->params.user_data; struct buffers *dstbuffers = dst->params.user_data; struct pl_tex_transfer_params src_params = { .tex = src, .row_pitch = ROW_PITCH, }; struct pl_tex_transfer_params dst_params = { .tex = dst, .row_pitch = ROW_PITCH, }; if (ctx.owner == SRC) { src_params.buf = srcbuffers->exported[ctx.type]; dst_params.buf = dstbuffers->imported[ctx.type]; } else { src_params.buf = srcbuffers->imported[ctx.type]; dst_params.buf = dstbuffers->exported[ctx.type]; } await_buf(srcgpu, src_params.buf); if (ctx.async) { src_params.callback = async_upload; src_params.priv = dstbuffers; dstbuffers->async = dst_params; pl_tex_download(srcgpu, &src_params); } else { pl_tex_download(srcgpu, &src_params); await_buf(srcgpu, src_params.buf); // manual cross-GPU synchronization pl_tex_upload(dstgpu, &dst_params); } } typedef void method(struct ctx ctx); static double bench(struct ctx ctx, pl_tex srcs[], pl_tex dsts[], method fun) { const pl_gpu srcgpu = ctx.srcgpu, dstgpu = ctx.dstgpu; pl_clock_t start_warmup = 0, start_test = 0; uint64_t frames = 0, frames_warmup = 0; start_warmup = pl_clock_now(); do { const int idx = frames % NUM_TEX; ctx.src = srcs[idx]; ctx.dst = dsts[idx]; // Generate some quasi-unique data in the source float x = M_E * (frames / 100.0); pl_tex_clear(srcgpu, ctx.src, (float[4]) { sinf(x + 0.0) / 2.0 + 0.5, sinf(x + 2.0) / 2.0 + 0.5, sinf(x + 4.0) / 2.0 + 0.5, 1.0, }); if (fun) fun(ctx); pl_gpu_flush(srcgpu); // to rotate queues pl_gpu_flush(dstgpu); frames++; if (frames % POLL_FREQ == 0) { pl_clock_t now = pl_clock_now(); if (start_test) { if (pl_clock_diff(now, start_test) > TEST_MS * 1e-3) break; } else if (pl_clock_diff(now, start_warmup) > WARMUP_MS * 1e-3) { start_test = now; frames_warmup = frames; } } } while (true); pl_gpu_finish(srcgpu); pl_gpu_finish(dstgpu); return pl_clock_diff(pl_clock_now(), start_test) / (frames - frames_warmup); } static void run_tests(pl_gpu srcgpu, pl_gpu dstgpu) { const enum pl_fmt_caps caps = PL_FMT_CAP_HOST_READABLE; pl_fmt srcfmt = pl_find_fmt(srcgpu, PL_FMT_UNORM, COMPS, DEPTH, DEPTH, caps); pl_fmt dstfmt = pl_find_fmt(dstgpu, PL_FMT_UNORM, COMPS, DEPTH, DEPTH, caps); if (!srcfmt || !dstfmt) exit(2); pl_tex src[NUM_TEX], dst[NUM_TEX]; for (int i = 0; i < NUM_TEX; i++) { struct buffers *srcbuffers = alloc_buffers(srcgpu); struct buffers *dstbuffers = alloc_buffers(dstgpu); if (!memcmp(srcgpu->uuid, dstgpu->uuid, sizeof(srcgpu->uuid))) { link_buffers(srcgpu, srcbuffers, dstbuffers); link_buffers(dstgpu, dstbuffers, srcbuffers); } src[i] = pl_tex_create(srcgpu, pl_tex_params( .w = WIDTH, .h = HEIGHT, .format = srcfmt, .host_readable = true, .blit_dst = true, .user_data = srcbuffers, )); dst[i] = pl_tex_create(dstgpu, pl_tex_params( .w = WIDTH, .h = HEIGHT, .format = dstfmt, .host_writable = true, .blit_dst = true, .user_data = dstbuffers, )); if (!src[i] || !dst[i]) exit(2); } struct ctx ctx = { .srcgpu = srcgpu, .dstgpu = dstgpu, }; static const char *owners[] = { [CPU] = "cpu", [SRC] = "src", [DST] = "dst", }; static const char *types[] = { [RAM] = "ram", [GPU] = "gpu", }; double baseline = bench(ctx, src, dst, NULL); // Test all possible generic copy methods for (enum mem_owner owner = 0; owner < NUM_MEM_OWNERS; owner++) { for (enum mem_type type = 0; type < NUM_MEM_TYPES; type++) { for (int async = 0; async <= 1; async++) { for (int noimport = 0; noimport <= 1; noimport++) { // Blacklist undesirable configurations: if (owner == CPU && type != RAM) continue; // impossible if (owner == CPU && async) continue; // no synchronization on static buffer if (owner == SRC && type == GPU) continue; // GPU readback is orders of magnitude too slow if (owner == DST && !noimport) continue; // exhausts source address space struct ctx cfg = ctx; cfg.noimport = noimport; cfg.owner = owner; cfg.type = type; cfg.async = async; printf(" %s %s %s %s : ", owners[owner], types[type], noimport ? "memcpy" : " ", async ? "async" : " "); double dur = bench(cfg, src, dst, copy_ptr) - baseline; printf("avg %.0f μs\t%.3f fps\n", 1e6 * dur, 1.0 / dur); } } } } // Test DMABUF interop when supported for (enum mem_owner owner = 0; owner < NUM_MEM_OWNERS; owner++) { for (enum mem_type type = 0; type < NUM_MEM_TYPES; type++) { for (int async = 0; async <= 1; async++) { struct buffers *buffers; switch (owner) { case SRC: buffers = dst[0]->params.user_data; if (!buffers->imported[type]) continue; break; case DST: buffers = src[0]->params.user_data; if (!buffers->imported[type]) continue; break; default: continue; } struct ctx cfg = ctx; cfg.owner = owner; cfg.type = type; printf(" %s %s %s %s : ", owners[owner], types[type], "dmabuf", async ? "async" : " "); double dur = bench(cfg, src, dst, copy_interop) - baseline; printf("avg %.0f μs\t%.3f fps\n", 1e6 * dur, 1.0 / dur); } } } for (int i = 0; i < NUM_TEX; i++) { free_buffers(src[i]->params.user_data); free_buffers(dst[i]->params.user_data); pl_tex_destroy(srcgpu, &src[i]); pl_tex_destroy(dstgpu, &dst[i]); } } int main(int argc, const char *argv[]) { if (argc < 3) { fprintf(stderr, "Usage: %s 'Device 1' 'Device 2'\n\n", argv[0]); fprintf(stderr, "(Use `vulkaninfo` for a list of devices)\n"); exit(1); } pl_log log = pl_log_create(PL_API_VER, pl_log_params( .log_cb = pl_log_color, .log_level = PL_LOG_WARN, )); pl_vk_inst inst = pl_vk_inst_create(log, pl_vk_inst_params( .debug = false, )); pl_vulkan dev1 = pl_vulkan_create(log, pl_vulkan_params( .device_name = argv[1], .queue_count = NUM_QUEUES, .async_transfer = ASYNC_TX, .async_compute = ASYNC_COMP, )); pl_vulkan dev2 = pl_vulkan_create(log, pl_vulkan_params( .device_name = argv[2], .queue_count = NUM_QUEUES, .async_transfer = ASYNC_TX, .async_compute = ASYNC_COMP, )); if (!dev1 || !dev2) { fprintf(stderr, "Failed creating Vulkan device!\n"); exit(1); } if (ROW_PITCH % dev1->gpu->limits.align_tex_xfer_pitch) { fprintf(stderr, "Warning: Row pitch %d is not a multiple of optimal " "transfer pitch (%zu) for GPU '%s'\n", ROW_PITCH, dev1->gpu->limits.align_tex_xfer_pitch, argv[1]); } if (ROW_PITCH % dev2->gpu->limits.align_tex_xfer_pitch) { fprintf(stderr, "Warning: Row pitch %d is not a multiple of optimal " "transfer pitch (%zu) for GPU '%s'\n", ROW_PITCH, dev2->gpu->limits.align_tex_xfer_pitch, argv[2]); } printf("%s -> %s:\n", argv[1], argv[2]); run_tests(dev1->gpu, dev2->gpu); if (strcmp(argv[1], argv[2])) { printf("%s -> %s:\n", argv[2], argv[1]); run_tests(dev2->gpu, dev1->gpu); } pl_vulkan_destroy(&dev1); pl_vulkan_destroy(&dev2); pl_vk_inst_destroy(&inst); pl_log_destroy(&log); } libplacebo-v7.349.0/demos/plplay.c000066400000000000000000000575361463457750100167700ustar00rootroot00000000000000/* Example video player based on ffmpeg. Designed to expose every libplacebo * option for testing purposes. Not a serious video player, no real error * handling. Simply infinitely loops its input. * * License: CC0 / Public Domain */ #include #include #include "common.h" #include "window.h" #include "utils.h" #include "plplay.h" #include "pl_clock.h" #include "pl_thread.h" #ifdef HAVE_NUKLEAR #include "ui.h" #else struct ui; static void ui_destroy(struct ui **ui) {} static bool ui_draw(struct ui *ui, const struct pl_swapchain_frame *frame) { return true; }; #endif #include static inline void log_time(struct timing *t, double ts) { t->sum += ts; t->sum2 += ts * ts; t->peak = fmax(t->peak, ts); t->count++; } static void uninit(struct plplay *p) { if (p->decoder_thread_created) { p->exit_thread = true; pl_queue_push(p->queue, NULL); // Signal EOF to wake up thread pl_thread_join(p->decoder_thread); } pl_queue_destroy(&p->queue); pl_renderer_destroy(&p->renderer); pl_options_free(&p->opts); for (int i = 0; i < p->shader_num; i++) { pl_mpv_user_shader_destroy(&p->shader_hooks[i]); free(p->shader_paths[i]); } for (int i = 0; i < MAX_FRAME_PASSES; i++) pl_shader_info_deref(&p->frame_info[i].shader); for (int j = 0; j < MAX_BLEND_FRAMES; j++) { for (int i = 0; i < MAX_BLEND_PASSES; i++) pl_shader_info_deref(&p->blend_info[j][i].shader); } free(p->shader_hooks); free(p->shader_paths); free(p->icc_name); pl_icc_close(&p->icc); if (p->cache) { if (pl_cache_signature(p->cache) != p->cache_sig) { FILE *file = fopen(p->cache_file, "wb"); if (file) { pl_cache_save_file(p->cache, file); fclose(file); } } pl_cache_destroy(&p->cache); } // Free this before destroying the window to release associated GPU buffers avcodec_free_context(&p->codec); avformat_free_context(p->format); ui_destroy(&p->ui); window_destroy(&p->win); pl_log_destroy(&p->log); memset(p, 0, sizeof(*p)); } static bool open_file(struct plplay *p, const char *filename) { static const int av_log_level[] = { [PL_LOG_NONE] = AV_LOG_QUIET, [PL_LOG_FATAL] = AV_LOG_PANIC, [PL_LOG_ERR] = AV_LOG_ERROR, [PL_LOG_WARN] = AV_LOG_WARNING, [PL_LOG_INFO] = AV_LOG_INFO, [PL_LOG_DEBUG] = AV_LOG_VERBOSE, [PL_LOG_TRACE] = AV_LOG_DEBUG, }; av_log_set_level(av_log_level[p->args.verbosity]); printf("Opening file: '%s'\n", filename); if (avformat_open_input(&p->format, filename, NULL, NULL) != 0) { fprintf(stderr, "libavformat: Failed opening file!\n"); return false; } printf("Format: %s\n", p->format->iformat->name); if (p->format->duration != AV_NOPTS_VALUE) printf("Duration: %.3f s\n", p->format->duration / 1e6); if (avformat_find_stream_info(p->format, NULL) < 0) { fprintf(stderr, "libavformat: Failed finding stream info!\n"); return false; } // Find "best" video stream int stream_idx = av_find_best_stream(p->format, AVMEDIA_TYPE_VIDEO, -1, -1, NULL, 0); if (stream_idx < 0) { fprintf(stderr, "plplay: File contains no video streams?\n"); return false; } const AVStream *stream = p->format->streams[stream_idx]; const AVCodecParameters *par = stream->codecpar; printf("Found video track (stream %d)\n", stream_idx); printf("Resolution: %d x %d\n", par->width, par->height); if (stream->avg_frame_rate.den && stream->avg_frame_rate.num) printf("FPS: %f\n", av_q2d(stream->avg_frame_rate)); if (stream->r_frame_rate.den && stream->r_frame_rate.num) printf("TBR: %f\n", av_q2d(stream->r_frame_rate)); if (stream->time_base.den && stream->time_base.num) printf("TBN: %f\n", av_q2d(stream->time_base)); if (par->bit_rate) printf("Bitrate: %"PRIi64" kbps\n", par->bit_rate / 1000); printf("Format: %s\n", av_get_pix_fmt_name(par->format)); p->stream = stream; return true; } static bool init_codec(struct plplay *p) { assert(p->stream); assert(p->win->gpu); const AVCodec *codec = avcodec_find_decoder(p->stream->codecpar->codec_id); if (!codec) { fprintf(stderr, "libavcodec: Failed finding matching codec\n"); return false; } p->codec = avcodec_alloc_context3(codec); if (!p->codec) { fprintf(stderr, "libavcodec: Failed allocating codec\n"); return false; } if (avcodec_parameters_to_context(p->codec, p->stream->codecpar) < 0) { fprintf(stderr, "libavcodec: Failed copying codec parameters to codec\n"); return false; } printf("Codec: %s (%s)\n", codec->name, codec->long_name); const AVCodecHWConfig *hwcfg = 0; if (p->args.hwdec) { for (int i = 0; (hwcfg = avcodec_get_hw_config(codec, i)); i++) { if (!pl_test_pixfmt(p->win->gpu, hwcfg->pix_fmt)) continue; if (!(hwcfg->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX)) continue; int ret = av_hwdevice_ctx_create(&p->codec->hw_device_ctx, hwcfg->device_type, NULL, NULL, 0); if (ret < 0) { fprintf(stderr, "libavcodec: Failed opening HW device context, skipping\n"); continue; } const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwcfg->pix_fmt); printf("Using hardware frame format: %s\n", desc->name); p->codec->extra_hw_frames = 4; break; } } if (!hwcfg) printf("Using software decoding\n"); p->codec->thread_count = FFMIN(av_cpu_count() + 1, 16); p->codec->get_buffer2 = pl_get_buffer2; p->codec->opaque = &p->win->gpu; #if LIBAVCODEC_VERSION_MAJOR < 60 AV_NOWARN_DEPRECATED({ p->codec->thread_safe_callbacks = 1; }); #endif #if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(58, 113, 100) p->codec->export_side_data |= AV_CODEC_EXPORT_DATA_FILM_GRAIN; #endif if (avcodec_open2(p->codec, codec, NULL) < 0) { fprintf(stderr, "libavcodec: Failed opening codec\n"); return false; } return true; } static bool map_frame(pl_gpu gpu, pl_tex *tex, const struct pl_source_frame *src, struct pl_frame *out_frame) { AVFrame *frame = src->frame_data; struct plplay *p = frame->opaque; bool ok = pl_map_avframe_ex(gpu, out_frame, pl_avframe_params( .frame = frame, .tex = tex, .map_dovi = !p->ignore_dovi, )); av_frame_free(&frame); // references are preserved by `out_frame` if (!ok) { fprintf(stderr, "Failed mapping AVFrame!\n"); return false; } p->stats.mapped++; pl_frame_copy_stream_props(out_frame, p->stream); return true; } static void unmap_frame(pl_gpu gpu, struct pl_frame *frame, const struct pl_source_frame *src) { pl_unmap_avframe(gpu, frame); } static void discard_frame(const struct pl_source_frame *src) { AVFrame *frame = src->frame_data; struct plplay *p = frame->opaque; p->stats.dropped++; av_frame_free(&frame); printf("Dropped frame with PTS %.3f\n", src->pts); } static PL_THREAD_VOID decode_loop(void *arg) { int ret; struct plplay *p = arg; AVPacket *packet = av_packet_alloc(); AVFrame *frame = av_frame_alloc(); if (!frame || !packet) goto done; float frame_duration = av_q2d(av_inv_q(p->stream->avg_frame_rate)); double first_pts = 0.0, base_pts = 0.0, last_pts = 0.0; uint64_t num_frames = 0; while (!p->exit_thread) { switch ((ret = av_read_frame(p->format, packet))) { case 0: if (packet->stream_index != p->stream->index) { // Ignore unrelated packets av_packet_unref(packet); continue; } ret = avcodec_send_packet(p->codec, packet); av_packet_unref(packet); break; case AVERROR_EOF: // Send empty input to flush decoder ret = avcodec_send_packet(p->codec, NULL); break; default: fprintf(stderr, "libavformat: Failed reading packet: %s\n", av_err2str(ret)); goto done; } if (ret < 0) { fprintf(stderr, "libavcodec: Failed sending packet to decoder: %s\n", av_err2str(ret)); goto done; } // Decode all frames from this packet while ((ret = avcodec_receive_frame(p->codec, frame)) == 0) { last_pts = frame->pts * av_q2d(p->stream->time_base); if (num_frames++ == 0) first_pts = last_pts; frame->opaque = p; (void) atomic_fetch_add(&p->stats.decoded, 1); pl_queue_push_block(p->queue, UINT64_MAX, &(struct pl_source_frame) { .pts = last_pts - first_pts + base_pts, .duration = frame_duration, .map = map_frame, .unmap = unmap_frame, .discard = discard_frame, .frame_data = frame, // allow soft-disabling deinterlacing at the source frame level .first_field = p->opts->params.deinterlace_params ? pl_field_from_avframe(frame) : PL_FIELD_NONE, }); frame = av_frame_alloc(); } switch (ret) { case AVERROR(EAGAIN): continue; case AVERROR_EOF: if (num_frames <= 1) goto done; // still image or empty file // loop infinitely ret = av_seek_frame(p->format, p->stream->index, 0, AVSEEK_FLAG_BACKWARD); if (ret < 0) { fprintf(stderr, "libavformat: Failed seeking in stream: %s\n", av_err2str(ret)); goto done; } avcodec_flush_buffers(p->codec); base_pts += last_pts; num_frames = 0; continue; default: fprintf(stderr, "libavcodec: Failed decoding frame: %s\n", av_err2str(ret)); goto done; } } done: pl_queue_push(p->queue, NULL); // Signal EOF to flush queue av_packet_free(&packet); av_frame_free(&frame); PL_THREAD_RETURN(); } static void update_colorspace_hint(struct plplay *p, const struct pl_frame_mix *mix) { const struct pl_frame *frame = NULL; for (int i = 0; i < mix->num_frames; i++) { if (mix->timestamps[i] > 0.0) break; frame = mix->frames[i]; } if (!frame) return; struct pl_color_space hint = {0}; if (p->colorspace_hint) hint = frame->color; if (p->target_override) apply_csp_overrides(p, &hint); pl_swapchain_colorspace_hint(p->win->swapchain, &hint); } static bool render_frame(struct plplay *p, const struct pl_swapchain_frame *frame, const struct pl_frame_mix *mix) { struct pl_frame target; pl_options opts = p->opts; pl_frame_from_swapchain(&target, frame); update_settings(p, &target); if (p->target_override) { target.repr = p->force_repr; pl_color_repr_merge(&target.repr, &frame->color_repr); apply_csp_overrides(p, &target.color); // Update ICC profile parameters dynamically float target_luma = 0.0f; if (!p->use_icc_luma) { pl_color_space_nominal_luma_ex(pl_nominal_luma_params( .metadata = PL_HDR_METADATA_HDR10, // use only static HDR nits .scaling = PL_HDR_NITS, .color = &target.color, .out_max = &target_luma, )); } pl_icc_update(p->log, &p->icc, NULL, pl_icc_params( .max_luma = target_luma, .force_bpc = p->force_bpc, )); target.icc = p->icc; } assert(mix->num_frames); pl_rect2df crop = mix->frames[0]->crop; if (p->stream->sample_aspect_ratio.num && p->target_zoom != ZOOM_RAW) { float sar = av_q2d(p->stream->sample_aspect_ratio); pl_rect2df_stretch(&crop, fmaxf(1.0f, sar), fmaxf(1.0f, 1.0 / sar)); } // Apply target rotation and un-rotate crop relative to target target.rotation = p->target_rot; pl_rect2df_rotate(&crop, mix->frames[0]->rotation - target.rotation); switch (p->target_zoom) { case ZOOM_PAD: pl_rect2df_aspect_copy(&target.crop, &crop, 0.0); break; case ZOOM_CROP: pl_rect2df_aspect_copy(&target.crop, &crop, 1.0); break; case ZOOM_STRETCH: break; // target.crop already covers full image case ZOOM_FIT: pl_rect2df_aspect_fit(&target.crop, &crop, 0.0); break; case ZOOM_RAW: ; // Ensure pixels are exactly aligned, to avoid fractional scaling int w = roundf(fabsf(pl_rect_w(crop))); int h = roundf(fabsf(pl_rect_h(crop))); target.crop.x0 = roundf((pl_rect_w(target.crop) - w) / 2.0f); target.crop.y0 = roundf((pl_rect_h(target.crop) - h) / 2.0f); target.crop.x1 = target.crop.x0 + w; target.crop.y1 = target.crop.y0 + h; break; case ZOOM_400: case ZOOM_200: case ZOOM_100: case ZOOM_50: case ZOOM_25: ; const float z = powf(2.0f, (int) ZOOM_100 - p->target_zoom); const float sx = z * fabsf(pl_rect_w(crop)) / pl_rect_w(target.crop); const float sy = z * fabsf(pl_rect_h(crop)) / pl_rect_h(target.crop); pl_rect2df_stretch(&target.crop, sx, sy); break; } struct pl_color_map_params *cpars = &opts->color_map_params; if (cpars->visualize_lut) { cpars->visualize_rect = (pl_rect2df) {0, 0, 1, 1}; float tar = pl_rect2df_aspect(&target.crop); pl_rect2df_aspect_set(&cpars->visualize_rect, 1.0f / tar, 0.0f); } pl_clock_t ts_pre = pl_clock_now(); if (!pl_render_image_mix(p->renderer, mix, &target, &opts->params)) return false; pl_clock_t ts_rendered = pl_clock_now(); if (!ui_draw(p->ui, frame)) return false; pl_clock_t ts_ui_drawn = pl_clock_now(); log_time(&p->stats.render, pl_clock_diff(ts_rendered, ts_pre)); log_time(&p->stats.draw_ui, pl_clock_diff(ts_ui_drawn, ts_rendered)); p->stats.rendered++; return true; } static bool render_loop(struct plplay *p) { pl_options opts = p->opts; struct pl_queue_params qparams = *pl_queue_params( .interpolation_threshold = 0.01, .timeout = UINT64_MAX, ); // Initialize the frame queue, blocking indefinitely until done struct pl_frame_mix mix; switch (pl_queue_update(p->queue, &mix, &qparams)) { case PL_QUEUE_OK: break; case PL_QUEUE_EOF: return true; case PL_QUEUE_ERR: goto error; default: abort(); } struct pl_swapchain_frame frame; update_colorspace_hint(p, &mix); if (!pl_swapchain_start_frame(p->win->swapchain, &frame)) goto error; // Disable background transparency by default if the swapchain does not // appear to support alpha transaprency if (frame.color_repr.alpha == PL_ALPHA_NONE) opts->params.background_transparency = 0.0; if (!render_frame(p, &frame, &mix)) goto error; if (!pl_swapchain_submit_frame(p->win->swapchain)) goto error; // Wait until rendering is complete. Do this before measuring the time // start, to ensure we don't count initialization overhead as part of the // first vsync. pl_gpu_finish(p->win->gpu); p->stats.render = p->stats.draw_ui = (struct timing) {0}; pl_clock_t ts_start = 0, ts_prev = 0; pl_swapchain_swap_buffers(p->win->swapchain); window_poll(p->win, false); double pts_target = 0.0, prev_pts = 0.0; while (!p->win->window_lost) { if (window_get_key(p->win, KEY_ESC)) break; if (p->toggle_fullscreen) window_toggle_fullscreen(p->win, !window_is_fullscreen(p->win)); update_colorspace_hint(p, &mix); pl_clock_t ts_acquire = pl_clock_now(); if (!pl_swapchain_start_frame(p->win->swapchain, &frame)) { // Window stuck/invisible? Block for events and try again. window_poll(p->win, true); continue; } pl_clock_t ts_pre_update = pl_clock_now(); log_time(&p->stats.acquire, pl_clock_diff(ts_pre_update, ts_acquire)); if (!ts_start) ts_start = ts_pre_update; qparams.timeout = 0; // non-blocking update qparams.radius = pl_frame_mix_radius(&p->opts->params); qparams.pts = fmax(pts_target, pl_clock_diff(ts_pre_update, ts_start)); p->stats.current_pts = qparams.pts; if (qparams.pts != prev_pts) log_time(&p->stats.pts_interval, qparams.pts - prev_pts); prev_pts = qparams.pts; retry: switch (pl_queue_update(p->queue, &mix, &qparams)) { case PL_QUEUE_ERR: goto error; case PL_QUEUE_EOF: printf("End of file reached\n"); return true; case PL_QUEUE_OK: break; case PL_QUEUE_MORE: qparams.timeout = UINT64_MAX; // retry in blocking mode goto retry; } pl_clock_t ts_post_update = pl_clock_now(); log_time(&p->stats.update, pl_clock_diff(ts_post_update, ts_pre_update)); if (qparams.timeout) { double stuck_ms = 1e3 * pl_clock_diff(ts_post_update, ts_pre_update); fprintf(stderr, "Stalled for %.4f ms due to frame queue underrun!\n", stuck_ms); ts_start += ts_post_update - ts_pre_update; // subtract time spent waiting p->stats.stalled++; p->stats.stalled_ms += stuck_ms; } if (!render_frame(p, &frame, &mix)) goto error; if (pts_target) { pl_gpu_flush(p->win->gpu); pl_clock_t ts_wait = pl_clock_now(); double pts_now = pl_clock_diff(ts_wait, ts_start); if (pts_target >= pts_now) { log_time(&p->stats.sleep, pts_target - pts_now); pl_thread_sleep(pts_target - pts_now); } else { double missed_ms = 1e3 * (pts_now - pts_target); fprintf(stderr, "Missed PTS target %.3f (%.3f ms in the past)\n", pts_target, missed_ms); p->stats.missed++; p->stats.missed_ms += missed_ms; } pts_target = 0.0; } pl_clock_t ts_pre_submit = pl_clock_now(); if (!pl_swapchain_submit_frame(p->win->swapchain)) { fprintf(stderr, "libplacebo: failed presenting frame!\n"); goto error; } pl_clock_t ts_post_submit = pl_clock_now(); log_time(&p->stats.submit, pl_clock_diff(ts_post_submit, ts_pre_submit)); if (ts_prev) log_time(&p->stats.vsync_interval, pl_clock_diff(ts_post_submit, ts_prev)); ts_prev = ts_post_submit; pl_swapchain_swap_buffers(p->win->swapchain); pl_clock_t ts_post_swap = pl_clock_now(); log_time(&p->stats.swap, pl_clock_diff(ts_post_swap, ts_post_submit)); window_poll(p->win, false); // In content-timed mode (frame mixing disabled), delay rendering // until the next frame should become visible if (!opts->params.frame_mixer) { struct pl_source_frame next; for (int i = 0;; i++) { if (!pl_queue_peek(p->queue, i, &next)) break; if (next.pts > qparams.pts) { pts_target = next.pts; break; } } } if (p->fps_override) pts_target = fmax(pts_target, qparams.pts + 1.0 / p->fps); } return true; error: fprintf(stderr, "Render loop failed, exiting early...\n"); return false; } static void info_callback(void *priv, const struct pl_render_info *info) { struct plplay *p = priv; switch (info->stage) { case PL_RENDER_STAGE_FRAME: if (info->index >= MAX_FRAME_PASSES) return; p->num_frame_passes = info->index + 1; pl_dispatch_info_move(&p->frame_info[info->index], info->pass); return; case PL_RENDER_STAGE_BLEND: if (info->index >= MAX_BLEND_PASSES || info->count >= MAX_BLEND_FRAMES) return; p->num_blend_passes[info->count] = info->index + 1; pl_dispatch_info_move(&p->blend_info[info->count][info->index], info->pass); return; case PL_RENDER_STAGE_COUNT: break; } abort(); } static struct plplay state; int main(int argc, char *argv[]) { state = (struct plplay) { .target_override = true, .use_icc_luma = true, .fps = 60.0, .args = { .preset = &pl_render_default_params, .verbosity = PL_LOG_INFO, }, }; if (!parse_args(&state.args, argc, argv)) return -1; state.log = pl_log_create(PL_API_VER, pl_log_params( .log_cb = pl_log_color, .log_level = state.args.verbosity, )); pl_options opts = state.opts = pl_options_alloc(state.log); pl_options_reset(opts, state.args.preset); // Enable this by default to save one click opts->params.cone_params = &opts->cone_params; // Enable dynamic parameters by default, due to plplay's heavy reliance on // GUI controls for dynamically adjusting render parameters. opts->params.dynamic_constants = true; // Hook up our pass info callback opts->params.info_callback = info_callback; opts->params.info_priv = &state; struct plplay *p = &state; if (!open_file(p, state.args.filename)) goto error; const AVCodecParameters *par = p->stream->codecpar; const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(par->format); if (!desc) goto error; struct window_params params = { .title = "plplay", .width = par->width, .height = par->height, .forced_impl = state.args.window_impl, }; if (desc->flags & AV_PIX_FMT_FLAG_ALPHA) { params.alpha = true; opts->params.background_transparency = 1.0; } p->win = window_create(p->log, ¶ms); if (!p->win) goto error; // Test the AVPixelFormat against the GPU capabilities if (!pl_test_pixfmt(p->win->gpu, par->format)) { fprintf(stderr, "Unsupported AVPixelFormat: %s\n", desc->name); goto error; } #ifdef HAVE_NUKLEAR p->ui = ui_create(p->win->gpu); if (!p->ui) goto error; #endif if (!init_codec(p)) goto error; const char *cache_dir = get_cache_dir(&(char[512]) {0}); if (cache_dir) { int ret = snprintf(p->cache_file, sizeof(p->cache_file), "%s/plplay.cache", cache_dir); if (ret > 0 && ret < sizeof(p->cache_file)) { p->cache = pl_cache_create(pl_cache_params( .log = p->log, .max_total_size = 50 << 20, // 50 MB )); pl_gpu_set_cache(p->win->gpu, p->cache); FILE *file = fopen(p->cache_file, "rb"); if (file) { pl_cache_load_file(p->cache, file); p->cache_sig = pl_cache_signature(p->cache); fclose(file); } } } p->queue = pl_queue_create(p->win->gpu); int ret = pl_thread_create(&p->decoder_thread, decode_loop, p); if (ret != 0) { fprintf(stderr, "Failed creating decode thread: %s\n", strerror(errno)); goto error; } p->decoder_thread_created = true; p->renderer = pl_renderer_create(p->log, p->win->gpu); if (!render_loop(p)) goto error; printf("Exiting...\n"); uninit(p); return 0; error: uninit(p); return 1; } libplacebo-v7.349.0/demos/plplay.h000066400000000000000000000066071463457750100167660ustar00rootroot00000000000000#include #include #include #include #include "common.h" #include "pl_thread.h" #define MAX_FRAME_PASSES 256 #define MAX_BLEND_PASSES 8 #define MAX_BLEND_FRAMES 8 enum { ZOOM_PAD = 0, ZOOM_CROP, ZOOM_STRETCH, ZOOM_FIT, ZOOM_RAW, ZOOM_400, ZOOM_200, ZOOM_100, ZOOM_50, ZOOM_25, ZOOM_COUNT, }; struct plplay_args { const struct pl_render_params *preset; enum pl_log_level verbosity; const char *window_impl; const char *filename; bool hwdec; }; bool parse_args(struct plplay_args *args, int argc, char *argv[]); struct plplay { struct plplay_args args; struct window *win; struct ui *ui; char cache_file[512]; // libplacebo pl_log log; pl_renderer renderer; pl_queue queue; pl_cache cache; uint64_t cache_sig; // libav* AVFormatContext *format; AVCodecContext *codec; const AVStream *stream; // points to first video stream of `format` pl_thread decoder_thread; bool decoder_thread_created; bool exit_thread; // settings / ui state pl_options opts; pl_rotation target_rot; int target_zoom; bool colorspace_hint; bool colorspace_hint_dynamic; bool ignore_dovi; bool toggle_fullscreen; bool advanced_scalers; bool target_override; // if false, fields below are ignored struct pl_color_repr force_repr; enum pl_color_primaries force_prim; enum pl_color_transfer force_trc; struct pl_hdr_metadata force_hdr; bool force_hdr_enable; bool fps_override; float fps; // ICC profile pl_icc_object icc; char *icc_name; bool use_icc_luma; bool force_bpc; // custom shaders const struct pl_hook **shader_hooks; char **shader_paths; size_t shader_num; size_t shader_size; // pass metadata struct pl_dispatch_info blend_info[MAX_BLEND_FRAMES][MAX_BLEND_PASSES]; struct pl_dispatch_info frame_info[MAX_FRAME_PASSES]; int num_frame_passes; int num_blend_passes[MAX_BLEND_FRAMES]; // playback statistics struct { _Atomic uint32_t decoded; uint32_t rendered; uint32_t mapped; uint32_t dropped; uint32_t missed; uint32_t stalled; double missed_ms; double stalled_ms; double current_pts; struct timing { double sum, sum2, peak; uint64_t count; } acquire, update, render, draw_ui, sleep, submit, swap, vsync_interval, pts_interval; } stats; }; void update_settings(struct plplay *p, const struct pl_frame *target); static inline void apply_csp_overrides(struct plplay *p, struct pl_color_space *csp) { if (p->force_prim) { csp->primaries = p->force_prim; csp->hdr.prim = *pl_raw_primaries_get(csp->primaries); } if (p->force_trc) csp->transfer = p->force_trc; if (p->force_hdr_enable) { struct pl_hdr_metadata fix = p->force_hdr; fix.prim = csp->hdr.prim; csp->hdr = fix; } else if (p->colorspace_hint_dynamic) { pl_color_space_nominal_luma_ex(pl_nominal_luma_params( .color = csp, .metadata = PL_HDR_METADATA_ANY, .scaling = PL_HDR_NITS, .out_min = &csp->hdr.min_luma, .out_max = &csp->hdr.max_luma, )); } } libplacebo-v7.349.0/demos/screenshots/000077500000000000000000000000001463457750100176435ustar00rootroot00000000000000libplacebo-v7.349.0/demos/screenshots/plplay1.png000066400000000000000000000616271463457750100217470ustar00rootroot00000000000000‰PNG  IHDRÁõ1fÐeXIfII* Áõ†Œ”(1 œ2ªi‡¾HHGIMP 2.10.342023:07:30 21:27:15 _|³x„iCCPICC profilexœ}‘=HÃ@Å_S¥"‡v"’¡:ÙÅŠ8Ö*¡B¨Zu0¹ô š´$).Ž‚kÁÁŪƒ‹³®®‚ øâêâ¤è"%þ/)´ˆñà¸ïî=îÞB«Ê4³/hºedRI1—_¯` !D—™YŸ“¤4<Ç×=||½‹ñ,ïsŽ!µ`2À''Xݰˆ7ˆg6­:ç}â0+Ë*ñ9ñ¤A$~äºâòç’ÃÏ ÙÌ À0« pHYs  šœtIMEçâš¶bKGDÿ‡Ì¿S8IDATxÚí½{{ÜÆ½çy^Ë>h¶ˆK_xiÞ/’(‘–¨KkhE‰c‰êH–Ʊ':”"i<:rNØŠ×Ãüq¬Iòôìî3É®½¾ϳxEûö=l€*ü (4ÐÝèn ùýþAPøáÃBUõÅ?]„ 2럨ì_€ ² C ‚@0`C†  A ¸ÖCü ‚lÙ\2çÂsV¸Ø|‚¡¢Üs¹"yæ]Éí dPq nü³éýþÅ  •‘àu×oÜ`{©¹€à5ö³ê­ùV.®‹‚Lôg5º‚&J°-S#Lºßx{É‚Ç\°×qŸ¬4‚çì_= ºgû7y—nÿŽå.K{ ŒßÉžžuMÜ`ËZ|C«ÏÁs7tBoOîy·;?7ÇZ/ŸÏ1=ü¦O;xAÌÛ¼ÿ÷òS¾à.'¸»Ê¶G|¡ü4¨Ú`¦So,⥶†!ØkEl#êP~ò›«Ñ¡²8Áµ……6/¼üãW}x‚/,,ìÞ©"îPnuð‚¯¹d‚«Áºþç€EÁ_¸°Ù›ÂÐÆ"$Áw¿‰à ¾ÇX4A‚«{W¸¶š[<±½ìÕÞ—øBž47øb;‰`«?𞜷£+ó14ɱˆð©2¾pá±èÔ]˜ y5‡ãÁs~£h²ç"ÒŠ€  A ÁÕcZÀ³8¨¬ÏÝaBT¡r·" C†@0`Á‚! A%Ø ² C ‚&¬GR Á‚!C ‚@0`ÁNÖNŠ/wò.ªÕ]Åõ‚ øÌUtæ/m˜ÁjÛæ?/&ìø‡ëêfIjåáØ; ¸^зU‚ÛþRw?X}rÒwÇ£Ü;’I·†k ÛŠ8ÓTÁÁõCÅ'¸­©‚=‚[n@°eÛÁ ßÿ~¢Ë7yò§ëb1_h‹Æ€e)‡7ÃÍ,¹½Ó{Ä—z9ÝM‘2ÅjÛ=®¥¤ ¯„ÏÁŽë¾õ¿òr-zé[^Ú#˜`>ìÓöÐd ’®ë1ê¥ÞYw}êÝpµ>+‚c•p›|ï qwǯƒ_‚ß6˜®±Û¿ÙÙæI^7:M–Zë˜>ÁhÛ#ÈÊ{kaVŽå§|©__f)¯%a-H‚oñÕÇ-±ÕNð·½]¡Ñ‚µ•ð™CfÍÔÓ‡‹OÁŸñŸ÷ÞsÀVE;xËå6`jÍ- în˜¦ßgYMÓ\õkS};Ø’kàÜÿ˜më ‚}%Ü Á¤i°å’¶Á0“ ‚£•pX;­‹Gß3‚ÿ|´QŒàë­›kQ‚íVëò‘í¼Þâ2‡&ØlµÖŽl\Q¬«„ÛdÑ–û߯;/þô¤2Ü»jýoŒûO¬§¢%Øéñ†„ŸÕ¶F¨ƒÙšÞ2Æ"@°¦>s‚)5 æ­F°APcqX‚ý@ 8Z ·!ØÚ¾¼Ïôóuž¼ÊR{Û&ßêrÍiÝU ¶ö=m;JVÇY¿È󤹽ÇR»Iû[=à{]÷vµ‹W'@p¼VªàT‚õƒ¼±ÚÒg12²ûP4¶ÃÅOä ¯†`2L¤!¬TÂ킟CÃE'¿]yvæ€`¨Ä·Û *3ÁÅÖ»^¯÷‘‚ApY ><::ZÃeÁ˜'`Á‚! A ‚@0àBÊ6ç¡¡dÚ xê².Tç ¡U½`à© øŽñ?Òà!œ3ÁV·+Ì,/uá5Y €ÿeÃûU9]*%ÂÃl6|EfVZ¿‰)遲9P€ ‹É˜ ßø;BtžE#Âìø;P=‘¦ ŽÝ¢’ nòhV§Hp±Äg=èG°5:Áu½Óî£ùÄ+ò›ƒ*ÓÝCÌU{6Ï¢±ð¬Ò§’àÐ¥Ú›_¸Gi &¶’Ò¡’me*f—aVS.³Íóш0ªŸxÑÃCžfòš• ÿƒ'+UO¢þ ÖÕJ°ž¨Âh³ŠŸþ¾Œà¸L?I²LIOžà5—ßêwOªæH0¦¬'˜LÊÉl«'ŠÙedƲßLy8[2”kpøuäŠùºÀÏ»ÎS/?{ì' ¿ÉVQo°l«fì^k¸KmVÄKW`îÐKÉ­ ,®¼ƒ{eq¿)Á_+w({d‚Ÿt:,û“^§8T¾êuV³ËЋÒ[¿å.†f–3"Sá×uu_½Á»tWïp‚{ÇK~ÒpùÒû ìÂ;,µ|lð­_ ’”à7鲆¿0Ž×yÒ¯ƒ+Ž$¸.¶òËòÙT¾plD v]ʰ92Á_u»Ý›Ynu-3xÔÜ]öóÕ·]¶ìàwÜÿá’ÉôãuÅÁRk013Í`ίž`Vñ2}È©yùí)»„W>÷*VÑb]sç ÃXâwÍýÓ%VOžVT‚ÿò:"#ØR‚åÈ^9Â’`MÖÉÜt5S†ç'ÜŠ0õ[y5S0¬q]á˜e‚}~Ó þ”4 B‚}UÕ¶Ç\$k„Å™!8dx¢/¶¸me2Á¿ðl+-ÅÁrv üKp«ÉeÌ8ÁF3`ÁðD fzáM‰'•”`ïÙ¥:Xž+‚—{†×0Œ {ƒ<ÿL¬éÉM—`á`)±Œ3»œ=‚ͤVÄÜ\¯Í>{–‘`kæ[‰‹V„™ÁK»Ü5òÉ{®’ž-%q¨ Ì.#–³H°Ô“››kmí1mÖæê›<±µ”DðÜúe¶þâz†VÉê'?޼¾Ëõs"Á~Y¾œ<Á´äI=9;?‚կŠVFvéx°”~ü¦ QžhÌƒà‚½Ù‚{½ÒÁ…z1ý‚0Â}8é15" ‚pm¦B×À˜é „K¨AfzBcèÎa¶ýHøb¶}mÀñŽ'‚! ¢(G` ‚¡A F r%Áç›`³»KºV–\/º]CƒlêÜÑèz7{¿Œïß{ã‚`h ‚ÓŒG&K°ÓÁРs´• ƒà)éÌ•:†àp²:™Éd@pà…&üѸ#Z,k°€ÿTLÑx†í(ÁÄJ$C‚òP6§ØNk x(µC‚Û£ì͇óÎd’L4&þhl«–l{h²Æ¦ÎÅl\ÉÔ9’$u°, 5`+¸_¦Œú?A©ÒUÂgCµ"¸?ÚK¿æ:îtæ|p[$=,; S˜ž]ñ ~r˜žE² ‚¥)ÙŠÔÀ¡•I‚ý6<£·ãUð s´n—#Ê=ÍÌÎ{GL4öMÏVó2·@S;º ‘…nøBY¿Ü‰Tò’`âQ°q‚ ýå<Á¢>®'ç÷áNN2«^±+ gÕG°Œ°68ÁŽ“…àÏÊדŸƒÜŽUÁC¼{[š½ˆüü —`¿>sF$8F w:3Ýÿ³×ÛµÆC°€¶l<ƒ·£Up>K,÷\{,󬃌žÜlÌ+á³aŸhÜðlÏv©?šŸÜ] ³·÷ëÂôìA„à0kàö³J0ÙŠ€H­Ôd’îŸ ‚Û‘*xЧÊn8ø£¥Ž‡»G‘o…Nפâ}<¸tCƒ?˜;ò©òØžŸëgùÐàæÚ Ïd,A0ÁRïz½ÞGÁe%øðèèh ÁÁ¥nE@ ÏnÔ!̶Á†fH ÁР‚û*ÜWK,8`ûԾý ¾€Ëý%˜L[Ýn'ýÒesF£jŽj’vGëR·(3@³|ûX¤NO3_ÕÛÇçáa ¦~jVçooS/Ý0oütúlÕu d›K‡AbõqAÞÖlIyë·?–WŽÿú&óE•[•Wó,ÁgÀx0Œ±~QõS{2‚ûëÖ?tKÝš–öbœðUÚГæYdÍã7ç©q~<ü<úؾJ«ú©àlšO¸L—ÜûY9Çמͳ,ÈP®Áá×Z‚Ù ôMœàC^ÔŠî¶ëòKneí‘õ¶\yë?Iþõ!ýË™àÐO-$˜š¤ÝvgZg4âÂ8£ýä±X~j,yCü6’Õï­ÝúN&)Á—Ùš€/bûe!†mýܽs—©ðëºú:Øz#Ø8^ç]›û ʵKôxäV¿KøÓX¿|\šZxîÂññ5Ñ3ŽWç£Ýʰ92Á¡ŸZHð«ÂZMo¡¬ÉÊþ`Z ²ÆÍ!Ìî†iZ™ÛÁOb³²ˆ´º,iO©ÌùM Xiü ‚ÝmƒéÇåŠ.œÎFE×d6Œ·ssk.KKny† íÓÓ£àŒŒùSSÓá žÏ¥ø©…»â&žN°£ÉªÞÙ5ö&¼é°;Á¤u#›)“&Øçw ‚IÓ@mEè¶šÛõkÁVÕ2¶"4}XÿlÃc"øÜZ­µàÐ…d­­°…7úl·Z—ì^÷Jèíx±Õº¹6y‚¿|ÔäŠu~šÍë˱­*ím/çšÛò¶2ÊG°Ñ^µ}†ÇD0ñSK!8Í*`¦žß²Áÿ älÔ˜:ÁKÞ¯JüŠ?ÿe°TnU1z«s>Á†÷g¥ìV I°aö.ÌM‘à{ïÇJ°hp O0)ËDÍÌá[Áßd¹íþ=Hø[eiEâ¾ü4ùÁhEŒÞ““~j•ˆŸšÏšÖ¸°?55kœ`Ë;f`¢VÛæéÈ£es{— ‰`ÿ{Û|ÇÞ®üäÄGÓíÉ­ïî1­«S•/Û»¸n(Y÷þó|1ÿoY¿LÖ_aq_þÙ;µÍ~=¹ÑGÓ耯n<8¡{¦¹Œ'÷ä”Qf†¥f<˜T#ÖLƃӾ36æ'‰£i:‚õãÁ~OíAŸN_ÉÆƒeq_úçðMŸÑ´‘Ÿh䯮Ïþ«•óÉO4 T)O4æApÞì½ÒÁ…}=ÊB°S(‚zL¦åahP€‹AðÒÓ91 ÂùŒ™ž@¸Üƒà)tç0Û~$|1Û¾‚ã O C øivFìÉBÓ3ýt5«¥d &¹­º–ûÿ -]Õu–Њø ­ g2xLŒÈÁ“÷G£ÀО,´Rs´öe[î“%«oz‚G&¸¯¸JJpÒ™ ^ÓÆ¯ ؽĭÎ~T§nüÊكƶaËýêJõþÇlO²R°·ò,ª®3§ì'IN»Tá‡âd!Ø¢Co‹Ú+!ÁíXÅUV‚Î$'‚á‘Yyl¶ZkEmTB‚ýªëÌ)?Á g’ÁÞÍÞÒìÙ“ T³ýõ–-1Z‚G$¸­¸JK°þLò$ø]+Â#t@‚cYAððóªëÌ™‚õg’ ÁéÙvd48´' ­Ô­}!x}_“@p;Rq•—`í™äBpãÁ²Éìj³â‰Æ³Îf婲îL² •—àv{VÖ ž}‚ug7ÜA0Á ƒ` ‚A0Áç‡`̶Çl{ ‚A0)Á†ÎYK­ `ŒÈ!– Ÿ;‚ïô7K{ÑíŽhòp§3ÚöV¡Ùâç? @°éM€¯ðJß4½î½qG$8å5Ý7ç¬FxP»³P‚¨×DHí•NçÆ ÇHð`ßÏn¹ã½½#ÝR·V²¨Ë×ûù,K¼Ó‰àºmÛ+'Î+ê‹'5RœE£ù¦³ŽO-Å7M:œÙ¾bK_4ËwG‹þ$ù¦‰|ÒM9@ï‘0V£nlî¦p[3C·5z€0«3ÕoßúQ'Ó\ïܵíæI öÕh¾ibÛ w_%†$Âá,p<±£+7J9™Ô¦ ¾i±ÙwÊzašL© ÝÖØVš‰x´ôäm‘.´ÇK+áa|Ó(Á¡oZèpfº·ˆoZHpMø¢i Öû¦… nlÊzŸŠ~¥EÜØÜËÂmÍZг-Šdqî˜>Áì4höJ¸="ÁáõÎT¿ˆ`n¦¶ó‹P÷õMstnlÊÂv°Å×wÞGÛÁ‰†A•7òp]>³85\hX 囦'8¼û%,ï÷éÓ–iÜ-`jÔ:ÁèÉàa+ávŽK‡3=Áµ£•V«uc‚¥›žàÝk|õ‹²l¶,<Á¦ÎglX­>ü}ÄÁûNïÈ9}(ÎFLãp–è›–B°åM¤árTÇ+n3ñ’чËD%<ªošt@óæÝ `u³ˆ?™¸$dý–ûä 4%› ÁVã‹àË1¸Dß´¬‡†qŒàïxÚŒxÇI›9J°.\,ëóLpî¾iÁüBzëŒÀ‚­¼)Kø¦9ÊD`áD¶å~uEdMàʃà7ÍßÞ š©!‹gní–è›'Ø;CÓ;w–µÙ徬'Ä0Ž´ƒï8i3wÿcÓ²ÍÛ¿sÂåÎŽy:•pn¾i “{Ò oÑ„`ÙøHì1‡`'¼É'ž‚Æ7M71_=¿’ ã‚õ6snÍŠûÚ‚àH%ÜΑ`óhsíÈŽ,À–»×¸gØ‹(ÁÒ‰¬Sß4MÜj)g, ãô+6s’`m¸Î=Á£û¦ÙÖÓG!Á»²,ä«foEoFœÆ”^Šj'b[±:زÆMðb/àˆ œê›–Ööbðûb§'˜ZªØ½†ÝkÆ:u X©„GñMkDüJÙ’ì¾O2ÓoÕǔ̙ÔX„ެ$qS}ÑÒ öœé‰§î'VÂCú¦m¹7„»Wè€æ¼újÛ4·¿ˆ'1«?¨—„¬/ÁQ‡³m§oZV‚I¸¸ÅS]!Xµ™{Õ† à›¦ ‹†¾iQCJ¿ÖÒ j»&tÈ’àhYœâ·"FôMãžaÑmèî¥\j/ö=Z­µ#ÛIö –NcÓ#ø^¬,Û­Öå#ÛI:qJ°Ük*Á±Èf#˜”ÅÒY-KÝ7-Jp‚ã µS î-[I¦d±¡*+\ËΓ`jk#X1`¦ži%8%x5rbÉÇÊ’`]Y´ˆ—¥ì›6Á W‡[߬*Áñ«—kOΉۚiÏ&ÁýN-âþÕmËòäl¢'¬œ¸—üùzºošž`²Õ«?ËÈjË¢=)K$Èû; §Ð•p;O‚µ=¹„aQý`«®'§Œ…Ž›`­ \Òxp¤ˆîû á!Áúñà4ß´¤¾€Ü*ŒlRY´ ãÁåx&çWÂðMióœ¯o¬™2Œè¿Èè*:ÁðMàœï±Å#X¶“JwAApFåÚÏ)Á²¯ ‚g•༾pÐcÊåùlY xÆ ÎKKGL¹|ŸswË9ÇÁ.Ù C˜m‚! Aù C ­­ `Œ¦a4 C ¸Ÿôs6›eðMë« Öoë]{¶ÕµF¾dµ]~‚}ß´FN¦ZvgA·¸3A‚ÍÆ ÂR­ßìÆåŽ-’†_ÀƒÍ–ƽbÄú (ÖƒDø>7ÛÔöÏ×ÖgM¾2kgµü'M@.íM§¥3e½êK!˜¼¤o)Ÿ—_)—uK;oZ«[ÿˆÌ6%¯ók²fÑØÍ°&Ap=tš‚_]àŠ@°}²hé 6O6½Èog)íLœ‡ošzbf…æ_ì.h&Ú‡yù,+ôM“6_Žô ³„yW‚X޾i¦ýt' ÆòöiZôlT+1bøœ=Å0ެõŽ3“å0yrÂsx‘µüÿ&Ë>:9YSI“€š'¦·KK- -6ÃòÛ í×8|¡ûo'5“àï'ú¬äz½Ò‹à8Ü·§ÿâ{¾iž¨˜˜dö1ûõg/Ñ>¬ÉóÕ¬Ð7úrÓhÙVÔñ$f–û<9'â‘àUFf™'FÏF™›FÖzÇ‘zÏKÊYÅâ½y·¦Ì:¦û¥;rNÕ²Ðb+nl¡ÆÝ篭ûæÚ¬äz½Ò‹àÅ~±@•ðp¾iO:ŽgL…î^ÿµÑhv.6;KgÖt7›6 Ö°‚ˆ™_¯97oø}ªÚ ApÔì§É¬õkóÏ&èödôlˆa!Xo¡–HpgÉJ$ØjìwüŽ–û ÑØê,5wöéˆó›së» Ó·å.ë)ÁÚ¬äz½†¡8è›öU·ÛõøC#°ÐÝëãÿ¶áš:û°&G†7¦|‡…Å7Íßތ܂“|rõMßÿØ´ló6?­_[«ËÎFœJ`@FÏF-¢8½…šBðÿØ–±¬;á è–_E—ºÝ‹ÜtޱèÃÂ$×)µÌ׳­ü Gn8´lÅB<¢omE„ æ„«LJ- ¦}Ÿ4‚óëÉ)ýÞ î²u#YÖÝP‚•û}tT^ûlh‚åVe'x4ß4ÁÒÝK%8nV‚õ~m‹­ÖÍ5¯ ! Ȳ¬õŽ“ÔXG;ÞúÝ#ËËêÀgk5>ÛÒâYApN¾i„`yš‘V„µK$ØZéÉÉ…©çæ›¶æÍ±´­§ØÏ^Ãî5Ããê¬Ä^|¤ejE8ï8I8€—ý· ‚l¸Ã¼Ø›‚óñMËFp#[+ÂïddlçÖ“óg•àU¦e%¸áf%˜R!8Ì:<Áº—Rœo!Xº{Q‚uöaIïoלÖî¾™@0±Ë™àõ‹aU¯²¨_›gE¦©?%kâôjNm{?`sûʪžà û—·)ÁäÄù€©žD°6+‰,Ù«JpëÊv1ŒPrñMKëÉéǃµ§yWŽk<ØQž*k¿]ž¸3 S `Û²Að°ÊÏ^lL[G‚§pÂ.é C˜m‚! Aù C x˜Êœ‡†’iƒà©ËºPƒ†Võ‚‚§*à;:ÄüH#@€ „ð N5åêv2fuœ;ópåtI³Ô>5ún•¶~N%ØI\¢s ³ˆÍWª)×ßÞfÌê8—g`ò4ËBþ*ÇŽf« Çý M[_XÍ[šxh`­Ù o3×”kl¸d‚›®û ÆŸ;^Û üûý¶@ð4ÔHLT{6_­VžUúTÒ£lmœœ´¹•—â´ånÚQ“4µ"¦\’`õ-–aªY™©8|•½¼òìÙÍ*»b•*—¼ÊÒ~ƒB,3ª™fUQÕ¿Èt+ƒ¤Åz#¶¾Ð¯¹¼¨–[ÍÐŒž`9ëWqÚ ­;ˆ)F& Öùw)¾e®ëgU&§ `NZ”Ê58üšÞ5ÛÁ{éU= ˜ë/³ën‰‹V×,D>•t«C/} ¬_ó–=ˆîµ\}¨tRs$˜:m¹—YÚkIHS.E5b+& &Yåzê[f5¾K³2ÕZ­2~]WKðÖ‡±®ÞñYÛai¯%Qq$Á¯XÖ²ßC›¿&–[Çë<ùÓ²~Í­[ÁVWoðõŸ`Ö캔ast‚ïaª dØ–S¹M_Þ?IlÅÂvp8R®W|ËDVbÒ¿­Ï$›Áœ_•à_Æ’Gð§üÏ¿‰¶ƒ+’à?]bkN+Aë9 Xne¸Þ¨Û÷ÊzR©Ñ˜`Vò8Á”áùÑ 7ô4‚/1Ò4ЬZžš³I°Ï¯J°· Á†Òÿ;g‡ J°ï%Öj]ßI¯ƒ[-á%&ÈVÖKß²#XðKžo/6¹6@pP\£™@°`xT‚½¤ã©N'q‚£¦M¾­Øì,°|ö '÷äÊHp4kÉ 6“Z 8;Á¢aއ`s{t%,ȦëCß2-ÁªµZÙFÓ"=¹‚õ‹{{{›ƒ¼¾Ë¶Ùû9‘àú&_ÿe¡ ž[¿ÌÊxqÝèÓ“³ÇTóOmVÎVÒxp¦žœâ[¦!¸ŒãÁô‰†2š–@ðÜÜc9r›•`e¡a½Î,Ìj¸‡áVÓ"ø1ë3‰y?'JpÌç+%œÞ% £„³"ÎVñM“"áT~ÄЧ·Uu“»±yu°õè;]‡) ذnß°ï„Ýò< ÖØ¢UçÁÜð;Jð0¾i‰F_,¯ñüzùÜ`: þeCéD,œÎá<ƒøÿf–ÍØ þÓééé/¥aó±¦8ÁŸ¯>ádgkÔ½Àè{OF_׸ûÖž!|¸ö.¯W’¾&<°Hôý­¶–¤Ñ×ú…ષSáœ%Î,ÌZ‚cæ^ý{rvß4%0ñÿ9‘àX8}ß´ÖVp‘hYS f[ùÇh± Øþ‚KÛ¼´nä¹°4­½½ÿõožMœŠDr<|¢þqépmèÕ6ÀT!65~Ð ‹àè³Ñ¾£iÕþáÔ&{O.Né›.ØSå°4aç>á©òP¾i9}%„ü(îó59ß´‘ÔÔ^²< Î)ž#¼Ù‚Tÿ7{Ц½ ;ºÈX¿¸R¸xŽðv%4âåz¿ýEÿ:cu‚o¸ƒà¹¡ßpŸšŒØ5«•Bª%\´+°ôhY߯,TOîE·ë˜Á«BO‚_}4n?þ¯ 6ÎÚ†kúÕu#èôµ‚eVKY±r6—º~ƒiÕߊ´ƒEVÓõFÕ~¸.î ×Á>Á¯_czô>Ò¸­ó¥?ÞìC°¦‘ü®^_¨­¼óöÍ·ß¶Š\s÷ÄØVµZ³ècL§b,"W‚OÆ;@ð`»a{€`éÝäÝÁfí» Ý é@›:‚]q/k­7[­ë;9l­ùÃÍG+­Vë†àSïí–‚eýî%¾ð߇'øî:ÛþV‚éÖ+Fkmíöf¡ £''ùl­ôLÒUl€àÁnÖ?ýdáõçÞø@]Á’°6ÁºSýËx“XGð†? !ö}\o˜`1‘7Á½£ ‚‡"ØÌ'øWïs!X®ÏH°?,±@²“`kûê>×Î"%xi—/ûƒJ°ŸuoÛT–Y‚÷¿{Ë›â<,Íí=–ÞU²ZÛ—yÎm ‚o]»víê.Ãlå2K]»¼¡`ÙØ½Â—þÌG v?`©‡Q‚ë»WùfI«[“½É]Vñ6xÂ/KIž*?‰‚•‘]I0ä•XÆÆƒåbY•ñ`¬>Uǃµ=9µO¶ö¹žÆÆƒ•õšžœ²W2à¬}Â]¼žÜhŠÏŸÆJð9Á ƒà2<Ø ›»ñ/[€U«é¼á>›Êþ†û`›GL6XÕ ³ŒòSÖYFP®ÂLϼ”y¦'”3˜mÙö%oHÀñŽ'ä€`èœ »P¸¯–Ø}–ÍpÀ.µ6⯔ú+ˆ8¾Sê/Ád‰øíãÌǶOéwž®|~ÞB>µp>?=5fá¡¿ˆ8oqùAÙþ ó¡/Ó8F¿DœªŠ9ˆñ³EÑ‹€~(Ь¤†|záüð‹¢~×ÏÈÚ>ýUÚžü2ø˜à„OY]‘o€{¢_²öÔï3åi_¥8›E%¸™!´ ñÌ@pÒAíÙ|•ëîáy Ø?Û…gj[ñ"ð‰8……gÿó _->íp•àßT«Ñ’½|í…c.-žNÚ‰øÔì! ¹Q­ŠKk6ÄO?W5,£Ìk(—?üߢYý¤OJp¼oeÚçhT+F<yÊ?[KSEVª×‚ú£ºÛŸàê´Â)²H‚ B?°U²t Mˆê'ñ¥/?ÍÞîO°ú…óïc!÷Úb®û&Lz•ÿ¯ØÏ¿ÏíºîAäÌ’†š5ñn-“þ-¥îÁKÈdÅ/‹æF4!‚ƒr±ÄV¬ñµR£ÚS §h›4•¬¼Ø†¯®Ì)ᜬô_ÝÖ‹g‚M%à®KZß¿:Ãz„üê ÞþþŒ…ü/–eo[ÖÆ±a;lᲟ{þZPb™U)T˜•$_¾áY¯Þñ ñO³@Çë?ñpÒVF*ÁnØ>›|+ÂÒŽÂDO'ÏÌ‹€GCn7›c#¸Õä2 >òÖÏMˆ`£Ù—àn3ýüeÛˆG\Ä|~â᜛«5›×—³,ÃYt‚IõY·–8¶Ÿˆ’pNXÚІeìÏŸh(Ã?êPããØ& ¹òH¶9Äx0ý‡¬hǃÇç°\LFª“§ßÖg-Êxpbhå#€~ñô©2‚ï§Ϫ”øùâ…3h:”8ž#½Ù‚RÚ›= x˜xŽôv%,â g© Î÷ ÷p¿|P©iØÄ€)œ%&¸:®YFدý^ píjù㉙ž“8™s<1Û~ìáÆlû±ÆŽ'p<™}Ç‚o`Á†  A ‚@0‚‰îtßæR·»šÿ^!Ìuæ*: › .Íg;/Æ—ÕR>ï¹úX~ª>‰ñC\$h8‚Û*Áí`ñ–÷×Ãl»ï¥d°Ò† ¡[gº*˜\·m{åÄÁPÁ nëª`F0g×>Yd¿,ƲíO{fIŸiÛW@ð#ž¶‚ ~VËtL±!X®§Iý^M;<.‚“{rgš*8 ØqNNؾjÑ_îºo}¼ü |Øzaº'Ú¯>sžÈ­B‚{aÛ„$µ{½å¥¯ãêA)·5U0%øƒÛ¼SwÍk)X/k·øÒã–Çâ§A·¯Ñi²ÄZ‡Uœ¯zU±!¸&Ö“¬Ú½šmžü Ci‡•pXS‚_½0™:ïýOkÞøØוv0ÛŠe]å•è«o»ìçÁï"›b}p€®•°WåîGp;^+û+³,›¬¡ëÉɇ8€‚¡Q •0©‚‚ÿØâZÈDðº—×L ¸v´ÂÖÞÁP¾·cU°‹°ž> X´¬l­o«¤:Ø_ß ö²‚`hD‚ýJ˜VÁ”5ŸÅ{ï3Üp3ÜpA0”ÁíhÌ»±¿¿¿·ÍÚK»,µ¿»šLðúEžƒeµ¶¯Š­´ûëplIVí^­íË|§?ƒ`(Á¼Vª`õ©2Öì´´ãÁ}{rÊx°v¯†²ÜŽTÁ…ZP&‚³3C%&¸ÝÁP™ . ¸§£iáêA%%xéˆÉÆÅƒÌ“ƒ@0`ÁÁ†  A ¸¨j,¯@#k¹‚§¤õ5hdm쬃àé¼¹å¢ÍõŒ?Ò Ý„Øzyi§1f‚ÍîBøÇ‹n;L˳CÐÑ£écyD‚}K­‡¥¿Þ%®ª÷Þ¸ ˜ie}Œ—´¶ÈUϘ»ž}Çumrïæt ^_‘`âÿN°ÓÁc'x×»¿É–¹w7ó~IÖ¶*Áu[šúàB¼Àî‰ëÏë 8#Á’]ËV,ý¤é„^“„`ë<›NŽ™`Îî¢Û`?½žr#LsgºFÀâãFø‡PjÖ0)wßly[5j^{£¡9@Q ö½§¤ƒ¥ŸT¼& Á½ìîÙ xh‚›<ö-1ߌ'=焦¥·4XjÖ0ɲ¾õ¡õ¯³ŸÕ÷ǹ™x€‚ü¤Óéìx¿¥–ARq°”S[J<6‚¯ßáºC~Ï÷“‹Œ°ú£]žz¸æöÛx·/=k˜¬/þAp[d ¶ú‰¼ôhy ~åtþªÛíÞô&Î=–0³ŒºF˜¢æ• 8‚kµÚÊ;FÍëß³Tí¯MÜg©€µ=¾ôÇ›I-Úô¬t«§‚`o„ð‡›Jrí+K³4í`Å{J˜Y&Üç2ñÚ ÛÑû½›HpzÖÌ{­ˆZ Þ½Æý)_ô!X:X‚àÜ ÞZ[û‹Oð©÷ö€×®¯­mÞ­{¬ýÒ[ZO$85ë·ÖÖno–à0™ØŠpB[Jœ{;ø÷×ÙÍûõçÞЂh…Ö{«u X½Ño\¬Ö^e™I‚hEŒ·'çü«÷rOª7ùÄ‘Ý~Y#8).ÁÜÁÒ³¥$ûf–ð~Ê“ÛêhšjK ‚ó'¸¶»¿±°rùÓeÔÖ5‘lì^áÉ]¯^ÜÚãiu¨ =+Ý*Np°ÕÏ<éíêên±Ç""_ˆõä0< ‚ž?§ãÁ~§®µä]X‹×fÈJ’q‚Éxð@O¸§D0T4‚‹¢&óÊÁ Á ø|i–Þp×é÷­7&r¬‘ßp‡†Ð¬Ï2Z¹ËԜ̱²Î2‚rfzæ¥Ì3=¡œÆl{̶/yCŽ'p< C |<ÊQ ƒ`hP‚ƒÜc ‚Að9%øNgð£^êvWóß«ÕívF(v«k‡àƒß§$ãÔì2ÅKhvë~H¿pÖ»ã›0 ØßeÒ—;Ñ%Í!LóÒ¯if‚Í$w´K‡ñeµVË/Ù'1~8Á¿½ÍþO$`Õƒ™m2‚âzÚð‚·¬íŽ§¶ÕaŠ!`/øÖ?œ½~S^¬Æ0³ äVªõRi>¬1WÑúšf&Ø“=ã;é½£´È¤<œž¼|vé§C°»üur’HpjHå§층aJ*¶Ê…à~Ñúš@0÷G[9±@0žÁg®ÔÙ0{³‹O-bŠæ%­àVh‡þh½GÒ Pú£Y¦cJ«´`âŸf¥ìÕ$¾láÿ]&Ï–d•{åG€ÅðÛ «ÿ?jG5iž±|rÅ;1˾w+RÛ~ú^98ŸÿñHã3gÙGwƒäætZ'¶¿·ÆŠD×ÝÔø3š"º¦ÎóŽl¥ØßiCÝëO×5—7ܰTF_-‹©¹¦ŽÓ nKpPa(þhoƒR2®¦å|¸WŸ9OäV!Ád¾Ij÷zËKGÿµIYHf•{egЊ€d ÛIü÷‘s:>C?êÊ„íˆåœû>(Ûnß6œ¦bUöª„Tì•V"ºÊ4e‰]ÞxH#{DßKúTR=’•”…î•ÞWÏâUðPp›ú£}!X#þh½OƒnñG{Õ묊­Á5±žZ©éöjv¶yò§ëѾ†,‹<[’•ìuË}r; Üw<¯·~1Èê>h4¶:KÆýql6~½æÜ¼áuÃÓð£û¹ïݵÄÉlüýS]?º?ÁŠ;{™ÛI:šèñè†'¯\^¹¹ ! k`môÙ•¾!&þz2úÁV~Yè^)Áíx<Á¯^S4J õ…6âöê[>Ø#F‰$Á¦X 0ŠïU5ž¯Aÿ²ë¹å~u%v€X;˜gå-ToVµ®…šçhÚ‹›æooFNC™ñMîUG·Êw:m;˜ÕüQ“:å\m! ¯ ©Ó/«NT§Ñ7ÝKaY´ŠTÂgΈ»ôŽ”Bpd.¾£kE¸„`ÇŒ`§YÔë™ÐšÁÄ}@žF®“{¿–`Ò„€`WÍœ~YÓ vÕ²h{çíX<Áô<Ï2,ýÑ´׎VØÚô•š`r¹LÜé4Y­£¾úß%Xî5`j—Bð/¼¬V_‚ýJøÌž`Ûzú((eekE8?š–à`ÂJ-+ÁJ¸ËK°Ö°+VDRMV}œ2´"Ä^Ó ¦×4…àUAU?‚ÛÑ*xP‚㧘…à†›™`a¥‚s#¸áŽƒà†›™à†›àú·"¼JølØ'7„Ó™oж»šLðúÅÀJø£i ö×?২Z©Å÷jm_æ;ýY%84h#[‘¬d¯ ©mó¼õéLNC9£‚Íí½XÓ@R–•_´HV'½¼r+åB$„4¾Wõšê–ѶÚvRnGªà!Ÿ*kÆ`£•ºñà¾=9ÕJM·×äñàHO.q¤)ãÁZ‚eôÕñà>gxv6äSå¢(ÛÃÊA³NXù½]™ô}vmÛm§±Á:„Ùö ÁÐ C ‚@0`ãK0Ð9$øþ]ú×0Îg Òn¥X¯©e‰k‚&iPÑ ® G5å ࡜ψë5áVŠõšZ–¸&hÐàžv6ò¨Ѹ ƒàŒ/žÔ<¡{·@0TF‚ ·Á¦0ìÒ8ŸQíÀZºh™‰Ökd#³ÁÒ¦ëÕÖú€ÉøÅ¶lp ‚ÃÊLäªs>£u¬l{D]´tÖkt6Ùb‚ÃYynÄü+ru-tŽ 6¿ÆÌœ`³³áÍÏÖû©Iù†Z~ÿï-ua‹[¯Ã-Å$MC0ui{1"ãIÅ›‹í:ð,ƒÎyOîE·ë˜¦ß“3k]aÊ­ÃL³¼M"5·´ñRÛÁrF/5IÓL¬×¨ƒ´y ÊÂýñYVKx–Açœ`¦S1A¦Îö%˜zqRÿ 2M[G°~"º®{F vC ÑU’5Ü+tî n¶Z×wœWÿíèß2¼ØjÝ\SY#6^z‚“´ìK›7…à{Ï@0æc 4Üð§^ïDÎ0L!˜7?>²“#½q•“h•‘@°­î•ؼ)Þ\ÿ½×³Ð ÁЧågtòw:ÁQ›®œŽšiàe=}ˆë‚}k¬ýýE,b{o7BðSá‡Eåmãû©ýYØt)ÎhÒzÍѺ‹i ޹´Ý ÈȤ7Wkw ´ƒ£O•c>’ºñ`â–ðUb½–¹'si»‘a<÷veêgœr} ` ` `L¤ò%ØJ{Krw Á‡ÆO0`Á‚! A ‚@0`C†  àN3 XGæhp à$‚Ï\EâÛâfCz’e{ÕLLŽÑà‚²ÜV ß'3…†&‚&ÑŠ8ÓUÁŒàºmÛ+'† Np[W w†“E‹ÚCRÓHéiÚÔt’%·=‚õ—ÒÕ‚òêÉiª`êA¦k­(•q¥Ô\öôf—4<ÁmML í!õV”[î¢0•ôý%¯øu°Æà’®‡ œ+á° VÜz¤=¤ÞŠ’¸3¨F qc9@ã ¸¯‚‚¥=¤ÞŠCS&XT¤ V–öz+J M›àv¬ –N‘O{È>>{ š"Á~%L«à"A04M‚ÛÑ*˜±vC8E†öz+J‚¥ï/ù ‘`u=åE0¯„•*XäMv£+ïÕ™ c< ÁíH´þuö³úþ87P0‚Ÿt:à¨m%O[JB°ïPé™Y‚àñ|ýïÔò{¾Ÿ\d„ÕíòÔÃ5°ßÆ»}éYÃd}ñ¿‚Û"k°ÕOŒà¥GËõ+§CðWÝn÷¦G01ý³„™¥b! f57[¿z^çsŽ›àZ­¶òŽQóú÷,U{ÄkÓ÷Y*`m/ýñfR‹6=+Ýê© Ø!üᦒ\{ÇÊÒ,M;˜ì3Ë$‚Ýóìk2©±ˆ×n؈ÞïÝD‚Ó³f&ØkEÔÊHðî5n[ù¢ÁëžÛ%Zã xkmí/>Á§ÞÛ^C¸¾¶¶y·î±öKoi=‘àÔ¬ÜZ[»½YB‚µþj+Âá¾—¨ƒÇÔþýÇuvó~ý¹7´ Z¡õÞj]VoôëŸu‚DYf’àZãíÉùÿê½\Ç“êM>qd·_ÖÁÆEŠK0w°Üß7‚}3Ë?x?åÉmu4Íw¨ô|/AðX®íîo,¬\¾Æt™µuM$»Wxr׫·öxZ*HÏJ·Šlõ3Oz»ºº[ì±Ù'KèÉav,‘µÝ8 Á®Kcnf çË7"†s/{ÇKA8ÑÀp…‘'ûY/–’à¯X•(Á±xf x^ x"Á"ÛËçÓñ7œà¿¼f þeû¾LK~ih;øq@°ëý| v^ÁNÙf•rÕºZóùláôÛÁò[ÚËoOY$®|Îc´Íƒøã.²UH0Y_¶&„ñòR¼ä±x@°ðt‚]Ñ °ŒÝË2ìme”Ÿà0æóÙÂI –:#ª8P3»ëK‹gf‚EÀÓ þ×&WM!x¾½È–]˜àZ³y}y1ŸÏ΂¼ÈÎ)™e‚ï¶Ûí§ng£o<Ç@°ÇŠ¡¬V¬Ù fzþËò4$ÆL°# ?0³Kð2Ÿ2ÚûîÿÑ•`sÀV„Œc>—)ú©­3[8ûüý˜Ù%Xõcñh4-ÖõØ~äÞnøá]¿¸···ÉRõM–ØÛZRÛÁë—ÙÂýÒ´¶xC!x}—/[¿ „¼Ê—í]\/GS˜•–…Cœ˜¾ça÷ g ÁaŒH`H¸ÂÈÓÑ4ù!X‰ç€O4"Ã?d—ZÆÇƒµ=¹¹f˜|¬6Âú… — ÝæjF^•ÑŸjÿpö'X?fNëØ™Öç O•Õ!x(MÊü<™–Ðöß0²ošox´ð¬r® ÍÒ*Ïš¡]–Vóýmè~sÀm£îÝ0éñÁ7Np!–]^ò“o‚¤XV‘?Ke†–zŸ ÍÒ*g™ZnÕä]yóÊãfi2I–UÙæWÄ#K3bHß´„)ZÄBíÐÕ͘-÷ĭر:¹-ªì¾iÄ…’š¥‘È’p–iáð¡ÍÏQ|Ó”:XZvùî]ŸyWœ8Áå2CËГ÷Ž™¢ÿ˜Ù}Ó(ÁÒ,F–†³ÚÐÍ.ÁY<7Óâ9ŠoÚšËÝÐêžÁ}E˜¥¥º”Î -kOîå·§L‹F4âY}Ó¼ñ0Ä,FvvL 2ü'Ï£+¹ú¦ÅŒ?Ý,—Î -3ÁŸêº°Ù}Ó¼ý1æáNeçà±û¦­¹­fó¿yoísG¯“l—Ì -‚³¸Ny1²›Íw Á$² x.gß4ï¾{WÌ¢#Åû¨Lfh“"x¹çÇä™BðLZIM˜`³ïm/fÙ•-ä3ýAZý}Ó|C9œ½1¢ošcýâR`Ùõ%3IjÝ»Êe†–:š¥)¶e =þ¾i¾¡Üf¬ ç¹ Øg& J®¾ibø‡U±ïhDº ~˜…ŠÂ¥_‰?UÐ7Íßzr’™©ñ„oÚxß´qǾilrÀ7m ñ„oÚ#ŽpŽ#žðM›xÀΜÆ,£IáÌ7ž˜é9é€#œ9dzíÇn̶k<áxÇø¦A|Ó C C ‚@0`CÐì|§3Úñ­î*.4‚ÏÚÚ-Ì—x²wéPCe=…Ú†)ÓvgÁ®«exË›úò°Ï>oý#å n ‡&A°–á-·nÛöʉ‚¡â¬axËåìÚ'‹ì—eÛd–´ƒ¿n}Ë“¦Xê%Lö;H²=oЬá2%«.† ÑŽ1ìì8''^ž·Jö÷Á^ò:OöDƒƒmÕ’m%ën°=ÉšÖL AŽ0L ¶_pÛëày̓[ßñ$«D&K¬uL¾Õ“ƒ Éö|Ydµ‚•¬‹ ™‚r XaX©ƒ'Á÷?6M3Úö³6\›'¿ºÂÒ]+Ò¶‚•¬2 A9LÖLz‚-mOC!ØlµÖŽl •¤a[O©u0ƒ¸·l`¨=9%Øqî½ÁPáŽ¦ÝØßßßÛ6‚×÷¹v½—jÛ<]gxn_Y)ÁæöÏ«Ë ‚¡|Ö<Ѡõ‚àWÞÒEQ±êƃÉ>žˆñ`Ip,+†ò 8áí*Áà*3Áà*7Á‚!C†@0`Á‚! A ‚@0‚A0‚!C†@ðð[Ýî°VÂ_î‰Onâ‚@ùœü†»I„Á¿½² ?\7wúý‹Ô³ÿ75àX‚ûÍ2jñ)t±‰˜OF&¸¯R=]iÑaï ‚ûÌô||Û£Þ»‚!ôä ('ý' •`C¥CåCåCåCåCåCåCåCe—G0•X *·þ'}3d¹ôNIEND®B`‚libplacebo-v7.349.0/demos/screenshots/plplay2.png000066400000000000000000000523441463457750100217440ustar00rootroot00000000000000‰PNG  IHDRÁõ1fÐeXIfII* Áõ†Œ”(1 œ2ªi‡¾HHGIMP 2.10.342023:07:30 21:27:38  é´¨„iCCPICC profilexœ}‘=HÃ@Å_S¥"‡v"’¡:ÙÅŠ8Ö*¡B¨Zu0¹ô š´$).Ž‚kÁÁŪƒ‹³®®‚ øâêâ¤è"%þ/)´ˆñà¸ïî=îÞB«Ê4³/hºedRI1—_¯` !D—™YŸ“¤4<Ç×=||½‹ñ,ïsŽ!µ`2À''Xݰˆ7ˆg6­:ç}â0+Ë*ñ9ñ¤A$~äºâòç’ÃÏ ÙÌ Ï(å¸ pHYs  šœtIMEç& (.hbKGDÿ‡Ì¿D…IDATxÚíÙ{ÛF‚íçá"\@J¢$j£-YVdÙ²|·ãŒ™‘—›In{lÇvg2J:¢:7ã<Œ=3ýñÎKr¿äë$ü‹S –* @‚$à9R,,:ø¨* ÿáeYÿ  ¬<AY†@0`Á†  A ‚ ¸Ö+Á?(eë*™%wAIwk@0”6‚û&•§Î¼ép{ýGX¥—`ãŸ5öû×A0”E‚Û&o\'³RÉ"x•ü,³O~r—­EV%ñgÙ»‚%˜cKd¸Eó-#˜ß³%k-ª“U!(‚K•»¬îÙÕ´KwõDzCʬqýg§§§ï;dY‹®È×ú”\z°¦êBP¼=¹ç½Þ|©DZ/Ÿ—ˆ¼Ðn:ÓßÚÿ{ù ]p›Ü[!ëÃ_(:•,•LtÎÆ"^šn{`‚Y+b®Cщ7W½Ce~‚kÍæ{ës/¿iRÕÇ'x®ÙìÜ*Ãw(²kp“«LpÙúìŸÿÙb‘/“๹õ~ CS‹p¾ýv"‚çæþ†±(A‚Ëk;W¨6Œ¹Æ-l.±«÷Úeºµ5º¸D0_ëkÚ“cº²6‹¡$Ç"ܧÊÂxðÜÜC»S7Wry»ãÁ%ÞèF#J–àH$´" C†@pHõ‰šxe•àÒ-"¸ e»A ‚@0‚!C†  `J)ÁUʪ@0‚!(a:Á†  A Á‚!C†@p°nu>øj+êCÕ{+8_п9R­`hV¡R¡?/lø—kãTMw¨uvGvØmâ|A#lš*†Í«VáÙ³ž„àþ±»»Î46Á*†)Áõg†2A°ŸaJpË´Ö+ë†Ï¿?‘ßò5Züíš½˜.¬Ø]—¶¦¹«éÎúÕþ)]ÊjšëvI³?®p‰ûÕ¥‚‚½ K“:g|ñ+Vu•Y™L0 h{(ªZEÓ¬8ñΪÚáÔ›îÇêª –&,ÞÙ3noñk°ñ…Mð™A´OnÿZw“é5Øè6Hiµ«q‚»Þ¶‡U•öÖܪËOèR~ Þ&%֒ЛÁ‡ôã“–½ÖJð{7Ù¦ÐèÁ ‚E† ‹¤™zþÀbñ±Mð§ôçw°»¼aR ˜|åvî­i¿ï“ªš¦­ð«©º¬;+vP½¬ó ‚vŽ’`¡i°a mƒqZ4@pëÒñßÁ߯ÕG#ø`·ucÕKp¥ÕÚ>®0‚Û-*ml‚µVkõ¸‚3 ‚‡´"5ÿïZõÅVO*$Áý]ý¿ ÷ô'§Þ#ØêÓ†¯ZÑ'¸“OúK‹ÁCzrjBL[ „`{A–Åq æàA£iC Ö7·¯ý~wIigS£kmת­Û2ÁúU¦ÍªTµZm_¢‹iQÛÜ!¥NÁ|­èVÛlS¼:‚=ûžÊ #X=Èë»Zr=#»Öx0éô9‹;ƒ¼ ‚…ñ`q@Á6Áê·{R&ÃD‡~³CY!8#ü‚`œå9ý~ÿCƒà¬|p||¼ŠÓ‚1OÁ‚!C ‚@0`Á†  A ‚@0‚!Ó‹^Ï / († !¾á^³#¡âÒ/M‡àÀ€bBpà,#qNfMÿ}á%Xvå$ý ׎—IÅë šR+b@újEH“~­ªºîMÁP"=¹p«C¡VÁPœ£i—™·–</vh|ï×ùÁ‚ܤ_«ª›üQ‚…@Ð8O4¬‘Û€žÜÐñ`ßWÏUFëÉa< Oðøù•±ßäÑŠ€††9ÁÕê³gU14Xô+„@8ѯ>­>vÖr ¢%„"òƒ¡‰ ö0,Ì“€÷YKA•ôKXüÄêö Q¯úÝ{-`'?XLF~0ÁÃÁ¯^Ø¡ÁTªt§,D ¿ú©G~îýÑC°“lí §l¹iÐè ‹›nÔÔP‚f ¬ªZ¦@pµ ‚¡¸ þ†Å7CìD + òƒA0”@+¢¢?9µXÔõp­ˆª%¬$XH_•S‡A0KOÎpÛwÞ…&Ø0ClG ƒ`(òÑ´ëv(0 î¬ܾDkhR”°’`!?XNF~04ÁŠ'¦8\ëŒì* ®¶”ãÁ{rrê0òƒ¡Iž,¿2V¡ %8Åü‚`h(Á©æCC N¯ e›`äCÙ&‚@0‚!C†@0†@0`Á†  A ‚@0‚CêþíPÕ¬„C „ZCsÊ•n'Їà#™`7ñD•,êðÇ!;5k0J ñFu ÈÁPú>R]‚½ùÁ6ÈBüïáONl°n4òÛ‰6×íªî2©ª›: AôäÞ(.ÁÞüà3çÂj‡ºy<âµVKJ™pªv¬õ…ªÃš)Žà#Å%X"ØI¶¢„ Ö<8ü™Ð`–¼a>Þ³Š„àm»ªÞ´–ª:©Ã4ÁîEؽË×`;}§zÿž¦iÞv°”›ö핪ým\B;X·ŒXƒ  >ò_‚j‚ueOCql_„…KpÁZ«µz\ÁPº>ò]‚Åü`éL î/é JÁü",^‚ÕIÀ–¬o!ÁPj>ò^‚Åü`àöU'J¸ZÛ¤åº ̪Šk›;´®D°¯*†"!˜^„¥K°g¸Ö&XÌ&XªÆƒ…m<¶Çƒ‚}UA0 ÁGžK0e‹àê›7p Ê2ÁG¸C™&‚@0`Á†  A ‚@0‚!Ï–Œ¥ehb- xJjo­­Bkm« ‚§ðzŠDëíŸ*ÇnBl½¨´eÄL°Öý^ôz ZZËAǧÓÝÿÚÒ„óK7ÃÒG°)¤ªÞùÒÁDËíOimª²v=ü†ëÊâÎéÜ^žà +H¢†àj ÇNp‡ÃUîß½]¡êkeàºõ‚SAp“ÜÛÏë 8$Á»º5©ÛEÍÜ£EÁú,ÇRÆL0ewÁ4ÈOÖÀã(n¹FK‹ ÷¶†Vu‹Î憰¡ÏÎcí C±ƒ´,MS¶‹šÔÈp îÏr,eb7¨÷-Ž ¥E–œÐàX²¥À†Vu‹¤ê‡–Ÿg^•çãÜÜAÊ~Üív·ÁgôÛþ1»ÄÚEÍ<¤Å“–D°P ‚ã#øÚ-Ú©; ÷|^\ „ÕO;´ô`•öOþnßðªn±¾ð/6ÁGvUk­ßÁ‹§K#õ+§Cð·½^ï#øS6ÞÀòt¨ûÎiÿrM"˜\¹Éç+³7ÁµZmù‚PóúsRªÒ«éG÷IÉbm‡.ýõFP‹vxUq­'6Ál„ð—Rqõ‚K#3í`‘`'Ü:ˆàÁ# 8’±ˆ×¦ÛðÞïÍ@‚‡W M0kEÔ²Hpg¿EôbÁmZ¡…VDo¬®þ''øœ½=ÀÂõÕÕõÛuÆÚli=à¡UG ¸µºzs=ƒ»ÅÀVD•æ^âS;øó{uró~ýZ°[¡õþJݬn \u‚›ö±ä’`­ˆx{rœà»ïœÏhQ¾ÉŽìª:ÁÖ¸Hz ¾ÎÒ*5‰àÅ]ö5#ø -nÊ£iBB%Ž…àZçêZsy{Ÿh›µ±oÎZì°ëâÆ-ËCëŠkù ¶ÖúÙ¦v;é‹púd=9ŒOƒàfóùsq<˜wêZMÏ osÕ?\¢ªPô,Œô„{JCi#8-ú%™W~@0ÁÁ³¥<½á®Ò~Ÿ¨n$²¯‰ßp‡ÆPÞg-ß&j$³¯°³Œ H…™žQ)ôLO(b„1Û³í3Þ@â O ¨ ‚! > E( ‚A04*Áð r/ñDƒàÉ¥÷V|zÿöˆ›ÛûcVÖ{½îäÛýj C°6(7MRmX­J·9àSþò²ÞUN¯ëŽuz÷¯g“oך ‚G$¸zÖqÿx¢ƒÍÁ‚uAð´~xÆJÝ9Á#».Z‚‡êéèMtk•'¬ñux(„¥Ijš[îŸZ!d™æ‹X㋽ëˆÿɳUÖðà èOÍÜöä{0zê­¶ýà¡VñþebëÙãÇo×<¯.û‘*FבKðQt s•ݰ4)Bí•™·}{ÎY«åÌ““ªv¬­šŠyt¾­R„…ÉÐÒZöD=¹˜&‚%ë„k°õGÞ#¿~¬î0ëÔ~x¬ó¹,ÍFL “\„ßTc!ø½›Nnš¡¦u7Yx!¿bõ\6ÌÇ{vnš.¤­5‚¹j¾­Ò46‰`g-~,Ÿ½s˨e…`Û{ÿeî%ÃØêêj?\Ã-?è5˜GÒ]¡ ét)4a’‹ðQ<«ó"zkš¦{ÛÁæ·Wȯë†Ûk´ì¬Ê“P¦PH;k ÇrÿÙ~ÛÁA[~Üû7Ë5S ðã…"N¨*¤Ó¥Ð„ .Âoª ,4 $‚=m3>‚éÎ2D°íG‚ME:D°u:&¸%Kp¥ÕÚ>®ìF¬ÅE°Öj­W²B°ãG‚¿aásÍ`‚Ýtºô™0þEøM¤£i«}ÞFÐõ ‚‰¶¬J*‚¥ªñ\ƒI¥þRúÆ"ë‚#Að§öê­ˆªN—2Æ¿E<̱|ölÁöHåÔNÙаíºcÝ$ßy7˜`a­;ïòp~S˜àö%Œ¶µ`…¥uV$ou–ªfå¦ñªšD°±6œàïíè›ÛöV­âïòɶھêVÚv¬vŽ\&øº"gÎ5\0GÒ}DMÒéRhÂØᣨ ºãÁÂÕAÎMsƃÇéɩƃ…¡Pi-g«Ò×{¤‹`õSeeOnÂñàš0îEøM5r‚¡± +¹ÓBù ¶=:Á 8—^‚` ‚¡T|Ñï÷?ÔA0NÁµã>8>>^=/1Ó3ÍC 8e®C˜m‚! åH Á‚!C O4’rvÎ<ÁZ¯9¸ÂÈikjõôä ¾5(K­%Q£7x"ç°ÏÃkxÄ[tûJ€`ÃQÏáuÃo5=!X#'ý(í«2Á2†Bwù`À6<ïJt‡P£ø\egˆ“ Œx“"ñº"ØMQy–ÄQšµœô*ûp‚#²3ŒTYŠM`e´k.$h4׉—ËÏtœ`;¸eZÑ\ZÕ†V᪺K­Ü´Š¢ª&WeŸK‘aæºïÿŠfî)Ö"Å8Á|Z°³11¸Í]K×쪾ÃÞLŠ`w¢”îþ©¢Gì¡ٰ ‚§ª|ë·?°SŒ¡³‹Ê“$,œ/'Oy,’é%˜ òØ™‹eOË’æÉ ¹iž„3^uE—ª>ðÝWá_¾xÓØ<é+Δ:7¸ÍM¦žÞ×7CçËFI°R"xDÿéK‰#‡¨yªÊ—Hûó;Å:û$(O’Dð¡Ò#űȳ%ÓKp¿»â$ 9‰^æ¡pÆc¼¬Ü47L¨zv_ªÊ>—"ÃÌm_ø—°ÒŸsÂx†Û§4Wl­&¥‹ Ám¾D‚=‡}%)‚w»Ý—ül|a¡"¤)SâÈ~Ý: Â.u´®;+ì”bèœ“à‹¡³·*G¼qçH<å±H')ÅÿÔ«ØÉÿº"Ñ«Õ#K+þ›¥[õ¿¾¨ò(}!ñK¾*¶ƒy …f¯ÅYdÇBŠZ­W÷^Àœà6eÊ„:€! ‚¿íQñQ9C‚þ5Ç%Ü«ª¯_h9+ìÔÌËÔ¹_¯yOBU¿á¶"ìµäv°|,;SÞŠ: þD/é^¦˜/ôɄį‘vîeÂ±ÐÆÍ™·ªÐ JÁü¸T)(Ê?AM°bôx0Á¦Ý¼’NÂ0‚Å)þ9#Ø C“Ž}¡Õº±ê1ÇW•î$~@píx™¬sÝKðÿ;Bð˜ÝúHëÇ[t¥¿ç’ààcÁæ ¢Æ5¯¢v öm•%’½ú­ß¦W²Opp@šï$èúˆ¯xv†`ùî73û½l˜ìd¸éþŽ|öx\~jF'ø—k Ø–[&;Ëîb1^B˜¯*»¸‰_¢åÚæMÿòüÄPs"Ã<ndÅŽ"bÍ»/åa [M‚àëWƒ²Ô<ir–š}„?|(ÁŽB D°ç$Ù[µ[xÊcÉION_TŽJJOׄñ`ñRóxøxpp†›/bÍ»¯àhrãÁâŸãûn ùO@·«¯Á®Þ¯%©†æ=91®¥N5Ác(‚'¢ã+ßY•‰·+§z’@0Á ƒà´™³×'Òt ‚'´|¤°µxLT©‚à4Ÿ¤x†  A ‚@0‚!(«W´yhiدŸaÖçÊ%h\•çtØ£Ÿ!†ß›;ãós8Áp-ñ3ÁA׌Uë|‚ Ô{þO¡(ì.ttó˜.*Ì?­æ¢ Q~4ŒàrH;_¾VÚÉŠ¼P,äÝNË}Ð-¾ŽàŠ´ÒÁw>Ë©æèý¯î- +ïs&¹ ½ÿ¶”Ç­ˆï¤T%œ/M¥´X´w•s;öù‚`M2Ü4…~ËÝ“îúîuÚÙ½Å;E‡àÊI•,\:)x{>''+ó¹éÆ)6MÑs-œ/¿t<-Òâý&=¹¯äÝNJðÜIÁK°ÏÏÏK† –—JÏÏÏç …¢Ý»òó¹@tòV"xÕ,‘…‹þ;ÂCçrÓžuÌõ|~;ÙÕôås×NZ*üºGíüËerÉ=/æÙN~Oóìñs‚¹á’åDßø:Ïì¶gz 6ÕÃ3A°ëùü(vr‚M×NÖŠ0÷äöÛ ,ùš`Ûp¯å•FcoM$xãjƒè™—à]ÚðÞöŽVíÝül{>?Šœà?3ãjÔ¬uZú{0Áù²“ü‘…FÁ‚Ÿ“\´l|úÔ²œ-Ü .°ÏåÛ^AëÏ‘-](‚`…œ`ÇÙ‚Éžoþ-àœÙ©îÉC°tÛ³}TY®$Ø{4³ðDC¾ëiØ9”àœÙ9ˆ`ÁÏ‘FÓ¼]ööÕz­T_§… j1/~%Ì«^jrlyygçÔ¼¹³SÐ󨄳S Øu¶ÐîÐâï³B°šŸŸ#>цħÊÞñ`OON=œ/ËÕ½Uiô§ÒNñ>æ6g†`%3~?G}ª, ÁC!$ÀÏÃÎèýœèÍh´v†;£÷s¢·+¡Ñ‡qøçîÒpØ1À˜e”´á°3Z?1Ó3iÃagÄ~b¶}ìvc¶}¬~"ñ‰'ùO< ä¦ArÓЊ€›†žÜ öäà7FÓ2=šÇñD#ÓO4ô©åˆ¡‹áÉsÓÆ ™)Øù^¹ ú ™óO ]þì ›CInÚ8Wí—»sóJv¸œ¯˜bèòggȺhrÓ@pøœ¯˜bèrHp¸ºhrÓÆ úš{Ze';/A_as¾bŠ¡Ë›¡sè¢ÉM/èKšÖœS‚ÇÊM/†._v†Í¡‹(7m¼ ¯òÂI±› ¯9_ñÅÐåËΰ9t‘æ¦ôu^›?×ruÍšóg ]žì èÉÅ››6NÐ×—¹±;dÎW¼1t_–Jù'8¾Ü´‘ƒ¾òFp˜œ¯cèf‡à¸rÓF úÊÁ¥9_ñÅÐà¨sÓ@p)ÊÜ´Ò°ºlED—›6^ÐWÎ,—ó_ ]Îì ™CQnÚxA_y»h„ËùŠ-†.gv†Í¡CnZ¼BnZÜ~"7-Áû"rÓbð¹i :;ãð¹i‰;#³Œ’6vFë'fz&m8ìŒØO̶Ýn̶ÕO$ž ñ¹i„Ü4BnZhE '‡ž\{rð£i™Mƒãx¢‘é'a¿y~~þ™¯),‡Q#<þ›=;(¬ œœœÜòÕš÷§ÚØY_C‚²R®IsÓ"³sÎÙ^–í ëçDoW:1&s¦óÚ«PT«ä?u|BRiÙÌrjݤ¹iQÙy×~é8Ûv†ôs‚Ü´Réã=;( Oœ›ù 8¬Ÿä¦ÊÖÈ„§µb©Ø2W¬ /¡È3¿¬²ôÕïÒ¥Òž®°ßí§Ùµ|ÒÜ´èìì<°>˲¡ýŒ 7N®usÓÄ5gžœtGP†ŒüPâÓšsæøè¹iØ)ÌϽŸ“ä¦ –—×Í-+»Ù-NÚ,Ò¹è úê?Q=+¤¿’éŽÇ¤¹iÚYÔwî”jÙ¶3¬Ÿ“ä¦ –+n…óåB¡`U‚¾í`êºs™eÇ'ÊM‹ÔÎÍ/ óLÛÖÏIrÓ†YÎn{Š ¯™#8\nZÔvn”J¥ðs’Ô©á–—­#ž,})-¯’oÖ²íøD¹i‘Ú™‚Ãø9 ÁK}ÜU,<êþYc•¤ /•å…ý7äçÛÝÜõÃ¥9!8ŒŸ“ä¦qót+d&pÓÍMÓo{yž47-B;sOpT¹i¥Ö†Þ¥ìz°°6}Æ)}ѵìmÈÖèó Ÿ_ιåÉl;/œ¤|9EnÚû_ yŸ·¨oP º¥0'ïý+&>Yn÷H±Xeñü0‡\çԳél2¹iCöL®5Ÿ†9òFFßsŸ,7{ä[_:¥žZ¤tžxùÍG™t6Ü´Ðó¼¤rùË[¥ØsÓ‚ëë4»kcÑ)~ÅÜŸoïÌYG³ÏS¿Å8,g-/Á;k•ìÍ—ãkK°3j?C¼¼÷Ô¸êÔòÓ¿ž9ìÜ㜠û|Á_ÃX¨YÒˆõm/;Ûy·³Þp$^;™Ÿ© ¸c½þn°=9›hcG?þ¼a*ιÉ/Œf=OÛÎûo¬BÜv¦‰à¦Á‚ó`çL,þ¥Žåäðå á„H·+²Ýþë¬^ý<úÉà‹ë†á|n4xufù–³agCÏ·è£ó<O×ÎÝSþïØíL=Á Ó<³.‘L¾FGõ1-¼þŒþ¼ûŽYÎêÞ°o¦»;X0¹åTïT—š_n4›ù%89;­[Zìv†&8ËŸ}_öZ^_øÛò#Ú3y ÿWZ<]" ×Ok˶z1³e.[~…,öž¾ú"9‚ÅzNž²ÆÂ{ÿØlÅogš®Áÿ÷âââÈk¹STÿŸ|”,ÿÑÿ¹l¹ò ÕU)«OÝέ]¹ˆÝÎ4]ƒ•·=œe;·›Mœ åËÇ«««Ç- ‚³iyýð{òóÝAƒà,Zž·ñ`œ:‚?Ù'êÈíÔzç=²p·CÖ]Þ¦Ÿÿ™YÞ¸DË t«‡öç¼êfy½³KÊÛ3MpÌv‚`¿åC0¥QIÃ?€É«:ÏYƒ0g†à˜íœ=‚‡Él’™ %CðÌØ ‚A0†å 8ý'ûJv®-Oþ ÷|ò ÷éO‹©ß6ÆYm¼µâT:fåÆÎ°³Œ051*a¦g~b¶=fÛç~¶=":x’ñÄBn!7 ­¹ièÉ!7 ÂhZ¶FÓô•³'ùò3O•GJûZ¼¨eÅñ)=UΫéÉMãOëÇMûjœ†~`?½Ä4®ß왊É%¦¹~¦‡àHÓ¾ÔêßöŸè„ÞmOœà©Ø™\ÞT: Ž0í OÅÎY'xü´/§À+ Uň¯fÿ¡oI%¦Mà)Ø™\bZ†“öÅ›²¯Ùç-¹ª8­VšÝå(™Ä´´³‰%¦FpR–“öEth[~F??¸-U•-ÿ'»o#^tIL›ÁÓ°3±Ä´t^ƒÇIû"Zµ-wR„ª²å·U;N$oj OÇ΄ÓÒy '§§ÏÎíf'hy2‰i ÇdyB‰i ÇcyŽÇƒApêV¥}XîT#¾š;tqÇf'h¹zSi¹PUµ\ ÀœE‚ã°s¦ ¦r:Æ‹ôÈÁ³`'Á –ƒàô¼´Ë#R:rÓòCpfrÓ˜FHûª§0â‹)E³Œò`'rÓ’fzÆá'fÛc¶=rÓ $ž 7 ‚›` `Á‚! A ‚2D°ÞëuÇ\õ«-«ðèNÁoLIoÜO4š8¤{îþõlÌcøåšU¸±5è¿H=üÿ&CÙÁÕ#™à#÷“ýwÅ»©Ç“`u[Ô¾É%˜V=H0ÁO4Þø.Á“ÜÙo½E°Öj­WܦÛji « >ò]‚««}q©ëcì®âL*õ—ô­²¤¢ã ‚ƒž*¿ñ^‚iÿŸu—ž=K†`{ÁhE€à@‚¼—`¢ö¥«D[ ÕÅ-tV$‚õÍmºt³D0_ëëwNÕße‚õÍ]²ð#Šeûª»ƒÚ&-×%‚yÕMO+BÛÜ¡«áì‚`~~ã]æ>U:<°''ÛóA^Þ=yuk/”­1ø,7˜1¢G?YåB¹èØÍ<<,ËU3Ü„(?Fp9Áù?÷wªSöò“ÒCÓ¬³Ås¦ù¥]™µ3ö< >ÓŠ®Å u·jQþ\*f”àï¤+je¨%ÛCËŸGÜÎBC²ó#w-ó­T5»b¤`ŸŸ!Ö$ÃMSØÅÓ““Ï‹ý“E]߽ś߯9Á…“6í™üÆ,¯“ÒÒ ; þ«ÃòòKZ kYUï7ÉATuës¡˜ ÇçN ^‚MSô\h§ówî^·<"Õ¦å"sþŠcç¼í,_KŸ—ªfº§ Øçg‚ç%Ã%ËÿrNE{ùÓ99qW>ã<´6鯿íy®ON;˜\¹‰ÞK«nˆ~Ý£o¥B¡hÁ9ÅÌ\3ü‹žÏ´Ó9]®1bÑmÜç/“Kî9ó†·H÷úá]RÊ{OÎççsÖ?}jùlk‚M÷¶Gn‹·w_È>Á®çó!ì EpAá--±ý–g‚%?ClÁÇ *Þ©o4®-•¼Å,\hl{>ÂÎÑž?Z ¾í³½¥£™ Xð35³ªN“áùªb{rá ^ê¬VÁ({öZè7 8 'o+©Ê%€ gz,B¾ëiì´qì‡`_Õ|·"´GÓ|]ýªõZÁíýøw/Á­ º¸ Ì«¶çJ¥2Ûæ¥vA*fAímÅÑÊ=Ê;mcˆõuZØXô<ߦ‹5¹Ìöz:ÛÚqÖr«fWä쟚7(ƒüñ‰†gøÇtFvU˘•¢!­õþ[iv»1Û>V?‘x‚Ä“ü'ž@rÓ ¹ihE@ÈMCOn{rð£i™Mƒãx¢‘é'Ñ8~j½VôüÜIQ;/Ï¢åIüÑÅóÅBxìÜ4;•$Ìþ úñ:ÿ}÷äÄ~‰õd1OOš›6¯Ê(„«OªÙ74þÜ4g–QÙ3FØZߨ/‚;/ÃçA“æ¦)ó]æÌù°û¡j6n4in_ß—›6Ã'”›Æ.òY¶üÒ¯NìZ~ZpOÔýmkéî‡å¼$ Oš›fÛ)ç¦,WxZÑNG ÌMsª–JÂÇÎç…r¦²ÔËMóN®æxIwÈ#ñf)´×üUsEð¤¹inš<õ{xnÚ†·m“±i‡‰å¦œü·D°ù@ð¼è•+‚'ÊMc]Ü‚]v<œ3·xšD°*7Í­Jl­Zóì9ö¹^—‰n\R¹i?ËÏi×ÉÛ`‚åQŸ< C°˜›6;Ç•›&Ä$ ³Hpi’Ü4Û2_Çx Áâ^Að¤¹i¾ /#‡ÎMs6ývÊ;Ñ}M9sÁî}[n'¬eš±ì™ûoçå '8ÚÜ´J£qñÔ ú XL÷šE‚Cæ¦9ÛÙs·X…‚„¥˜¶6Œ`u¸°ÙÁµ¥©<¯;7ŠG”›¶Ôç5ؤ‚³x4]‚íì¹@‚ %kž¬¯Cô~(‚ÕávòZÏ?°'âÆL0ŸìÉ, èÉEš›f‡¡àЭm ‚ß;˜`5–#üþÛ»ç+f‚•©{ Ž(7…¡y‚¾‚ÚÁBºW^2ç«¢'çdÏÉ?¢KÛs%š°O[›oÓ²VsÕ|U}ávd­9¯Bû«O¸û7Gã­˜¸ö\i°Ÿ“䦕„§ÊèÉ…Îù*MƃE‚ƃU=9â¼)ŒÄ«;}þ k¢O•=_RWŽiN&››6£Š.7ͺ­Æ*7/PTšÂí¦™›M–›–Á;›mß~Sn—xnžÈÎ$.•:¾ûvzÃíÊxÃ}J†—SLp1;ávÈM›žácÙY8ÂYð3=“6vFì'fÛÇn7fÛÇê'Ox‚Ü4Bnà,ÈXZ†&Ö’‚§¤öÖÚ*4±Ö¶Ú x:¯7¡H´ÞIð©Bqì&ÄЋJ[F<ë½^7ÜÙ¼Û-_îõVìr£§©WØûcæ ^Z›Æ¹>¾¸¸xüñ絑·Y¿¸8.ÁkKa ®.Ñ "ïh}÷¯gáÎæ«OÝòÊC³ãü£@ðw™'x¹ã)­-PÕ}¬-‘“{;x½»ÿjÖìªõПþõlº·—''¸EßBö 0?ƒàª.$$ëeÍK°F—ZÍýÜ!øOÂZ.Áºµ_Ýúœíà !¸ÂåÙU¡¢K¥Ù#˜²k_¬"e—íª3Jp³ùü9e‘¨EÿõÚtŠÍ#Zl8ULÁÌ;ÅV­ª}¡™b\3Yïô—ÒxUÓÌÁïݤÿÛö%‚µî&]zÒ¢Uïƪ§‡öêLXË%¸vÝ!xÁ°Öâ;øŒ|hoUØÑmV¯’W5j³Mðµ[´Sw@»oƶyÅîßÕéÒ«&UÁ7ÉÇë§µ‚ë§{‹§Kê~e: æ-Z»™jl²ñ±_®Ñªß^!Åž®h[k íà–C0­oÐ «[Uتf^Öˆ~%ÅVO׬w–îß#ËfµìüúóÑé;Oã¶N—þzcÁŠFòE½Þ¬-_°mÓõ׬µü×Ì{d­Z­‘îvðHë=¹Iæº&µ=h+â ›n{@À’ÝäÍÑ&íÛÍžÕt›*‚M{¬j !ø-*¶í…VëÆ*ßo«µz\™u‚ÏÙÛ- ËúíËtáßÇ'øv›¬8€àØnÙa´VWo®§šàÕ¾ÆçëS#xÅÚ?׋u{Œ¢¿4»cú'š¯?cãu Kµ‘þË­úëüMbÁk|Â~Øw¯ÞH1ÁäÎÍ:aÏžM—à_®9/¸OWîÌîXŒ|÷]$;Ÿ‡$˜K4…ªi%¸}é*ÑÖBu±C ‰`}s›.ݬü½°Ö`‚ù¾– vP¥…«;›dÅöUñXfŒàÃýýýÝÁly›”ö·×$,κôw:jÐy”x ®wvéjAËkY [µŠrá5hK6ž*V¬V,Ž ^aÎб´ÕÙ}ªìŽ+{rrŸlÕís=ñKŸ+zrÒV…gåî´ ¥ŠàÁ ÁY s4¢ÓtÞpϧ¿á‚£fE§°³Œ H…™žQ)ôLO(b„1Û³í3Þ@â O ¨ ‚! M*d°"}5ÃBv² ØPľI~ A†šB#?wÕ‰öà$¿ &b}µåYð¢×ÓfŒà(®œ«¾ðøæIèª9A8Áª|´ äΪ°tçK3.‚5£2æªäO®ð¿½2:Á5c°eÏëTáp›;±ëÍ _ú¶ù^èªéVA/FôˆAùhÑì¼Ô=Áê—ëìJþdþ†½ùlt‚…e'.àñ¾C½GÕ)«aüþ[ióÑ@pÐNžÕ*- ú¦×ÚÓù2ÕíüñÅ`‚C3x@>š.„š ñeÎ@'ûP’Ì„ª¿]³· ù Öô´üïÏjäOX~öóqwzx8b#bÕ:i„àE™ý³XvŠ|©U.—íÛl¿K—ùç|aÑ^!¨j±è~œÂ&DùÑ0‚Ëa V磉ÓÑýwLºä¸zî›Ew(e”8QjÁÏÒBðU:ûüŒñN+Ò™8øÎG0kºR§ê´ôÒtŠ¥VÞ³_Ûƒ¬õ%G€óß«¾ü„þóý·é$˜8¡hE|'Ý›*£,䣹¡gB|™@ðG†±Ñ]4Œ[W…µ¬ªôÌ“Îì(µœ¬ÿØíhi¿¦)t^îžX}®Ýë´K·{‹ü¥],œ´YO]>ç÷],ŸØý¿‚þZA°ªjº &Ý8Á¦)2¬N°ð &88ñeÁW-,¥4 9B‚¬´b]º‚ouÓCð‹Ú?݃`6>XÕ´ÊÐf0åW ¸Tz~~>W ß'ÿòyè„æ²V8_& ý]±qûÐ"˜T- gkAUÓM°º'GÍržŒ`!¾l‚ÅÕTöä„?aœñàóైy‰_‰`¢oøMžôÖÃiEl„"˜Tý¡Ô1‹¥¼ì2ÁB|Ù·ÙZZn n´Z×¶lóë%¸Òhì­•^þ¹AUó²Öhlg›`›á°«óÑ|Xê£LëWtß5X×SDðB,‚íù 5 .ZçìéS 0¶ÀÃÚZ¿P Cð•ï >f£Š|4%–#lø[iêÉ æpøNƒÖÔ‚ZöX„CðûoU¬ñîÙ0‚KÅ Žì·"´Ð«òÑ\,Åø2%ÁîZVÕßYèÙ®t3Á×Yš–$Áüc– MóöäÚÛ;TëµR}6åžût§=ç9Å­ º¸†`§*ßÁW)%¸¼³sjÞ´ÿ&uO®š`U>š<©–‘S6?àƒà¡O•Ë£iž‘]{<Ø%˜ÿ1¾¦A©á>‹B°[uÎé*¦r<ØT<_—FÓB?ÑÀەѯ9ðD#y¥¸¡ôDcO`=5og‹àqßìÁ‘ïGø‚0Îêî“"Ð'j20žÕYF…#¢ì_1ÓsZÂD¹ˆÁÓfÛ‹/fÛ§FHÀ©Á¦©dX˜È(„O‰9Tñ|øã­™5œN,¬dX$øO4Êï:ï&A0”‚ ‹³ Ÿ—U4iî§K¨q)¤U:i–ÁoãƒVØd Á—‡?9e!.SÈÀ4×+<-ÓNÍ”ªJÇÍÁ>†}×Ôó©ÂÖuY˜l78”aUi®¯r¢³0=Ú³UóU´ç<;U¥cfŠ`Ãæã.Õ‡!xÁލÓ*u'Í’¼ÐõtyÕ+”;!.³zø³ÕWâ2¥ Ls›”k¼S÷'X¨* 4sK o˜ßö¨~– ~A#,oþQq¹f-Z1­’–´®•â£m÷*Õ µøV»ï¤v°—iÝøÀ›Ø¶r'c‚ Y#X`8 ¡‹6å¦É ÖÍŽº«È 6ݦ@°—)çsƒ`<Áß´Z—ì¤Uvöiá#øùü [©é!؉ËÁÐ8­ÁºÖ_fE­¿¤hET­´J1 ðÿ÷û}Á|º‡`'.SÊÀÁ 8dOÎC°'ÍrÁ–NšåŽYBðw*‚}X‚`<òhÚ÷ž4ËúÖÕŠTõºQ)¤Uò4˯m‚+›WåçżêG”5!.³Zۤ店)f`j›;´®D°PÏ*ÁŸh˜ž4KÖ$«›ïF®êCƃ«ñ`7SÁ³HpÀÛ=!5<€‚â$x2~A04]‚'åCÓ%xr`(Û׎A0”e‚!C†@0`Á‚! A ‚@0`C† \Üè!Zš˜`õ8gœ! šœ`õ,#wþ3¸¥œ`Ã4Ãt×\¨¸§”^‚Õ×á–)dóL^]ˆçâ}Eî5;××Y«Âå¬#F W•[E04*Á*†‚«Êo!âÄmWÏ=!š›è–à;ðmÕ¼†SJ°Ÿa›`!Þ÷Õ™ ìfö ÛúÈ06º‹†q몔|¨Œ÷­]· önõ7 A°—a›`enšï+l몵ֳgžüàª*ÖÌ¿ƒ xš¾:œ`Ó¿—V¥N`(=;ñ¾j‚Åü` ¥°áÄûª Vb ‚¡)ôäüË5 ¥}4Mˆ÷uY2{Õ‹ùÁOÜx_'jXI°µÕßA04"ÁŸhT=ùÁVÚºb<8 '§V]ä1 Cðäù•ƒ$|+mÈ@´" QŽ—_ ÅKpÜü‚`(^‚ã×Hïõ‰4¼¥ˆ`ýØ)^<&Â÷¸@i"‚@0‚A0‚!C†@0`Á‚! A ‚@0ÅDð­®w‰ÖkޱENpÀî ¶ß…¼|`tÚ;'¼ô(g-Šà€YFòœL¡z-<Á”ÁJ†7Ìz¥RY~¦ƒ`(ý+æó*ÏôªÌ«¯óTlÍÜsóƒu;òW#¿íô_g-!?ØÎ Æ,"(b‚} Û3Ú¬ä‡3©ºÁ’€yƒƒ¬ÕrÚöZ¾ªÞ˜ Š€`Ã"Áºñ…Kð6éàÕ¤P`£Û°C7ÌÇ{v>°³–¯ê ÅA°Ä°t ®>> h[ahš¦­P,7Ìo¯¥=¾îã3EÕª››A,0<ÁN3AžM‚¡ŒÜfQ†ÒÖŠ¨èONô"XU\ƒ¡ÔõälÖBl M•`ßhš˜ôë¬mîÐ,a K}s×®:Œ`^õ# EM°â‰†øTÙ½Wû¾ÞEH0ƃ¡XŽ?¿RZP¤'Ì/†"%8q~A0)Á ë¢ß糖`(«¯â„@™%‚@04Ãú? Ê4À&†2 0†² 0†² 0†² 0†² 0†² 0†² 0†² 0†².F0eX ʶþ¯¶­‹L-IEND®B`‚libplacebo-v7.349.0/demos/screenshots/plplay3.png000066400000000000000000000563011463457750100217420ustar00rootroot00000000000000‰PNG  IHDRÁõ1fÐeXIfII* Áõ†Œ”(1 œ2ªi‡¾HHGIMP 2.10.342023:07:30 21:28:07 |hâ„iCCPICC profilexœ}‘=HÃ@Å_S¥"‡v"’¡:ÙÅŠ8Ö*¡B¨Zu0¹ô š´$).Ž‚kÁÁŪƒ‹³®®‚ øâêâ¤è"%þ/)´ˆñà¸ïî=îÞB«Ê4³/hºedRI1—_¯` !D—™YŸ“¤4<Ç×=||½‹ñ,ïsŽ!µ`2À''Xݰˆ7ˆg6­:ç}â0+Ë*ñ9ñ¤A$~äºâòç’ÃÏ ÙÌ ¨=V„ pHYs  šœtIME磨ñbKGDÿ‡Ì¿LbIDATxÚíýsÛFžæïo¹") HP²(‰²^lË–bÙ–åUÏälË;öf“¯ì±³³>ÏlÄljV¹º²kg·Wµ“»¤Æ™Éø—îo7Aêy~p:`w£ñÅGî&úá¹AÓ¬ÿ‚@ÓNð<M«@0‚!C†@0`ÁTÁ­Añƒ*F°Þd Ùº8¬=ÁPÕZL< çöæ÷T]‚ÍМÿ~rCÓHpÏrÇ7m‚TÃ#xÍþwÎùä~xÎ;äe¢ÿÎ@P©»ØÚ2EÒzçì$?ò ¯ÓqLV*‡àFó—¼éÙî-6¥Û½ãb¹m§ÆÍùLO¿Î 6íc]VÐ-õ…MpãѺjRAÅÎä^  †= xõ¢aëÑ»˜qð"'˜ý·Éæ¯>gî2‚«vyÄÊO OsqÛ:uÖ"^Yb<0 ÁÎ(bQ‡ò“;\ .•… n-.~tqþÕ׋LíÑ ž_\ܾ3‡¸C¹õÁ‹®ÑÏyŸýÃ?x,ºF$x~þâ°„¡ ¬Ep‚ヒàùù?c-*‘à¹õ«Læ|gƒ%6—Þ{ý2;È’Ú:;ÜŒ"Ø-õ{6“s*ºº¾€Ce®Eˆo•ÉzðüüR7ß‹¼ ‚ÅzpÃtc•Kp."£Á‚!œRC[‹ø.šV‚wl!ªÐt" C†@0`Á‚! A%Ø€ i†@0•,1ˆÁ†  A Á‚!CPIëƒAÄ¢¿ÛòŸÝ ò#øì0ªfÚÒ÷ÿýíˆmø°ï%nmÅý‰´Óÿ5™î,6,+Šá.óài>›àX|Ÿ¾F«…; ‚ †©’á'ûÌ’ýÞ†*O°‚a­ùÌyØØ;¿9§ ÖØQ/¡‰ÏÅC^”r²þeßOkÞLºŸÕI?¸ÅÜÚýÌz“$ø¹¬‹~)èÜbxÃÒå ò$¸D~õM[×–qC ü.µÿuVvñN$”'Á¥ª©Ûjâ~@ÓJ0`ƒ`C†  `Á‚!C ‚@0`ÁÎEÚ`1úÃÁ8¾gwú¸µ ø,`ácgº¯ñ6›FÓt Øÿ‰d‚­ÕÈÏÌ ½k’°d|ùn->” :ž°Ýöb#¦´'³‚ÝW±ÅFgª.X"ø™ãéM¶?“$A0T‚U]°D°³ÃýÒgM-°ÙÝ'”zúr§_áìrf6­¦!Y ûþÁ1(¦µÓ`]˜S/ã¦ÂËšé™Ü™¢ lƒ¸®&Ø5æ#&^Òp={|q¿ˆ«áñ+Z=“â(¼+H[ sBð¡¢ f¦À}¦ÿHCð’oïKœ~‰0#x©˜ºY¯ ÿ`ר÷¦Å´Vb¬›¿Ux/™ /ch¦ ðÅòÛÓ2Á/™)ðí_æ¶fÄéWöÖ® ^j†öÒÏJÆÁšu™ýI³¹ otåáfZØ}®> wÁQ£ÕZ„D°% <ÿ`Ç?*š`‹T ‚-@!ÿ"Á¡o4ÎB]pÁ_w»—b æN¿Ô?øÅ'ñíZ ü‰sTÁP"Á‡¡.8L°® Wœ¤ïô1Š0BN¿šõ‡Ãm=†`÷z€àUÿ(©ƒ`å·ÊgÁ.8L°3}‹˜É8ý:€íXÍ‚ï½Wƒ`Á‡Á.X"ø;nïë²ÖÞÚmJYoúö¾ÄéWø;€57wå05(öýƒ©A1©U6 Žò2ÁçŽ`Ö ŸQ“o•Řf^VýŒ/åz°ïlD­SÓà(/c|þ> tÁYå€ Aålœ šb‚A04Í! M7Á­# M3Á‚!C ‚@0`Á†F–¹¼­eOH½­õ5hl­oõ@ðd¾¸墋½”?V Ž<„ØzyiË,˜`É–òå`›[Ëë³CÐÑãÉž}9-Á†úˆfºŠzuG²D»÷ÆÁ¶VzÞÒÖS;eîvúŠÛÊäέÉÜ[“`Éÿ!‰`£ ‚ 'xÛ¹¿J—yx7u½$k†RÓ@pÛ±ÒÓApe^´Ÿ‰½mœ’`ήN\!‰-åž0˜$ëçÙ@²`‚»K–iÿë ð\”M‘n±”ÇâSü¯Ä¬"ɫ"§”ÙrƦâU%X2œ¶”t!žgÉÒî°Øw]ÄÜá,é8't\,£À³Š¤õ­ ­{ŸÝ¬®?έÈTŒ`æ`¹åüVL[Êß5’,yM‚à¢Þ¿Ã&u7Ø3ßM.Ù„µo³Ô£5°¿ Oû’³Šd{éŸ|‚ý¬^©¿Ø_x¼œi^9‚™ƒå-‡`b¬#ÙRÜ Â'˜:X‚àÜ nµZ+ߨÔ|õ;ÕzÌzÓ‡ì”ÇÚ;úÓ­¨mrVZê™O°³Bøá–”\ûÆnKgjÆÁ’á”ÚÌD¿‚‚sY‹øÊãàóÞŠ$89kj‚QDk ¦¶”j‚¹ƒ%Î൵s >uÞpÂíµµ‹wÛk¿p޶# NÌšàîÚÚí‹SHpÀÔO5Š0¸×$Îü›ûmûáýÕ—ÎÒ‚? mWÛ°¶·.Ÿ5Á‹~[f’`£ˆbgr.Á¿|Ï?cIù!¹²—5ÁÞºHu f¶”»»šD0µ¥|æ»F‚©×$.‚àÖöîúâʕ붮0 6®ûIsû*Kn;ýâÆKËKÉYi©0Á^©ŸYÒ©êÚvµ×"øœ,b&‡õàI¼¸øâ]v'uÝÅÀ"ïâZx¹6EV’ LÖƒ3}Ã=!‚¡ª\}(ç• ‚A0‚§`ìÑÈO³ô†»Jׇ¶Úf)çJÿ†;ÎO³¾Ëhå®­N9çJ»ËÊUØé™—Rïô„rF»í±Û~Êp<ã  :g7µhiM„³Øx¦!XŸŸk@£jn^G8 Œg ‚ﱃŽpÏd‚ñ\CŽpæŒp"Á‰ÿôoRŸxñ´¦:¼yzzüïüi=áôôxVB>ùpJ'¨KYãj­,ÂI«#®sÍ5^}žú¼k–28~emÐÊ­ÀIëÇ|S•ÐÕ¼ë–/dÎ=¸ò „S:AýØHYkIÑ­“0×R!œ@°®¹Åõ<‡7êñ7O*CðšwÝ*܃ïb"®W%œI'˜$ÁÿÜó,íFRØ‹!دµVçê%¬º¿)°›«J8É Dõ^ìÿh­å!æ>ó vãùñ»4Èx‚›R¡P†Üâ=¾}â'~çƒuéúÏ]÷ü0ôœ!*Ñ”`¿Ö_úMY)¹ÇÈBðnÐÿkV$œä¬Ö YkO®µìÐvb Å3ÁšpËR†ü u_»Ã’Ãã n²vÜcG,Úg:6ü¹ÏšÕ¶ËÇvòÚM^J„ÜÍz)’`Q+#Ø™…”Oð“ããã‹A‚‡öÁã¹Ðð–Æ\«H8É uÃËêà/{R­¥OãÁ›…Å3Á RÀ#B.þ^^ýpjGëê—vk¬Íš­ŸöØêL£æ=ôÝ̘Ie~ÈÉçj‚-gíçÏ{.Á¬TÙ/ž2Õ+TÎÑ¥Zx‚&b¾P‘p*³ŠÈFß„grìéóFAp žvžrþ0­ysë=ïYV›‚#'›Š§ž?æ  çt,Å35Á~À3…ü¨ÃäüO«ÓÙ_ÎàöAgë°^}‚ý˜/T$œSBð‚žÏb v"¶/~Q«çIðíßÖîü©±mÕV‡å\¯ ‚ëbE¤V4Áù…sJNÏ 3¹{<"¤×Ê`ö\e[{åO˜yç­O=­"áœv‚I<3­¦¥›zˆ÷¶wlõæ9–عԫIÁi_dG7.ÈË?½+öÁûÑ‹ZkÖƒ•Æ\ï¯eì6ѹB°t53fE©&Ø=ÁÏ“$ؾ²ÇÖíZêxfüF#rùGr²€IÖålœõàp¯Tš†d –/.(¿•VæªN%ÁÒzð„¦_اŒgÖo•å%x(IÒ üÂ4„sÂXñýÍ(ó#RŸ†pV›àpuc—QÙ¯j8Û‡¶¦`ìô,;àgÎñÄnûÂÃÝö…ÆŽ'p<™}Ç‚oÁ7 £¾i˜ÉÙâÕ´©^MCÄÏÅ7S‹p©ß*Ë–\·ó3CkFy}±m’• yšp*£p8WµÓÖ <ò›= z’µ•ÈêY)›-l~”ÛÍGí 5ÍõÆ+̽Òú|Eú¦ÕÝJæ"£p8ûB¬ Õe”G9 aò3qÙÞ®Œx]™õPIp*‚`•¹WJŸ¯hß4÷­rëy©T™àN<+âíÊ‚£Ü[Ï—¢»7ÎÁ*s¯´>_áÁÑQŽg ‚£úXŸÅ»ŽÛVÝóôªÍÍ)Œ¾†}µÑ—û˜¨ÏÍ)<ÁèósNQkÝ>@Kùö@ÖÏò&Xmî•Öç+"œÖÿ|¾PkÔŸÿçs˜à…×üøÔØ'uß3Îþ·&ÝÒVb¦Ö[¬ùWÏ/˜D9qOp¤ÑïM öø\¨{Σó’%W’Ñ—¿ÙRd% }ñ}ruZ*âÁnåM°ÚÜ+¥ÏWT8­öœúúaãùs¥Ã;bgµ¤½ÊòM m%hðÅÚäöfˆr\þm™oŒá›G°fš.izÿßßú‹d²TsÆÖFß–6#÷Þ¬>±¶§…à¨×¥—§K$øÆŸ”‡;e<†oZÁ]Ëzä%Ÿ lŸ¦'X•ux”#!‚ }z^ˆ|í‰ùG->¯Ÿ?‚ÇñM+`ýäÿ€à̾iA¯²hK®Úœ"kÐ-dôEíÃnüÀÓ5‘S\£Na… !ÆòM‹!X_?99lê‘kM[Îʼ_@ÝêkÍÿ|Üt ØÅ5/iè¬*ͯR9š¡ˆ“õÙ{ZJ&XoŠâÎÿêÕ"8Á7MÜ.¦Þ‚ðMãY%´ÐÆ®Ð>9kÏKnÄ=Ø‹ÑX¾iq³ÞÒ´š‘°+³?¶v½''ôN½þBÑ]o8Ñzä&uÑɳ£K†qßþÏ÷ÆŽeíÓÈrjxOKÉÉ q•!8Ò7L¼Ê’,¹ˆ‘±XhÄÝ‹$©}ØuÏ.š;‰Q‚i[ ›Æî›6:ÁZÓ´uÜu fs3™`» ¬™úÜ›nXO÷LsÍ™Ô鬪ëGÆý?šf§É4·úº8ÕG·ÙÁ/ßóäõ£Á-Ó«µ¢Gú¦¹>\mæ¹õê…ïÕhÉE] ^ˆ¬ÜÍý|þ´.}ÑŽŽƒëa‚i[ ŸÉe÷M‹%ø¦­Fl9ËWö‚»Vˆ`‘µ©y’ÆÁÖ·Wíÿ8 k:û´ÿÞ¸ÿ¯‹ë–¦Y—ÙÑŸö@Ïîƒ_¿ôJɳƒN³+N°ÂñÄ’òÄ}5Ú’‹lö|‹ÏÉ ¬¬[…l±ƒ`Ùç+Ž`K<ÅÇ"Xó"· X d‚I)%ÁPÈ£«ò+\§\.ÇÏäÕ?;Ž\ìŒ$K.J0/E07úÊB°hK)gôM‹&¸u´ÒeÚßJAðkÝ[k‘ë]WzÁÛ×ÙÇ/ƒ"J) þÚù|Q"ØmöÍé$˜³ææ©%Yr…¼X©ÁŽ»—Òè+™`Ñ–RndóM‹&ØÌÅ2Žàá5ý?ŽŒÓÏôg³¬‚9–2Á«á‰à/Èç>ÁÛŠÏä"}Ó¢n¤Ú’+Ú½+ô¹2kJ‚?~W%‚©ÏW³›`ŲAØ'øÞû)#¸=“ã+½ÊÔ–\ÄÈ‹X¬‘/?„»IR<z¬˜E°Ú¿,ׯñM`}óÊ®­M'땖ѽ@¥·»û‡ÿµ»»‚à Û¬ªßË“ñ¬~r{U"Xß¼f|ÈÚB’UýFC±š¦þr5Ú’+j=XäP®Ëöaþz°Šà2ÖƒÇñM™`²\«î줉V<ÁÊ™Ü8ëÁ^1:­è·Êc}Ͳ‰ŒBaŸ¯™»²r£ˆœ¾ÎÁÙÞìÁ¥¾] ‚GzA—Op®Ÿ_‚³¼á>­úf8~ªWà<ž]#¯tÏ6Á7ŽŽŽÖ*÷veî‰s 04‚±Û~<|±Û~âp<ãÉ´ A ‚@0`C†  A Á¨3Ðpk q >³$¹GM®"¿mêƒ`hl‚e‚Ý£âÀ ¢U{q¦è‚ æÿvÍZ²ÿÅ +PÅ >TtÁŽÏ&ágè¤ð³nXK¾ ¢a´núký †2,:a© æ+-]”.ˆ’_ï(;ÑP„ð Ôž2O𡲠N"Xá‚(;.ú6 *˜`¿–»à$‚.ˆ”`î‚¡Â >TuÁI+\)ÁJ,A0TÁn'è‚SüaC øPÑsÀˆŸ¡`Mé‚H †ˆšõL82Þ´S;›ZÁ^­?ƒ`(Á¬vÁÊõ`b/«ZŽ˜É©×ƒU<Öƒ¡Ñ> wÁ¹ÉFÀ4ZE@Y 6ÎÎ  M1Á‡‡ šf‚ T&‚÷†¶4¼Uˆ`]ünuë(‰Í G¶àáU‰`ÁÁ†  A Á‚¡H™Ë+ÐØZ6Að„ÔÛZ_ƒÆÖúVOà‹‹P.ºØKIðÌÿŒj¹Cˆ- ——¶Ì‚ Ö‹â^àEÉ´¼>;=žìù×—Ól¨hžeÔû8äµ3ø÷ÆÁ¶VzÞÒÖS;eîvúŠÛÊäέÉÜ[“à KìJ$˜lKÁEiÛ¹¿J—yx7u½$k†RÓ@pÛqâÓApe^´Ÿ‰½mœ’`Î.5˜ä¶”šµG,ÁúyöŸ,˜`Æî’eÚÿ:<eS¤[,å±øÄÿã+1«Hòê;&©È)e¶œñ†©8AU ¦“*[J‰à!Ù‘ ‚‹"¸Ãbßus7†³¤ãœÐq±tŽKÌ*’vÖ·.´î}v³ºþ8·"OP1‚Ÿöûý-‡`n0©¶¥$›ýŽp¨ÁE¼‡Mên°g¾›\² k?Þf©Gk`žö%gÉöÒ?ùúY½R± ¾ðx9Ó¼r2; n9S•-%!Øî¹íÏW­&.‚àV«µòMÍW¿±S­Ç¬7}øÀNy¬í°£?ÝŠÑ&g¥¥žù;+„nIɵoì¶t¦fL VÙRJǯ`€à\Ö"¾²Äx ø¼·" NΚš`gÑšF‚•¶”Á=Ç×£ˆ"ÞX[û7—àSçíg Ü^[»x·í°ö çh;’àĬݾ8…G9õ‘Q„ýoSG\Ð8ø7÷ÛöÃû«/¥Ú®¶9`m3n],>k‚ý¶Ì$Á&FÅÎä\‚ùžÆ’òC>re7.k6‚½u‘ê̼&ww5‰`¥-%!Xß¼F*Apþ·¶w×W®\·u…µqÝOšÛWYrÛé7vXZ^*HÎJK… öJýÌ’NU×¶«½Áçd39¬O‚àÅÅ/èz°;©ë.y×Â˵)²’d˜`²œéî  UàªèC9¯ü€` ‚!< cF~š¥7ÜUºÎìEÛf)çJÿ†;ÎO³¾Ëhå®­N9çJ»ËÊUØé™—Rïô„rF»í±Û~Êp<ã  ÁPöõ`(G` ‚¡¬#¹Çƒ`œ£îôÓg ¿.òSLp£ºŽÇžµÖΤð²¬Åù£Ijý‹›—o¤Ì¨›·úýþ‘‘µY©OP\Ô53u@#/>Ÿ Fu­ ;ŽŽýA¶ZûSDp7õîâáQyW`µRg-³Y)¢înxTèŧ·ù"ÁɱÖJüdŸÙGÝ;Á¹Ì,çVNtœNg×Yv‚µæ3ǘçà@˜¢9ÿÕuÇöÏsAsÿ>šÒ¬¬2ÏJMVkšìºænºò›Ð ™­é=ë¢wLw«Ñ¥ˆA›h–.Z§j‹ûÅ,¶»rŸ9všß MqR3ÙŇ¢O-éT¬ƒî&þóq¨Öp©P­ºPWÜœ¼u(>ÌN°ô›óž)š³ «¥‡M†!;šÕþ3ð?&Ré™jYoݰÑ]vN)k_jñj£û£I)²!4Ë?º-Ew3a‚ zO}÷¹Åeâd©£w$Žê‹÷k —RmwÔ‚7¢„NøÌ`ß­c]4Í[›æb¿-Çðóà…fõ*`VjÖ’ïŸF’öç¿õ æ®kZ“%ÿ"Üܶ®˜î̃ÌKIm¢Yân­¶”NðG·¹åÜëaÕ³œm#—!5“]¼Ù ÜŠ½ ¯V3rÍüÓçüŽùµ†KìïìsÝô &¦yÅw‡£ÌÌÐ4aŠÖaÇž¾e\— ]ÍêÕóÓ¾te ‰§>Á¼VÙxB1TzTxxî*mVør ]%âÛa¸ÀÚöú¥ï3g¼þa`·`ï×¼m¿Û’|æäžÛ»x)8±¤SáÝÜ˃ô¥F:Au6¹ŠüZ·ŸÝä+9«>Ê JŠº¦ðMK´-Sgmö³7dõ‰µ]è ªC°pK9©B³3øGUÆjJõ®Ê7-ƒV†¬ˆÆ<þ *C0³ºf-)܈@0.Iãù¦do 1ù"nYÔX‹p¯ù&e¼±5“LÀ”\*³4bò%ÌÒ¢ÚB<ÜtÉ…,Ø£`³4ÁúúÉÉ!k¹ j[æ_†Gê‹&e¥^f¡ˆ=x£Á:u­Ky­³4¢ñ|Ó(ÁdSÙu6geGŽ˜OŠd FvjI›¾”\ªÍqÔ*„lòU·…ìã'P¶Å(Ï“Ñ:{>8ó¤¾Â¶Ì¿ 9ŽC•÷1‘wÆïƒ7JW•";ÞâOƒìhð(¾iäþR“/î–%kùQ{hšý ¦ygW¸­Q[3ÉLáÁ¥6K#&_”`u[ˆ‡?¸Ò–ê웡ñËâH}фÙ¾è&9Åq¹W,×J &¥ÈíM:l5Wb'|8Á!s×£á²o†FÞõJœ·µds¥óƒzpÑbòEÇÁI'(kµüÀs8“ ™@Hq¤¾hd˜ê³(9Åq=¸ïØŸÁEJiR[ö~mÄž@áñQ^'<’oZ2ÁVøy/G^±¾P‚Ã3¹ô'(‰`“‰`j"I°b!ÚZA‚-¹-á™\Ç7-™àO-=Š`î¶‚½¨»gÝîþV‚%_´ôkÝîšg–Fo”(ÅÙªLð8¾iÉSc-ÁɦPy¬ëY .Ô,-uŸ5)8‰+]§B€©®D.ëÁ¥(E<ܪGðX¾i©þ°_‚ÃnI'(w&W$ÁjªÂW¬(¥lKUË7ßßÉ»ZÙXKA0µ5{&¼Ê¨YZ°(³4Ž¥¨5ª-Ô,-|!˜\FÈ mSŽ#õE£g>`’SWo׿bùF)J‘Û›t‚ <¦oZx=˜ü½ªÖƒ#frVüoW„jµö£¿h XG‡ÛÒU¹ˆ‰õàJ,ÿvÅÓÐzpÔLŽ8œé±ëÁQ ÷IëÁñ'˜Á•ðM“œÏQ*êPöNøƒà™Œ%Á ¸‚gÇ, Ï Áº˜†'³4<3C O{Ô!ì¶Á†fH Á‚!C C³L°>ôSWÞ=© o¦Ïn){›,…^^E¬r!˜›|éý›¾f/6÷à’¶±#½]ýˆkæ]g3uÓtÓäeÞöû¸éØ3ušÁÝÐwgS`÷L‘:ø¾ú'¿©Ý2¼ŸL§ëå;±€`<>ÁÚÉš‚ !˜›|Q‚Uv\Ìa+l†&<¸4kOé&Τr@s*¥¾gš—Q§~b^©ƒ$k7-Ô,câ/lÚáôþÏwñº®»ÁG3NNN¼æ¶OL <>ÁÜ"Iêƒv\–Þ ›¡©öÉÉ~`\ÊM]vÏ­Kfhö :ìÖîïäRº­ÑM]ã>EòÝnÍ$ØyŒðWûOÑBv=¶¾|/ÙqmXO÷ÂfhÄŒ{•É~`‚à·Â¹K÷kÕ¬·¸ß—gÖ±.šæ­Ms±ß–ü¾~di-`òEšUY‚/ìu· 8‚p».~ÿ#L ¾eË?žsÙœà¦$&»Ž5ßäK³þø[Ã7Có¬Ã:l‹‡û_¿ôÙÈ8˜øÉͺӯÁwžiºG0ÿË:øÔ†çA0ÝäO°®˜°ŒH0ÈVRn[d‚-2¢ó!KÊ=y“ ˜ BK6 xl‚‰ÉW‰ëG[줕 æÖa2Á_; \ ÌýÀ¦`í袻ì]xwÛÛ†‚Ç&X²ç(`Rkh¨$˜ø}ÑQ„áùÉÍÒõ*¬ —Ý>8¸â¸îEŸ‚ï½Wl†G•šÉ±v[ x ¿/bÇA0÷àJ&ø»€É×Ï2ÁÜ:L"˜ú}µ6Yº­0ùª Á½K»ZàÞÕMME°¾»eÑBŽZVöÁ¾W2ÁªßÑHœÉI~_ºr=¸J]â,þ ŒïärY‹ˆ–÷E~n’<˜gV ,£.ƒà J14è­Ä8 \)¥žUñGg†à&³5[ÖAðH½°žo><ÁZ¶fÛ¸P™£Á‚! ‚! A xÆÇT¸¯‚`O’`Ä ÷X‚` ‚GÖ@ÁãJŒà#sy¾ÔH'¨Ášé~wÜl:†^®×M&G 2_è‡yõ7øàQçVt£]<ÍÚì৸ú„ÿXr1'¨Áä wò:}=Qþšd‚£³ÎÁÝðoxgºø±ã¤Ç<7b$‚ÿ‘Y:Ýd¡'I ‚ Ö™Åu6ÁÎË—>kjÒîJþ´‘Ìиm™ðM#fhîç›.ÁIYià¹]›0KÓ]w4]m±HNÖ7Í:·¢mW^|ºjFø2“B*¥ˆÿ\ 4bpW‚Á‡9ÚÌÿl%34¾MM¹¥NòQIÊJDJ‰ Ýá²X $'»OÎ:7ò’â¾øt!Ý6—)ß–pHuU)22Œ?ë'ó¨üNøl¤QÄÓ>Ó$LÌЈm™ðM#fhîçWÝx¶b³ÒQ‡SÊ™³4J0·X# ÚRi‚}9nE'ûË©ã´è&_¦/7Nr­”`RŠúÏ%œ`ÃZRœ«„Nøp$‚¿0ý(ìx•Ý–lóédb[FÇTÜ Ø9p3´ˆ¬DÝ]«Û‰[O ækÞ£ƒ-×Im™¬o'Ø·¢“ NðÕPƉ³H®˜èÁ}»WëeRRJ“ÚâýBä ˆGxSt'|6îLެX‹ ¶ÈóžGƒo¡—B0ŒÍEhq¯›mr[ª1“ãWœ‰`eœ‚+Fo½Z-RQÊ’ÛžÉUàÃP<6Á_w»—b æ¶e"Ä „€›¡Ed•´ÔíÞZËL0iK&VtVÆ)™`­Û];r}˜?qΪJñàT™`·>3r!X׆+¾ç—3Š0<Û2Õ‰Œ†"kP/?Õ›i &¾i^[&»{Òz(¤©VÆ)˜™ÄC×+~•\¿¢õŸ«Á‡Á.x ‚ÔÄ処@1+Â]™\‘«© XÑ©K)ÛR‚Y'|6þ7ÜÖÌ»ÚöÖnSM0±-¾iÄ ; IIeV*vÌ­•˜¥ ·Åš“•¶¥j‹¶‡/~SÒ@œ`òeúêí¬è6¨RÔ.á ø0Ðý­²+ ¬ÁJ+’ÂÖÌ Íä”Y yN½x²X«æz°dE'^ƒß4¢gr"«ze—KùÓ$)ÖƒãO0‚³3c4‚¡Ö" :áC ‚g2– ƒ`ƒ` ` ‚ApÁQ‡°ÛC š!`è\ÜÔ Q¤5Îb㙆`}~®ª¹yá,0ž)F¼Ç:ÂY\<“ FÄs 9™3‰#⹆|ªÂùéßLÂIGG¼¦ÛªE~\×R7'CÖÊjÁFŠOE8ê^âÕçŠÜœîh!  ëÑ!ï°¢£?¾ñ§Ô×!keÅÞþ~G°>=á–I°*r¯Ü½Éï’♂àè¿‚_íÍÙº{Ûj=_°ƒ±ø¼ÓÕLO8Ë$X¹W_±hÄŽ¸RUImî³u'Z7XÚ–óü«ÏñdãÆ^º6W¯ùí¿?YmäY¥ˆRân²!'í¬ä_Þ–zë î—-î>¬Y¬r=¦u{ §›uÎ)kV÷²ªÂiÜw²ú …ÚB²zµ“YVE.ÅŸÎ\:‚›ò÷·ÌÕ<ëðÛ¢óo»™ôž“µÃŸâ‘1/ž<«ÌDGÊJj­ËIŠiËç'n²f]`þ¼W6ÁzÔæ$Ãéfµ‡)Îf¸…:ÍZFÖË*5 Ô–@V)™YéÅ3ÁšpËR…üÚM6è¾v‡ø O6nüÈÒu–õÉU]_>®ùÃv'é–rÇê<«Ä/%NP;î±äƒE)Ið¡m_ðJ•Bðüq-x,‹Æ\›d8›Ç†“¡fX«º~㎮·Ž5u8mŸy³JÒ‘$Yy­R2«æ¯ÊÙ7ÏVð¯!Ï/HW‡Üý{ùøœ£±5ë_.ÛÍq\<3­¦¦FwcÇÖÅV£}‘%6.N¼Ðc‡5yàÖ»b»ÏšÛÝá¥DV5ÁâµÞ6Köæ¥$!8Ô‡`7ëÏì^Ø¥^-fæÑœd8çœÏíJG„ÓmK-Š`‘•×*%GŽœ ˜!.ž¿Ñ–è× Ò¬øÓ©‹LÅÔƒ®/ò¬j‚“Öƒ Á¡¶4Ñ+¤…7*­þÌM8œ–Û@JpL8½Z#žb%Þò/›$GŽœ ˜Þò¸xfýV™,ÁO¡þ¼Wö¥ø… çŽçxoö€à,J|³!žã½] ‚3Eo[Ïóò†ûÕ¡­¿1Ê9à@8g€ÏÇ.£ö¡­z5„ó'vz–p„3çxb·}ááÆnûBã Ç8žÌ¾ã Á7 ‚à›†Qß4ÌäÎáLñÆjÚT¯¦!âøFcª¿ÑÈ9âÍÓÚ¹y×O/œ#„G|³§&vs{9êúÕ¹¢)?žv‚¹ÏWÀÜ+~§yÒ›=Zk$²Np=¯]ã1Žþ[¨iY×ìÞ&Ú$¹|ÓB›€5kÕâZ™á?þF¶lîõ$üF¶ñ„·+µ^Ò÷·¸®ˆ[Ïs¸úZ{•B«~Ã=Ï1|ÓT_`®C/þEiy43">_ÔÜ«þ¼“pÕ±¾i¡Z¯?Œ ˜¹?]¶Z#ø;)¥ ÖÁ\C{ûˆ!6žcø¦©v:Þ'oT0Ô\Ëý¯ëÃrïš‚ùÎD²©n&öˆ±¾i¡Z7ÏÕ”³|…1CM 'õR#¶fábuk£Š¡½ý;ðñ»4Èñ}Ó’ –͵¼]ÅVÀè«>M=qà”>_‘á ×Ê™…`Ù7mûI–lkö°Ò“ ¸ \”oZÁ’£ÖÂu¯ ;aùâ]À²k:tÃñù"æ^)}¾"ÃÙîa£L}Óꆇ¥3Ò/1$Ü›°šFWŽà¾…ÚæññêBBeÿ5*BpfŸ¯j…³f]¸c]mî3N°¢z§§z1¢o𫲾*òØ ÇÀU¢ÏWåÂiM|ŸÜšUKí “«oZc"F_•#ØÓ¸dŸ¯Ê…óÆÄëìi½/ˆsK̇®Pß´ÆDŒ¾*G°Ö¶…S­Œxµ}Ó*âÔA.Çb­™Ê‡nß´\{™¾ª<ŠPX½TÝ7­zûALéC7ŽëÔø!Å諺×Wõ‚«ë›V5‚ESºxCðdŒ¾jÃåš8û¤EbPÓ† Ë¡ªû¦UŒ`Äü®œÑ×?‰Z'­P âGÕóM«Á!OüB}Ó&eôµÐµN\Á° žyTÏ7­ Ïíì<¶n;m$A,Á7mRF_ýVYþa.åêO}Óª@0i#½\ø¦UBðM+:žðM+ñi ß´â ß´#ŽpOø¦•p„3g€±Ë¨ì€#œùÆ;=Ë8™s<±Û¾ðpc·}¡ñ„ã Oà›AðMƒ  ` `ªÁBÐrˆ6ÂYY‚ïôc>ìô´YmuZNWyy0X£ÙÓAðÁ`ðëÀ±—C’Á†ar×ç.߈9Û†¥§ÍÊÔ‡\or•«O¬íðÑ–ž¡-ŬyÑÔG®Z7oõûý£ÀÑ{o,-œe´`R3Û‘·v¦ö]'YÎ&<оíþªUf±áEs h¬–ò©'°í†Ö¬Õ`rÝñÖNÁM[׬%û_çBp›…´©Ož`m´[;ã`?"ºfØWí¥Åu³H4½„ó‰îߨè¬2uþAݹ¹n©ƒxšžÀoͪ‹$!X‡}h"š-²O°.]·w5$²$Fº¢]zϺèõ>órŒ@°2ëLüú ã©e-y=õÖï-ý§¢{wL–v–"³Ê]¤åŸÀ)µ$²ZûÁt½ZiVƒ$ÁCÑ€¡xp+›MÚZ&Á¢Ý4²$FCE»Ä…{,~ØÁ© öWMóú‘;!ø­‹‚Ößd“ãn€à%ûàš3CSe•'Z7ý¼eŸ»'8ø‘¥µÀ žîyµ’¬Ý¥Á-Þcø¹?yR7[´µ‚ŸÚó°-Cn·ˆ,i—Ùï(ÚÕܶ®8qÁ#üÃÀ†sÏ[Èyú–Fäw[‚yR™U=†³»"6—~/ƒ5ë²fë§}Vë·Wí#l¥Žd¥¥Á¬ÐªÛ:–Ûòa?ØÖ þv0Ü \­ˆ¬Nq Šq0eaD¡`© Žž…Dlù òN&˜ *Epô%„©^¶ÁSGð']&=Á­£»ÌÍé&¸ç\¸‚Ë X×S¬ë œ ÁR¦`qµM}p)Pˆ#Ø yF‚?ìŸ'‚ãÚ‚ó%Xß¼²kkÓI^³S]‚oÚÉM-"k2Á­M–·-•¢·ÿ;vp›Ý Û<)&m1ŒÞ%–#ª-“"˜´[D6N?†a‚½¬?§"øÙ®ÎÝ+›:/òòÙ“¼Æú4~=XI°Í¢j=8¼,·쟬+’Oã–±'¸Ì•°L¿“#kæI[åõp©Y&¸Š’ÿš*m¼] ‚A0‚A0Á ç¨í  ‚!C†  `Á‚!Cç`,Ám„ªÎíÃÒ„óÁÝ‘ªÖƒ¾HWÂ%®ê·Æ²ùŠR«x£˜~% nù¾i#¾Ú¡÷ÿÝÅÙhöã3—ãWu‚‡¹û8µ3¬„p:¯°ürI£ˆt!ׯYK·॓–cruï—7æ¾iMjôeKsF*[3?³nȶf„`îe&ìÁ¨šÂbÍýPÖ*J5]‰ˆš¨ÅZ Nw‰»Œl‚ÿ1lý¦W7yì@bÈ –½ÖˆÉ\бN L>qSB0Ù›&í*VØš¼Êø66B01$±fhŠ-uìÈ‘q¬•:‰YÒ&3̓Fòp³õ~òê­~÷ÿÕûp=°m±íp;x‰JÇ:)0tŸÜy#Xx• ‚Õ¶f®ç—3a‘lÍÈ-ñ½Ìˆ=˜8ÒbÍzhšý ¦yg—9 ñ¶ˆRÖ(ÅÝØˆ/šÛ–/'H°fþ·~?N#F°3=“ –-Ô8Áú"'˜_¢Ú±NÆ‹ì_Î'Á¢ã}p_Ó›Úí_GÙšuºæý*ãë—ìóþû¨q0Ù*Lþ.D­„à]¯-''lã×JK©”'˜ÜLîå``8±QY^0‚íÖ2Á^Ïíý´ƒ+Ü1’œ:"M;ÎÁlÏ·NŸÜ[³¥ª••`+ü¬“ã)#ØÖ©eDph&‚s!XekfOº»Ý[kN©¯Ï3ÌkU¼}}ür: ît»û[Q_¸Ó½rÔÁEÜšÍaLj²5sŸ—ŸêœšYZÁ ‹5J°Òuj ö×A".áoÞèGß;–¶> ì[¨à\úàH2:‰¡&Áög`?FÑ—`1‚KJ,ApZ‚/}fÝYŽ$Xmkf8Þ\Žç—dk&Ľ̈=˜8ÒbìÖú{çös 5ÏlÓˆ"˜”šÁîÕîîn-ÑpŠiÖñºÑÜü« ˜l¡¦"˜ÛÔ)ë¶²ÛÚù X,תVÛš…Ö`CëÁÄËLyÕzpÄLN½¬"xòëÁ¡o•ýi…òÇ1Öƒ Áêõ`¥kè9\®¸”öÑqšøL.wI}Gªo?€` ‚A0Á 8ÁÍ¡­eý\¼Çb é ¸2jzâ›Ú‘­¦1k·ŽtE2BJŠAÉC†  A Á‚!C†ÎÁ0ú‚ÎÁé}¸^„mÙ,¹wASF°écèúe¤÷áº÷FìôJ[J¸|èÞy5S3¹š¸‹ 83Áì­rGÎ[åY4ÂN‡ßÑY·þ›Å¥á.‚àé#x¾Ùl¶¬;Ͱ ‚S|rÕGgVu‡‹y\ÂÂL tï g#Øñžñ6Ó@ 8ûL.°ËŒoÑò7œI[°èn.ÑÜ»¬PV •C0÷áòŒ¾ÌV€àq÷}°_Jw'd̲K6ÁPq34C»°ìzp_Ó4ÊšK0ó2[õö “q05¼Õµßm9$+†Š#Øñ&9 oôCƒÀ(Š'xÇÛ˜/mŒÁPÙkÝîš³U…Ü:Zév»7ãn]êºc“žã‹†QT<ÁßÜÖ_¢èM5gËdÈuÊŒ!X׆ÉèÚ7ÁP‘GB^eÉ|ï%ä¬ *›àÞ®oôE|¸\w¯‡‘ë»ÿôËÛq‚¿s«åµî^ÙÔA0TÁ¡_¼°RÌä’׃‰Y6u÷ÁÞ®„@0`ÁÁÐÌ A ‚@0`CД| AU†ÎÁsTE`C†  ` `Á‚! ‚!œZ‹§õ¤,S[½ä²wðÅi\Áúi ÷®ëLy7¤g%ÜpÞÎXtÓÇ>™wÿ)®`ݺ€{ ‚÷¯‘‡¥|óOê΃à¬kö¿KÏ šR‚2óÏÛ¹F}ÎN»Õý’É&YyY÷ï¢ÈDÆs~ ŸÕC×¥êÖ5å©áfCçŒà¹¹çÏç^}>÷IJÚÎÿγ±EÛ¡ÆQ È$«ÓEþù£ÀØd¦HµÞ´¬Á¼Ö¨R¼Vû ý¨JAç‡àáñ²®ïÞ±ÿïÚM6¿cɺå$,F“f%·íƒÝc»îæ±á'I¬?þ‘O ­0Á¢V‰`ÞRkÏzrÕK*KAç‡àNççæ®þ½ývwÌVÞIXªåfe¥$‚Y­ºåœ`®^¯/[1ãàz˜àW/ìBõãwªq°{^kÏúöòÜÜÂi#¢tžF2–Fv•‘`·P Á¼V2kT–‚@ð˜¯,2Õ³üÏN¡V4Á¼VJ°ªtnÔŸþ*Áõ†”ÕYˆ Ø=A#Á¼Ö(‚y­”`RŠ”%ç®7@ÉLì°–‰àáa\ÖÁzÖQ¯5Ž`=8Š ¥È x[¥$4+_¿zõê¥^]"ØÜ°^ÝXfÔÅ“B”Ò,¿å´éî¸Ìë佫¸ÿ¯^áuK#¥>šŽNøÌÈ·Þ¿Ãæ^7ØHÁM.Ùܶo³Ô#§äðïÂÓ¾jü9{Vè2ÁoÙ±ë65Z“%»E|Å>K+x¾‚ü¬NÓoÝt¯bÉ>¸Ö·úÑmï*ÁÍþ¢Gðt‚Š|î‚Çëƒ[­ÖÊ76t_ýÆNµ³Ž÷á;åa¹ÃŽþt+Ë8xòI¥piÖeÍÖOûN–Î ‚hÚj˜`olØÙ>ì§!˜gµ›®kw+ê*Áºµí,Ÿ âûðYÞk_Ybè`"ÞzXºš-‚]íç6“㣖| ö³Fða¨ «ÞX[û7—àSçåg Ü^[»x·í`ù çh{¦þ¤Ë¤çDpëhÅ©o+_‚ï='øÂî•£æìvÂgFNÛ¸ýæ~»ã³ÈÀÚÃÕ6DzmÎÁÎ ×Ý ›ú¸k~ÓNNò%ø‡nUWñ7oô£ïK[NÁ‡Á.8Ÿµ—Å_¾çŸ±¤‡å‡[³G°wÃÇŸÉE°î³5b³±ÐÔÌ:á3#O‚[Û»ë‹+W®ÛºÂ0ܸî'Íí«,¹í,§mì°t`-â¸yyÖëW·ÍB¾¹ËdãvaÛI]ÛÔ£î]ò²ê›×ìÄCçÞÇŽm¯²ƒWXrÓÈŸ`·i¿wÖ6wØ ƒç“ZéžZ” ÜÝfYÁnVvšu¼n47ÿ"Xß­þëS‡.xl‚_¼ ëÁî¤Î]&ëÁ6–áõ`+°tìNú ˜,zŠ•V5Áì;2/«˜d¶L4-~=˜œÚ/$Ø_æI¾L³æ÷P)¯>3r"¸lõ&ñ>"yþæ«1ßM+†µ© øðÏÁ…<ïùØdª‚§’àBæ\e½Õ4‚Ïãîß ‡ÃOõª¬çÿ*¯®Ï0Áçq—Ñ£££5–ØÞ¨ ÁP6‚±ÓO9ÁØmŸwÜ¡’ý"àx‚§œ`ª´@0‚!C†@0†@0`*‹à;ýàm°8B)Êà³Ce ÍôÌA]¾á%¸ñ¢÷þ¬x)*Ž`ËR2,9Ñì­ôCP)+Þ°ÚÍfsåDÁPõ V0ìî"lž,¹öŽdßxQ³ö„/¢î[(jö‰›¢ç¼@-]«H·Ê—àöo¥ì²›¢»ñ×w0è7Å·^wȪڟ Ac`˜¬›¿ûÆ‹ÂÑìw| Å ëéž—¥BY¯‚`¨‚%†¥>˜˜/( g쬚çѸa}Ë~pÄû±’§oY îpAyLÎD°%;KûÁДÜs<5 UmÑÔŸ=N3Š0<¯G Uk&ç³–‚`£h¢‡VÓ˜'äΦ&Ì –®¤“5‰`â Ay¬øFƒ~«LŒ Ÿ†ÜåõàX‚± BpÄÛ=E £(W‚KæC¹\:¿ Ê•à’Å=#!h* æž‘4•C†@0"`Á‚! A ‚@0`C†  A'øå` áÖ@ãý†»&üƒ¹¸p”ZéMýî½±@046ÁÑ»ŒºqþÁQ¥oUCyÁð“ý8ÿ` Uˆ`ÃZóٖ笻ÖÀ.˾01Öœÿè®)ðð±ÂXSX S‚Sa–v쇛á=HÜuØMhr©È@ç‡à8ÿà×_8£Ö÷"»ì,ñ(ý VÃRÌ>_2k ]e'Ák}mùYI©È@ç‰àhÿ`™à°°Lðçôdܲ¿©°&t›}~ýˆÕúö—•È-åÔúú­Ÿ•Í*ýRQ'€ÎÁQþÁ2Á±¾iÊq°—õw[Aß4Ÿ`qÍúão½¬t¼<Ð5­Èj¬Ù꿞À÷2†Î!Áj÷Õœ6Œ4«_Æh!‚Ɉ†œÀ‚­®ÁKÝî­µÁÛ×™iñË ÁÜË:ߣˆµ![|püƒs"X×G'ØÖKÇ'…´%TJœ  KÌä\Ör"8윉àP[B¥`PŒÕ4I½Kž)ð…í][¿w&þÁÏØÑM#` ÌJíîJq}óŠœÕw%ö vO°½I0ûØ-õú;?+iVä  óC°ê[9úËpéiÄïÉY¼co=XÑzâeZv›…õàsNðþ•úUZwè`JðHþ« ªÁ#úƒ`¨"( M7ÁúÑD.d{7cŸ‚!C†@0`Á‚! A ‚@0`C–ÔD›³îý÷ àXÿ`…ƒ0—ÞÎÞß…B%o34e#8Ö?8Îüæà{ U‚àÿàfóÞ†*Op¤°ÍéäÌ|íO~àþ½:·ÿeÿÕÕþÁ,džÕö¾ºìüì=Ïj+arZgò6"Üö<kŸ%}vàÈ8UûEXû’óƒäì p~¬„‡°óÁ#úË»N¿¦ãØFk¸¬g$ØÖ–S!˜$•NÀAK×?Á™üƒÃî"™à½oÁö¦@0ÁÙýƒww·–¨po—Û÷­M–nSÏ^5ÁºSȱ÷%Ø×a¥0%XøC 8“°·Ë—k%û^=¼¬^‹Hþé9õz°X±` ‚sö† Ê ~¡Êkž+/ÿ`ªÁ‚!C†@0`Á‚! A ‚@0‚A0‚!C†@ðÈ?¸‹XB#8â wMéóäî*†  ±ËH½»CCÐú{ M5À†¦` M7À šn€A04݃`hºÁÐt ‚¡éCÓ 0†¦]Á4ÅÁÐtëÿE~&=ؾ‹IEND®B`‚libplacebo-v7.349.0/demos/screenshots/plplay4.png000066400000000000000000000534661463457750100217540ustar00rootroot00000000000000‰PNG  IHDRÁõ1fÐeXIfII* Áõ†Œ”(1 œ2ªi‡¾HHGIMP 2.10.342023:07:30 21:28:25 Rbê„iCCPICC profilexœ}‘=HÃ@Å_S¥"‡v"’¡:ÙÅŠ8Ö*¡B¨Zu0¹ô š´$).Ž‚kÁÁŪƒ‹³®®‚ øâêâ¤è"%þ/)´ˆñà¸ïî=îÞB«Ê4³/hºedRI1—_¯` !D—™YŸ“¤4<Ç×=||½‹ñ,ïsŽ!µ`2À''Xݰˆ7ˆg6­:ç}â0+Ë*ñ9ñ¤A$~äºâòç’ÃÏ ÙÌ ¹mMA pHYs  šœtIMEçY•’bKGDÿ‡Ì¿F×IDATxÚíwÛÆ½æï‹Ii !ʦ$J¢dK²d˲LWu|ãÚ–Y»vݤÑU\;ÍÍuº³9­ºÄÛty÷ì¶9ÇÙ&iÎ=x‹;3ø5 H€€|ž?¤8f¾óÑ`0À<ü—+”gý BåàyÊ«@0‚!C†@0`Á4‚Ëí"âMÁÔà ‘Y 6i°™<ÁФܱ¹BylŸÛÃo2hr ¶þˆßï‚`(×mg\pÈ©¢Kð*ûYŸ¼ó7—ÜMn&ùg)¼‚FJ°ƒ-“$í¯Á"yÝpݽ¸Nºd… Ñ\4ÞìÞžíÝâ·t{w,·XZ 0¿óïôè Ÿ`‹m«ñ½>`¯én !h¸wr·Û Å"¼ú¸Èôøë.ãàªO0ÿmðû¿W/ø†»œàö Ûñ…²SÑU©ÁLçb.╌ú!XŒ"ˆ:”œájxª,Jp¹Z½¾>ÿê‹*W¥‚ç«Õ­;%Äʬ®:*Æ\r?û·sYt6ôIðüüz§„¡1ÌEøßýz ‚ççÿ¹h„—Övv¹6¬ùÅ žh\½÷ÚU¾‘'ÉßlÄììõ~''´»¶€C£œ‹ž*KóÁóóO½›ºùb0É«!8˜.:ƒn " Ñœ‰¤Q`ÁN¨SÏâ ¼\¼Ã„¨BùE@†  `Á‚!C ‚&”`‚ò* MÁ¶¯‹¦š³fñûúm‹éÆ1K¾~ã%I«Á“'5yû‘em´.YÖ=Ó¤~VûÈ˺a/±ÔjK¸|èž rÔÐN¢`Ñ…à0ÃÁ¯?à?ï½U’ľJ˜~PX³÷ܽÎÎÁüó–ȺÂ?üþ€LYʲ˜HY!(=Á*Ã=vO°sd M*ÁïÕ¸hÁ[7øÇ/A04©£Á%X‹%††HpÜ\7‚¿?ÁÐdæ7ìÒÖÓÖŠÂmló­ 3Ž`g¯?,Ÿ{Y7ìC–Úi‚Ý£þ‚¡”Gù sò,…ºcóˆo5Ì$wr¶—uC¤ÇwòÎQm ¥"XÇovr‡¦é"z £( ÁÃåCÃ%xØü‚`h¸_©Þç«÷ E“A“C0=ö“åã^l^:f2ÐbÐ A Á Á‚!C ‚@0`Á†  A ‚†DðVx iWûØ ‚2'ø"dáÊÝŸ,ï]È«7ݵܵsÒK¿±ò÷‚ áÜT öVl¨k2ÙåäCÐ(Fº.˜\1 cùŒ‚`h nêº`w=q¶Ä~QÃðA¶× ñ±÷YÂ]LÁ2btAØo7ìe8 ²J‡ƒ Áïä.4]°¿¢Íu~xã50ÍŽ7à`{Õü±‡·W$kØf‚"¸©é‚‚©õi@ð6»Á++¦ÀVkÑ3Þ°Ÿí{þÀþ^‘¬» Ê’à º`µ6Ÿ½‰»fh„Žå†ýÕ.ÛÚvö}öF“Õ |Ó (‚›Ñ.8Áþ0A]M‚¡ÑìuÂRœŽàº°& ÁÍHìÍEÐçO’Œ"DVôÁиv:a¹ ±–€` £h|7Ã]pÈé×'˜4v¸—°‚%m\ó²ö"ØÉúCÌ;a¥ =Uú`óYäë]Ôùà®c>ÁÍPÁP^ ¾y||¼ŠrK0`#"†  A Á‚!YÖåeh`]¶@ð˜Tß\[…ÖÚfàõ*”‰Öë ~¢@ì{± ô²Ò¦5d‚Ë—í6¿¦yymz:~2Þó¯]NJ°©ßâ8X–‚e÷´{ŸÙ ˜i¹>Ä&-/qUæ®$?pE›Ü¹5^‚ëË,YAô&جࡼ%Úã—É2wî&>®”5Å^y ¸ÒÕ©žà*»&Ö?®€à„ûì:¦“$H3ä`)ì›Y‚àÌ æì.Ùû)xÊV.ó”ËâS+øÃSϬAÒ?ü¢%Hìe•ÅxÃÒœ`R ~-._KÎ^R±¥”îhݳAp¶/òØ×ÄOQžÎ ‹–bk°žYƒ$ËúÆÖig'«ãs+öFð³V«µ)~ÃÿÛn‹.ÖKJ¶”Á’™%ÁwøMÝM~Íw’KŒ°Ê“-žz¼*ûMô¶¯wÖ YYúwগÕÝëGFð¥'—SÝWއà¯Úíö-Aðb¾á­ ˜0µÂ®>Á’™%Ξàr¹¼ü%£æ“ß±Tù ïM=d)—µ¾õ‡[q#ÚÞY彞{‹Âïo)ÉÕ/YYs3– –|°õÛ³ìk2ª¹ˆOì`<¾ÞÛ±÷Κ˜`1Š(ç‘à­ܪòe‚}3Kœ9Á««q>oˆpeuuýnE°ö¯bk%–àžYS\[]½½žC‚ƒdì(ÂtÍ,Að0ÆÁ¿{PaïO>S Þ(´ÒY©ø€U¬nóbݳ¦ ¸ê•e* ¶0ŠîœCðûoýÏxR½ÈÇÎìvËšŽ`w^dr æ–{{D!øÒßöAðsžl¨³i’™% Áå­½µêòö ¦mÔÆ /imíòä–è7vxZ*èUÞ+J°»×O<)umk²ç"ü{²˜;9̃àjõãåù`禮V MòVW£Óµ ²JÉ(ÁÒ|pª'Üc"š4‚'Eßæ• ‚A0‚ó@0Öhd§izÃ]§¦Š5’s%Ãg§i_e´|—iq4çJºÊÊTX陕¯ô„2F«í±Ú>ç 8žÀñ‚L `(ý|0”¡@0ÁPZ‚ƒÌc ‚Að,|§Þ¢ø£ ¨‡wã?û|s* ¦ívk°cÒö îF0QÜÑ®Þôâf¹/üªÞ<}©ìÝyÑX/w±Ç4D])mýõÍ`Ç4ZUÜ`çMöè;év93‚;dzE°ÒgoLh¸s´å3 ‚³#X)î© Û×E?‹ÕÅgKTxžù ÛëŽíŸâ曢ö;ìFD>o³äŸÖyâZ¾þ}`ÀF‚£Šä^z –—»zS'p4Ty5|aw:Ó”ÂL+Æér§k7û%Ø4ÏÎ8¶¶ßah×Ãù‹àØ^µðØãûð[sǶ”¬NÒYp'°GI[]ô<}Ë« 4®0ÊØC‰Ó–ŠãÔ­F¼ˆvÁýL­O‚·¹K¡â&™¢mØÏöÃþh¾±¬ÅÖËÚlQÅ?­ó½³ ØH«áÕMþ8Ms˹W±K•WbÉ ÙõÛ’Q]Õ%Ø ©ˆãÔ¹Ó5£]pŸ}°4h ƒù0U2EÛ°¿Úe[ÛòèùÁŸX¼y¸×l¢ø§…ÆÁ!· Õxb±= s˹vÛ™ÎÑl_åîs?hŠ,§.Áµ6 ©áe2wº‹H<$‚ý…ä‡C°4øHCðtÜÉéB*WÞ–ÆMI C‡iu§kFºà!웢õ&XòOÁáÊ¿'‚CS¼T«ÝZu²N;ÝE¸ N=AŸ?I2Š0]S´ÞKþiiηåZR‚EiZ‚™^Þ¢?UîtÍpœ–àðs‚-;1ÁVúQÄôÌEô$øûƒôËÑŸ.wº‹Pœ†àCÉéÌ'˜4vØæ-%Ü’)Z/‚ÿ´ú•ˆml{®knò§i"رœÛ\’CXÎI•×ìdÝZQ Ç!Fwºf¨ î÷©²ô陯éZžîJ°:cYÓ°Mõ|°|£õ,2Å7,u¬Ò—x+ 5…îtfC™ õÕ 7A0žÊX‚` ‚! ‚A0‚A0ÁCŽ:„Õö ÁÐ C Â(£ `̦a6 ƒ`ì*ßk2AðÔL,ÿ=Uׄðæ-_](màØ¨“ÀŠŽ‰–à Z¹ (b×7ÁÒ»ê®)±×•5„²Ÿ¨«v0îz!ITyÃ}ºÖ MÁ+ÜÝèåWÆTY2êO¤È‘³¿† ^ÿ@vzÿ‹'8 ß´PÌéëBÎ]Ò6#â–o´A&ÆsÉ ™,:¾i†±ÿkésãñ¬œoZ"‚Uç.§—qÌТë¿ò½âm‚ÕE„,ŽbY«ì8ÈÌ"2ñMK@°äÜešåC—PClû0â6Ësß4§¦­wl,·(i­ +º7ìó–Cí½}‹Î<Á™ø¦% Xrî jÛ…ýÀfq¶MöM»ËCEÛ†w'GÛKåvEøn«FøºyòzwrÙø¦%EÁT, ×ûÍê("ê]â…To"ƒNx@ß´DûÎ]*Ák‡ÂÂKï6ÛÿÏc¦¯ÞÇ|}þÂ~²9ãì›–`Ósî’ 6èÉGr˸~`SeëÕ/Á/:\ÿø¯Î²ˆÓ™ˆ‰Dðªøüïöÿy‚Nx0ß´äû—¼ ÖX¬a.B¸£»ì¶B¾iE î›&lÍ»®½k CpàÜ%Z¿o?p,ÖT?°Ù%8bk&®ï6iìli|Ó@ðà¾iv`×e«·‘¬Cã`ÌG£²5 ÍG|Ó@° ß´É"꣆ožÎX‚` ‚! ‚A0‚A0ÁCŽ:„Õö ÁÐ C ‚@0`G~¢‚`UWÛ하ÚÎ öwZáH»Úó¸Ñ½ ì8§eñêêÊSå Ú%y¯Æ^½én `{¥÷ÆM40Ö¸¨œeQ Ú‹`_Gߦ Áqsû¨kö’g'‚¡ŽƒýuBÔõ¿)T;niFÁ±c¿=Ó;*9j æ‚`ü£½sw#ö¾æ”˜ò ÜeÑá²dôM ÁbL±dš‹üw™ŠEqDZ^¬ Û°i`<êîKpG^ ç­Ê;òLSô'xýùÌ?ª·W$«…g X"øúm~Gw㘼nY·~nYÕV…ØGžIšã§¶Ê±7ìgûnÒ¤î^1;{í:}°õ©Oðw®ñ¶þ¯;­ï¨þ^‘¬» K¿~ÉíÐZoÁÔYÃLì5½uÒ†ýÕ®7EFݽbV]ü•Ñáq°{v¨a‚óŽ‘í;+°qÁÉöÖÓ'!X€Jk÷%Ø&¸®,&Á)þBØ¡UÓ¼uƒïô2‚뢃à¾ÔPšŽ`ŶqàQ„é±`Ü/Á÷ÞŽ™` £ÜÁŽŸÚÖJˆàçžù—ä&ììõ‡X‚½… .7ønKé½V ààžLÌËÇÌ'½“Sgn—6êÏKò|pW‚1 ‚¾›6Ñ}1ŠÁ Á Áéõe§Ó¹OA0î!z<¡¾y||¼ŠvŸ‚!C†  `Á³,8¦Â}C xœ#™Çƒ`œ@îÚKø£-¶™h8©m¯LÁ¿º•zං‰k6 Eˆ»Ãhyv}å %õ öš–¨ë™0`¿’|Çô½} F-é V¾|`‚uŽuÉY‰º´Ì»lŠIvŸÅ/SÏ‚àJ6M ÷­‹Àgò¢‚}v%O2ÙžŒxžd²)š<ŠåŸ?ëåp÷ùÛßJÍORgõ˜­IÇ"þAýÍ„æ‹`ºvæü·³ËÔ%Ø÷‘Ó[Î1Uά™D¸Üˆ`É“L¶';òŠ)š¤`=˺%M‡’òòeßMÚK^gK®kÒ*º|ì_uxßx}°W#ýC¡óÇ³Ý _ô5ŠxÆn³Äm‡äI$I«áy’ɦhWµ–´ê²H¬¿¿pï¥d@°{T§Ëñ÷’ýÑÞH®kSA°ï>ã7Ë7£]p‚¿j·ÛbêGò$ ’ŠC`Š(ÆB7–n¯‘`h@ýž;ä á®óòm:‚ï<'”,U^ë7Ë{ðEwr!’P2â&V_+C‡ÞçíNN©1µ—º¬ÎÍ*ÁÍH<ùµÚö±‚}G¸™&Øé„/Ì<Ì´Ùq= ¦4Ç“Îån£Óu„›m‚›á.8 XZÀ9i{.;½ Îó¯†Ý`kæG¢¾0û$ø•í‘‚icÛóGÓã¦#8ÈJÅ9ÅQ¥½"þhy%¸~e„ öÝçbáxý÷6­Y%¸ê‚Ó¬s—–íÉäù`íɵþhúgr^VeJTÞK7uœ?‚öŸvh>XçgÎøSå‹ ³?‚¡aœN3Mp³ ‚'ˆà¾ÆþØÁãŽz_÷d³óÎÁ}LR¼ ‚'‡`ƒ`Ä<êPæ«íA0Á4>`C†  ` `Á”k‚_¶Ûx˜`É®*´R”h¼,i/G¤ròW«î};hp‚›*ÁÒ;òzs©r÷3¥y½.P£ˆ‹˜.øéa,ŸQ M8ÁMmLŒç›¦iœ-QÅlҴדVÉz‘ˆ_Ô±ní<Ѹ¸½K£O°dè(e•/ž7¬ìfIƒdÌ  Ù¸“Ó·zK’ÏÎÔÚuÆŠÝAGç¤}¤wi ú`i/ÕqûGUÜ,ƒäQœ $4 k[c Þ¶,«¬X/ª¿ˆ|›d|©º4zˆŽoºY%9{‰£Ên–Ôß+îÐl¬5n%ØÇYÎhÆÁnÖÏ7Ãî4!?ѳó©›U/·)!F(+·Òfj½ Ÿ@±„f‚`qkö›f‚õßgĆ$B°4¢‘NÉ‚@ù"XgÜ:Y/Õj·VCoÝà~/ë6Ðl¬1n]–’}þ$+‚ù2ò~ fzyŸª^€‘½‚6Ðl¬1nuXŒ:HöOpÔ¥1Á‘²$²„f„à¨q«°?rüe_JÒØáÞ‹œšçžï¤c½øÈ¡†ïµ§ú&IÆ—ªK£GpÄÐ1,aqé¸YþÙË*{`Æš‚£Æ­¦üTYò¥äßnñ”'kÎkJ/Rx>Ø~ÛåÐì1ní¥n×û!J6„…@°Ü _˜ Ê1ÁÍ&†òLpZ`(ßÓñØÜnm 1A0ÖÉA ‚@0`C†  A xbd]Æ× ®Ë“ê›k«ÐÀZÛ¬ƒàñ¼^…2Ñz=!ÁO4ˆ}!6^Vò¿˜zX“v5ø^”Ž.¯MAÇOÆ{þµËI 6õ[ˆeY™`ù 5xQ:Z®±IËK\•„¹+É\Ñ&wn—àúò€÷²_Pß±„“ßð ÞíñËd™;wWÊšb¯<\14N~ xŒWÙ5±þq'$ØgWëIì}Ék2 x¦ $‡L0gwɶØO1ÀsP¶‚t™§\ŸZÁžzf ’þáß:7«ØË*‹ñ†¥9Á¤¬uTV,KÏô‚ᑼÈc_ssÖmó¤pNXt°[C€õÌ$YÖ7´N;;Yœ[±'˜0‚ŸµZ-áçw ,%‚gÜ@rTÜá7u7ù5ßI.1Â*O¶xêñªì7ÑÛ¾ÞYƒdeéß=‚›^Vw¯Á—ž\Nu_9‚¿j·Û·Á]\#¹[ŽDðŒH›àr¹¼ü%£æ“ß±Tù ïM=d)—µ¾õ‡[q#ÚÞY彞{‹Âïo)ÉÕ/YYs3îæ!x¦ $G5ñ‰ŒÂ×{;–àÞY,Få<qŒ<Ó’C&xcuõ/Áçâí1®¬®®ß­ÖþUl­ÄÜ3k ‚k««·×sH°ÖsOE˜3l 9üqðïTØÅû“ÅÔ‚7 ­tV*>`«Û¼X÷¬)®ze™J‚-Œ"†{'çüþ[ÿ3žT/ò±3»Ý²¦#Ø™\‚÷\/J‰àÀ5Rr°”žqÉ\ÞÚ[«.oß`Úæ@mÜð’ÖÖ.On‰~qc‡§Õ©‚ÞYå½¢»{ýÄ“âP×¶&{.¿'ëõF˜!ÁÕêÇËóÁÎM]­šä­®F§kd•’Q‚¥ùàTO¸ÇD04iOоÍ+? ƒ`ç`¬ÑÈNÓô†»N7:Lk$çJþ†;ÎNÓ¾Êhù.ÓâhΕt•”©°Ò3+%^é eŒ0VÛcµ}Îp<ã ™ Á9›Š…¦M Á¹"×ÛélP Í(ÁwZÃÈ:¹ÿêÖÍJ·»,1 íäë†cˆ÷r7K‚Iœ;ÚÕ›‰K”"ëð &šÚP«W;ë•‰¸¿ÄrÚÜPšÞ°Ü¬ê™„R>hé¬Ñ£:'+íUÖ6am· ©Ö„× heÚJþ¬ýâ)q1ÔpºGݵãëƒ`çMöü¿“îW¸¦«]î±{ðоkéòÀöE¼Wggò.ŠŒÏb_kµƒFÎz=ª-–tIkôË ¨½åîå ºÆk ·ò³7‰KÖ‡˜ê;nAD&s´å3 ‚/¿â¶Y„E¥l߆[1ÿžvø8D°0ãúm=%Á^#ð¯ÙK!Ã:ÃÑ …¥³²×^R²7Ár+™`).ëðt㌪q õ"ø¿½è‡`m&%¼Ý|4Q["ÌѼ;)ßÎêçö¶úÍ@ cdÓµ³³&?/u ê”À^7” Eº­µ3?¦Aûz(Ä,/¾ –ÀÒ¨Ý\ÁòÙjˆh)k̲Zbž]Ž,ÖŸ-‰Š\ØÕÚN¶¾“þ³!ô2q1Œý_óô"'X^\9SF_Í€àæ@³S?“üÑDCò‹é·æNøÆö ú:7+ÓQp¥èHWQ8[zîµ1#«û”"JêHßvžÁÒE^3àHKðŽø¸7ÁGߪe ök@»”E›¨£«®!Œ.«”üMU‚Móü±¶¾0#¸ÓZ üÑ>uþƲ[W,k³E;aжa?Û÷üѼ¬&i5<+5Ç?Í»w˜‚·¹ç¢RÄ¡,yÈ]¿8Ïé æ&u¯’Ü" ~Óâú<%ÁeÉúÎ+‹SƒøJ C°(Ëé nF»àTB.µù©ßñy–ýä†|ð'·!×xUˆgжaÅgDÚÊ¸ŠØWy†X)kmvT?²‹íüÐ-¢JpYߤ&XøÆQ·×/yâöG!‚E >ßT<䜲ìO07©k·k«ž`²Ý6¼«ŸC°rÔ¤N"øqÔ¿¦$˜HGõÊòðÛØ%„RƒqYEÄ ¯,ûn&•`¯¾p.‰³Ú*Áþ•BòC‘ Vï˜Gjá¬÷ÈŠ`Ù„ Ç\DįÀì1ŠðÓ,•Å#Xªe;Šˆ½±bl½â"²îˆé©,:‚›‘.8 ÁÜéÌLBpùx™e=ìBð{Â4M|´T«ÝZ9ÁNkµƒÍaõÁµšg ÷ú‹ZíJ”`ƒ_ºûrÙLjÔ%xÉk –ZY. ©ÕV.!ô<ˆ‹Ó×jï÷$Øé„/úœM ¹Kʉˆ,JI½^ÞwýÓFá£æTXj©á“Îeovøˆä!ç”Å Œ.1KY)é¬knúØÕMJ°AŸ?QŽ/‹î®Ò©l\ýâB:«Ô-@/‚›á.xœu³F>1j‚»ÝÉ©³<Ѳ$胃¬>–a‚Ï#“ßñû{ù}plYôM5ÌS©¬S–óNøÂ6ÁŽ)Ú£X‚icÛ·RžkžÚX,ÞXÔ;,½¥QVýÊžwµÜûãÿÚ£*ÁNÅ÷ö6—¢W6÷Œ(ÁëWö¨ä!ç”ek%žšCé] Vœé®°]"O°Ë= Á‡r{ìzÆvš²H­,•¥¾çÕ &„AƒËqaeaÚnP×f'ù9¢Ån†ºà¡ÜëNNš)TŸbŽ…`yÓäÝÝ&3£O•e‚õ·Ln¸ˆ½%8Wt>XK°­¿{Š~ªÜeȜੲöY¥îNN¾mÕ…0:!àÄ%ôTÙ>ˆââ¢Ï§ÊÓ¢ßM›¦¯ þñbÔƒàáKífÔêšËq^eì]mt¹ufC*÷ªÁ7cù¿ ÆlI'ÔùyÆît}9Îïx{Mò Kã4 ‚“öÂtz‚ѵ.ÁÚ¸ªœè¼ š’ÁÎG…!¬¶Á† l‚!ŒQ„Q†@0fÓ0›‚!ìj‘/Ü£ÓCpß´Úðë™ÈJ kÖût%Pyƒ¯ž&“Wá}ÓtR߆îGÔ[«.¹Š¥·Á‚õ>] ”èý¢qTxPß´¡¼m/¯äMk%‚5ë}º@p¶ß¹+,™Êg‹’«XW+±™Ò`¾iRðB¾iNÒ53ƒ­œubüí‰&«ì¨5 ³4 Áú¦{Iw¦NňQlã Í Üq)'X$¯üÊ ]­ÄfJƒù¦I‡|Ó4×:iœ]Æš„Ží²˜‘ošl™$y—H‹´£çk …Äš\ ‰ô*y°Š¬»•ØlvÂýø¦É+¾i®Ñ±4fhÄúû ÷Ž)ð#ö›‡£ÖdœÔ7M&د¸ øÖ¡%L –"fhœ`£Uu >ÒœÀXnµn‚àÞps@‚%ß4ávÕRí3´ Í_Y¿ù”mø|ÓmÁƒù¦Elë\çJ~sË4õ&òzî˜Ðvû¹c<&ì[¬Í:Áù¦©£ˆèÍsØN”DÚÜeïÜ€¾iq{1ê“`S¬;"XpÌ2Áƒø¦é ÞºÁ¿^† –ÌЂ6ÿB¸ƒU'‚à}Ób¾wÚàKwjÛÇFw‚kµ•c•`ßbmæ Ä7MO°¶!…<3´P›S&xfi‘ ì:Cðv:ÎòDÁ?ûŒs—p²Ö‰!Ø]ÚÈM@Üp‘βd±‚ðMKK°ç¨ßæIfXóF0õÓš¡‘Àç^ÉÆ&>²3Mð¾iz‚Ó19βšßæ'ØdÜŸoš¶ââ¨5a",WüdÍ4ÿŒ%ØÉºw­AÍ×…‹[¬™Óe¹Òg'ÜÌ’`íœ2*=“S '†àA|Óì·q.”гc¨âßÄwòڧʲãÌ?U†oZÎ}Ófž`ø¦åÛ7-‘•ØL §Õxo¤&íE)œ?‚Çë›Fñv0F£ƒ` àñUÂj{ `ÊV Á‚!C 8Bð4h– VWx:ëFÖËvâu£)²BSN°Î,ZáoC׋¸Nm†ç%&æ³Ä/¦È M9ÁZÿ¦”Š'X^ã%&)Åë³p ÁŽžÆòÁ1^b êŸ`b<ßd—ø³%ª8éLÒˆH»ØPÕdíù[7GÄK,²(3ê%¦ÃÒ/€R,ùÁ¾Í4ƒÇ®úš¤ CýœYq¦_œ9ΉIÓ±zPŠu¤õ1íÄ I Ù%Xk’¶a?Ûw½ÄL³|èädýðmœYÛ‹{h·þw*‚ƒÈëÖ•xY%›7h¶ ~ø€PƒÜæóbZ“´ û+î¡ïZõ«+ŸcaöÃAˆà¯ø÷´¿;ëâ%!8(€T,õ^VÅæ še‚¹au¯Ü:“4Õ ZG°£ƒ$£ˆsA¤b©'PF¸©Á*Á:“´Þ¿'ö¢IV½Ä¢Gõ  ,Ÿ X²yƒfŽàUaèeÐçìO£cÅËIZo‚W¼½º¬ó‹9ªs(¿Xê ”Ù4Ïæ š9‚%/i¦goL=–)þ¾Ç(">«ö¨R±Ô½Ôù` s3KpýŠo’æ¬5IÓ8éÍÊd‚c¼Äbºµ¢¬žÀ'X¶yƒf`ù©²G°Ö$MOpšùà/±Ø£.…‹¥›VlÞ Y$‚@0`ƒ`CÐ$ A ‚@0`CP^ 6ÈÔôïÐ#œÙÅ3 Át¾T„úUiž"œCŒg‚p/ž½ FÄ3 9™1Â= îñû?“ÿº}2XÙŒó¹ä™kçÝæÝuI®ê9“w‚¯Ž!ä >?ïÃjšeN}dÿc->²ç¥"Ü‹`}Y¨WÖR©øê…üI㺛(¾Š6 ùÕ6CnžhCþó¯uIÑ 'Lï³]Z裄 ”R¯\Lsád×Ç4­ØÝ=jáä›ÏºhÕžq8õ‘ýÇ~ld '—J#Šgo‚©¾(öŽ›8= ìëæß3û‡›·´!·7zÆ9D°ÐÜÄb*Ûðwr·,Míe/¥Â×YþxS)Ƈ3ŽàyMŒÔ¬Ù‡“Ö,~a‹„¯ë¶B°’Ul¡ë «x& ˜(·m‰`vt¼.þŒº¯ÝqàŸøÇ7øßd‰Ë¡[Š`¯¹“:O>¬Škà ?ä¯y{“¥â–Npíê·æ;¼}ÿ/”' J×N挓þÇÉÉú ß¼ÁÌ"Îî[½07NNVBE´m9æ$>œ¬¶OÙÐï"}°&F‚`§C §ý€ÒÕ–ûhÇ §hZ%²ÎPÖPdÓÜo<¼ \!˜ý˱ö÷ÇÁÞ‰ýèû€­Úʹ¹KáNÃßkξ$Ý,ú!ºýÀM:h>¤+ËûtÇxËöœ˜ó9¿9Áss¿¾éVÁ;Ùíóó¥¹pÄå˜/ć“Uá¿‹’9\ú1,°ÀÍ1â3X¬¶^†NåòúŸBS"ë™å¬¡È¦!¸ÿx¦ Ø x¿;2Á¡£ê ޽yN#¯ˆjÄ5W=§Þ^ÌâÃéW…32аƒ¹Y5w Ã$8Y Á‘ÛÂÊ♘`/àAÈ7šÿüçæ2I@pm‘knÄÏ5Wè0¾~uÑøíq/æ ñáŒ!xcÇí,LðÝߎ™àÊÑb!;‚ˆç³ˆüç~ñã¿Eà:‚çı0\‚Å ä²x{eKpaîËýh£ÎÍ ƒàHŒxYä;âÜ .Ì…Öeí—àâ™âN.æ²G“L‡=Šˆ”e8ë;üÈm´æªGz†³'ÁÑMÿô"˜QÓÇ|°6ä쨚 Lé!’n>ØÃÅÜÏ39ù¾´×SPeö§Ô=œòÝnôN®ßùà>©`©›Æäñ'&7X<Ó>U–¦àV̳èTCÒ®Aœ`qà´Ý¥ÌÀ/ä;œÒä—}©gž4ïE Ïþß왌ï4êñŸÖ–zí_ _Ї©îoöL~8%‚wzæ¨_YM<û»r"Bî>Ûì[êæaG|êÃ(æÕ¡aÄ3“7ÜÇòÂ@}è\aäOs8åãFðØVm¬gH%„sXñÄJÏQáÌ8žXm?ôpcµýPã Ç8žL¿ã Á7 ‚@ð Z†¦M Á¹"×ÛélP à™ øW·zåhÓqTlÿ£øÏ^¶Û]¿5úê§3ApÙâŠoZÉU…‰¦.Ôêõõàĺ»É"ÚÜ–mÄïoÄî6¨î½ Tj­{ŸÙ]ϹòÐ"3@pÇÖ|o·¤£osUášîÛÀírݽ¯¾¤<Á”ïv0‚Õï;¯u'˜kkê ^:+\÷Ž@0'ø÷<‡S¼þß­qFG@p…iÙ=Ót|aûºHOð†íÄâèȤ"ÄyU[`-’Gïx’x[yÂÙ@Ý î.“A0];;kòrɵ1íu¯6„—=ÚÒkg.ÁÔHYyűÆþ¯ùïÅ3–/´ñSA½­r¼Dú\ä}þVÓ^Þ“G°\Y¹†ôìŠ1ù7‚›ýj=SO$lÿ Ùq/`|ñyî=ž½™‚;Çnûv«M¤Uù^‘HY»´ˆ½"%©²—Û›ûc“˜aŠŸU%ÅîApL±™vl#GðE£bý¢ÕŠ„áúmqà ®½Gß¹w)Vk‘%VYnû‘em´.YÖ½œ¼íÔ†´¼2'µÞKYŠï&#øÍCe/.7œ7øI^¿ñ“áÛi‘uÃ^R÷ú0Lð³V«õ*–àp sAp3Ú§¹“{Ùn›„¨mþð!$<fÿþlë ‰½çïìŒp§5A?tŠ"¸¬°öýA,Á/YÉíxÖ«<ùÃA¸ß‹!˜Ç†Äþ†Ï`}¾)‡Ë=jëmøB'ÂË*K›uÃþªÍåL2hŠ‚½Nø¢¿¹&g<ºî¾‰l»ÓÁ“t'ç_ïû$ؿȷ¶I ÞᙉÔárjw!Ø/vo‚QŸyÁ¶-Ï‹äƒàf¤ NIðb­v°©ÞÅÔj«ÇF˜àz‹L.ÁåãeQÄPmRüE­vÅ%ø=q(š´®ÕÞì…K•©G°SìÃlö‹#‚Nø¢¯Ù4w:G$1Ò¹Lã±C¤¦3!µoŸàUÜÍJÃk*K:«bãZˆ`/\nYÄž]°´²!xE)eNn†»àôsQ‚ý{Ûo#q<9wr™¬ ð}xÑ9Ž3×fqDŸ¯Q„è„/̾¦k{\›Kæ¥-žøX]lÛÁ(7xºâeÝi\,ÕÆ$^^…m¾µÞ»~eOF¡²¹gHYŠ?êB°£í•Âå–E„S‹¥t‰`¹Á‡^{IYÕ{Wù ¸ê‚û|ªÜoȳ–4:œ‚ÕÙÔgÝçƒCëÉÖâæƒµû1’Ž™N~'3lKsÓÁ39ýŒwŽž*_\˜ýR$¸ùЀï¦õz<«päA9"¸ÙÁ5õÞ¦•(_ãÚb~‚Â7f^‚Óêüq’\ÒCø(/eeH°ÑaºLg“àdS‚F®¢CéŒLŽ™ s6 †¦ea‚`£ÂVÛƒ`CP¶ÁÆ(Â(C ³i˜MÁÐLÜ˧+ÏwõM“*ŠA7[³üëåîäL¬‘{ùtMÁYø¦YåøŠ‡bÀ_„¢ÎNî–áY¨ ¢²¥‹[T·<áç Y¨Ž"á¤:_5ù`q¾kFü«Ÿz¿€;7ƒ«ÈоK«œ`@”™oZÌj.yÝUÐz²“ØÔù¦™fxÑoR‚•ur¾ØQŸùËCWîèB<íõ>ÉxÀ~›Ž`y´wQNöæFp?¾iRew¯.¯õO0ïÞ`š|Ó:-¦¤_ý¹è¡zÜi­xfiÔ 2±4Æl’ÛšVúϵk¡VnÁ¤µVvo¼Þ†é³mŽ”à|ÓdïÊÀÝëáB°}˜Ôzµ6%Á·/N“oÚ;nJVIH°û¯ëƈ›¥Åü®mxoÔ ²ÞBr[‹cQóyÌ hy/VlqâÊ HÙ©¬ðbkó­5‘ºë·î(È7M&X¾·Ë4|—ELÔM]V¾i©F>Áþ v½ÞÇlwuzÕž‚`u¶E½|q ºë‘<ˆošLpàî%,ûp­·T«ÝZ8‚÷MKA09^·Ü¬"Ff‚·nð¬/»컭ۨû<Á¤FÅ ¬ZÐÊ~ ÆMð ¾ilgu Ñ±ŒÎ¢öáR&W^Þ§N5§Ì7-q:ªHºjê´€Dp/[+ÉmÍý©k/÷?ðreu­¡¿“£•N0—áϦÐqœoZäÙ¿BÆ4¤TÍ©óMëŸ`ÉBm‚=ÏžrwõÖóß&%X[ÙpÖÑ<€ošçÃ¥º{Xª>\~ë‰}„'˜9…¾iRÅŠ%¸¾Û ‚} 5=ÁA±$j$ß4ÉmMK°ò¹O°\YÁÊ^A±þìØã)qQ³&u‚É¢nöK°­ú¦-™¡1¾n>XýN¨éóM *nÇéÁü9âÈSå^wrqóÁÚG‰Òç’–4ãýmÒYdù¦.d—ôë¾2ì„3óMË©F曦Ç2 E§k{> >›ÈX£#8;ß´Ù$¸«e}yȳ¡3ÁþèjŒ ‚jDw,)NÝÉgOð:Apê^x¬S‚_v:û¡É³4%’å݃‚ó¥›ÇÇǃ< ʳåž‚! aµ=†@0e, `Œ" Œ"@0‚1›†Ù4 M3Á´ÝެÙ$íêlÜÅ7­ÖžÐoµÒøØÅµ×ІO°ï4F[¼é«_ø5ÁÌ7M«‰ýD]\{Ñmø׺½«ž;‚sÊÁšbǶ×ä†`œs‚óM“œÆ$‚©ççy›½8ØD‚ôM ö’lÍ‚)?ñ8 ù¶qÄ034ýÍFà›¦+‹îÑ–1í%Ÿ`$hýi0ß4ɧI"ØYA¥u÷z<™ÿÓ™ú¦…,U#†$Þ26'Fþäî¼fÒݯDÚ‹÷–4šUw‚PY5Ó^jeÇÿÏN¸ß´‚ßhܽeϲ+_'öM“ö+.¾uÈo…6ì%/¾Í'ØhU]‚´'ð˜ÓûÍÃhVÝ Té!¦½¨R–É%x ß4ÉiL&XçjàxnyYÛ“Ið€¾i1v”üæ–©vÇ~VN0µ·\‚õ'ð%Ÿà›OYòóM͘;ú?(3¤FP×MkÛ˳¼›d‚òMÓû¨ÆD¤Óó9~‚³ðM‹Ø dHðJì]c7‚;‹õ˜ö’ý&šàA|Ó$§±^;Y'™àŒ|Ó"ß;íNð¥;µícc«Ð•`z¼ÉcñÏœ…ošðéN°êG¤3õ;Á¹NE*þŸŽ³èQØÏ>£Çßš;6Yë¨ëÖIJ°Ú] Wv’ ÎÀ7-5Áy'7,‚©×þZÀØý?'ØS£[«‚c;á¾}Ó´ÿÙ³PÓ»{å€àþ|Ó´¶fâ¨5á?"ØÌûdÍ4ÿLB°¼W‚U;o6XÛ1í%;¿Ì­ïN¸™)ÁÒübÌܤÜŸoZoƒjÙZ2£žGöŠ'¸Ë|päNNß^1Îo“Ù gë›–¹'ØÈæ"úÔPÚwtŽ7 ÚkÒ ÎØ7mæÂ5–]ï'‡à1:  Ø  8¡2¿Ïåû@½ÛkŒh x4gnfŒð…¶ÞíEél¼µ1sçZùk¯aœã C 8§†°ÚC ‚²†@p®d(¿"ÆlLçKE(ß*ÍÓÙ%øN ijJ0žF„Ó|§Õ›“$&bcqÀS‰p/‚ _í[j]½é (cí1¬.ëähÅMŒÃå&à9J5ÛæÀɵ@5¢E8Ù7ƒ§ðiêF°ÿ¥Ôc ˜Æ¼hÛÂÛæí`4Fñ÷ôu#˜&'¸bÆòÍ;Á]€Á“¦òéB©TªžºtÒÉ VµP#x#0p“û9¶­áì»wQÉèëè›vþ/Fj±Öe¼|zz«Ä‚U›s ^a1t%/H«6oj— #ÒlÚ¶ï])/2ó%-ÑòÝ»‚%ZÒº,ul2Š.†Rý›”/XM7X¯^ð?þµC0—HÞä©°;Á¼)ÿe$&øY«ÕzåôÁÖ§Á®Ó˜<~h-²m»<«“î]e#ø;ž&Šû׈& ¿¶Ý›àMv/ÁFs'u– «àjtš£ïŸœœÜÝMÛ¶Ì0ILðWm.gî!ðÖŒƒÕ%»DãO[#´X[PøU þÅÜÜ¥(Áîsö%þëûàj„Ͻ:???ž+„–^H5Н¶ïM°­ó'‹!x”7r¿*ÁbžøQ„ÓrÃÃ&¸.üÉÈ„ìñ+¼Ð\ZäÚ_Á¹ ØcxØó¤°NK@ð(,Öb v‚U,žž‚`¬³®I@ð(ïä"£œÓQDò;¹Cn®µ·¹¤ì;ɳÁe—“ÜiÙ²Ë4Ë ž®Œƒ`#îNN"¸²¾Ãô¹ x®~…¥×yb‹o­Ïƒ«ª¾Íb~¥>×åN.ùlš­qÀ6%§±óÁ&õçƒGnsYŠ™M“–'‹Å§˜£tO••Ù´ÄO4Ö¤˜£,Ä>Ñ€r"副ÂÌLñ`xš”üÍž©!¯OÁæì „§àás1ÅR°›Á¦¿îõ|ýK>ø“ûÑ_ç/šc{Õ|¿´8ë5" SBK툽ô`¾ ›"iUž²@o+8¦,RÏ®õvëØJ±%¸ÂÒbA©šH••ö ­+ÔÛ­ÕÆÔÌýÔÄõõÛ¾3ÚëNkE$9ÁF«ê\öLÒØ^ûnÒÙëÃ0ÁåCŸà¥¨õšJ°çÂ&I*‹” P ݤ²Hd«aé¼ÝüGu³žÔ¿pïp•„NàôÁ~eb‰½dŸ9‰`)„ 8#‚¹ŸÚ-·ãõF´¯ßµ 1EÆ áÏDíµk:_T ûHȪùk,'T‚5ã`'«˜£SNà¡ xZøe‘k¯w¤ BT8~b_å[8ˆkk¨ÔÀ¯l­M‰ûÖ•¶ØJApÖ£™`oI‘J°‰Ôï•9Áñ(D\YLÍE~+JpGµsX´µÃ˜~–1‡B‚G@ð¥;µícƒ\>^®Õj‡9!بñb«Y¥È¿'¼áh&/Õj·Vc‹ !6Á?ûŒkîØd­CôÔ “`Jû'˜i³ÃÝL´ø ܲÐlfzy?ì3_lY3Æ??µÈ^HD°o½FÛ<ù“s'ï¹°I’Ê"%$C7-ÁT˜Äí5LÏ1Nd•jÕ-K#|ˆú¾™„ àŸ@®A@°8kÄg.TlÉ’îÚ|°®¯ÑÝÉÅÌk Ö}‡¸°õ=  ž•Gæƒ#“¼úù`Vƒ¤óÁReퟹ­è,s'‹Çù x”iÊ8¾µ1qe1ŠÁ ƒ` ‚ApúÊ~ÙétîSœ#‚Çc½V> Á½+{óøøxä&$‚@0`Á† œ|A“* ÍÁ%šD`C†  ` `Á‚! ‚!<$Ï™ÖEÒUGÚë)Kíü?ðƒ]Oy¨Ã¿k7?Á 8Á„ý\:-¦=×¶]q *ž.ò>ùÔ,•V_ˆCž@04:‚9 ó§•b©X(±´CFÁûKvÕe¨hм§f±tí¹Ë*KÇ‹œ@l,=|§Œ)"”üò?^!tí  Y ¸T:=-½zÁÇñç<T¼qmc ~õ‡Ⱦ¤í-ýaŠ|‚Cï¨Ê ØØ¤!Ø/K)fðÓézhvîœ\¦tïûëÚ!¿%ãÉ‚-’«ý\aû×øZp‚ÂIÝ;j‘>ùο+$7¢eÑÕ81»žš‚ßÏ—J»¿õ±¼ûµå?®÷O0ÏGmžÕ?rTy\üêãÓÉ×qG­Û%öùåð 6ù^?\³5Šð4$‚ÃY“ì 3ºì(tG ŸàÿVå*Ǽ,2Ôü\l3Bp±ðì—}ü¼U÷K…b¡Sóf2%øEô *ÁN ŠšxE ¦,üd±€Û¼)"8X‚KóÎÄ–½ì±¸¬ÌÕfDðݯ»Üí¦¿_tŠ MÁ7vww¯Ô•kpÉÚ`w7.sžñd}!tªÆûö¢Ký ˰]g]ÚÂÏ»U/–zœ Pß ŽÚƒà ,z‚Kõí'ÁÓM°<][ Ú·Ç|pâ§Ê1'¦k{3¬ÌëN‚§™à^Òßž¥žÿB Á Á Ê#ÁÍùÁÊDš˜w…ÆH0`ÁÁ†  A Á‚!C†@0`ÁÁ†  A Á šN‚!hB‚¡ ‚&] Á‚!C C ‚@0åàZ›Æ~¶ÿÚ êƒà [ÑE𠱘≣•ô娰ãwï-Ú êƒà¦JpSê0ùßFì1¾ÁÐ$Œ".bºà§Ó½# M6ÁMmLŒç›‚Ó#“±A°G9Õ†H½ãIâmå gu3HãÃͺaW¼}‚½Ü£>ëg5űü¤rZ‡ïä.t]°Ô[¾þ@ê"E6‘<Éžä_àöØýðØ<Œ=ü¬"ñØÙÚ ’¡£ò݉½BýQLÇî>¢fšà¦n¬'øúm~wg•kßñ$ëD­Ö"K¬¶ˆi?²¬Ö%˺³'÷À­†åfݰ—ܬLe/éõ÷~Ö“'øÍC7霠Û]%4Óð…Ùƒà‡!áq°“Õb]¤½ÇïÿX†³3™àöqÆ RV¾i…'ƒû*ßúÃO~ó)Ûúù¦˜ƒ#0Ž%¸©™ˆˆE¼IK°lE ö‡2Á¶7à`£ˆà,+A+‚àØ'‘.8Ž`R«­i 6jµm±—Dpùx¹V«† ~¯ÆEC/Õj·VÑŒ 8Žàf¤ 6W;îäUû`†Vç2MI0Ó¦8žD°”” ^qOªÌôò>Å@Ç=U¾wÁüÊ}Ì2C{Ét›æ÷‰ þþ J°7ö€@°†àf¸ fª_ÙcÚ\2/mñÄ8¶užÚÛl•<]a=fãKì4H ÁTì´×PGÎ^x28ml{Ye‚ÅþüŽy³ç"ÒËO•ýéÚ×"µä’ÖÏEØþl®þN.n>8“K³ÈÖÜŒtÁ”'‚Í‹ D Ê3ÁMtÁP® †  A ‚@0‚!C†  `šl‚auá{4 hœãkŸ |Ÿ`Á†  A Á Á‚!(7ÿâgÝ?¯žÔ »¿Ïüñy87×ù¹®Àæ9SÑMºfí£ØP^.P®ÎùêE÷ÏëvˆÉ»_Çg¾ûïv”àâÉ7ŸE³’UþÝɽàþŽÉÚO±¡¼\O&„àRUCp©ôTƒe§©=ÀS  e•:^–,~}3v˜ªžÀ#ø÷Ù^‰ÇÁÕ(Áö&;@á‡ë|:¯X(»ÛÏZ°ÿò{¶õ?Öy±KlãePåkÑ˾ ®j.ò=棈Ï"ØÆ&ò(CWl?«[–`˜bƒ`\·ï~äßž5W„ªÕZs‚^åTªÕƒZ—bûY‚—ÅVŒ"òAp±ðì—éö¯Ë½ þÏN§Xp;VâßöºÌ?é,ú'ø’RÊÿ54ˆ›®ìÄ¥èÿt‹]ŒÔš‚k©öJ0Š(•¾x&øðï= ~ôMpPlj'¸tDvö’Š­«4f‚oìîî^©‚­ ¶qwã2oÔgh(7ç‡]j«É€¡ÖÉA ‚@0`ƒ`C†  ` `Á‚! ‚! A ‚@0‚!C† ÄCЄ C3@0Mº@0‚!C†@0†@0`Á†  A ‚@0‚!C†  `Á‚! ‚! A ‚@0‚C†  `Á‚!C ‚@0`ÁVõð.b MÁMíÄ⢡­¯?@,¡ #ضµ oØ\A04ùkÞ°+†a,ŸQ M>Á†7lήq¶Ä~QƲAÁ¿÷’îç§È`PÄÁ†‚MóìÌͳ$¶½$±Wø†ïøÏŽØl ÎÐ 1,|ý6¿©»qÌ ~ã%%‚­Ö"Û¸ ‚¡1¬0,üú%aj½õÇÁ÷Þ*;Y- `‰a™`ç# å•à/j\U ånaÐçO\l)ÁPîîä–¶ š\‚#³i‡{{{; bš—¶Xjo‹ûúÏ^’6¶yò§ž¼ÆR@04F‚5O4¤§Êšù`Ó<iÌC@pÌÛ=©„Q4.‚³àCã"8~A04.‚³Ð—Nç>ÁP^ ¾y||¼Š(C¹%‚@0`C†  A Á4!š÷‚¡\ë· Ê5À6†r 0†ò 0†ò 0†ò 0†ò 0†ò 0†ò 0†ò 0†ò.A0åX Ê·þ?¨þÏÞïQk‘IEND®B`‚libplacebo-v7.349.0/demos/screenshots/plplay5.png000066400000000000000000000546571463457750100217600ustar00rootroot00000000000000‰PNG  IHDRÁõ1fÐeXIfII* Áõ†Œ”(1 œ2ªi‡¾HHGIMP 2.10.342023:07:30 21:28:41 ê²Œ„iCCPICC profilexœ}‘=HÃ@Å_S¥"‡v"’¡:ÙÅŠ8Ö*¡B¨Zu0¹ô š´$).Ž‚kÁÁŪƒ‹³®®‚ øâêâ¤è"%þ/)´ˆñà¸ïî=îÞB«Ê4³/hºedRI1—_¯` !D—™YŸ“¤4<Ç×=||½‹ñ,ïsŽ!µ`2À''Xݰˆ7ˆg6­:ç}â0+Ë*ñ9ñ¤A$~äºâòç’ÃÏ ÙÌ Mºa£ pHYs  šœtIMEç)Ö¥>bKGDÿ‡Ì¿IPIDATxÚí[sÜFš¦çǸ% AR*ñ ‘ÔÉ”EQli«e­=’íÙÔxìö [cu{´§UnÏ´|ÓŠuoÅnÄŒ#ܱòáq‘8d~™ù%ê„:¿ï… U%€Dâ!Ȫ|êï® È<çïÐȼ|Aæ5 Á‚# A@0‚€`™Áδ2c‹PÆ"óŒ~áŒÐ/ïƒ`dÖî&2V™×Åíá÷h2dv Žÿ%ÈþÿÖ!Fæ‘à­$ï¦gKg ‚7ÓÿžÍÞùA½|¶x©(Dÿ{Ö~A&JpŽmšX/&ÉÎß,:gеdÞ«(Š “!øLøöûÅãÙþùH·/Çr/]Î:‡SOzâ–"8N_kÊóµ>N >óþ6÷Pˆ ã}’û]§óú™3i/à³ßIóþ_*úÁkŠ`ùÿP>ÿ}ö‘|á¾$¸³‘®öEêË™"g«Nó<‹ø,ÑýaÎz;hu¤¾äÝU{¨Ì%¸±¶öæ¥sŸýqMfex‚Ï­­íÝ;‹vGj»¯å9ã'ølñÞ¿üKÁbþŸ;w©{#S‹PßÿËHŸ;÷ÿ0Là³Û×oÈ\ŽÏ­^– ;²«÷öUù¢\ ¶åË¡à|­¯ä“\¶¡Û¯£‰‘IŽEèO•Éxð¹s”uçÎèA^†`=|&ït£L–àZBz‚# ¸ÏtÓ¬á³8d^ >s/ Z™ï^‚€`ÁFŒ A@0‚dF Žd^‚Œ ŽîD€`#FŒ€`Y.‚ÝG["3Fð‹ÄÈ‹âå –Övž}Œ¶DfŒà»&Áw‹—/gÿz#3ß‹xÁ]‚S‚WÂ0¼x"@02ãßå.Á)Á’Ýðd=ý_ö{GAFðÊÅâ…ü¿ù" ´32•'¹Ì%¸ 8ŠNNÒÿÈ·Ö3‚“r1H6ä ¯äsuUˆvF¦Bð]æL ~ó×ò¡îVK|Z.‚ãöjúâ ŒL‰`}Ö—`Jð³§AšöKÕ~ðÒ 8/ƒ`dZßu/ÁÁù› ™U‚Ë‹0¹ÿ±)³‚‘Y%ø®s .Ç"ēǶB€`dV Î/ÂôL±ÔØ‚`dF ¾k_‚S,÷÷÷¯ïQt~/]Úß“Ä>û¶\;×äâ/rñté]ŒL`y6.Áæ§ÊÌxpeËFf‚à»Ö%xð L“àèÅ‹#sLðÝ» ™g‚GÊ×Ýn÷‚‘y%øv«ÕÚD+#sK0‚€`ÁFŒ A@0‚#F@p5Á™ Ñ‚‡%Á¼ÖŒì@ÙØHt]xošq . HŽÊ¢¹Wí^†k°šuüèa”¥€Ì˜J¬¨ÑŠ5oßTåµfdÊÆF¢ëÂ{ÓÌ}¹“¡Íj#ËA0íP(ûª‡à„ß‹`¢5³v1½ËÈé€`l4››Ù$ÑÚ•þ±ŸýÅÚ+­™³'ª.¼7 ƒ`i{òؼ§d-½(˜Xº‚²+­™³bZË›‚A°>¿”`G@VEðƒ—Ã,·ÚÁ}Ö/%Á‡¥uŒ¼µ¯¼i¹€ì—»hIV¬ @°a@#;àvêbyÓzlE`:È[ìŒÜæZó—k{<Øÿ$çEîûIãÁ‹Mð²½ ‚ ‚!X!2ïÛfí2gÆGf‚®&ØÙÁûÞ'9s<¸Ü€g”™;‚#F@0F@0‚Ì(Á‚#F@0‚€`Á‚‘å Ÿh x¸P…šètú¡+¸Þ´ ³† ‚Yoš6§Õ:/B´ÿÚ{ve9CyÓD9¿˜/‚€`çîcüÝlsfÏqÿë$  ‚«½icÔ/`dÌ“›¼²Ž‰ R‚1åM£‚2µV˜Ç*jÚÖôt~]ÔÚÖN®÷!TÉ¥üO,Hn2; 24d¹ Ö3Þž}óärlº¬¨µ\+ ý27›ˆG·JB¦Ô17߈ YB‚Ûíö®\$´gÝöF¾´wJo”é赂ä¨,E í*£¶µ‚`²UÚÈvpÃQ¬%×rÃÙ/CC–’ào:Îâ“‚ŸýÐI1ºù©ê{~¹kȈáL{ÓˆZ"·­mx}Arµ´±9]fW-aõƒ w#CC–»A þ¸|‘<=2µ¨½iÁ]ÓUæœ0ÊÕæeh®"X ȈáL{Ó`ÖÅôµÃ ‚ßÊÖÃÌÊÐì!Xzш€ŒD{ÓxA•‡`µÕ¡{/CC@0G°ëMsלàW££‚Éù'4MpáMÛ‰ ‰^‹Fdi¬˜l•Ä£X v®Ë=0;°ehÈrlŽ« 0Ì?É 7L?¾;ö<*¾ï²t#FŒ€`Œ€`™Q‚#FŒ€`Á‚#ËA0>Ñ@–‚à{UZ3j@‹žv:s09øËlv´êÖõÑ}0³†5M¹Ê¸_Ä}ðE2—“@ÔTüFyôÐÈ<l~½2æWÉ›óD°J·‚Ž`)…ºx"@02§g“~OÖ…á*Z¨ g¦M¬mk4d…á,{EdûÈ7hñ™«P+·*Ê*eÿRE Y«sËŠþtP¾_ü¸¨Ö³?èj«­f CÐ4GÑÉIĺÊÈŒ6³Ã¡¯Ájvq±Ó3ÞŠm«ò…†[}˜.}]/Œ'ü”¹b««å…?H¶ºY;}ïH+UÒ÷»Z¹¨Å2ÃɆ§‰3ÁÚU–ËÐâl–»(]e¦­$˜ØÖt¨­T¨­&—âøÎoâx­ýð»8^m_‰ãݶ ;0®ÁÅV3‚åfF0©‹’¥±:·BÌ–_ƒ‡šàŠ×g§J÷¦nArúˆ© 27×`5?øÑà (OzPÎLCI°ö¦9]æ¢h¡PËX”;’÷ÿœÂ˜ìEѶ܈Ú±±åËéVÉZÛF]”,(ÔÁ†Bß/¬~p®¹P· ùîs=‡Ì1Á´C¡nÇ‚Ýáà .îÜ^‚Éj_ͶjlÕ…Œ¦$#¬nPl/ÁA³¹ÙÊ®…­]é'ûÙO°ò¦ñ+…š‡`²bcÛ»%žf­Ki]R‚ºûR µ! V7<¿câÉcóœžÎîQaR+oO°ZôÌ ¨`q7]7}þÛººG0ü5X9Ü@ðÜìºÊ¬óÛ‹à/ÇDpÖMÉG0¢*‚ãdD‚_¡1¯3Z³hk¿”¡†³_ì¢%ĶFF´M-úzz$ùV¿Êþf'v>iÕE˜û²tnæV5Á[WäË%˜8Ü@ð\ÌiÍœÑRý 5Üxp_OrÎxpòÒºÈsOr½Çƒéç‡Ír<ØÞj‚ç“àq&ÆgF@0F@ðÀùºÛí¾#@02¯ßnµZ›8-H]#FŒ Á‚#F–ƒ`xÓ¥ x&¼iö•‘'bv†ùÛ›ŸV¼¹:¹%x<Þ´|«ÙvÍ„qÜë\÷ü>#â0Ã}áÈš…b¥ ‚§Eðx¼idþ°õzæËð‹@02E‚ÇáMË·šéèÄÉ…0<¬þsyûn(@0ž²7Ÿˆ)ÎKޝ|¨ h!óGðÜ\X†3)â ²* CÌVÖÚ’¡^5¹¸clÈÒ¨˜M­•möÉK£.ÆZJÌæ¬  9‚ëö¦ñ¾HÓ]Ò›àˆ³g)U²µXš1®ËÊQ´,?Xât)w`¬¥Äl¾µÉ\Ÿ7írrÜn·wõâg&Á…«ŒyªkŸw®ZÂ2œýdÜ(«ÅÊЈWÍйi‚•,¬E6?ÂO^k(Ö”˜_ ™Ø5¸FoÚ7NçŽ^ìtRZ Áí àn°\ïºT¨Q³C³#‚(8Ÿ‹¥7Í‘@È7âUó¨%”,(Ôœ[Gî®xªëBkME0·2y‚ëñ¦Ù;°z{}¬ndžƤÛR­Tµ vÆEür”|1aÖ´¯˜u±ª ‚g†àZ¼iU‡Íæµ–3&pû¦seV 5ÁJÌæ%XyÕz¬j‚ÕÁ‚à‹‹7Í&ø£Çrꧺ³Wv»NG¢ÛrŒõCEÑ׿ÏÞòRCdhī֛`k-‡`u° x¶7ìhEý?sÑ›àjjøj÷Ið«ƒj‚ 1SÛf|˜mlw==í;׋­f/e®2šý†{dZ¡ÆËЈ£•¡¯)ÊLÖ",‡›>Øž“µñ<ošWP}\ަ&Ìg˜ú¹Or>ó$7Øx°þý¡}¹æÇƒ« Æxð„ž¥øžã ±TGû ãî #u$즹€oö€àyMÐJõa]#FŒ Á‚#F–ƒ`|¢€`OD§Óî¯ä£ûÆj½¶ÒUV¦ÙÁ'·ËL01œ Kpû¯§ý•,æÉå ÛåÔ:±â[¡ê{1²Æìw2EqX-n F9Td 6¿€>\އ!XçèûaV’X‚YÇæóû“Ä!óCðÊÈv¤™$øf)€2Ìpô)‰Cæ‡`}:µ LêDªd,ÁeQUÈœÂüì†Ö¬,tô£Pã]e¡®K÷±¡ l‚Él{5ó'ŸìI´f‹F°žñ–^-«d,ÁeÑd¿x!·P(‚ÍÙw{t-G¡ÆÎMK̺0z×J‚-I¦±-ÁÊpF hϺí*K°.š,ËYŸjÅšXÓÿÍU¨y\eG±r•u?*ÐL›"ø4=¬vâ6B05Ã!óM°2œQ/Â0Ìâd<Áʶ Φ令dÖ³4¶BAh‚¿÷u™W™éìq®±”à郎-Œ q!˜šáéE8fçv춨$˜¬5$Á‰Ï›VI°Õ‹(ê«f‘e!X Èz|ûæÍìÜl–k I°í*«‡`e†C–‚`K@Öƒàîâÿ´¢ç66fƒl»Ê†'x33µ•’¸Â ‡,Á®7­Š`ÙuH ¶;£ü Ž^D¾)ŠÉð E01 i‚yK°a»Öˆš÷m‚¿%Š5MpcG¾l~´ìq•=Ѷµ­+r9è‡`-nËÖÊŽ@›áVb<Øs¹¦ãÁ„àH 3,?îKkm>€6™ŸÜ@æ™àyŠiÛC@0F@0F@0F–ƒ`ÑÂy^Z‚#FŒ€`Á‚#ËA0¼iÈR|¯Ê€fÊžv:Óœ(Ù§,íËݾ·XíGç†L…àˆÅ´Š]½]± ã«Ä¾Hê"ØïM+Þƒêºøóê ïZEµÎ ™lÈ¥u ‚Y_H“æ5~ëT‘¤19‚‘ù#˜XÅ@02‡gK•XɱŠ)oš)(S‹³©ÅšZ¡áM+—‰7-%—ÙQáÕ½ýtà­–°ê’ ˜úA±6»=ML~35¦)êL8#SêÔ¢åM+'Ç‘E¶(g83ê¢×²uoÎì;R-_]öÜ@±6'‹øó‚`j¥«Ì”•ÛšQ¬‘EíM+kù#ñ¦5Ü¢ kŽą́‹^ËÖ½Y×`Rbc£E•Îì€8Ü¿«YÇÔ*–YÏ~t|%ÁZkævS;ÂX‹˜:ÛA Ü~pPŠÙ µ„·\ÔE¯eëÞdç–ˆÛHµ< 5…š­627ÓÅ‘–¡³'ìM^0¬Q¨}õ{ã&Ÿ L°µVµa_IŒ‰ùQÑáÁ‹L°²Š‰Ö®´–ýì'XiÍ"8l6¯Ú2Ep£u1ÝÒá€;kYq^ܱˆB%FO ·Šy­SŠ`¥5sËk~v»yGBì+ZM°³–E0_-¼X—Xš¿ˆaß*‚¼ì0R4§fÒÛ}r<çZI°¶Š2´_ì¢%ÁĶF†]µb,jD¶ý€¦¼iyÑwmj‚ër>µò ¨jóÕÒÕ6‹2‡2‹ÓAÞ’`vävôñ`rÝ£4aŒ»Orr7y9ГܰãÁÁži‚‘>ƒ^ÁÁËK0æh °›?X‚ç6A+ ~4fº#FŒ Á‚#F–ƒ`|¢€à ç^§Óù´X,unW;К`›àñ¦˜lb~±h[¾ ßi·ÛÅ~¥sÛø ÙùÁ&ÁÝ~½i½sÄüp·¹Ø˜©E‘ñ{â. x#Y¯I4’{©HSÓƒ#s#cè›´ÌæΊ…Àu•±€á“LÇ{tDuÂ<ù¢Ú5 mIYšÒ– —`K̯ŽÈ|0gž\îMMwòK°ã‹°/Özž\lÕÉðyÓN]‚­‰xƒwS…#˜øÁÃYîM;¾éÊІ$ø¨Üjœ>*ÉVÃ=)K+®ÁJç¦wàˆÙj{Eæ—`2?˜µ)\N¾¹)Zé*’`µÕ ùîó(ÿs~0éŸ2;(+ÐìˆLІ€àž,öÎ=ÁfÑ> &H{Áƒ¼UºÈ¦E0©Àz³yg€àV2–àÍB‰&]¬•`³OßèH€àA ޽½ˆôΞ}œ–1ÉbÍ“ Ä‹XZ‚¯|˜Ü» ˆŒÎr«ØŽÙ 2šø ùçýœ¤­+rµÝusQü¤Ü*!ØØj5Á´h¦`sê‚, Á]æ÷",WµÖdǃûþTÙf·ZM0):Àï‰!‹Ù‹037-‚€`ƒ`dî žœ #ã xr~0ÑÂC"A@0‚€`ÁFŒ A@0²Û†€àaó´ÓÉ¿»ûèþ «)Y›7íË]@4§׿MˬiÊ›¦óà cˆ7qùõÜ¢di4a{m¤:6¼Gúê ï¢Èl\›7-H<æ‘f'ûÅ‚1!©Þt[}ÜÅw7æ„àÚ¼iArSÚŸ~»‚‘‰öƒëò¦_™üd_§ï+‚ÿPnµxÏÜÂÉl?"“YÑ¢éý뤮ª*YËÒ¤.ÝÇÆ!’#üéÀ8XOQd¦ Ñ›V~éw[n¾¯V[%s•I̹ÊZ–¦ª%ëÒ×<¹‡é+ßGדäÀª+Q¬YG˜%3ýØ¢È,<ª7l¾¯Ö[íI°–¥åÕŠ³‰ŸbÍõ¦]NÖº<ü.ŽWÛWâx·-Œºt?*kÂ:Âü,ʃe‹"³Mð¨Þ4B°©spwÀ, hQp¾“wÊ Ê¦»Rã®/‚sW<üsQ4¯‹®«îÜʃ¶]ùbðã§(2ïW{ÓF&8£æyÙ8&½ˆÓ‘¦Š5}%-û&F]H‡‚)Š,ÁŒ7m\Íæf9ùi(‚I]–{·äkOͺˆÖ®|õçOQdf¼iEQ±Ýšà¯-ž½å\ƒÓòåÔázº® K¶.Fµ˜¢È‚³½]tp‚ɸˆA°hh‚ã/*ÁµyÓˆ Í|_ü­|oCmõ—~ÞÚ/׊XoZ/‚ºä·€,©‹Q-¦(2“׿M3>U®6™zlü¤ëM«&Ø® äM^rãÁEµ¸¢È,ö"ÌLònéö" ‚ ‚‘¹$xrÞ´(º)÷à+¸HOΛ–>àOp_ÈŒ A@0‚€`#F"9Ñïud¨¸¿å‚'qîìdèœ='@ðT|G‡x‚áMÀ3ŽðÄ ^íÐ/ßü´Gñbú&+CÓQŠ5¼t@p0œ7͘ÞÞëû@å×wYÙÌI¹£MV<(¢RÝÖ˜À¯‰<¯¥ç¦\P/¿¾<`¦Läµ¾i–ÑèÞ´~ î•fŸ“ ȆñdBßE‚7‹j¥o–-¬d/ŸË–ÿ²<¯&É»úÈWª£üÁA^<óFðJé¢Ê³#¸w?{±q²:f‚}—ÓÍ$8+ó«”àßË…[òDþº•½øú?GKð?Ü<{6»ðæG~ëÝŠ²¯Jp>Ù¢ðd]D"×e, b8ózÓôRøäeQ"tI•vr–¡)ÛY˜Ç&X»ÊhµÁܬÓB-(ÆNðY/ÁêŽùÙGò¿¿ù‹Z<óÚ’üÚÙ·ÝFèÙ–à…“sN.1œ 7ͱ­Që‰! +' « i¦³Õt¸e“ãØ©Â•ÿƒ ãÜþÏþ^–ð@òŸ·é¿Â±œ»ÊrÃÙé#×›ECǶFk´ÿiËn˜2´¢¨t^ÉYK_ƒ•«ÌCðq»ÝÞµ/¶Û·'@p`ð›$ää}ðÞ{ï]²NÞÙõ÷Þ{s™¦°ó¦Cp’P†ƒú~ô0aðk9.öì©¶Š}÷y”;ú‰7-â%Wµu,b.‘‘š€t¶ƒ¢_`ª#4ÁÊUæ!ø›N§s§ØAZ4·­‰Nç ±¬¿,ù5þÓóçϳÓóÙï^KsøIöúóç¦ÿX‚e#´^ûUFð_0S†_¯àôÎÝÅ;±¬bÖÛCpÂL¡g &UÁü̾aØWÅÆ"^7ø5Ö7ÐÄx ÿÕòŒEä ²/Ášá1üÇL5¶6 Áoek‰~›ÍkÙ,#ž`â*ëM°ô¢éçÆfs£5‚K~=ÿûjõÏ×VWÏß=·„¿.<— Jðf6Ý2OË™žqØ]ÕX 1 ÁåZ½ N³›íš'˜5`U=ÉEd×A²=ëÇ·¢ÿðZrq fžäê%8÷ƒÙ€9X@ð«ƒ> f\e#\ŒEŒà Ÿ^„&¸MÁ\/"™`é+ g%`¹*LºÊzìÈv¬ÑàL[ö®I°È\hYQb[c·ÊkËx‚ó¢ûoìŒyè’ã^¹t]æÚÖ¯–£#¼u-=Ú+[¯•G~ù|Õ“\82ÁôSåcã·+È/^ø hÑxO#_®¬)u|³ãëÁS$8Z‚'©sÂÓx~ =?~fR±„ë3=ð| ‚§p÷Àlû‘ðÅlûŒ'0ž F–‚àøÂEdÔ\ˆÑœãiÏÞoíno"£f{w Í9ŽöìMðÖ¥5¤Ž\ÚBsÖßž½ ŽwÑV5e7FsÖÝž}|a»žÝ­|ýõ㊷7¿Ž‡Þôê×+ãj¤QªådûB}Í9Kùòêtö»}¡/‚/n¿‹Æz ØÊ㿞VÝK†§p=‰ÇÕH£TËÉÖÅ´9·êÛ^C5îЗ•õ:ïÕé,ÛsÌﳚrÀž€àz Λ÷FØÂj²‚«›x-÷D‚àñ,›wëw+ x|ÓÆ- ^É^läÿͧåÙhØw¹•µ•8Î7Ò(ÿVÊuÊ\ÉÎ^•EWã•Õò¯†®µºâî@¯EŠæ[mhHvÑY!xEýý’Êñ ±b¶Y£ x·\½n~Òfd«ú¢”¯õÓc«ô¤ñu™0Á[uüûOäß~)ÿ{7»ÞñÝÓ¢O’¤¹¦Š®Ú7ϮꦬÊY´‘lÇEQc­•Mæ–«Ö"Eó­þƒ}£&Eg`R9¾!ºf›‘CJ^ZGOw°é}#ï’æMî[¥'¯Ë<]ƒŸ¤[½ê%xåñ^ù0²—4Ó…KÍ‹äï»·××oßWEßß4Šž|!]Ü—çðàž|_m$§ïEµžÜrw ×’5EÑ|«YQ²/Ztæ&•c‚’\¥\\Mn¤Ë«FC™çï–sôäîºÙ¼òLv O_—ùºÿÇ×_}×Oð×—yo¸ž‚£Y·þY¶ÀõFšïEÉâïÿU¾ÿø¥,úÝ¿c<ÆZÿqÓxËë"wÖÆÜAV”ÝÁ œÖ袬'©œ>$ÒyÑí¼yjQõƒIC™ç¯l3Ú¼Ûeç—,’è“ÆŸ”ùï[½ˆkU¢FŽŠa ?ÁIyG,Ú•]kÍí¦¨Üeв;˜Á±R9ÚfÛì¨íy¹ ÅŸ?§y‚ÕH›²'e±úÁñææû«}ü?³ÃW¼?ÏÞoZ[kù ^¹U–üÙO°ÚÁì|ysóåëÊy¾\6ÄÚý­táÈ&X5”‡`»y‚Õ(ÁÜIY,‚Ó\é®Äý¼?ðz ΊÊç^ƒ`k-ž`²Ö+Ý‹XYõì`¦úÁÿúPÖ“TŽmþ(Áª¡<s Å·%˜«ËÂL[ 'Á¯îTüöKŽàW=zo¿h3ø$G*Ç6HÁ¯îTüöËj‚×í^W—ù'øâµ[iþ]>ÉÝʲ·Ú›àxïSteïÍôÅ÷åb¾ÕkÛFc9kÙÑkE¹Snõ=³Ç¢‹ÎÁ½ýmZ9¶!È!‘6K—ße†âÏŸ³U‰-ßf„`¶.óO°…$C‹½6GvuQ~<Ø»ÖZ¯ñ`klÓ3ä:K¯­ýîwÞñ`]¸Ë¡¯É‘ÛÊñ`·¡ŒËµg<ػּŒŒ•`´'Á Á3Nð"~%{JYÔo¸O³=1Ëh’Á,£úÛ3='ÌôG{b¶=fÛ/ül{(:`<™sã ‚À›† A@0‚#FŒ€`’G÷ýï}¹;»ùi=ÛYíÌ«÷Ú@gž~‘y¡ÞȽQ"Šž}ìßÅ«ƒi˜ûs´¡¬kŠcPVZ.æ?š¿#VÈ?ÚÁWoy"ø®Ið]õF>e%œ‚…¬kZ—ËY¥ß—/]Nòß&;91J}0¬ñ‚½¯Ÿ4B™G3Gpp²iÿ ò¥e]wNÄåd%]¸x"@ðò|—½—çÿ(%øü~+ôb :(_ Š×‚ò}}‡ÏEFÑb[¡0–ò­Fz¡{Ÿɺ½ƒ›ÿ˜uk%Ár+±¼uøþAmU»Ê_ª.­¶¯.È,<ɽàzÁåùOó,{« F/%Å;ïp”wîf±H®™IÙ¡EI7%ßjòRm5´wPMpl˜Õ®$بvPî½=Wu9u«í« 2 ßåzÁAü÷íâ9çÙ©|:º•ýÞ¼(ƒöNf˜MOjÜ–ÙMYúrr|³X¤€É¢ï5ËçÃìý|­ì©ëÍ_çâ`µUY´XŒëÞÕߨ¯ FðßÔV‡ÁïÆñåöù8¾·/ðóS»Úä`‘YM{á^‚Ó<ít¢ POrùÓ“Ò´_*j^eOOòÕœšon¤¯vXÞeʢ͎ŠÒ}ô0}±(zUø1-t¶ƒ@0µ=>u:ÆrYͼ.ç;ýõƒ›ÁûÅb^ôøÔ®¶qÈì|×½çyNÆ"Šçÿò~o¬î·¤ïá!˜tÊ›¸{çÎo×é{¯‚¯Ëõ‚ÁÆ"@ð‚\^„_د¯6›»”à½[Í4Om‚·ä«Í 7ÁÖÅ´àaNðz³yg3/Ñln¶Â¬è[Ù¦²­„Íæµ–ýsçAëRÌ_ƒ›Í·%Á².^2‚ﺗàâ\žTB°^4{Ù }\ƒI75塚#ʇýŠr+»]§#‘4ÜtóáµmI°°ŸEAðœ_„K09ÿý'CL­­q¨a.±ÁËKð]§,vÞØ—Ù]§Ÿß“¯}õR¾M.þrP½¾ãïE<‘EwÔVßͰÍ6Ÿ­µ•-îm¨­fE³³E’­;Ü lcG–½¶#HȸEW"˜,2›Ë‹°Ý ¶>U¶žä¼ãÁÁÖxpñЧž¹è€³6ÖRñ}>¨?ṴE ãÁ³Oð]w ¢¶áÚ‘Sõ ÷؃^Ä ½xÍÁÊ ‚A°u¾;O5 x– cà›Ý4ìG„Èâ,Zóß”ç[iB µœ#F@0F@0‚€`ÁFŒ A@0‚#Èì,:ÒDzµÓá¾a¶ÚIƒ¯l!Ó&8—–ÚRBpû¯¥œaãNÞиÜn·ÛЉ!Ó&˜HKÌh½ˆàî¶ÐÊÈl¼¢õ‘ ™C‚õ$õ\£*ú 8ÿëq)2 ¢ÀвF–Ô”hW‰k5‚Ñ©›`cn}5ÁtÂüÇѱ=aÞ”šíªž±Ï_Ùdp‚Ó'²Ý âÿþ¨xú{Ömo†Ö÷š†Ô”Zµk#õüM§Ó¹3 Á¤üì‡NXþÆPPT©H„ZµkUnµ³ó„L«A &&’ÒocœPcû)N2«‹Ö®ô§þl¬ ­ÚµŠ 3H°ÖY½ˆ¨0´f…2×j>0ó„Ì Á–kOrÈÖ‚Õ!{ÊTJ &†VíZÁÈ6†kk{’#ãÁÆ{`¤‚#FŒ€`Á‚ ‚Œ A@0‚Ñ"FŒ Á‚dÁ ÕÌúåî,7ðjggÖÓN¦­ñüâ®ç†ëe-Ì<^d‹æS;IQ~«zÑ.úêÀÚ¿N]&aij¾H@ðxNÃDyf·EinѸTœ¨‰qìV-§ÚºMðk†%EÙºÌjš xl³ ¯Ÿ4Â0¼xb^…ïÝ—ÊÔ»àä’\üí–[ôí»RQ©‹ò[%‹VÑæ‰T[’°àë‚—“`†aGõEk%´ Â)úü}³(¿U²h½žm‰ì€­_Ã[­~u‹y¹B(nÙB9[¾(ôkÆÊu(Ád¡ÞÖÔ²ÛÁ½vž{‚M l¥úÕ-ºžW@¨¾IéÈ;?9j†@€Ê ÔÁ&¤w¥¯Áz Ûë&³ß'šE‚-†%`í€!øêí’š°½¦&EÛç…Y”ß*Y4Іí+Ù3Ù[¾.丈¶‡úÕ-*…²Arú(ÊFA{G¾úS¶ƒÆaIðiY´Xëû`×K÷¬q Ö;8*ŒµTS›/2Ñ ¸'ÁÃ)`i›3Ÿ–ÔH ‚IÑòLé¢&ÁeQ²h-#;`+À×…1¯ôP¿Ò¢ÏžJ‹l;ÛêwŸ#{æEž¡{?ðÞÆ4Ázô6Vjj›tq¹µr#L^(‚{¨_ ‚“²?PE5Y;È›ô(T7HpÐ ÁXݹ›B]@TÑÛo4…UÔ<©eQ²H‹ž¿W¬OvÀV€¯‹‡àjõ«Að3_ìÚØÞ»%·ÿÔ&Xkj£õfóÎ&®¡¡š_éSûx’³œjùI¢ï'¹£ï9jÔx’3®Á•êWƒàËý‰àŠ&Ôíüô'¹žäôgÑVXK0·Õ ®P¿º?x9 ‚É/ÆXĨ£iDœJÎ%­Ò¡ZèVw¬¢û;nQ‚%)JÉV÷¯l«¢j„`²U¶.<–=Ô¯´¨Þ*!X»e=›òZEð¡nBµ–.J6špŸ¬‚‡þDÃüзa÷ªìø\‹¢ßs[e>U&Ô˜;h1[­þTÙ}<óª_í‡>_]/Á'ö­ƒ4aÂ~œ‚«ö~»gЃš™‹õTé„íE ã'¸6~å-p´“&öC ‚kúv%‚ç‚`¤ŸÜì¦ üD†òý ƒàÍùVšŠNÀ¼ßh`Œ€`Œ€`Á‚Œ A@0‚€`#FŒ ÁõeÑüÁAg­ŸB+ƒn÷êçµUQt:mõ{íAŽ­ªÚT{<ÈVuµÂ± °¿ùËsÛÏä§ðBŸ'Xè#Ýx×5¡S´ÿª.Wo÷½^jSíq­RBubÚc'þ`oì™ÿ•9>íw³dÞ¬;ëtøô_AÖê_K!8Ö§sˆjÁ ‚'O0…@œ\ÐÖÙɼäþàˆè{E ½¿y‰£ ÕoÜdM¿týÖ}ºu­Î‚LF\¬•\Ru'Wì;GíGÈTPLdÄ_…ŠÐGj Š+±ª6Ù_{ÌlUÕeýDNqž^÷Þ|“ x©ýÁô(}‚FßDJ"|¦_æÂê“@ˆ¦ÑeJ̃囈j.ˆ°ÂÓ}Ó»RsèTÛíiéûÚ_sÁlÕ꾑ÓIî3ë“!x‰ýÁTßû¬ÛÞ(¤ÀÑÑߊgEÞôÛ‹`Rôrr|S­•\SuéA°ã¬.ù¢¬KþôTÖE_ƒãÏKÖ¬jßðLvàÕ»[5ê’žùö¡|–K›0Í4^Z°AÍrüçæ§\?˜5œÙ(t¶Ó¾K ljøròÍ 5ÊHúÁ=Öw™t« uɃrf‡óøt˜j;ðIc™­ÉUYƒó†_ítîy¦¹Lƒà%õ»ÔðOrý ­•cI¤À¦kp‚³ÛõžûTI»#Lv0Á ±qy:…S$xiüÁc#XIG'8l6¯µB“àFëbºùÃzV;ˆà·²#3Cð’úƒë%øæ·âzW£5Þè§ÙÍ”+E]d‹˜u‘`µƒVuÉ*¦LðÒúƒë%XíÀ9Ø 6¨që2:Á\Ñ~¶ÖšÁKì¦u¡7väj+^Ó/Åݹ¾mLŠR‚Ó¢²2ù­{ÿêNÈÕåÝ …ou»ìëÖ¶þÝ~6‹ºÕ&;àµÇž­rÐ3ŸnàÆÎDƃ—ÚlÓ¡Ìr<˜äõ}¸¥wÀ·‹pî56˜ãÁ¾1ؾŸä¼¿Rwì òúƃ٭òcÓôKSûTy¸,¶?¸þu«?U¶>=±—àÏáèíðT2ÄÝRvS¢)ìV»V‚iïj\Ã\Sb¡½«iìV»Þk°õ¨8‚‘šþZBLŽà¯»Ý.÷_Nµk%8!^Þì]®qc·[­V_¿³8}í1Fæ; Á‚# # A@0‚€`#FŒ ÁcËûƒû)ʉxŸÞ¨­¶T%Ìè]üÙª’ý½Õ2ýË’á® …!üÁýåD¼nÆu}©‘ª„¥«Ï„ñµvXËVÓ©TÂýË’á®+ÃØW‡žé׿¨7ê·.‚ßË;€_–Îô¬žb[ Áðƒà: ¦§“ª„ÇI0üÁµùƒù¢Ôl‹x‹—WNœ›¬Èל³×çG訄F?ìQ ÑÉ…ÈzßT »[uÅÙË÷n«sd¨„YrÃ\›?8á=Ôl8”7ï¹½Uz„¶/"9¨èˆ)ÖˆÎ/1»oN§Ërz„ ¡¿]È9R¹Þ_Oþàü¸¦åæŠR°ñfBæÍz…¤5Ü¿ñÔ¼ V›`­‰ùUsóC{îþ`¼tÃ\›?¸xÏÒûRp”\Rua•ºr­¼#V#(ä.ä]•°¦F”+Q)0kŽÄöIÀ5aYNµK±±üÌ®ôpS‚Yò¨Ã\›?Øã‹h²fN%FtE?é់·¶“À©KâôÙxGƒÖù%¼eBW Å7!ñ—š Õ}s¶º^E0#(`øƒkò+‚=þà(¹¦êÂLô¾yµÓºH‚³__ɶê’=«¢‘©ÖÛ8T+)p¾Ö'^‚Ijí1m—Üe,ëâlÕqO€`øƒëðw¶ƒB L”º†?˜ôƒ½3ÜÍ{“$8k—Œ`UbúÍ‹æÕ¦*a¦ïé¿ù9›ê9W÷VÔ¥8XµÕ§Õ.ãq ðÈæ¿l­ëäÆš8æ¿Ú&]C–Åw™ÆG°½Õ¤Ú€59‚á•`¥Ôà´]®µB›`eú%Õ6TÂS!øÙa¯M¿Á< Á7¿×»ZPŠÑ¯Áÿã Ñ’Áv7°67ýæE³f2°œÁB0[-OÙTŸäà†`¿³wh‚å`ˆ$ØñhFQgïDÐe<.‚á®ÅLvàñ§Eeeò×¶Gô¾úƒä½í(ÜùÙ"X›~vùª‚Më°Ji!ô¬Ú…4¼³Õ¢µy‚YríŸhÀ å¶Ç`Ypï±ik—þ–‘ýë+¼³·¯'9ÏxpYÔG°jcD|AföŒ¨Î Xf‘S u¨?X×…;nøƒëÊ0öÕ¡gúy§ŽÕDð“߆±zöÌôdv0^:‚á®ÍÌ¥þ`¡+•/§Ôͪýäe±…l«T%ì1}o ëBPQ*aZE+TÂYAQÖ\«©JXì³?ôrSl™ã†?X×TüÁ ïÑ þàSZ490ÚÍ=9/i?‰ª„}&`¾w¥©Ñ¾ÀÄ*šØí’ÏÉ\•ï5„®KÙ„d-Ý„I/—±yáuŽþàü¸¦åæŠR°ñfŸqü“—`­÷Õ¢^ªvœ½Ù:*aYƒ¥~vZ-vð“—àKq|ç7q¼Ö–ªž5MpÙD´ ÕVM•ðø †?¸0[Ôðr”\5ÌŸIî.CE¬dä©JXŠ|ƒ|}Ï·1J°È«$Û‹–/;BzŸ|Zí2®`øƒG6ÿùîCé2L09BãH×`"Û=½¤Ú€5f‚á;ÁaëJ3”`¥¦uÑG¨@´v¥È÷ç)LU“ïEDð™`t/EƒLUÂV]Š#Ôú^³]&Ap(8lqö¦ð$ð˜ 6GSû"˜ª„ºT¶Ë$Ôe\+Áð×â拲‹ýû±ØAÄÚö´Þ—!Q jÈ:íòK›Eu GŸ—`¢¶š0ÃÖP ó3Ç °CðýÁÞß“ãŠ&üƒVÕx°Ï›žøÛ¥Ÿ'9ßxpQÔK0Q [MøàeãÁcûTy¸À<á}kΫƒ‰W«‚á®åL ávo×3Gð4TÂðO'CøƒÝG¦™#x*aÌјN†ðSþ§­¾)UÂÁL¨„A02DÎÏŽJ#óŒ€`Á‚ ‚Œ A@0‚#FŒ€à±eAüÁÄxÛC™;élufd&ÀÕϧO0üÁÞã-Qæö §Ô­7a|­]ΰ/³g»°§³gØsD¶ºñ(¦M0üÁ0r“B{…’Ž­Zz–Y¯vñLPíöÑÖª‰ê&þà¹%øƒ?÷jþtö {ŽÌÖž‚áV¨R—œž€Óì θË*u{¬eÔ•¸ŒY¥oD'Šº<ù“;o¾¯sätL´gØ ˜9GÁILZ[œ\©º÷‘†/]Æô`ƒìŠÑ †?¸¸3zÔJ™ë8{~§Ôí±–YW^ÄË\äsyb-g& O¸sDZûze÷Í5^Ѓå«p~\ûƒ³g"¿¥E•2×qöÞ²'ÛrJÝkѺæOO¶ˆ—%8ˆCBpe»ð§Óx@^m·ÛÜÛwcᜣv[=ÁU¬ ÈÚeL¶f‚—×Üìˆ #çÖa¸iö컈!Çå/‘Ü ‡YËt¼É÷7ØëžÛÑfZ»s¤®¼EŠvét‚²4ÚÁ?ÝqNg^´‚é”.cz°µ¼¬þ`*æ€`*ÇíŸ`f-³®ºG3n‚º¼JlƒŽ÷töÕ‹à´V$xyüÁé3K³ygsP‚©·‚™µh]­‹éÛ‡ƒ\Ù.ü錛¤.a«ÕJÿa>TùOç€k—ñX ^Vp–§ïˆp0‚?6ªÖ/ÁÌZž“:Á#=É¥ ºÝ®õv(þtà6|Þ~Áœ»„«Ë ^^°ï™»‚¼†à/g‹`~,‚³Û­}Ýõ‹N‡àeöGÙû¹üö[¾(C°!Ç%Ä2JÝkѺæk½›ü­oªÚ&÷ö÷ƒÞíâѓӽ³Í|˜µLàoípÿꎟ`Ç€üËï`kÿDc©üÁd¸ÖW”!Ø3Ì)u{¬å{’K|; cÓ‰³ÕÚ>Uöì€ÿT™wZãÁÖ“9ØÚ?U.óïöý¤ê,ÕeŸäŽþ ·ß¾9èïê¼èë;÷Ô£z4i`uJÉ9=¨Ù?Á2 ÁpJNŒÌÁAø$팆'ë"¹$4g9¹”3ÃÖ.SEðM"ðÔ³&ÑÌÃùÓA±è±š–ÛN)ö¤œÛ$ç£S¹F9C:Ðs­M«‰;Cš5fM¢ÄOê·š–6¯ãSœR<(Á×rUiÆL‹àbûGHQB0ñj“hÐÞ‘‹ù5Øo5ýƒà¡ n[¿YÊéÛ’`é›sL¢fÑž6§{mœR<=‚×$: Áž&Á[™Þ3ÁÈ8ÞìÊýPø=ηïJïä½ûòýCûo@¥;P[ÍíD:,00vpéc-Î\ ‚ÍvÁ~‚†¹ÃšŸóšà¸¼ZáƒàçÎäd}ÔE×OÔôV³\Ïvjj%Ü D÷n—;ÛÿèÞË3™«È-®ÔçZ:^jý×ãÒXFXõÿ€xdÍ-d[Å®ŒÕ#e¬埘£¡uª#¶Oû`éq½÷ƒV↱6`ê¢þ²WN*Ýü¤(=Bý×TV5Û)s½`E»Â:X]t'GK0›Š`‡ažàK÷~ŸmöI°]T)NŽû$Ø©q¨¸ÇÕŠž'Îüýr~¿Q­„XŽ KlºúÃôß§ÕÊnïjU–kˆbD(kkóãöhh¹ûs°†×´%¬3u!½«çïWppdŠÞw®ÁeµÓÖfûl¬h7²Ö17¨v`‹aÙüíÀ&8ý{Ñm¦ÏòÏ:ˆCŠeû¼(šÿêmçlmÆÂÙê•8?Sò‘È"Ø(]m··3Kì©|¶1«š¼Ç—ÛçãøÞ~ás>‚ƒø¿?*ž*ŸuÛ¹%¶xT|¯=ü.ŽWÓjÅ»mA„²„àKq|ç7q¼Ö^!BÙ þûÍèÎa¾ÝÆaI0¯¡å6V=¿~ø·â±õrr|³¬‹(¶JëRìà½fo‚IQóËÓ•7aVíäôÝjq)âD»t-{7r´T»ÔA°Áp~µd:+ºMå Nؽ¥r1¿€¸3ŽIÑÕNç~Ú[-Ï¿üSßc~¬€œÔ{ÎEù'”|×Ñu)k¿Ø×ÉIôèa ßd»ôuï‡NZ£›ŸfUùñ zøç¢ZÛI@„²„`‘W+H¶?‘§w‚ºã¶KÄé·ø>yå“Üåä›å°”¬UÐ~iÔ%H®GЛà¼.rÓ¼ûrÕþîswē틲5}3®£=Á„á>&Í?Áä8†#¸ J°<ÊÓþžäò6Œ›¼Ipâì½{Ú¥>‚…um‚£›Ò›`·ÿè­6ÛKX‚£hJM18Á·ßhŠòñàŸ¿W¬/{éb=Íæfåìg†`ÑÚ•æØŸm‚•Pv–Þ»%+õÔ&ø­¬®b"3¢ÝÉlö"Ôn2}ªÝ¦ùkÁ 9ÁÔ ZÍ–‚Õé1Î/Ý€ª€àœR öE°ÞªÕ‹°[ÄO°¸ØU5/Áj«&ÁFã• ïïÊw†%˜í‹r«!Øy’ÓŸM4œ6åu«‚é&cfíM0wR{ `¬ÆE˜'¹ ŽûëE¤[}þþÔ ~e÷"ªí¹£³½ë`ÇE°5šFÄ©„à­ýýÿüß™™µp¨fºÕýý“{¹Žu§,š;T÷6L‚uQ²²Õý+ÛªzºÍuQ²£(GðÖ¾®KßkK,%Øh—ËÆhîíL ­ÎÖ•br°½VÆZZ³]Äþnì%˜5°¬¶nmž`V´ËŸ#Ãä[Á̧rdTPœõYc¤ï<[¬ÛmžCt1±ÿ¾éb‹©?ÖÈlŒPöK0ÿ$dz÷º˜ùƃuQz°C<Éù>ùg?€d<¸ú[Î/»U×D°÷Û=ƒÆhÓ!2U×à”òê`2Ÿäö x†R×7{†Êþh‹ý“û}Q}“ô&¾ºP×É/R Á=>ÿ.êÂ;ê}ræF¦—›R3Ëýpô~ÀIm2œy­-ž›œ‡f# A@0‚€`ÁFŒ Á Á‚# A@pí¹×®~u0yíìø‘±ìù^{ç©±Â½Ú Û¾z»ÇZ}¹E˜»¦D~Tùæ…,ßœ V¼‘ý¿ÈöÌ-âŒ4C‡·„öš¤5¤[”'ø¸œ@JmyÙ;j~Ù 3g³ _NVˆ§t¾ÎµÅÆ…õäBz,“Õ ¹YºˆAðìµç§ŸˆtK l/‘nd˜z~C·h÷qY@è¿"ÒµÖŠŒ ˜;0ãÌ-ç ½öª£Û`¾Zþ_÷`Éþ{Ô™Ákp*Ò-gý÷éª-Û6R”¸´Ž5ײeÆz\!íb¦ÕÎ|ÎÁºþ_o]‰츦ŽÛívæÕtDºéb/‘nd˜zþ¦~ª¥¡‹v?*Eüya°ÊݲyQºÖ¡#¯5UÀƃÖy1Á×Ê­–þ_z°¬ÿ·¨Ë' ¸æœSé‹`Ë÷÷M§ÓÉ”·T¤[Êi{‰t#ÃÔKz´.ÊXÁ‚Îv fìrŽ5­¡5UÀ‘[ÔCp&ÒÕvÜÊ~páÜ1M=œs‡ÕÔ#“'Øt®ºç_ß-{‰t­®f‘jç9¯2÷Wìì`‚‹Á G°’ÓöéF†©W±Øh]L V6›×ìµ<ó*`¥-öœ‰t#¼½Ž`u¦z‰t ^ S¯e¨‹+N³[¨?ü+g‡VWŽÑ}ôX?—ãøAðÌXƒ ˜ÿU0ËÜÁêw½TjøQæ K-ÒÁ HpåhZ%§í%Ò SocG.¯XÂØ­+òÕ€,²• Ï­Z+â䵦 XEk‹›ÁÎõ´ì;‚œ×ew)º7Áª]i\õ‰F¯'9~<Ø2õÒñ`-ŒU‹ÊOŸ õZŒ¼ÖÜSm3ÇŒ–¸Ën~lÿà\o‚1 tZu pHYs  šœtIMEç:ûhäàbKGDÿ‡Ì¿Q†IDATxÚík—ÛÆ§÷Ãxd» `£»%öM}ÑÍl«Õ-«—‘µÎH–8-·¢Øq¦­‰oŽœµh{3΋MÏ8ÃsfÎÆçÈg'g_qQU@Õ¿€ðò÷{!Ad(…"ëỂ uÎC u'ø-©k@0‚#F@0‚€`Á2‚Ý h?dÆæžHŠÌ æ… Ü¼Ì`dÖî…"©2o‡šÛÃïÑdÈìü#“¿‚‘:¼ª~ÁaD°\º¼ýù¦|ç•~ùÍø¥¸ýóÍô 2Q‚¶Q³þQ,ß;âµDEÉ|ÁûàQüxÖ:t­c…å^´,;‡?è'=~SD¯5ÅŠj­#‚/<Úr="ÈxŸä>ëvß¾p!êüê³ Qý± ¼¢ {âùïWOÅ wÁÝõh}´/2º\ˆófÁQ^ʱˆ_…¦?0Á²±ƒVGFÕ]M•e n¬¬¼{ù­_}¹"²48Áo­¬ì¿‰vGFv ^Q¹Oð›ñ{ÿø1‹ê… ~ë­Ë½ @™ÂX„&øî‡"ø­·þcÈ ~sëú ‘íà­åm±°sI^½·®ŠÅ"Û/{y«µ~'žää†nl½&F&9a>U&ãÁo½õ8y¨{ë‚äulƃ/¨N7:Èd IH/A@0‚€`—L/Ê >‹CêJð…ã(hU¤Þ½Á‚Œ A@0‚€`#ÈŒì#H]‚‘ù#øÄǪǂí6»ÜüãY·Ë¦xŒV]òóÅné-ÆEywM%¦‘C°ï~…" Woìb;$ÔÜûm8*‚ùRŸ÷V\—ü¼>(]‹¸¨×YLµ"x[λxTf65Í‘|ûû>ÂÆäFêGð’çykg¢š,PðÎV£¿¸ç%\(é°üKv–&XÍÐÌÍ‹zQmT½ÊÌòíW-‘)ꇗ=S3ßÏÔŬÅÄvÉ~<È­OÕEeŽúÑÆ@f”`ß?;¨„/ôuOä\&–¼L‡Ã\ƒÅ««éÍ÷LQ½ø<4EåVÃԢ³(© !8tíàcÿÔ±wµòê²—Ý£È,̃Ïc‚ß}O>à5ÔƒT”‡MQt5ZÚè0‹`UôfÛÚxÐYNŠ’Åç/’¢¬³#w ®Á?èÅF¶¨èZRB0©‹Y«×Y·vº“º˜j[EùŠ&xÕ.ú ®Á5Ø? ~pŸ±äËDþr äõØüü™x¿sîꦊ!/²Vt‰£QQÖÝbŒgûÁbSëvÑ¢~p\³Ö«nôçþ§ÑKázÒ¹åLųªEêbŠ „5ÁŽj#µ!˜v(ô=6‡àÐy“çÖ( úvm¬ÜU N­%¡6XnÇr9Ϫžs‚Y³¹Ñxðön3Ê_ó þR¼ß\©H°×l^k{6ÁöZ´¥ÃŠgÖJÌ›* ÏóXrb_ƒ£3Û»ÄS3'ÁòürîÌã^ »=Õ‘Ðç-&8³VŠ`wµ@ð|œ`I Μß"‚ï—ŒUÔLšàtŸלàÃV«u}‡Yo¶Dö¢SËw®‰Å¿¤‹&_ÜKŠÒa×w’¢dÑ Àåö[;ªó°#–—’¢¦©a;×ÅòX̬¥* «í®–©¶]ÔA°*ú;<«ÓAÞ„`çÈíðãÁ亗<\)jèxpöIN ò†ç•žävŒñà™&)ô"@0F@0^\‚K}ÃqÅîÃKøf®kX;Їv˜*Á‚#F@0‚€`Á‚‘Å Ÿh xÂ9îv»ŸÆ‹‰ÎíjZ3œ&¸A¼iCFNÌ—™ñ±± ú.xpÔétâ9üZç¶þ8ÜùÁ6Á½²Þ´þ¹M¾¶Þ4Ëd±|S‹|ótk‚ù;áêˆh¬Ǥ>>¦¦{·íEŒŒ¡lФ,c8‹XÖUæŒyO¤Ž÷ömºh⩨E½j@Û²4­-ãY‚Sb6xÍ@°OæƒeæÉ)oof'¿9 Îø"Òk3OŽ…ë\w2ò¼i/²§&âUï¦ sG0ñƒe gÊ›vºŸ•¡ Hðíd«,|ñ ^$[õö„,-¾k›ÙAFÌ6²§Q¤¾“ùÁN›ÂvøÍ _ËÐWـ뭲ð»Ï}õ#öü`Ò>}áØARf—K)‚ûL° wî^·‹–$˜T êE0‚«¼™¸È¦E0©Àj³y´@p%‚µ€ÌIðF¬Dãœ.Ž”`»Ï~ÊÑ‘ÁU r{Ñ]~œ&˜dqÄ“ ‹XX‚¯|_ ÄF gÊ*¶c÷ˆ€Œ&8ÙR$m^«í®Ú‹†à'ÉV ÁÖV‹ ¦E¥‚-Sdaî9~/"åªÖãÁfƒÎñàÒŸ*ÛãÁέLŠVø=1d>{v&⦱°EŒ€`ŒÔžàÉùÁ@02‚'çãmœ1¤Á‚#F@0‚€`Á‚‘Å Þ4šgÝ®úîUVÓ²4>6oÚ»€¨¦Ì›&­iÚ›frï·Ö$ÜÉwÔ•ÿDËÒh¼ÎÊPuläéëƒÒE‘Ù"xdÞ4æ˜Gšå[ñ‚5!i´éµKÜÃw7jBðȼi,Üö§_l‚`d¢ýàQyÓâ¯L~ÒÅéûšàß$[ß³·pvCî‡KI ‘¥-šÙ¿Ù#uÕU‘uNJ“ºôN¬C$Gøãu°9E‘™&xHoZò¥ß-±=ú¾&Xo•ÌU&±ç*Yš®–¨K©yr÷£W¾÷¯‡áAª®D±–:BU”ÌôsEf™àa½i„`û}M°Ùj_‚,MU+?ùJÖ›¶®fêrÿ» Xî\ ‚Ý·êÒ{š(ÖxêÕ5˜'ë,ŠÌ6ÁÃzÓÁ¶Î!»7Á€泋]Õ}H&(?¸íJwd}.wÅýŽ‹ªº˜ºšÎ­x‘uÒu/²¿äEêNp±7mh‚%5/“þÀ)éE¼Š`ªX3WÒ¤obÕ…t(E‘9"ØáMÁ¬ÙÜH&? D0©«Ærï¦xí™]ÞÞ¯þõ §(2G;¼iqQ¾Õ˜à¯ÞãÏßÏ\ƒ£òÉÔÁz¦®Kg]¬j9Š"sFpàìE˜¢Õ &ã"Á  B<¯Ì›Fdhöûšà?ˆ÷÷ÖõVÿV†àÍV²–ïô¦õ#ت‹r¸1z°¤.VµE‘™$xdÞ4ëSåâñ`ë‘©ÁÖOj8½iŧëByÃs×xp\-WQd{v&y·Ìö" ‚ ‚‘Z<9ošïï‹}1|%Á“ó¦EøÜ2#FŒ Á‚_Lô{(ÙßrÁëÍ ÈÀyó-‚§à;<Ć7 Ï8Â'x¹K¿¼ÿiŸâñôM§ ÍD+ÖðÂ!<Ál0oš5½½ß÷’¯ï:ehd3¿ ËÌ”£MTœ% ~¢nkL à7¸ÊѹIôËo/˜Ñ“y£ÂCÍ2Þ›V–à~i–$˜L@¶Œ'ú.:Ï!x#®Vôö¯äÂ’|ù-¹üÇÅ!x9 ?4G¾TD0žàÇž·vÆëFðRâ¢R‹òŽïÊgËc&8ïrº²7Eþ."ø×bá¦8‘ïµå‹oÿÒ_€ÿaÿÍ7å…WùÍ ʾ=,ÁÌ{²ëûÞÙ*÷¹ÒI–91œåzÓÌ’÷ä<.áeIv”ˆÇÈдíÌSIl\e´Z„`׬ÓX-ÈÇNð›¹ë;毞Š?òG½xá!ø7?ÚÊ6BßnÄ '(œÙsr‰áŒçxÓ2¶5j#±dÉd=!Ív¶Ú799Î9U¸àñaƒ[¿/Gð¢ÄÝ$¿¿Eÿå…`å*S†³²Þ4ßoflkD±FûR[vÖ¡ÅE…óš…·ÉZæ¬]e9Ÿv:ÝÁÞZ§sk3‹ß0$'ïñÇ/§NÞ›«¾»HÓFØy7CpR†Ùè~pŸq½'ÆÅž?3V±ï>÷•£ŸxÓ|·⪱ŽùŽK¤¯' ³î‹û¶:¬]e9ÓívâDE•mw»OˆemüÝ`Á¯Eð×/_¾”§çWŸ½åðùúË—EÿX‚E#´ßø;Iðãß:¦ ¿=:‚£;wƒÇwî0eKݹsSèjÁî™ÿî^„e_å‹xÛâ×"ØÜ@Cë1üïg,B5—2¹†ÇDð—R5¶R‘à÷åZ¼ Á^³yMÎ2rL\eý ^4óÜØl®·'BpÂoÁÿk9ŠþçËËï¼µ€¿ÍsN–à 9ÝÒãONÄLÏÀë-,9¯Hðz²V‚£ìÊ]» v°Šžä|²knÍÁæñ-î?¼®- ÁŽ'¹Ñ¬ü`iÀ2XV øõAI‚®²! ŽÇ"ÆN0+Ó‹0'£i ØÕ‹`C,ü`‰á,L©Â„«¬/ÁÙNj4XjË>´ æÒ…&‹Ûšs«nm™›`U´õÎΘgñ{ùOr.‚—._¹¶ùw‹ÑÞ¼í•Í7’#ß¾Xô$ç M0ýTùÔúí ò‹ùWÖcf¸h<8<Ïû¨ØMðÄ>U~3w4ÍEðÂ}ª,ÎxÁ§ÊÖhÚПhL=ä¿È0[O6oç|¢”‰õ‰ÆÛ xáøråˆ2Šoö€`|=xŠû @ð$un@xZ×—`Þ÷ã6“Š5 Œ‹Zk=¹™ÝYK<ÖÄEÕVeQ²/Ztæ&•s69$±J²¸Þˆ–—­†²ÏßÍÌÑ7Â;«vóŠk0Ù9iîºÔëü¿¿úê«;ùµÝP½ ëzB~%F³nþR´ÀõF”¿YEÉâ¯ÿI¼r.Š~÷?ã1k­ÿ½ŸxSu;ˆjcï@uî`†Žj´&êI*g‰4„*º¥š·¡u?˜4”}þ’6£Í»•t~É"Ù9iî“Rÿ~pªq­ˆàOôÈQ<¬‘Op˜Üãvu®µ’í¦èÜquî`Ç"Håh›m9G…hÏ+ÛPîó—iÞ Áz¤M'e¾úÁÁÆÆ»Ë¥þòãð¥\‚_Ê÷›)‚Skå¼t÷ª(ù×|‚õf‡àíU›Êå¼4ÄÊÝÍhávš`ÝP9§›7C°Þ%ØuRæ‹à(WzKA‚·Ôo.Á²¨xîµN­å&˜¬õÚô"––sv0Sýàº/êI*çl÷!Q‚uCåìj(w›Q‚]u™;‚i ô%øõQ1Áœ»~ݧñÁy¥Ìà“©œ³!܇”!øõQ1Áœ¼šîE¸êR‚׮݌ò¿Ä“ÜM™½åþ{7E—öÞ^|$ÕV¯mY•Y+³V\ôoGÉVßÙ³{,¦èlÜØkmÑÊ9‚i³hùQÖj(÷ùËlU`ën3B°³.õ'ØŒB’¡Å~Û#»¦¨{<8w­•~ãÁ©±Íœ!×Y"xeå³ÏrǃMážk }EŒÜŽgʺ\çŒç®U7‚‘±ŒöÁ # xÆ žÇ¯dO)óú ÷i¶'fM2˜e4úöÄLÏ 3=ÇÑž˜mÙös?ÛŠOjnnãi¯=ÈZ|š¨†ôQåÌÆwOçGf¸ñ­ëì+ob"­ÁÜIW›Ÿ]ŠŽ¥q¶ÌÂ}qT¿ØÁóCð×%˜N[çÌgi°Òü2"âÍÜY‰8åì‹öNˆ?8Ùõ:L¿–À˜çý{ù(]•‹gr7‚à”}ÅAðeâ2fºNÖÁ’ý÷© 2'¹o—`Ëü±¸ÛþàûÑ_ß Ûží‹°Q(TK¤üÁ‰Îï¶eþK›~}*0&;èC°™£_†`Zm©Ìll¥ KÔ™Áw—`aît:ò—Šž÷:ëiðýï‚`¹s%v;œøƒIŒØvö𢽧ɣ¢qöRi°ÃôoUÖ…îÀ~кȇ"øZ²U¾L–Ô…x•U]>ÁSMû6{ &àn·{$ÏÔ«®—üVâ¾ÿÏB~³—¸Ê)0‰‘[=ZfŠÒ~pâÉ¡Ò`‡á,fQօf^¥KA±ø±ý`L6S—”O™Áw²—àT/"sµ v[®a‘úƒ³[Òà|‚3;¨@p•±\‚“‹0¹ç¬ýÁ6ÁÚL£¥À„EËì$˜HƒûLv@rë&/"X Š}<_ßÉ\‚sÖg*Õ‹ðcp*J LX´ì«N‚}# Î'Xû…“ô£{z¢*ǽÌOn€à¹ X]„é%¸:ÁóÃuR«¬}‘¹T2Ô¸G™%–,ÜÁsJðô%8‡`í¦'•øƒIŒ˜8{©?Øß¼"^e”`Kì0ýÆ[¥;0ií¸>`;×£²ïìpBpŽ_XÝ«F°ñ*#Ó!X\„­Kp•'9÷x°5ZÊ­ñà䡯iO¿áæ0ýÒñ`kŽjÛ9µ~À&óTé(Z…`ŒOà;©Kp]“Kð$‚^Ä ö¿ýv>޹‚”à;wÐ^ ¸Î#CÅ~áKøf®kü A@0‚#FŒ€`Á‚ ‚Œ AÆD0ÎTI•Ö&å6/ñGΫŒ%ȵ¯….’4ìOVôEAp ‚›Cš*Â<%’ÓŒèÌŸó9Í‚é4ü”´„K‚çìlEíøuvèñ-½˜×8Ì{$ Ž-›HU‚33“ B|Kcá:× }÷3½w× àøƒvLÍE¢#.k¶È4Ž\´š`-­dá‹ÑsÇÃfÔæÆT¹®¥Q>ö!Ø[ëtnÍ)Á,ðÜ_ít¶ çÞ~ÀÁ½N ;*‚µ´’…ß}½+ý¤uœý`Qôbü³,ú‡w»Oí¤û·VæáINª5…æSõŠ»Ý5»¿Ôìêväìù¸£&ïfŠ‚à!zɵ…Êš´•¤?Á¤—a9%ù¢ŒE„E~,G;B_5 ‚µ©²?Á›±'2Cp³¹Þžs‚Y¬ùÌ%x5Ñxn‡÷ÚíöÓðdJðF¬‹ä¼ÁÚTY¢á8µ±x’…[j`cn¯Á¢•Ú_è#VXrk¨¯ô¸h}‘?‡ÿq‚%8Q>ž•$8&8Þª"xΞä2¥ÕôCýM/ÓŸh€à V"ÉÝUªatLL•”`ª|,&Xí@ˆ$çàVë£ðX 9Y«uCù<‰p“µøn"˜EýTY?8 ¶ÆƒéhÙiÎxpá#Î܇®h"Ÿ*Û?È@ÇÓúøTy‚#FŒ€`Á‚ ‚Œ A@0‚#FI<øœÌ. ȸ f±Õ,Å•¡y•BÀse_AÊeå‰7M Ô‚dN¹XÝÒpÙçÒ µ‚¢È`[ßÌ& Ëûùr‹¦ ~&ßpgt§·åòAÝÛ<%CKÇ|Ã=ÌšcQ¼$ R|ÿ&Ù ÷ÅÒ/6£Å³ËòÕzK—Ò24ãM‹S±»(20Á¦!9aÙÈÐbš¯Li<~•»¼i)ÅÚNšàôÜ¥×fÓ²<£Ì«é f{Þ•}Ä÷έ#vEF@0Æf£»ß^òJÛß {.oZêFV!X¥2ŒT#ø´ÓéH}å»ïAµ‘¡ñ•˜àƒ`»s1Ž[ñÃHÊ›Æ:;¥X»‘>ŸZ FÎ'Zó¨Xk‚;ÌAðÕ[ Áâ‰8!8[˜àoºÝî‘X”¾/Gç6žU¶bÀ΄˚%ê/S”…WÅ«ɽÌ(5Ø’)ú…üŸsµÛ½’ìy¹[_‚ ¥Mðé‹„`ÑŽ1ÁŽ¢Èhz/ÊLz †à0U(º§’¢úí뵋Á³@0k67Ú^‚íµDýE ~_zÕx‚eQ³‹fs¯ÍkN0krÇ7Ý‹ˆZ&&ØQÅ58jèÞ%^‚`§u*îÜòR¯“ù×–ì{|Æ"ÌAàInâks×à¿>¨Dp²¨ž±Ýx xJ™YÎf˘»¨ ÍA°R¨}h{ÓøÎ5±ß·ÞïCp¼Vëš´©Õ–`"•#Z¦qâ¢Èðë‡*õ¡obî:MÎrŸäLѪãÁ*ôSåÚl7ÈÃ1>U/Á‚#F@0‚€`ÁÁFŒ Áh#F) o2ÃOÒ›6®4fã? ‹Ñèá’JK´7¶>Ÿ {Üä ž¤7m\éµg¢Ñ“oç=\œ{çé ›_X·~O¿ >Á“ó¦Í7Á¿Ž Vò®' ¶ììO6ÁÄ›FZß(ÖV½i\®ÀLQ±…d댼–Ô%uO^°w¿ß;IêM×ʬ>Þ.„÷äk2CŠkÙrš`2Ó“´¾™Šø³íM»gІ©I¼¡½ÕÔ=UZCsÍzÎ;q<½oBQC°™oø"%1Áô¦™¹çª¨xp±^M¶ªêr³"øÅƒxVzOã§!k­ÉÌÏE0ï¬û…ëÖ÷Ö„Tn«x+<9oZ´î±÷>Šv·˜êMäˆìûŸ¦þîóè/e[#ÐýàçÏÄ«ó!‡|’•kH=\Ô…èz‚›]žÔ½)Drݻ̓E£¬a0bÈ^ÄX½i¢(7sÏ÷úìgzöó|˜&8= 0-‚O_v R],rÄèE Eð¸½i”`¯Ù¼&÷5ÁV Á_Ê ¬ÌÁÿÒŽòÍ«Ô`ɪ©;é5xÌÞ4ßë^o9ù×nñb‚9Ï#ØÚ!ØZkÒ 6M°ÇÆÝŸ§=‘ÿü¯ÞZú`øJ‹’ xÄÕ›F.N*¯Š NÏ’ß;ŸÂ“œu NÇ£/ßÓ×^>J˜àÉyÓ Á\î*.*×ú›M°޹ ¶v°y%>k­ Üj}ËZô'8ªïn`C*ÔœG ‚gË›fvÿ—}ewŽ»Ÿ†š³0fŸmóŽêÎÈa䀃àú|»òõ΂ ‚\1ûbĉáËðH] ¾(Fý¡äGêÝ‹@Œ€`Á‚ ‚Œ A@0‚g©n©é›#*Š,*Á9Þ´1¦Â4„ZÍXhVÔŸ‘š\œN“Ï5ÁyÞ4\5ôç¾Íd*Ç/ƒO¾Éçœ`·7 W ‘¡ùüìßc‚ 7—÷‹ÍD®µ‚GE°f—3íHÓ2%*K^&®2žåÞ­5ËÈДOÍ‘¡qjÏ«ÁW/WÏô )~Ñ|+ùÈÎdB š+Ø‘Qüüc1ãmÕºûÝþúÞ¿.§±å¹Êt—Ú©5ËÈÐRSêòdhÔ]bÏpžá$³Ží “[‰&ø%$•##X›»ž÷:ë‰uŒÇ²ûßÁrçJìv¸’¡IW™ZL=Œ­)š‘¡}rn)ÖÜk™¢5"¸Ó9-ÝPH…¯jš5´V\ƒàѬ½iÏ_ É—Áâ±€ìþ?Ç“~·B]®ï+ZL÷ãˆÖŒu*ˆbͽ–mO8îÔ¡Ñ—cšý_îXËÐxÔб˜Ž³ŸÒqô"L⛸M0y†&ÆI/Â1‘Cp˜²¯¦Öª©ÿ#q¼ôëE4CþÆKðÞMá{–&xSjÉdã¯6›Gë¢9kÅš{- ‚‡"X”êEˆÞœÖ‘=û)÷r ÖEsÖŠ5÷Z6Á|öÛ‰¬*.Cµ•"˜¯õ@ðÔRÎÅ‚ƒ°Á¯òÖ² ®Á“œÝ8Yû*9â™ùéšy&XI¼~gL$_¾Ôž©EÁ¤hF†m•*ÖÜk™¢5!˜Ô}³Õúý¿)霑¡‘#ÁÓz’ËäulMÉвãÁ޵j8¬ëny¸Ÿ*ƒà‘Œ A@0‚€`#FŒ€`Œ€`Á‚ŒA@0‚€`™>Á]ׄ‰¬ál¹Jdܳª ¯ÀéšÉÌhklw¢,Ô\Ú4v~W}/à—¸1}sJ/iß&¿ÁÓ!XÎ>[åF–æ[‹)­Yl¬Ë1œ%7TïÿžoZ“´ް á=Á#̉NÙyrgg>Ô•3¿Ë2ž¸f´eû&ÑšVьͽƒšE&ž.ÁÊU&½if‘ΔÖì†,×p¦jþü4~>ÜO÷ã¢NõTC³ÀÁÓ&øù³X––L[CdÄs`K ܆³l?x;üæ†Á¹llD¡æ´±áI—&ØÌ7ïóîg8#“Kªc?Q¨9ml —&øKi-[)Ep?Ù‹`§(Ôœ66 ‚ËŒEð''1‹Rões{¾ÓpVH°ÓeEj2iÁ%N»Êœ¾¯~2´¡Öróœg ‚ó>´]eBòåö})5؇ò§"ò g…çØØŒPÌec«KZ­ÂãV OžàÔÏ\Œd<¸Â“\ægªê©çÇOlM‹`Á‚# A@0‚€`ƒ`#FŒ€`´‚#ÈŒ o2%‚Ùhæ·Ã›æ7jê ¨;Á#úV9¬Sô;ÿÈD ®èMÁάž5FÑŽÈkog~Æpæ{*²Ãá™Ú¼i¬V0¹¯ÈV¾ˆýÓÌ<9f¦YŽñ{ÓN_€`\à^g=ëM o'Z³íp5‘¥M›V?‚; O—àW]/+á‹»7í¸S3‚›!žz/"I‚áMÁõ&Þ´L;²&ÁS‹0Þ´’ß›Æ1‚ñ¦U x¼Þ4ŒE€àŠÞ4B°ñ¦±ðI¢5#E'áM«Á¤E ¬Gn ÁÃŽ›V/‚ñ©ò´î—ø&ŸaA@0‚€`ƒ`dž ææ[’6FjG0‚€`ÁFŒ A@0‚#Ȭ\qve‘m¹‹¯ÊʰîÒ·~õó'8ñÍ\¦þò¯Þª²—ÌŒNÏUii_š‹6Ïñ¦y—â«‚T ¬%ßgEY°…&8 ãÙ1áYõ½d¾ýý •l­™KÎ7Üõõo;8¼çú]öÛ~c\ÎÁ xØäzÓb‚ÙÙe!ÔúÅfÞZÇwÅû‡òŠ‚à‚ÿÏYƒù|íìÏgÒZ–œ"0#º4n{Óv2¿²ŠÊ›¨|!Þ27â5²ÕÔZ,.ÇéJÙµÄò µxÎLOîµïªÚ¦'´¦×rN®%‘L;ÆçŠŸ]ñšà–˜Ñöò‘ðEˆ+ò rqŽ'¿Ý¶„$tž\æNw;ë./D;ˆ‹¦n”Ä›fí`Y¼ÐàªÏCçZ~s¦ä(ys•Æ?‚³í¨§&^ŸÇ Òƒ̃Ïc‚• -ˆZ?Öš¶,M-ÞÈüƒ~iÆÛ‹ApÜ¢66³U{-µƒåðrý$V:Kþóεf`—7ìuV\ÇkEGÏÑ1ÁÙ¢Ùv  XüìˆýüH]ƒõcǃûŒÅ÷êî‹=¨D––sÅ¡=Ú°hjQHžÅÞ4ºUk-¦v°,6/êÂÂ-zq¢kÍØp]žuŠ,z¬Y‚“µ„9ôEBp¶(mGsv@°Õ0ôÁ™t(¢;÷^ÜÜzæý€‡Vßd/»VÜ7É!ØZkÆ2I‚­îv̚Ͷj¯Ù¼ÖV:-Kà/åú+ÖVÉZöZôöaÁd­Ù#ØéM£½ˆ&wœ¬f#нˆLQëD‘³‚£†Yí9®ÁQ3õ.%wìÝž¸yYÚ€K¹½U²–Þj.Ád-U‘z>Éé&¨ð$g_jèÙÁ2Y‚-Z¾7­2Á©­V%XŸÚE¤„FƒLZ;Þl%Þ4.—¤ÖŒ¨ÁÔâ‡é¦k숲KyÙ*YKoÕ&øεfŒàoÛ¹¾¿oôp†àŒN&-ê"Øœq+j]ÝÁ‚Í,ñ¦õöyf¸6ýâØ*YËù$ç®ËŒ\éSeòCOe?UÎ>¯&·ØŸÉÕ –°~·Àƒàñ¦yc¬FÖ¨ æƒ`<Þ‹äê·>'_ìÁ3œñŽúq‚g={Û>²pÁ<9#FŒ€`Œ€`Á‚Œ óKpEoZ¹<ë&s2y·›·ƒ/vçõ\À›6V‚¼i™¸ h÷~›|“—wþ”÷]ÃÔ²Þ´©êœ`·9_”"ßE?_‚áM› ÁÂ^ôN¸êß(ð¦M±¬K¼i–¶L¹Êt,ëäŬ-—`²U¹øãA²\χ7mND–¶Ìšßåœ×O±æ$˜luvehà oÚd¯Á±7jËbWÙÃf"KK?®P‰WÖ€–C0Ùj¼øã oÚÔ¯Á1`tª0 ¯ ™Ù_¢ælv9c™Æ¢¯¬-`³UûŠSÛß.€7mv ³JÕt/âE–`Úáp¬·ê¾½‚`xÓFFðûRv&»«ÍæÑFzu"ñÊÐr Ö[‚áM›Q‚%`zB᳟:&.&¯¬?*—`½U›àÙ’¡ëIÞ´Élš3p¶W,G­HðëL/b1Æ"àM› ÁTâ%u]i5•xe h9“­Æ‹«9Áð¦Í*ÁdäV°Óœ¦$^)ZÁó7 oÚ FÆxÓ@p½o®yàMÁõ¼i Y¼€`#FŒ€`Œ€`Á‚Œ ‹Kpm'b"U3^݉ÛüV‰`æô}¹Ó)G0‹elÉõ.=ÉéØ[eñZ=‘?‡ÿqâó¥^¦ª6Á¾Ó…§ æ‡ßº%ÍùÖG˜þÇùùükp?óÛËçÖÊß+Ó‹P sï<°¾±Î/¹—ö¦ ´k«©‰÷Aªg˜OpÝ[¦Ä:äèp¸°Ì\l~ˆ`cîŠwž§5#„’µr>”š¯ÝUiÓEçŒ`Ë›æ ¸¯7lª¥²ÔÖVÿA9Ör vºðŒî…OZ– /.ê 8ªöAøË¸ÚA+­s³îo~ˆ`çx°[kFí?L>C%Ÿ*ÏÝh5 5W¨¾Þ4Ó¹ 3ÏA¹; þ9ÁŽçlJ°Û…ç»TåV[E£j¥N¯EpóÛh¾›–ê ”qëÞŠ[A02lƬ{££aNóF†¾2NèK„nóF†œÐh§Ûü6‚›7q"‘éóäŒ A@0‚A0‚#F@0‚, ÁûŸ¢%6ãõ¦ñ®7*‚Y 6å9¶˜ù^DƒãÌæ&_kÆÌb¦èȼi¹†3odÞ4g]¨˜Í+-KãõQl}ýÁ½6@ÍÛ€æü¹oúUñQyÓr g§#ó¦¹ë’úax¯ê@ðlÄm3Ú2·€l„Þ´ÙTÕ± vÐȨ˜íòÇžçùS$X¹¬ˆ MùÜ”fK|¢mWÜs4 K¶Àä¶¥Y»z÷AáYl®r˜?OÎÜœŠ¬¡zJ=¯5)J§5Ó›ÓlÖÙoÿh·!)JΧ*ª¶:7g­bÑ_½•œ_¯³bP82е£”m­‚7l•,’¢Ï{«–"¸‚7-]mW]®v¤˜…/ľì? ‚íÎÅ 8n¥eiã Øå‹ˆjfXÔsÀbÛšó‚@&Eéé*k̓BÍídŠŽ8êïÅ‹ÿ¨‰]'ö¦Ù‹z«´7l•,’¢Ï_ ¯Úšu¥©âM³«í®Ë±Ú ¿£·x;­x‚{ÄWJ–69‚Él{Bp˜½ùäÚb6u>ÝÊ…à¦q•5]Ú² Þ´>;|4U¼ieÎë'¥ NÉÒ&E0oï!xSÚÒX‚uQJðûòE>O;¬bÑ7Ý‹ˆWß[ïè#'‹} vzÓ’­’ER´?ÁÅÞ4«Úùu)G°-K›ÁE¶¤ÔdÀÜ^DR4³Õd’ß(ÔœZ3w;züëä,“E›àêÞ´¢'¹"‚ýBoš-fãÙº¨×J^ƒ-YÚ,„¥ B/ÿÉ|îÆ"´RÇyĩܠ; Áý½iù§¿`´ÅE0©Ë¨V6,©5SŽ­"oÚæ±Ìò|_¦¨zÿCùŸÔÍlug~FÓÜÞ4#•#Gœ˜ä˜µH657ÍZL5¹mµ«âMë«{s¡›`K–ÖO©2˜;,áM+7œ7tìež…ZŽ7-û©ræ§Ê2OÄzÓœŸ*g­vU¼i%toÎÏÍ][|î3¹ÌðûÐy=? œ±{Ó@02֌ٛ¦») WÇ~¼Þ´ÔÏ}`dÔ¯7­ïÖ"xo{$•»³‚Ó Ì“C@0‚€`ÁÁFŒ Á²€±[ªsývÑþsyÓ‚2æ®r_vàËb¾us1OyŽÍÌ #26Voš:§ª0ÕšU‰ö¦ùžku]/ Sè½Ò{©7-,cî*G°ûç$½ò_3ÆèM³¾7ïøÝö21_ßuÎ?0úÓãU¦*ŒÐ:µä4wàjÉ1 ñ³wÈÄʧñxÓÔ9•~'ª5ŒàËϲX’º(“”ò“¹ŠN‚`3koU¨IÍÖÉË,~elgž÷ðU,C³Þ×’Ûä±BkA¼iü¢!8H®Ì{²«Q7Íì€ÌuÍô¬âMó¼ýg}ëÂågщyÓ‚Ô‡NiJM~{¯ÕÌÜÒ˜¹wYïÇk-‹7|NçÉåzÓìkM 0•‘ìÞs®r5oZÿº¨/Ã;‹NÌ›&+£¼VÒ›fjD†F hÛáé~ƛƃ~ˆŸ%¬÷j­åðrý$V:K‹âM‹®u†`/zÊ TãEKzĦÓ‰7M]ÓÓ;p\Á›Ææ8;,øû©¶bÑYt2Þ4ÆØÅnT™çτˬ“ë‹ ´íð›Ñ«]žÓ¶Þgj­eñEœOn-Š7Í~$Š­bQãDKwc]Ù²YÔãXƒxÓä?¯ÉAª¦ÓT[•®2uN Á¼iîë gÏɰF2ðæ xÞ´ÐñX™C0™%Î Ÿä¬÷ãû”MðbxÓ2O,¯ý²½ˆ~ÛkŽ;w†à~Þ´2=š¢a‹‰xÓ6›ÍU)]f+ùkZ‚íµhÃ4Á‹áM-Á…Þ4ß¿xlµ§*ÊD3?Ø›6R‚ÇèMSâ,…­aØ’ä½ 룵 ^ o9¿êµÁêÁÜApuošÏ¿M:ÜöV=Þþ"Ÿ`¿Ð›faéò¦E[øiÙkð½iiÇVÁA8:‚çÞ›fî+•hYoZîPAÁÞ4ë층y…—ë xÓTeØÎuêØ"öÕ´ MÚ²*¬j”àEñ¦m¶Z¿ÿ·¨Mó´fnÿÜ@Þ4>IÖ¢EY«•v—Tò¦µZá/㺸¼i¤.´¨‹à1yÓ¬ß22ãÁ9l2<Ô“Ü¢xÓèg®¥k0ošµ–¥5KøUÅ›foµ]Tgµ'ëMCœÐÉï#Ã¥–Þ4);\;tÆâMC“:zÓdf‚Œ A@0‚A0‚#F@0‚€`_ÌPä›ÇÝn÷SœŸdÞ¼iŽt²ó>̓£N§Óž×sžmÆ”U,1œÚ8¼ivÑÁ¾Þ`¼i"KÌÁ © o¤*Зà‰{ÓÊ¥¯uªß7’jfvÒoÊ*Vîîy)íM³‹öMßìÕôÁÒºÜËXl*î`(‚KyÓ@pß<þÚAðoŒhéòDZg‹XÇÆãM£Eµ¸m‚›gÿ•&X›ßT-þtž2ÃM”`9ùl•ûœi™–V¨±¤Á˜—2‚%Ú3ªX»ýÊ2¯¥k|3¼lÙØüô¢^Ë'»®GÂ{â"XÿЯçíÿÌjò™ž£ð¦É¢FÜF«ZÞ›&k~-M01¿‰-ÇÙËõļijÓÇþ©žÇOisªd¶R§G‹2*(Ö2EÇp÷›V”$Ÿ`÷TâLî÷öüóL;Vñ¦ÅuIõ"ìi’шÖÊ.:1oš"¸×Y½iªS- ‚…™+Cpã0!X+Ö"ˆ}ËnÅš·^KtÌ<ñ¦…Y«v³ÀsÜJpÅÚ ±kšËòÞ4´:±Í7b6¢5³Äm Á¼i¾¿þ Úd†`c~“ÏdšàÕŽ-ÁžŒ7MüJ qìËÁ.ë¶²nôs‹5­™ÌUv*ÖLðě_FÄϵÄÀ[§þOr¤ÁlMP~×ÕÞ4í’Ú2}Äz«´7ÍcÉV}#f³µf§Es•ûyÓ¢|ž}’#æ7¶, IŽÅmd;“ð¦Ñ^„}ù‡ Ø­XKÍÓ¦Ž0¬ò8;û›'uÛxbXtßÄ+xÓŠ{ýîçMËî Õ£±ä˜Sò¦¹Þ»ë¶¢£½xܼÖöªìV¬i;F{W¼ý×4Áz­9'ØÐV•T®ÁNoÚ°{ÓXøY;Êß~׿9ÿ‹xÿ›W¢™·e£7Í÷ø“J°Æò¿ÿ–·E¯Ÿmõ*ö"|—b-ß–$œ\d-ŸXºjG°™Æ¨Ž!ßÞ¥ ­ô2£iƒxÓ¼ö ½i¬'óÿ~§ßåM{*ßÿÏÿê­q[Þ5x2Þ´–Œ>·V$8Ë\¢åës 6ΞKü²Ô'%½iƒ\ìM3y·7-u„ÅÅ›v˜(°ÁJ¡ö;Y¯‡[¾·ó×R7v²æ.w/‚ÚØ\B±úÜj}·Z¶7ͨèòóJ«µ›ú$v0oš{FÜ–Cpoš›àŒîMü$[í‰xÓŒÛñ$—Ó•Ï!X~pZ¤Xs<ÉÍÍx0ý¬†ÕŽ«ÃÞŽ©Á¼iîXk;îçMsœù„;¾[Û†ÌLàMCêxÓšÞ4¤Þ7 A†F@0‚€`ÁÁFŒ Á²ónw¼-¯v»ë8O3œºxÓôÜè´Ð¤ó§âo¿5†œ´þx.~L™¶H®ÖŒø2EÇíM#¨íMsËМ;˜Š7M|YFÏIÒçû›½aý}|ÞvÐJ|Ã}¼Þ4ú­ò ѧß-Csî`*Þ4<¢¸ hÇw…géðQžÖlìÞ4SÁvËМ;˜’7íì†ì>pïÞm#K£{ˆ_5bHó{'‰K¯%iª7Ââ­ft\FœevyÓ¨­ 5·?ªßLÏQzÓœ;°æ€Yí]ΛVÁá,:1oZ`¼V©yLVÄöÝ/´×ŠŠ&‚Ѽ3c¹H÷²÷©Ôü®’sª@p%‚«xÓ†%xbÞ4Y嵺ÙÎ'xÕhÍzOÝV¢Xj°±Í}f,qVr’ÜÞ4jc« Á­™8¿Wo%çWøçÀˆ7-³VoÙª{¦„à Þ4²Uw]œGH¶3~ošgùì¢ÐšÙÿ_],M?8Q¬ 5˜plÅ4Á–8K^MjÄ›f×¥ 5§*VGÆç7‘@’¢n•TyoÙª{¦„à Þ4²Uw]œGh?gÙ›&Y|ùȾåU"8=1ß/ìE¼È¬'i“뮻ߢìWò¦ Gp oÚÈ“7mh‚µb­ÁTœe~_ÊÒ¸åM«#ÁY­™8ŒF ï±Ñ1¦¼iîµ*yÓ’­ºw`*Gp?ošÞª».Î#Ì!x<Þ4ßÿê=þüýÁ Îtå‹®ÁDœevyÓìºðzE˜iŒ?ÉU÷¦Ux’«âM£g.oZ¿'¹ñ{ÓüûθΘ:Ë´|ÝÆ"J{ÓŠÚ©ÌFCp7ÍÂÒåM«Hð¼i„`#ù"ÔÙՆ›WÄËÌV¬i‚•XëÃTÃX⬄`·ïëùu™å8µf}½i™ÜÔ½iö9uÜÇ›F¶êö¦9ÐMð˜¼i¾å\tÁÞ6Zô‚Ѵƃô¼Ð5Ìh òÎÙxpŽÖlO•GéMs}ª\Í›F·êô¦~ª\soZ0¸Ý²"“¼i ¸Þ7 ×< ïMûª×ëý”ƒàÚfá½i·ÚíöÆà«ïmƒ¡…æÉ! A@0‚€`ƒ`#FŒ€`™‚Üu¿¾Ü-þºïvÓ3‘z¦.Þ4_)°ÒÓ­s¿–ÐIvzÓør§“ž%¾0)ö¦É…Ä06ošïiA-:Ø×J{ÓÄn“ýz¥ m£ó¦ù9ßEïûŧuêö÷ |Ñ*ö¦Y_[7LþÖË{ÓüSǬ… ;–`¥Àò¤u ž~Þ4OåǃqyÓ|ÿò³ Z”Ÿýû0÷õ¦‰ª&z0]‰¬¦EðÝö¹¬‡t”=ÿM¬-Ë:¶‚O2º-Ï{øÊxÓ˜ÖžÅ24iÍâ±B‹ÊÒ|Ïcµ'¸ŸuJñ‹ùÆaò¼}C°.Ê/f Þ›fï€'gq?KðļiéæÖSš2Ž-–ß÷`ÚW¢”©å²x³Ácq–ûî7ç_'oÁ›f¯E„$v†÷¦Ù;0uÉV{Þ4ü}§“™m¯µeÔ±õß ÁO3Ï<øè‡¸×¿žîkÅZCÉЖÃËApô“ Xé,YÚ\äMKÜéŠÆ“­¯É)È›Æfµã‘Üjt±Í<´7ìàyO<¼«ÓÏß›æûϺ]_º³(ÁÉbÊ-`¦5÷ƒ·ÃonDuU‡AÉЖÅED4' ·ˆ,M­¬ÏÁ…Ö)Ù×’1¤ån÷®hrΞwãE:Ž5ˆ7;pöó£¼çðÞ4²ƒç¯Äê˹MÄ›åeÚU6<Á<3ØaìžP>×;nòÕz6'ª˜àÁ¼i~Ž&§˜à±yÓÄ%¡Ù<ØÁöZ³ÙjoÈã_ [ ïM#;¨t ‹7-am*gï~ó1áð¦egÔÞ4ÒŽV®ÁzÓ!x Þ4¥ÀjµvW‰ÍhËr ÖÞ´~k…%ØvlÍÁý¼i¤qÆäMkµÂ_ʵ¶ÃÃV|v6[­ßÿ[Tƒ\‚ò¦‘oš©€›à±yÓR¿ˆÚãÁyoÚOró7\›ö½«èȼi9&ñЃÍ<˜7ì çssÁ5ô¦!õ ¼iH½oR÷Žý¢{Óšgá½i2L@0‚#F@0F@0‚€`ÁFÜìºçoÚ²Xä8uMͽiýbÏ0õ2޴ƶXd tÊ‹½i–€l<Þ4çH*¥´7îÀsu€žošsœ™ûÌ7 /fæÇ›‚4sãMÁ š¹ñ¦ñ6N&2ù`ž‚#F@0F@0‚€`ÁFlç¸3ª-=ëvÎÞ¬e³ëÍ.ÁZ©å QÉ«·²¯5’þÝûmX_‚™Ó€fæ…Ì›æÖ´õ¢Т*Ô¼à™BÏLmåbf®vᤂÌ/U—Ò‹o ËèY #Ê€Ö©f n–ÿªøpÞ4×Ï}çìÀ*:˜BÍžž Úçîö9%èW<ù<>pسøÙ¿gdC{Ó”½é‰M°{¤è  5‹à¾ÔˆªE³·b-j—¯ãsJ*п.¥ >»!»\šÿŒ×ŠÊЈ7MЬ#LTXVÑÞ‰vd9¼iñ êOFÄl„`V+ù0óžì:ÎþEMð°Þ´TQž>ÿùÖ©¸¨kÒ§¥5s+Ô˜oþ71ïQB0YŒwàžµKB0©€Kçf×¥ê\å åµrÊÐò£ÉIJ—Þ4ëhé͇<öy.#Ûæ©Œ'Å€•$8U4óô‚UQÁ¶ÖÌ©P#s}¢9¡Jž=§´]šY‚ã§ž‚º D0ñZQ‚µ7ÐìˆÏ““Dk½§±ŽËíM³4DÌVc‚O;Nú2Ý[âÃàAKXÅÄÓKšà£RÞ4¢X+"Xºð2E½5Q[¡Fµf9 5Bð½ý ñ¨Ådä­Ç8/9§v®æ×¥ÁBœå³‹â÷ˆ×ªÀsàgZÀ¥êïM‹:_È3.mì/i‚G7F7‚¿éf h¤qÉíu|’K ÎS¨ªœçpû>û ßçSå!ÎͽsYøLÇË‚‘QÅ‹û& ©k®‡5$ØÃ%—ðÃÞˆ?qÍ¡ fí(N2¥`ž‚#F@0F@0‚€`ÁFÅ%xydZ³«Ý.~æ«&™ioš}©Öj!ˆ}VÜiæòýN–` Bõúã¹ø1åTSNÆ›–góôk΢cñ¦Y[õ\Þ´Ôò†.;š_×ßk^uÄï…:Í\îôûyFwøÜ<1oZžuŒ;¿á^ÞUVæD9¾án¶Ê]Þ´Ô&_x<‘7-"xI9¶¤rèÙ7Êý£¼V^ãl—Íä¼i9Ö±ËÏâ&u7nõòÇ^Fš:Bvö§óø­5 Áù3³£ýØó¤†Ru–Ô ¼œØ¶´MèѼ7,W.­õ¡&xÕ(ÔÂkê¡uvÚvxº¿Ow {ý7ÛóIðä¼i.šx ¶ÚQm5Båê­‚‡ö¦Q‚_ˆj¹†-â#äõäSOM< 7MiÍ.Æ24ýØÁ»Ý',_å4°ðjbîÚ¿¹á+ųùbW¬sž!xÖ&éMsÐÒMoUÝ$5R‹—´~BÍÐÞ4³U~çö¦%u‰ªí%¿*r¸ 8a¬,Ï+Úæ®¤ïAÅlz„cΞä&éMsö"쵈e$!Ø5¾4 ošÓËà‚fLÆ¥5ó]7›ëíj¿/ i<ཛâíg xHoZ‚õV£¢ .ÊHë; È›¦·šG°©Ë¿ˆ©kß¼jš`çö194\žàõd]k«ZÌfÕ›ž$îÍÁãö¦Ñv¤³0“&%[M¡âñöùûƒxÓôVÝÞ4R—§r­ÿü¯ÞÚŽŸ"8«¹tÊ çr,b2Þ4ÚŽt¨ iG²Õ*¼ÀÙ“3.Ò×›æt¸§ú~êñ¬¼V­wvx‚ÙÎõVZ æ&ØØØæàIzÓ¨uÌS4bñó~´pù¤a¯µ´ú?ÁñVUQºÖí˜`g]@0®ÁN‚ÆÚW@¿þ§h©q".v>ˆ–ÔÝ>¼.^ýËQ^Öµz´b¥-u¹¦k=‰ þj»ÑXrôƒ7b‚ý‰øóæ/S;Á¸ŒEü:4·k}¿oÄ3¯s ¶º†EÒáp,»×úœÙÆ58KðöÆÆ¿*‚_ʯ ÈÎçÒÆÆå»K’àÿ!_]Ê%X¥,Þ݌ֹ]@p°±qãîr)‚ÉF• <_Orÿti9¡F?ô/õÖ—Á[j h\L%,ÒçC7ÁQ®ôÔVó NÕƒàÂ'9EÍæ¡_,Æ¿>*ÙUkU#8Ùj>Á©º€`œGpc¯µµ²víf”k‚Øí›Éb°wC,îÉþöu±l_ MѨÇzE,¯F×̽w£…GŠ`ºVò$'WŠ·ª×";ë‚ApÁ++Ÿ}FÇ`ÕC]s%5–¦EWk<8)JÖzb=.§×ÒçÔƒ`ƒ`Œ€`¼¸×úî3|Ã}šc–ÑðÁ,£iŒ™žC3=§K0fÛc¶}Í †ñÆ“šŒ A@0‚¼¥‚‘Zç ©5À!Fj 0Fê 0Fê 0Fê 0Fê 0Fê 0Fê 0Fê 0FêI0‚Ô8 ©wþ?£^™Áì+üIEND®B`‚libplacebo-v7.349.0/demos/sdlimage.c000066400000000000000000000164671463457750100172520ustar00rootroot00000000000000/* Simple image viewer that opens an image using SDL2_image and presents it * to the screen. * * License: CC0 / Public Domain */ #include #include "common.h" #include "window.h" #include #include #include // Static configuration, done in the file to keep things simple static const char *icc_profile = ""; // path to ICC profile static const char *lut_file = ""; // path to .cube lut // Program state static pl_log logger; static struct window *win; // For rendering static pl_tex img_tex; static pl_tex osd_tex; static struct pl_plane img_plane; static struct pl_plane osd_plane; static pl_renderer renderer; static struct pl_custom_lut *lut; struct file { void *data; size_t size; }; static struct file icc_file; static bool open_file(const char *path, struct file *out) { if (!path || !path[0]) { *out = (struct file) {0}; return true; } FILE *fp = NULL; bool success = false; fp = fopen(path, "rb"); if (!fp) goto done; if (fseeko(fp, 0, SEEK_END)) goto done; off_t size = ftello(fp); if (size < 0) goto done; if (fseeko(fp, 0, SEEK_SET)) goto done; void *data = malloc(size); if (!fread(data, size, 1, fp)) goto done; *out = (struct file) { .data = data, .size = size, }; success = true; done: if (fp) fclose(fp); return success; } static void close_file(struct file *file) { if (!file->data) return; free(file->data); *file = (struct file) {0}; } SDL_NORETURN static void uninit(int ret) { pl_renderer_destroy(&renderer); pl_tex_destroy(win->gpu, &img_tex); pl_tex_destroy(win->gpu, &osd_tex); close_file(&icc_file); pl_lut_free(&lut); window_destroy(&win); pl_log_destroy(&logger); exit(ret); } static bool upload_plane(const SDL_Surface *img, pl_tex *tex, struct pl_plane *plane) { if (!img) return false; SDL_Surface *fixed = NULL; const SDL_PixelFormat *fmt = img->format; if (SDL_ISPIXELFORMAT_INDEXED(fmt->format)) { // libplacebo doesn't handle indexed formats yet fixed = SDL_CreateRGBSurfaceWithFormat(0, img->w, img->h, 32, SDL_PIXELFORMAT_ABGR8888); SDL_BlitSurface((SDL_Surface *) img, NULL, fixed, NULL); img = fixed; fmt = img->format; } struct pl_plane_data data = { .type = PL_FMT_UNORM, .width = img->w, .height = img->h, .pixel_stride = fmt->BytesPerPixel, .row_stride = img->pitch, .pixels = img->pixels, }; uint64_t masks[4] = { fmt->Rmask, fmt->Gmask, fmt->Bmask, fmt->Amask }; pl_plane_data_from_mask(&data, masks); bool ok = pl_upload_plane(win->gpu, plane, tex, &data); SDL_FreeSurface(fixed); return ok; } static bool render_frame(const struct pl_swapchain_frame *frame) { pl_tex img = img_plane.texture; struct pl_frame image = { .num_planes = 1, .planes = { img_plane }, .repr = pl_color_repr_unknown, .color = pl_color_space_unknown, .crop = {0, 0, img->params.w, img->params.h}, }; // This seems to be the case for SDL2_image image.repr.alpha = PL_ALPHA_INDEPENDENT; struct pl_frame target; pl_frame_from_swapchain(&target, frame); target.profile = (struct pl_icc_profile) { .data = icc_file.data, .len = icc_file.size, }; image.rotation = PL_ROTATION_0; // for testing pl_rect2df_aspect_copy_rot(&target.crop, &image.crop, 0.0, image.rotation); struct pl_overlay osd; struct pl_overlay_part osd_part; if (osd_tex) { osd_part = (struct pl_overlay_part) { .src = { 0, 0, osd_tex->params.w, osd_tex->params.h }, .dst = { 0, 0, osd_tex->params.w, osd_tex->params.h }, }; osd = (struct pl_overlay) { .tex = osd_tex, .mode = PL_OVERLAY_NORMAL, .repr = image.repr, .color = image.color, .coords = PL_OVERLAY_COORDS_DST_FRAME, .parts = &osd_part, .num_parts = 1, }; target.overlays = &osd; target.num_overlays = 1; } // Use the heaviest preset purely for demonstration/testing purposes struct pl_render_params params = pl_render_high_quality_params; params.lut = lut; return pl_render_image(renderer, &image, &target, ¶ms); } int main(int argc, char **argv) { if (argc < 2 || argc > 3) { fprintf(stderr, "Usage: %s []\n", argv[0]); return 255; } const char *file = argv[1]; const char *overlay = argc > 2 ? argv[2] : NULL; logger = pl_log_create(PL_API_VER, pl_log_params( .log_cb = pl_log_color, .log_level = PL_LOG_INFO, )); // Load image, do this first so we can use it for the window size SDL_Surface *img = IMG_Load(file); if (!img) { fprintf(stderr, "Failed loading '%s': %s\n", file, SDL_GetError()); uninit(1); } // Create window unsigned int start = SDL_GetTicks(); win = window_create(logger, &(struct window_params) { .title = "SDL2_image demo", .width = img->w, .height = img->h, }); if (!win) uninit(1); // Initialize rendering state if (!upload_plane(img, &img_tex, &img_plane)) { fprintf(stderr, "Failed uploading image plane!\n"); uninit(2); } SDL_FreeSurface(img); if (overlay) { SDL_Surface *osd = IMG_Load(overlay); if (!upload_plane(osd, &osd_tex, &osd_plane)) fprintf(stderr, "Failed uploading OSD plane.. continuing anyway\n"); SDL_FreeSurface(osd); } if (!open_file(icc_profile, &icc_file)) fprintf(stderr, "Failed opening ICC profile.. continuing anyway\n"); struct file lutf; if (open_file(lut_file, &lutf) && lutf.size) { if (!(lut = pl_lut_parse_cube(logger, lutf.data, lutf.size))) fprintf(stderr, "Failed parsing LUT.. continuing anyway\n"); close_file(&lutf); } renderer = pl_renderer_create(logger, win->gpu); unsigned int last = SDL_GetTicks(), frames = 0; printf("Took %u ms for initialization\n", last - start); // Render loop while (!win->window_lost) { struct pl_swapchain_frame frame; bool ok = pl_swapchain_start_frame(win->swapchain, &frame); if (!ok) { window_poll(win, true); continue; } if (!render_frame(&frame)) { fprintf(stderr, "libplacebo: Failed rendering frame!\n"); uninit(3); } ok = pl_swapchain_submit_frame(win->swapchain); if (!ok) { fprintf(stderr, "libplacebo: Failed submitting frame!\n"); uninit(3); } pl_swapchain_swap_buffers(win->swapchain); frames++; unsigned int now = SDL_GetTicks(); if (now - last > 5000) { printf("%u frames in %u ms = %f FPS\n", frames, now - last, 1000.0f * frames / (now - last)); last = now; frames = 0; } window_poll(win, false); } uninit(0); } libplacebo-v7.349.0/demos/settings.c000066400000000000000000001560301463457750100173140ustar00rootroot00000000000000#include #include #include #include "plplay.h" #ifdef PL_HAVE_WIN32 #include #define PL_BASENAME PathFindFileNameA #define strdup _strdup #else #include #define PL_BASENAME basename #endif #ifdef HAVE_NUKLEAR #include "ui.h" bool parse_args(struct plplay_args *args, int argc, char *argv[]) { static struct option long_options[] = { {"verbose", no_argument, NULL, 'v'}, {"quiet", no_argument, NULL, 'q'}, {"preset", required_argument, NULL, 'p'}, {"hwdec", no_argument, NULL, 'H'}, {"window", required_argument, NULL, 'w'}, {0} }; int option; while ((option = getopt_long(argc, argv, "vqp:Hw:", long_options, NULL)) != -1) { switch (option) { case 'v': if (args->verbosity < PL_LOG_TRACE) args->verbosity++; break; case 'q': if (args->verbosity > PL_LOG_NONE) args->verbosity--; break; case 'p': if (!strcmp(optarg, "default")) { args->preset = &pl_render_default_params; } else if (!strcmp(optarg, "fast")) { args->preset = &pl_render_fast_params; } else if (!strcmp(optarg, "highquality") || !strcmp(optarg, "hq")) { args->preset = &pl_render_high_quality_params; } else { fprintf(stderr, "Invalid value for -p/--preset: '%s'\n", optarg); goto error; } break; case 'H': args->hwdec = true; break; case 'w': args->window_impl = optarg; break; case '?': default: goto error; } } // Check for the required filename argument if (optind < argc) { args->filename = argv[optind++]; } else { fprintf(stderr, "Missing filename!\n"); goto error; } if (optind != argc) { fprintf(stderr, "Superfluous argument: %s\n", argv[optind]); goto error; } return true; error: fprintf(stderr, "Usage: %s [-v/--verbose] [-q/--quiet] [-p/--preset ] [--hwdec] [-w/--window ] \n", argv[0]); fprintf(stderr, "Options:\n"); fprintf(stderr, " -v, --verbose Increase verbosity\n"); fprintf(stderr, " -q, --quiet Decrease verbosity\n"); fprintf(stderr, " -p, --preset Set the rendering preset (default|fast|hq|highquality)\n"); fprintf(stderr, " -H, --hwdec Enable hardware decoding\n"); fprintf(stderr, " -w, --window Specify the windowing API\n"); return false; } static void add_hook(struct plplay *p, const struct pl_hook *hook, const char *path) { if (!hook) return; if (p->shader_num == p->shader_size) { // Grow array if needed size_t new_size = p->shader_size ? p->shader_size * 2 : 16; void *new_hooks = realloc(p->shader_hooks, new_size * sizeof(void *)); if (!new_hooks) goto error; p->shader_hooks = new_hooks; char **new_paths = realloc(p->shader_paths, new_size * sizeof(char *)); if (!new_paths) goto error; p->shader_paths = new_paths; p->shader_size = new_size; } // strip leading path while (true) { const char *fname = strchr(path, '/'); if (!fname) break; path = fname + 1; } char *path_copy = strdup(path); if (!path_copy) goto error; p->shader_hooks[p->shader_num] = hook; p->shader_paths[p->shader_num] = path_copy; p->shader_num++; return; error: pl_mpv_user_shader_destroy(&hook); } static void auto_property_int(struct nk_context *nk, int auto_val, int min, int *val, int max, int step, float inc_per_pixel) { int value = *val; if (!value) value = auto_val; // Auto label will be delayed 1 frame nk_property_int(nk, *val ? "" : "Auto", min, &value, max, step, inc_per_pixel); if (*val || value != auto_val) *val = value; } static void draw_shader_pass(struct nk_context *nk, const struct pl_dispatch_info *info) { pl_shader_info shader = info->shader; char label[128]; int count = snprintf(label, sizeof(label), "%.3f/%.3f/%.3f ms: %s", info->last / 1e6, info->average / 1e6, info->peak / 1e6, shader->description); if (count >= sizeof(label)) { label[sizeof(label) - 4] = '.'; label[sizeof(label) - 3] = '.'; label[sizeof(label) - 2] = '.'; } int id = (unsigned int) (uintptr_t) info; // pointer into `struct plplay` if (nk_tree_push_id(nk, NK_TREE_NODE, label, NK_MINIMIZED, id)) { nk_layout_row_dynamic(nk, 32, 1); if (nk_chart_begin(nk, NK_CHART_LINES, info->num_samples, 0.0f, info->peak)) { for (int k = 0; k < info->num_samples; k++) nk_chart_push(nk, info->samples[k]); nk_chart_end(nk); } nk_layout_row_dynamic(nk, 24, 1); for (int n = 0; n < shader->num_steps; n++) nk_labelf(nk, NK_TEXT_LEFT, "%d. %s", n + 1, shader->steps[n]); nk_tree_pop(nk); } } static void draw_timing(struct nk_context *nk, const char *label, const struct timing *t) { const double avg = t->count ? t->sum / t->count : 0.0; const double stddev = t->count ? sqrt(t->sum2 / t->count - avg * avg) : 0.0; nk_label(nk, label, NK_TEXT_LEFT); nk_labelf(nk, NK_TEXT_LEFT, "%.4f ± %.4f ms (%.3f ms)", avg * 1e3, stddev * 1e3, t->peak * 1e3); } static void draw_opt_data(void *priv, pl_opt_data data) { struct nk_context *nk = priv; pl_opt opt = data->opt; if (opt->type == PL_OPT_FLOAT) { // Print floats less verbosely than the libplacebo built-in printf nk_labelf(nk, NK_TEXT_LEFT, "%s = %f", opt->key, *(const float *) data->value); } else { nk_labelf(nk, NK_TEXT_LEFT, "%s = %s", opt->key, data->text); } } static void draw_cache_line(void *priv, pl_cache_obj obj) { struct nk_context *nk = priv; nk_labelf(nk, NK_TEXT_LEFT, " - 0x%016"PRIx64": %zu bytes", obj.key, obj.size); } void update_settings(struct plplay *p, const struct pl_frame *target) { struct nk_context *nk = ui_get_context(p->ui); enum nk_panel_flags win_flags = NK_WINDOW_BORDER | NK_WINDOW_MOVABLE | NK_WINDOW_SCALABLE | NK_WINDOW_MINIMIZABLE | NK_WINDOW_TITLE; ui_update_input(p->ui, p->win); const char *dropped_file = window_get_file(p->win); pl_options opts = p->opts; struct pl_render_params *par = &opts->params; if (nk_begin(nk, "Settings", nk_rect(100, 100, 600, 600), win_flags)) { if (nk_tree_push(nk, NK_TREE_NODE, "Window settings", NK_MAXIMIZED)) { nk_layout_row_dynamic(nk, 24, 2); bool fullscreen = window_is_fullscreen(p->win); p->toggle_fullscreen = nk_checkbox_label(nk, "Fullscreen", &fullscreen); nk_property_float(nk, "Corner rounding", 0.0, &par->corner_rounding, 1.0, 0.1, 0.01); struct nk_colorf bg = { par->background_color[0], par->background_color[1], par->background_color[2], 1.0 - par->background_transparency, }; static const char *clear_modes[PL_CLEAR_MODE_COUNT] = { [PL_CLEAR_COLOR] = "Solid color", [PL_CLEAR_TILES] = "Tiled pattern", [PL_CLEAR_SKIP] = "Skip clearing", }; nk_label(nk, "Background:", NK_TEXT_LEFT); par->background = nk_combo(nk, clear_modes, PL_CLEAR_MODE_COUNT, par->background, 16, nk_vec2(nk_widget_width(nk), 300)); nk_label(nk, "Borders:", NK_TEXT_LEFT); par->border = nk_combo(nk, clear_modes, PL_CLEAR_MODE_COUNT, par->border, 16, nk_vec2(nk_widget_width(nk), 300)); if (nk_combo_begin_color(nk, nk_rgb_cf(bg), nk_vec2(nk_widget_width(nk), 300))) { nk_layout_row_dynamic(nk, 200, 1); nk_color_pick(nk, &bg, NK_RGBA); nk_combo_end(nk); par->background_color[0] = bg.r; par->background_color[1] = bg.g; par->background_color[2] = bg.b; par->background_transparency = 1.0 - bg.a; } nk_property_int(nk, "Tile size", 2, &par->tile_size, 256, 1, 1); nk_layout_row(nk, NK_DYNAMIC, 24, 3, (float[]){ 0.4, 0.3, 0.3 }); nk_label(nk, "Tile colors:", NK_TEXT_LEFT); for (int i = 0; i < 2; i++) { bg = (struct nk_colorf) { par->tile_colors[i][0], par->tile_colors[i][1], par->tile_colors[i][2], }; if (nk_combo_begin_color(nk, nk_rgb_cf(bg), nk_vec2(nk_widget_width(nk), 300))) { nk_layout_row_dynamic(nk, 200, 1); nk_color_pick(nk, &bg, NK_RGB); nk_combo_end(nk); par->tile_colors[i][0] = bg.r; par->tile_colors[i][1] = bg.g; par->tile_colors[i][2] = bg.b; } } static const char *rotations[4] = { [PL_ROTATION_0] = "0°", [PL_ROTATION_90] = "90°", [PL_ROTATION_180] = "180°", [PL_ROTATION_270] = "270°", }; nk_layout_row_dynamic(nk, 24, 2); nk_label(nk, "Display orientation:", NK_TEXT_LEFT); p->target_rot = nk_combo(nk, rotations, 4, p->target_rot, 16, nk_vec2(nk_widget_width(nk), 100)); nk_tree_pop(nk); } if (nk_tree_push(nk, NK_TREE_NODE, "Image scaling", NK_MAXIMIZED)) { const struct pl_filter_config *f; static const char *scale_none = "None (Built-in sampling)"; static const char *pscale_none = "None (Use regular upscaler)"; static const char *tscale_none = "None (No frame mixing)"; #define SCALE_DESC(scaler, fallback) (par->scaler ? par->scaler->description : fallback) static const char *zoom_modes[ZOOM_COUNT] = { [ZOOM_PAD] = "Pad to window", [ZOOM_CROP] = "Crop to window", [ZOOM_STRETCH] = "Stretch to window", [ZOOM_FIT] = "Fit inside window", [ZOOM_RAW] = "Unscaled (raw)", [ZOOM_400] = "400% zoom", [ZOOM_200] = "200% zoom", [ZOOM_100] = "100% zoom", [ZOOM_50] = " 50% zoom", [ZOOM_25] = " 25% zoom", }; nk_layout_row(nk, NK_DYNAMIC, 24, 2, (float[]){ 0.3, 0.7 }); nk_label(nk, "Zoom mode:", NK_TEXT_LEFT); int zoom = nk_combo(nk, zoom_modes, ZOOM_COUNT, p->target_zoom, 16, nk_vec2(nk_widget_width(nk), 500)); if (zoom != p->target_zoom) { // Image crop may change pl_renderer_flush_cache(p->renderer); p->target_zoom = zoom; } nk_label(nk, "Upscaler:", NK_TEXT_LEFT); if (nk_combo_begin_label(nk, SCALE_DESC(upscaler, scale_none), nk_vec2(nk_widget_width(nk), 500))) { nk_layout_row_dynamic(nk, 16, 1); if (nk_combo_item_label(nk, scale_none, NK_TEXT_LEFT)) par->upscaler = NULL; for (int i = 0; i < pl_num_filter_configs; i++) { f = pl_filter_configs[i]; if (!f->description) continue; if (!(f->allowed & PL_FILTER_UPSCALING)) continue; if (!p->advanced_scalers && !(f->recommended & PL_FILTER_UPSCALING)) continue; if (nk_combo_item_label(nk, f->description, NK_TEXT_LEFT)) par->upscaler = f; } nk_combo_end(nk); } nk_label(nk, "Downscaler:", NK_TEXT_LEFT); if (nk_combo_begin_label(nk, SCALE_DESC(downscaler, scale_none), nk_vec2(nk_widget_width(nk), 500))) { nk_layout_row_dynamic(nk, 16, 1); if (nk_combo_item_label(nk, scale_none, NK_TEXT_LEFT)) par->downscaler = NULL; for (int i = 0; i < pl_num_filter_configs; i++) { f = pl_filter_configs[i]; if (!f->description) continue; if (!(f->allowed & PL_FILTER_DOWNSCALING)) continue; if (!p->advanced_scalers && !(f->recommended & PL_FILTER_DOWNSCALING)) continue; if (nk_combo_item_label(nk, f->description, NK_TEXT_LEFT)) par->downscaler = f; } nk_combo_end(nk); } nk_label(nk, "Plane scaler:", NK_TEXT_LEFT); if (nk_combo_begin_label(nk, SCALE_DESC(plane_upscaler, pscale_none), nk_vec2(nk_widget_width(nk), 500))) { nk_layout_row_dynamic(nk, 16, 1); if (nk_combo_item_label(nk, pscale_none, NK_TEXT_LEFT)) par->downscaler = NULL; for (int i = 0; i < pl_num_filter_configs; i++) { f = pl_filter_configs[i]; if (!f->description) continue; if (!(f->allowed & PL_FILTER_UPSCALING)) continue; if (!p->advanced_scalers && !(f->recommended & PL_FILTER_UPSCALING)) continue; if (nk_combo_item_label(nk, f->description, NK_TEXT_LEFT)) par->plane_upscaler = f; } nk_combo_end(nk); } nk_label(nk, "Frame mixing:", NK_TEXT_LEFT); if (nk_combo_begin_label(nk, SCALE_DESC(frame_mixer, tscale_none), nk_vec2(nk_widget_width(nk), 300))) { nk_layout_row_dynamic(nk, 16, 1); if (nk_combo_item_label(nk, tscale_none, NK_TEXT_LEFT)) par->frame_mixer = NULL; for (int i = 0; i < pl_num_filter_configs; i++) { f = pl_filter_configs[i]; if (!f->description) continue; if (!(f->allowed & PL_FILTER_FRAME_MIXING)) continue; if (!p->advanced_scalers && !(f->recommended & PL_FILTER_FRAME_MIXING)) continue; if (nk_combo_item_label(nk, f->description, NK_TEXT_LEFT)) par->frame_mixer = f; } nk_combo_end(nk); } nk_layout_row_dynamic(nk, 24, 2); par->skip_anti_aliasing = !nk_check_label(nk, "Anti-aliasing", !par->skip_anti_aliasing); nk_property_float(nk, "Antiringing", 0, &par->antiringing_strength, 1.0, 0.05, 0.001); struct pl_sigmoid_params *spar = &opts->sigmoid_params; nk_layout_row_dynamic(nk, 24, 2); par->sigmoid_params = nk_check_label(nk, "Sigmoidization", par->sigmoid_params) ? spar : NULL; if (nk_button_label(nk, "Default values")) *spar = pl_sigmoid_default_params; nk_property_float(nk, "Sigmoid center", 0, &spar->center, 1, 0.1, 0.01); nk_property_float(nk, "Sigmoid slope", 0, &spar->slope, 100, 1, 0.1); nk_tree_pop(nk); } if (nk_tree_push(nk, NK_TREE_NODE, "Deinterlacing", NK_MINIMIZED)) { struct pl_deinterlace_params *dpar = &opts->deinterlace_params; nk_layout_row_dynamic(nk, 24, 2); par->deinterlace_params = nk_check_label(nk, "Enable", par->deinterlace_params) ? dpar : NULL; if (nk_button_label(nk, "Reset settings")) *dpar = pl_deinterlace_default_params; static const char *deint_algos[PL_DEINTERLACE_ALGORITHM_COUNT] = { [PL_DEINTERLACE_WEAVE] = "Field weaving (no-op)", [PL_DEINTERLACE_BOB] = "Naive bob (line doubling)", [PL_DEINTERLACE_YADIF] = "Yadif (\"yet another deinterlacing filter\")", }; nk_label(nk, "Deinterlacing algorithm", NK_TEXT_LEFT); dpar->algo = nk_combo(nk, deint_algos, PL_DEINTERLACE_ALGORITHM_COUNT, dpar->algo, 16, nk_vec2(nk_widget_width(nk), 300)); switch (dpar->algo) { case PL_DEINTERLACE_WEAVE: case PL_DEINTERLACE_BOB: break; case PL_DEINTERLACE_YADIF: nk_checkbox_label(nk, "Skip spatial check", &dpar->skip_spatial_check); break; default: abort(); } nk_tree_pop(nk); } if (nk_tree_push(nk, NK_TREE_NODE, "Debanding", NK_MINIMIZED)) { struct pl_deband_params *dpar = &opts->deband_params; nk_layout_row_dynamic(nk, 24, 2); par->deband_params = nk_check_label(nk, "Enable", par->deband_params) ? dpar : NULL; if (nk_button_label(nk, "Reset settings")) *dpar = pl_deband_default_params; nk_property_int(nk, "Iterations", 0, &dpar->iterations, 8, 1, 0); nk_property_float(nk, "Threshold", 0, &dpar->threshold, 256, 1, 0.5); nk_property_float(nk, "Radius", 0, &dpar->radius, 256, 1, 0.2); nk_property_float(nk, "Grain", 0, &dpar->grain, 512, 1, 0.5); nk_tree_pop(nk); } if (nk_tree_push(nk, NK_TREE_NODE, "Distortion", NK_MINIMIZED)) { struct pl_distort_params *dpar = &opts->distort_params; nk_layout_row_dynamic(nk, 24, 2); par->distort_params = nk_check_label(nk, "Enable", par->distort_params) ? dpar : NULL; if (nk_button_label(nk, "Reset settings")) *dpar = pl_distort_default_params; static const char *address_modes[PL_TEX_ADDRESS_MODE_COUNT] = { [PL_TEX_ADDRESS_CLAMP] = "Clamp edges", [PL_TEX_ADDRESS_REPEAT] = "Repeat edges", [PL_TEX_ADDRESS_MIRROR] = "Mirror edges", }; nk_checkbox_label(nk, "Constrain bounds", &dpar->constrain); dpar->address_mode = nk_combo(nk, address_modes, PL_TEX_ADDRESS_MODE_COUNT, dpar->address_mode, 16, nk_vec2(nk_widget_width(nk), 100)); bool alpha = nk_check_label(nk, "Transparent background", dpar->alpha_mode); dpar->alpha_mode = alpha ? PL_ALPHA_INDEPENDENT : PL_ALPHA_NONE; nk_checkbox_label(nk, "Bicubic interpolation", &dpar->bicubic); struct pl_transform2x2 *tf = &dpar->transform; nk_property_float(nk, "Scale X", -10.0, &tf->mat.m[0][0], 10.0, 0.1, 0.005); nk_property_float(nk, "Shear X", -10.0, &tf->mat.m[0][1], 10.0, 0.1, 0.005); nk_property_float(nk, "Shear Y", -10.0, &tf->mat.m[1][0], 10.0, 0.1, 0.005); nk_property_float(nk, "Scale Y", -10.0, &tf->mat.m[1][1], 10.0, 0.1, 0.005); nk_property_float(nk, "Offset X", -10.0, &tf->c[0], 10.0, 0.1, 0.005); nk_property_float(nk, "Offset Y", -10.0, &tf->c[1], 10.0, 0.1, 0.005); float zoom_ref = fabsf(tf->mat.m[0][0] * tf->mat.m[1][1] - tf->mat.m[0][1] * tf->mat.m[1][0]); zoom_ref = logf(fmaxf(zoom_ref, 1e-4)); float zoom = zoom_ref; nk_property_float(nk, "log(Zoom)", -10.0, &zoom, 10.0, 0.1, 0.005); pl_transform2x2_scale(tf, expf(zoom - zoom_ref)); float angle_ref = (atan2f(tf->mat.m[1][0], tf->mat.m[1][1]) - atan2f(tf->mat.m[0][1], tf->mat.m[0][0])) / 2; angle_ref = fmodf(angle_ref * 180/M_PI + 540, 360) - 180; float angle = angle_ref; nk_property_float(nk, "Rotate (°)", -200, &angle, 200, -5, -0.2); float angle_delta = (angle - angle_ref) * M_PI / 180; const pl_matrix2x2 rot = pl_matrix2x2_rotation(angle_delta); pl_matrix2x2_rmul(&rot, &tf->mat); bool flip_ox = nk_button_label(nk, "Flip output X"); bool flip_oy = nk_button_label(nk, "Flip output Y"); bool flip_ix = nk_button_label(nk, "Flip input X"); bool flip_iy = nk_button_label(nk, "Flip input Y"); if (flip_ox ^ flip_ix) tf->mat.m[0][0] = -tf->mat.m[0][0]; if (flip_ox ^ flip_iy) tf->mat.m[0][1] = -tf->mat.m[0][1]; if (flip_oy ^ flip_ix) tf->mat.m[1][0] = -tf->mat.m[1][0]; if (flip_oy ^ flip_iy) tf->mat.m[1][1] = -tf->mat.m[1][1]; if (flip_ox) tf->c[0] = -tf->c[0]; if (flip_oy) tf->c[1] = -tf->c[1]; nk_tree_pop(nk); } if (nk_tree_push(nk, NK_TREE_NODE, "Color adjustment", NK_MINIMIZED)) { struct pl_color_adjustment *adj = &opts->color_adjustment; nk_layout_row_dynamic(nk, 24, 2); par->color_adjustment = nk_check_label(nk, "Enable", par->color_adjustment) ? adj : NULL; if (nk_button_label(nk, "Default values")) *adj = pl_color_adjustment_neutral; nk_property_float(nk, "Brightness", -1, &adj->brightness, 1, 0.1, 0.005); nk_property_float(nk, "Contrast", 0, &adj->contrast, 10, 0.1, 0.005); // Convert to (cyclical) degrees for display int deg = roundf(adj->hue * 180.0 / M_PI); nk_property_int(nk, "Hue (°)", -50, °, 400, 1, 1); adj->hue = ((deg + 360) % 360) * M_PI / 180.0; nk_property_float(nk, "Saturation", 0, &adj->saturation, 10, 0.1, 0.005); nk_property_float(nk, "Gamma", 0, &adj->gamma, 10, 0.1, 0.005); // Convert to human-friendly temperature values for display int temp = (int) roundf(adj->temperature * 3500) + 6500; nk_property_int(nk, "Temperature (K)", 3000, &temp, 10000, 10, 5); adj->temperature = (temp - 6500) / 3500.0; struct pl_cone_params *cpar = &opts->cone_params; nk_layout_row_dynamic(nk, 24, 2); par->cone_params = nk_check_label(nk, "Color blindness", par->cone_params) ? cpar : NULL; if (nk_button_label(nk, "Default values")) *cpar = pl_vision_normal; nk_layout_row(nk, NK_DYNAMIC, 24, 5, (float[]){ 0.25, 0.25/3, 0.25/3, 0.25/3, 0.5 }); nk_label(nk, "Cone model:", NK_TEXT_LEFT); unsigned int cones = cpar->cones; nk_checkbox_flags_label(nk, "L", &cones, PL_CONE_L); nk_checkbox_flags_label(nk, "M", &cones, PL_CONE_M); nk_checkbox_flags_label(nk, "S", &cones, PL_CONE_S); cpar->cones = cones; nk_property_float(nk, "Sensitivity", 0.0, &cpar->strength, 5.0, 0.1, 0.01); nk_tree_pop(nk); } if (nk_tree_push(nk, NK_TREE_NODE, "HDR peak detection", NK_MINIMIZED)) { struct pl_peak_detect_params *ppar = &opts->peak_detect_params; nk_layout_row_dynamic(nk, 24, 2); par->peak_detect_params = nk_check_label(nk, "Enable", par->peak_detect_params) ? ppar : NULL; if (nk_button_label(nk, "Reset settings")) *ppar = pl_peak_detect_default_params; nk_property_float(nk, "Threshold low", 0.0, &ppar->scene_threshold_low, 20.0, 0.5, 0.005); nk_property_float(nk, "Threshold high", 0.0, &ppar->scene_threshold_high, 20.0, 0.5, 0.005); nk_property_float(nk, "Smoothing period", 0.0, &ppar->smoothing_period, 1000.0, 5.0, 1.0); nk_property_float(nk, "Peak percentile", 95.0, &ppar->percentile, 100.0, 0.01, 0.001); nk_property_float(nk, "Black cutoff", 0.0, &ppar->black_cutoff, 100.0, 0.01, 0.001); nk_checkbox_label(nk, "Allow 1-frame delay", &ppar->allow_delayed); struct pl_hdr_metadata metadata; if (pl_renderer_get_hdr_metadata(p->renderer, &metadata)) { nk_layout_row_dynamic(nk, 24, 2); nk_label(nk, "Detected max luminance:", NK_TEXT_LEFT); nk_labelf(nk, NK_TEXT_LEFT, "%.2f cd/m² (%.2f%% PQ)", pl_hdr_rescale(PL_HDR_PQ, PL_HDR_NITS, metadata.max_pq_y), 100.0f * metadata.max_pq_y); nk_label(nk, "Detected avg luminance:", NK_TEXT_LEFT); nk_labelf(nk, NK_TEXT_LEFT, "%.2f cd/m² (%.2f%% PQ)", pl_hdr_rescale(PL_HDR_PQ, PL_HDR_NITS, metadata.avg_pq_y), 100.0f * metadata.avg_pq_y); } nk_tree_pop(nk); } if (nk_tree_push(nk, NK_TREE_NODE, "Tone mapping", NK_MINIMIZED)) { struct pl_color_map_params *cpar = &opts->color_map_params; static const struct pl_color_map_params null_settings = {0}; nk_layout_row_dynamic(nk, 24, 2); par->color_map_params = nk_check_label(nk, "Enable", par->color_map_params == cpar) ? cpar : &null_settings; if (nk_button_label(nk, "Reset settings")) *cpar = pl_color_map_default_params; nk_label(nk, "Gamut mapping function:", NK_TEXT_LEFT); if (nk_combo_begin_label(nk, cpar->gamut_mapping->description, nk_vec2(nk_widget_width(nk), 500))) { nk_layout_row_dynamic(nk, 16, 1); for (int i = 0; i < pl_num_gamut_map_functions; i++) { const struct pl_gamut_map_function *f = pl_gamut_map_functions[i]; if (nk_combo_item_label(nk, f->description, NK_TEXT_LEFT)) cpar->gamut_mapping = f; } nk_combo_end(nk); } nk_label(nk, "Tone mapping function:", NK_TEXT_LEFT); if (nk_combo_begin_label(nk, cpar->tone_mapping_function->description, nk_vec2(nk_widget_width(nk), 500))) { nk_layout_row_dynamic(nk, 16, 1); for (int i = 0; i < pl_num_tone_map_functions; i++) { const struct pl_tone_map_function *f = pl_tone_map_functions[i]; if (nk_combo_item_label(nk, f->description, NK_TEXT_LEFT)) cpar->tone_mapping_function = f; } nk_combo_end(nk); } static const char *metadata_types[PL_HDR_METADATA_TYPE_COUNT] = { [PL_HDR_METADATA_ANY] = "Automatic selection", [PL_HDR_METADATA_NONE] = "None (disabled)", [PL_HDR_METADATA_HDR10] = "HDR10 (static)", [PL_HDR_METADATA_HDR10PLUS] = "HDR10+ (MaxRGB)", [PL_HDR_METADATA_CIE_Y] = "Luminance (CIE Y)", }; nk_label(nk, "HDR metadata source:", NK_TEXT_LEFT); cpar->metadata = nk_combo(nk, metadata_types, PL_HDR_METADATA_TYPE_COUNT, cpar->metadata, 16, nk_vec2(nk_widget_width(nk), 300)); nk_property_float(nk, "Contrast recovery", 0.0, &cpar->contrast_recovery, 2.0, 0.05, 0.005); nk_property_float(nk, "Contrast smoothness", 1.0, &cpar->contrast_smoothness, 32.0, 0.1, 0.005); nk_property_int(nk, "LUT size", 16, &cpar->lut_size, 1024, 1, 1); nk_property_int(nk, "3DLUT size I", 7, &cpar->lut3d_size[0], 65, 1, 1); nk_property_int(nk, "3DLUT size C", 7, &cpar->lut3d_size[1], 256, 1, 1); nk_property_int(nk, "3DLUT size h", 7, &cpar->lut3d_size[2], 1024, 1, 1); nk_checkbox_label(nk, "Tricubic interpolation", &cpar->lut3d_tricubic); nk_checkbox_label(nk, "Force full LUT", &cpar->force_tone_mapping_lut); nk_checkbox_label(nk, "Inverse tone mapping", &cpar->inverse_tone_mapping); nk_checkbox_label(nk, "Gamut expansion", &cpar->gamut_expansion); nk_checkbox_label(nk, "Show clipping", &cpar->show_clipping); nk_checkbox_label(nk, "Visualize LUT", &cpar->visualize_lut); if (cpar->visualize_lut) { nk_layout_row_dynamic(nk, 24, 2); const float huerange = 2 * M_PI; nk_property_float(nk, "Hue", -1, &cpar->visualize_hue, huerange + 1.0, 0.1, 0.01); nk_property_float(nk, "Theta", 0.0, &cpar->visualize_theta, M_PI_2, 0.1, 0.01); cpar->visualize_hue = fmodf(cpar->visualize_hue + huerange, huerange); } if (nk_tree_push(nk, NK_TREE_NODE, "Fine-tune constants (advanced)", NK_MINIMIZED)) { struct pl_tone_map_constants *tc = &cpar->tone_constants; struct pl_gamut_map_constants *gc = &cpar->gamut_constants; nk_layout_row_dynamic(nk, 20, 2); nk_property_float(nk, "Perceptual deadzone", 0.0, &gc->perceptual_deadzone, 1.0, 0.05, 0.001); nk_property_float(nk, "Perceptual strength", 0.0, &gc->perceptual_strength, 1.0, 0.05, 0.001); nk_property_float(nk, "Colorimetric gamma", 0.0, &gc->colorimetric_gamma, 10.0, 0.05, 0.001); nk_property_float(nk, "Softclip knee", 0.0, &gc->softclip_knee, 1.0, 0.05, 0.001); nk_property_float(nk, "Softclip desaturation", 0.0, &gc->softclip_desat, 1.0, 0.05, 0.001); nk_property_float(nk, "Knee adaptation", 0.0, &tc->knee_adaptation, 1.0, 0.05, 0.001); nk_property_float(nk, "Knee minimum", 0.0, &tc->knee_minimum, 0.5, 0.05, 0.001); nk_property_float(nk, "Knee maximum", 0.5, &tc->knee_maximum, 1.0, 0.05, 0.001); nk_property_float(nk, "Knee default", tc->knee_minimum, &tc->knee_default, tc->knee_maximum, 0.05, 0.001); nk_property_float(nk, "BT.2390 offset", 0.5, &tc->knee_offset, 2.0, 0.05, 0.001); nk_property_float(nk, "Spline slope tuning", 0.0, &tc->slope_tuning, 10.0, 0.05, 0.001); nk_property_float(nk, "Spline slope offset", 0.0, &tc->slope_offset, 1.0, 0.05, 0.001); nk_property_float(nk, "Spline contrast", 0.0, &tc->spline_contrast, 1.5, 0.05, 0.001); nk_property_float(nk, "Reinhard contrast", 0.0, &tc->reinhard_contrast, 1.0, 0.05, 0.001); nk_property_float(nk, "Linear knee point", 0.0, &tc->linear_knee, 1.0, 0.05, 0.001); nk_property_float(nk, "Linear exposure", 0.0, &tc->exposure, 10.0, 0.05, 0.001); nk_tree_pop(nk); } nk_layout_row_dynamic(nk, 50, 1); if (ui_widget_hover(nk, "Drop .cube file here...") && dropped_file) { uint8_t *buf; size_t size; int ret = av_file_map(dropped_file, &buf, &size, 0, NULL); if (ret < 0) { fprintf(stderr, "Failed opening '%s': %s\n", dropped_file, av_err2str(ret)); } else { pl_lut_free((struct pl_custom_lut **) &par->lut); par->lut = pl_lut_parse_cube(p->log, (char *) buf, size); av_file_unmap(buf, size); } } static const char *lut_types[] = { [PL_LUT_UNKNOWN] = "Auto (unknown)", [PL_LUT_NATIVE] = "Raw RGB (native)", [PL_LUT_NORMALIZED] = "Linear RGB (normalized)", [PL_LUT_CONVERSION] = "Gamut conversion (native)", }; nk_layout_row(nk, NK_DYNAMIC, 24, 3, (float[]){ 0.2, 0.3, 0.5 }); if (nk_button_label(nk, "Reset LUT")) { pl_lut_free((struct pl_custom_lut **) &par->lut); par->lut_type = PL_LUT_UNKNOWN; } nk_label(nk, "LUT type:", NK_TEXT_CENTERED); par->lut_type = nk_combo(nk, lut_types, 4, par->lut_type, 16, nk_vec2(nk_widget_width(nk), 100)); nk_tree_pop(nk); } if (nk_tree_push(nk, NK_TREE_NODE, "Dithering", NK_MINIMIZED)) { struct pl_dither_params *dpar = &opts->dither_params; nk_layout_row_dynamic(nk, 24, 2); par->dither_params = nk_check_label(nk, "Enable", par->dither_params) ? dpar : NULL; if (nk_button_label(nk, "Reset settings")) *dpar = pl_dither_default_params; static const char *dither_methods[PL_DITHER_METHOD_COUNT] = { [PL_DITHER_BLUE_NOISE] = "Blue noise", [PL_DITHER_ORDERED_LUT] = "Ordered (LUT)", [PL_DITHER_ORDERED_FIXED] = "Ordered (fixed size)", [PL_DITHER_WHITE_NOISE] = "White noise", }; nk_label(nk, "Dither method:", NK_TEXT_LEFT); dpar->method = nk_combo(nk, dither_methods, PL_DITHER_METHOD_COUNT, dpar->method, 16, nk_vec2(nk_widget_width(nk), 100)); static const char *lut_sizes[8] = { "2x2", "4x4", "8x8", "16x16", "32x32", "64x64", "128x128", "256x256", }; nk_label(nk, "LUT size:", NK_TEXT_LEFT); switch (dpar->method) { case PL_DITHER_BLUE_NOISE: case PL_DITHER_ORDERED_LUT: { int size = dpar->lut_size - 1; nk_combobox(nk, lut_sizes, 8, &size, 16, nk_vec2(nk_widget_width(nk), 200)); dpar->lut_size = size + 1; break; } case PL_DITHER_ORDERED_FIXED: nk_label(nk, "64x64", NK_TEXT_LEFT); break; default: nk_label(nk, "(N/A)", NK_TEXT_LEFT); break; } nk_checkbox_label(nk, "Temporal dithering", &dpar->temporal); nk_layout_row_dynamic(nk, 24, 2); nk_label(nk, "Error diffusion:", NK_TEXT_LEFT); const char *name = par->error_diffusion ? par->error_diffusion->description : "(None)"; if (nk_combo_begin_label(nk, name, nk_vec2(nk_widget_width(nk), 500))) { nk_layout_row_dynamic(nk, 16, 1); if (nk_combo_item_label(nk, "(None)", NK_TEXT_LEFT)) par->error_diffusion = NULL; for (int i = 0; i < pl_num_error_diffusion_kernels; i++) { const struct pl_error_diffusion_kernel *k = pl_error_diffusion_kernels[i]; if (nk_combo_item_label(nk, k->description, NK_TEXT_LEFT)) par->error_diffusion = k; } nk_combo_end(nk); } nk_tree_pop(nk); } if (nk_tree_push(nk, NK_TREE_NODE, "Output color space", NK_MINIMIZED)) { nk_layout_row_dynamic(nk, 24, 2); nk_checkbox_label(nk, "Enable", &p->target_override); bool reset = nk_button_label(nk, "Reset settings"); bool reset_icc = reset; char buf[64] = {0}; nk_layout_row(nk, NK_DYNAMIC, 24, 2, (float[]){ 0.3, 0.7 }); const char *primaries[PL_COLOR_PRIM_COUNT]; memcpy(primaries, pl_color_primaries_names, sizeof(primaries)); if (target->color.primaries) { snprintf(buf, sizeof(buf), "Auto (%s)", primaries[target->color.primaries]); primaries[PL_COLOR_PRIM_UNKNOWN] = buf; } nk_label(nk, "Primaries:", NK_TEXT_LEFT); p->force_prim = nk_combo(nk, primaries, PL_COLOR_PRIM_COUNT, p->force_prim, 16, nk_vec2(nk_widget_width(nk), 200)); const char *transfers[PL_COLOR_TRC_COUNT]; memcpy(transfers, pl_color_transfer_names, sizeof(transfers)); if (target->color.transfer) { snprintf(buf, sizeof(buf), "Auto (%s)", transfers[target->color.transfer]); transfers[PL_COLOR_TRC_UNKNOWN] = buf; } nk_label(nk, "Transfer:", NK_TEXT_LEFT); p->force_trc = nk_combo(nk, transfers, PL_COLOR_TRC_COUNT, p->force_trc, 16, nk_vec2(nk_widget_width(nk), 200)); nk_layout_row_dynamic(nk, 24, 2); nk_checkbox_label(nk, "Override HDR levels", &p->force_hdr_enable); // Ensure these values are always legal by going through // pl_color_space_infer nk_layout_row_dynamic(nk, 24, 2); struct pl_color_space fix = target->color; apply_csp_overrides(p, &fix); pl_color_space_infer(&fix); fix.hdr.min_luma *= 1000; // better value range nk_property_float(nk, "White point (cd/m²)", 10.0, &fix.hdr.max_luma, 10000.0, fix.hdr.max_luma / 100, fix.hdr.max_luma / 1000); nk_property_float(nk, "Black point (mcd/m²)", PL_COLOR_HDR_BLACK * 1000, &fix.hdr.min_luma, 100.0 * 1000, 5, 2); fix.hdr.min_luma /= 1000; pl_color_space_infer(&fix); p->force_hdr = fix.hdr; struct pl_color_repr *trepr = &p->force_repr; nk_layout_row(nk, NK_DYNAMIC, 24, 2, (float[]){ 0.3, 0.7 }); const char *systems[PL_COLOR_SYSTEM_COUNT]; memcpy(systems, pl_color_system_names, sizeof(systems)); if (target->repr.sys) { snprintf(buf, sizeof(buf), "Auto (%s)", systems[target->repr.sys]); systems[PL_COLOR_SYSTEM_UNKNOWN] = buf; } nk_label(nk, "System:", NK_TEXT_LEFT); trepr->sys = nk_combo(nk, systems, PL_COLOR_SYSTEM_COUNT, trepr->sys, 16, nk_vec2(nk_widget_width(nk), 200)); if (trepr->sys == PL_COLOR_SYSTEM_DOLBYVISION) trepr->sys = PL_COLOR_SYSTEM_UNKNOWN; const char *levels[PL_COLOR_LEVELS_COUNT] = { [PL_COLOR_LEVELS_UNKNOWN] = "Auto (unknown)", [PL_COLOR_LEVELS_LIMITED] = "Limited/TV range, e.g. 16-235", [PL_COLOR_LEVELS_FULL] = "Full/PC range, e.g. 0-255", }; if (target->repr.levels) { snprintf(buf, sizeof(buf), "Auto (%s)", levels[target->repr.levels]); levels[PL_COLOR_LEVELS_UNKNOWN] = buf; } nk_label(nk, "Levels:", NK_TEXT_LEFT); trepr->levels = nk_combo(nk, levels, PL_COLOR_LEVELS_COUNT, trepr->levels, 16, nk_vec2(nk_widget_width(nk), 200)); const char *alphas[PL_ALPHA_MODE_COUNT] = { [PL_ALPHA_UNKNOWN] = "Auto (unknown)", [PL_ALPHA_INDEPENDENT] = "Independent", [PL_ALPHA_PREMULTIPLIED] = "Premultiplied", [PL_ALPHA_NONE] = "None", }; if (target->repr.alpha) { snprintf(buf, sizeof(buf), "Auto (%s)", alphas[target->repr.alpha]); alphas[PL_ALPHA_UNKNOWN] = buf; } nk_label(nk, "Alpha:", NK_TEXT_LEFT); trepr->alpha = nk_combo(nk, alphas, PL_ALPHA_MODE_COUNT, trepr->alpha, 16, nk_vec2(nk_widget_width(nk), 200)); const struct pl_bit_encoding *bits = &target->repr.bits; nk_label(nk, "Bit depth:", NK_TEXT_LEFT); auto_property_int(nk, bits->color_depth, 0, &trepr->bits.color_depth, 16, 1, 0); if (bits->color_depth != bits->sample_depth) { nk_label(nk, "Sample bit depth:", NK_TEXT_LEFT); auto_property_int(nk, bits->sample_depth, 0, &trepr->bits.sample_depth, 16, 1, 0); } else { // Adjust these two fields in unison trepr->bits.sample_depth = trepr->bits.color_depth; } if (bits->bit_shift) { nk_label(nk, "Bit shift:", NK_TEXT_LEFT); auto_property_int(nk, bits->bit_shift, 0, &trepr->bits.bit_shift, 16, 1, 0); } else { trepr->bits.bit_shift = 0; } nk_layout_row_dynamic(nk, 24, 1); nk_checkbox_label(nk, "Forward input color space to display", &p->colorspace_hint); if (p->colorspace_hint && !p->force_hdr_enable) { nk_checkbox_label(nk, "Forward dynamic brightness changes to display", &p->colorspace_hint_dynamic); } nk_layout_row_dynamic(nk, 50, 1); if (ui_widget_hover(nk, "Drop ICC profile here...") && dropped_file) { struct pl_icc_profile profile; int ret = av_file_map(dropped_file, (uint8_t **) &profile.data, &profile.len, 0, NULL); if (ret < 0) { fprintf(stderr, "Failed opening '%s': %s\n", dropped_file, av_err2str(ret)); } else { free(p->icc_name); pl_icc_profile_compute_signature(&profile); pl_icc_update(p->log, &p->icc, &profile, pl_icc_params( .force_bpc = p->force_bpc, .max_luma = p->use_icc_luma ? 0 : PL_COLOR_SDR_WHITE, )); av_file_unmap((void *) profile.data, profile.len); if (p->icc) p->icc_name = strdup(PL_BASENAME((char *) dropped_file)); } } if (p->icc) { nk_layout_row_dynamic(nk, 24, 2); nk_labelf(nk, NK_TEXT_LEFT, "Loaded: %s", p->icc_name ? p->icc_name : "(unknown)"); reset_icc |= nk_button_label(nk, "Reset ICC"); nk_checkbox_label(nk, "Force BPC", &p->force_bpc); nk_checkbox_label(nk, "Use detected luminance", &p->use_icc_luma); } // Apply the reset last to prevent the UI from flashing for a frame if (reset) { p->force_repr = (struct pl_color_repr) {0}; p->force_prim = PL_COLOR_PRIM_UNKNOWN; p->force_trc = PL_COLOR_TRC_UNKNOWN; p->force_hdr = (struct pl_hdr_metadata) {0}; p->force_hdr_enable = false; } if (reset_icc && p->icc) { pl_icc_close(&p->icc); free(p->icc_name); p->icc_name = NULL; } nk_tree_pop(nk); } if (nk_tree_push(nk, NK_TREE_NODE, "Custom shaders", NK_MINIMIZED)) { nk_layout_row_dynamic(nk, 50, 1); if (ui_widget_hover(nk, "Drop .hook/.glsl files here...") && dropped_file) { uint8_t *buf; size_t size; int ret = av_file_map(dropped_file, &buf, &size, 0, NULL); if (ret < 0) { fprintf(stderr, "Failed opening '%s': %s\n", dropped_file, av_err2str(ret)); } else { const struct pl_hook *hook; hook = pl_mpv_user_shader_parse(p->win->gpu, (char *) buf, size); av_file_unmap(buf, size); add_hook(p, hook, dropped_file); } } const float px = 24.0; nk_layout_row_template_begin(nk, px); nk_layout_row_template_push_static(nk, px); nk_layout_row_template_push_static(nk, px); nk_layout_row_template_push_static(nk, px); nk_layout_row_template_push_dynamic(nk); nk_layout_row_template_end(nk); for (int i = 0; i < p->shader_num; i++) { if (i == 0) { nk_label(nk, "·", NK_TEXT_CENTERED); } else if (nk_button_symbol(nk, NK_SYMBOL_TRIANGLE_UP)) { const struct pl_hook *prev_hook = p->shader_hooks[i - 1]; char *prev_path = p->shader_paths[i - 1]; p->shader_hooks[i - 1] = p->shader_hooks[i]; p->shader_paths[i - 1] = p->shader_paths[i]; p->shader_hooks[i] = prev_hook; p->shader_paths[i] = prev_path; } if (i == p->shader_num - 1) { nk_label(nk, "·", NK_TEXT_CENTERED); } else if (nk_button_symbol(nk, NK_SYMBOL_TRIANGLE_DOWN)) { const struct pl_hook *next_hook = p->shader_hooks[i + 1]; char *next_path = p->shader_paths[i + 1]; p->shader_hooks[i + 1] = p->shader_hooks[i]; p->shader_paths[i + 1] = p->shader_paths[i]; p->shader_hooks[i] = next_hook; p->shader_paths[i] = next_path; } if (nk_button_symbol(nk, NK_SYMBOL_X)) { pl_mpv_user_shader_destroy(&p->shader_hooks[i]); free(p->shader_paths[i]); p->shader_num--; memmove(&p->shader_hooks[i], &p->shader_hooks[i+1], (p->shader_num - i) * sizeof(void *)); memmove(&p->shader_paths[i], &p->shader_paths[i+1], (p->shader_num - i) * sizeof(char *)); if (i == p->shader_num) break; } if (p->shader_hooks[i]->num_parameters == 0) { nk_label(nk, p->shader_paths[i], NK_TEXT_LEFT); continue; } if (nk_combo_begin_label(nk, p->shader_paths[i], nk_vec2(nk_widget_width(nk), 500))) { nk_layout_row_dynamic(nk, 32, 1); for (int j = 0; j < p->shader_hooks[i]->num_parameters; j++) { const struct pl_hook_par *hp = &p->shader_hooks[i]->parameters[j]; const char *name = hp->description ? hp->description : hp->name; switch (hp->type) { case PL_VAR_FLOAT: nk_property_float(nk, name, hp->minimum.f, &hp->data->f, hp->maximum.f, hp->data->f / 100.0f, hp->data->f / 1000.0f); break; case PL_VAR_SINT: nk_property_int(nk, name, hp->minimum.i, &hp->data->i, hp->maximum.i, 1, 1.0f); break; case PL_VAR_UINT: { int min = FFMIN(hp->minimum.u, INT_MAX); int max = FFMIN(hp->maximum.u, INT_MAX); int val = FFMIN(hp->data->u, INT_MAX); nk_property_int(nk, name, min, &val, max, 1, 1); hp->data->u = val; break; } default: abort(); } } nk_combo_end(nk); } } par->hooks = p->shader_hooks; par->num_hooks = p->shader_num; nk_tree_pop(nk); } if (nk_tree_push(nk, NK_TREE_NODE, "Debug", NK_MINIMIZED)) { nk_layout_row_dynamic(nk, 24, 1); nk_checkbox_label(nk, "Preserve mixing cache", &par->preserve_mixing_cache); nk_checkbox_label(nk, "Bypass mixing cache", &par->skip_caching_single_frame); nk_checkbox_label(nk, "Show all scaler presets", &p->advanced_scalers); nk_checkbox_label(nk, "Disable linear scaling", &par->disable_linear_scaling); nk_checkbox_label(nk, "Disable built-in scalers", &par->disable_builtin_scalers); nk_checkbox_label(nk, "Correct subpixel offsets", &par->correct_subpixel_offsets); nk_checkbox_label(nk, "Force-enable dither", &par->force_dither); nk_checkbox_label(nk, "Disable gamma-aware dither", &par->disable_dither_gamma_correction); nk_checkbox_label(nk, "Disable FBOs / advanced rendering", &par->disable_fbos); nk_checkbox_label(nk, "Force low-bit depth FBOs", &par->force_low_bit_depth_fbos); nk_checkbox_label(nk, "Disable constant hard-coding", &par->dynamic_constants); if (nk_check_label(nk, "Ignore Dolby Vision metadata", p->ignore_dovi) != p->ignore_dovi) { // Flush the renderer cache on changes, since this can // drastically alter the subjective appearance of the stream pl_renderer_flush_cache(p->renderer); p->ignore_dovi = !p->ignore_dovi; } nk_layout_row_dynamic(nk, 24, 2); double prev_fps = p->fps; bool fps_changed = nk_checkbox_label(nk, "Override display FPS", &p->fps_override); nk_property_float(nk, "FPS", 10.0, &p->fps, 240.0, 5, 0.1); if (fps_changed || p->fps != prev_fps) p->stats.pts_interval = p->stats.vsync_interval = (struct timing) {0}; if (nk_button_label(nk, "Flush renderer cache")) pl_renderer_flush_cache(p->renderer); if (nk_button_label(nk, "Recreate renderer")) { pl_renderer_destroy(&p->renderer); p->renderer = pl_renderer_create(p->log, p->win->gpu); } if (nk_tree_push(nk, NK_TREE_NODE, "Shader passes / GPU timing", NK_MINIMIZED)) { nk_layout_row_dynamic(nk, 26, 1); nk_label(nk, "Full frames:", NK_TEXT_LEFT); for (int i = 0; i < p->num_frame_passes; i++) draw_shader_pass(nk, &p->frame_info[i]); nk_layout_row_dynamic(nk, 26, 1); nk_label(nk, "Output blending:", NK_TEXT_LEFT); for (int j = 0; j < MAX_BLEND_FRAMES; j++) { for (int i = 0; i < p->num_blend_passes[j]; i++) draw_shader_pass(nk, &p->blend_info[j][i]); } nk_tree_pop(nk); } if (nk_tree_push(nk, NK_TREE_NODE, "Frame statistics / CPU timing", NK_MINIMIZED)) { nk_layout_row_dynamic(nk, 24, 2); nk_label(nk, "Current PTS:", NK_TEXT_LEFT); nk_labelf(nk, NK_TEXT_LEFT, "%.3f", p->stats.current_pts); nk_label(nk, "Estimated FPS:", NK_TEXT_LEFT); nk_labelf(nk, NK_TEXT_LEFT, "%.3f", pl_queue_estimate_fps(p->queue)); nk_label(nk, "Estimated vsync rate:", NK_TEXT_LEFT); nk_labelf(nk, NK_TEXT_LEFT, "%.3f", pl_queue_estimate_vps(p->queue)); nk_label(nk, "PTS drift offset:", NK_TEXT_LEFT); nk_labelf(nk, NK_TEXT_LEFT, "%.3f ms", 1e3 * pl_queue_pts_offset(p->queue)); nk_label(nk, "Frames rendered:", NK_TEXT_LEFT); nk_labelf(nk, NK_TEXT_LEFT, "%"PRIu32, p->stats.rendered); nk_label(nk, "Decoded frames", NK_TEXT_LEFT); nk_labelf(nk, NK_TEXT_LEFT, "%"PRIu32, atomic_load(&p->stats.decoded)); nk_label(nk, "Dropped frames:", NK_TEXT_LEFT); nk_labelf(nk, NK_TEXT_LEFT, "%"PRIu32, p->stats.dropped); nk_label(nk, "Missed timestamps:", NK_TEXT_LEFT); nk_labelf(nk, NK_TEXT_LEFT, "%"PRIu32" (%.3f ms)", p->stats.missed, p->stats.missed_ms); nk_label(nk, "Times stalled:", NK_TEXT_LEFT); nk_labelf(nk, NK_TEXT_LEFT, "%"PRIu32" (%.3f ms)", p->stats.stalled, p->stats.stalled_ms); draw_timing(nk, "Acquire FBO:", &p->stats.acquire); draw_timing(nk, "Update queue:", &p->stats.update); draw_timing(nk, "Render frame:", &p->stats.render); draw_timing(nk, "Draw interface:", &p->stats.draw_ui); draw_timing(nk, "Voluntary sleep:", &p->stats.sleep); draw_timing(nk, "Submit frame:", &p->stats.submit); draw_timing(nk, "Swap buffers:", &p->stats.swap); draw_timing(nk, "Vsync interval:", &p->stats.vsync_interval); draw_timing(nk, "PTS interval:", &p->stats.pts_interval); if (nk_button_label(nk, "Reset statistics")) memset(&p->stats, 0, sizeof(p->stats)); nk_tree_pop(nk); } if (nk_tree_push(nk, NK_TREE_NODE, "Settings dump", NK_MINIMIZED)) { nk_layout_row_dynamic(nk, 24, 2); if (nk_button_label(nk, "Copy to clipboard")) window_set_clipboard(p->win, pl_options_save(opts)); if (nk_button_label(nk, "Load from clipboard")) pl_options_load(opts, window_get_clipboard(p->win)); nk_layout_row_dynamic(nk, 24, 1); pl_options_iterate(opts, draw_opt_data, nk); nk_tree_pop(nk); } if (nk_tree_push(nk, NK_TREE_NODE, "Cache statistics", NK_MINIMIZED)) { nk_layout_row_dynamic(nk, 24, 2); nk_label(nk, "Cached objects:", NK_TEXT_LEFT); nk_labelf(nk, NK_TEXT_LEFT, "%d", pl_cache_objects(p->cache)); nk_label(nk, "Total size:", NK_TEXT_LEFT); nk_labelf(nk, NK_TEXT_LEFT, "%zu", pl_cache_size(p->cache)); nk_label(nk, "Maximum total size:", NK_TEXT_LEFT); nk_labelf(nk, NK_TEXT_LEFT, "%zu", p->cache->params.max_total_size); nk_label(nk, "Maximum object size:", NK_TEXT_LEFT); nk_labelf(nk, NK_TEXT_LEFT, "%zu", p->cache->params.max_object_size); if (nk_button_label(nk, "Clear cache")) pl_cache_reset(p->cache); if (nk_button_label(nk, "Save cache")) { FILE *file = fopen(p->cache_file, "wb"); if (file) { pl_cache_save_file(p->cache, file); fclose(file); } } if (nk_tree_push(nk, NK_TREE_NODE, "Object list", NK_MINIMIZED)) { nk_layout_row_dynamic(nk, 24, 1); pl_cache_iterate(p->cache, draw_cache_line, nk); nk_tree_pop(nk); } nk_tree_pop(nk); } nk_tree_pop(nk); } } nk_end(nk); } #else void update_settings(struct plplay *p, const struct pl_frame *target) { } #endif // HAVE_NUKLEAR libplacebo-v7.349.0/demos/ui.c000066400000000000000000000154671463457750100161010ustar00rootroot00000000000000// License: CC0 / Public Domain #define NK_IMPLEMENTATION #include "ui.h" #include #include struct ui_vertex { float pos[2]; float coord[2]; uint8_t color[4]; }; #define NUM_VERTEX_ATTRIBS 3 struct ui { pl_gpu gpu; pl_dispatch dp; struct nk_context nk; struct nk_font_atlas atlas; struct nk_buffer cmds, verts, idx; pl_tex font_tex; struct pl_vertex_attrib attribs_pl[NUM_VERTEX_ATTRIBS]; struct nk_draw_vertex_layout_element attribs_nk[NUM_VERTEX_ATTRIBS+1]; struct nk_convert_config convert_cfg; }; struct ui *ui_create(pl_gpu gpu) { struct ui *ui = malloc(sizeof(struct ui)); if (!ui) return NULL; *ui = (struct ui) { .gpu = gpu, .dp = pl_dispatch_create(gpu->log, gpu), .attribs_pl = { { .name = "pos", .offset = offsetof(struct ui_vertex, pos), .fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 2), }, { .name = "coord", .offset = offsetof(struct ui_vertex, coord), .fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 2), }, { .name = "vcolor", .offset = offsetof(struct ui_vertex, color), .fmt = pl_find_named_fmt(gpu, "rgba8"), } }, .attribs_nk = { {NK_VERTEX_POSITION, NK_FORMAT_FLOAT, offsetof(struct ui_vertex, pos)}, {NK_VERTEX_TEXCOORD, NK_FORMAT_FLOAT, offsetof(struct ui_vertex, coord)}, {NK_VERTEX_COLOR, NK_FORMAT_R8G8B8A8, offsetof(struct ui_vertex, color)}, {NK_VERTEX_LAYOUT_END} }, .convert_cfg = { .vertex_layout = ui->attribs_nk, .vertex_size = sizeof(struct ui_vertex), .vertex_alignment = NK_ALIGNOF(struct ui_vertex), .shape_AA = NK_ANTI_ALIASING_ON, .line_AA = NK_ANTI_ALIASING_ON, .circle_segment_count = 22, .curve_segment_count = 22, .arc_segment_count = 22, .global_alpha = 1.0f, }, }; // Initialize font atlas using built-in font nk_font_atlas_init_default(&ui->atlas); nk_font_atlas_begin(&ui->atlas); struct nk_font *font = nk_font_atlas_add_default(&ui->atlas, 20, NULL); struct pl_tex_params tparams = { .format = pl_find_named_fmt(gpu, "r8"), .sampleable = true, .initial_data = nk_font_atlas_bake(&ui->atlas, &tparams.w, &tparams.h, NK_FONT_ATLAS_ALPHA8), .debug_tag = PL_DEBUG_TAG, }; ui->font_tex = pl_tex_create(gpu, &tparams); nk_font_atlas_end(&ui->atlas, nk_handle_ptr((void *) ui->font_tex), &ui->convert_cfg.tex_null); nk_font_atlas_cleanup(&ui->atlas); if (!ui->font_tex) goto error; // Initialize nuklear state if (!nk_init_default(&ui->nk, &font->handle)) { fprintf(stderr, "NK: failed initializing UI!\n"); goto error; } nk_buffer_init_default(&ui->cmds); nk_buffer_init_default(&ui->verts); nk_buffer_init_default(&ui->idx); return ui; error: ui_destroy(&ui); return NULL; } void ui_destroy(struct ui **ptr) { struct ui *ui = *ptr; if (!ui) return; nk_buffer_free(&ui->cmds); nk_buffer_free(&ui->verts); nk_buffer_free(&ui->idx); nk_free(&ui->nk); nk_font_atlas_clear(&ui->atlas); pl_tex_destroy(ui->gpu, &ui->font_tex); pl_dispatch_destroy(&ui->dp); free(ui); *ptr = NULL; } void ui_update_input(struct ui *ui, const struct window *win) { int x, y; window_get_cursor(win, &x, &y); nk_input_begin(&ui->nk); nk_input_motion(&ui->nk, x, y); nk_input_button(&ui->nk, NK_BUTTON_LEFT, x, y, window_get_button(win, BTN_LEFT)); nk_input_button(&ui->nk, NK_BUTTON_RIGHT, x, y, window_get_button(win, BTN_RIGHT)); nk_input_button(&ui->nk, NK_BUTTON_MIDDLE, x, y, window_get_button(win, BTN_MIDDLE)); struct nk_vec2 scroll; window_get_scroll(win, &scroll.x, &scroll.y); nk_input_scroll(&ui->nk, scroll); nk_input_end(&ui->nk); } struct nk_context *ui_get_context(struct ui *ui) { return &ui->nk; } bool ui_draw(struct ui *ui, const struct pl_swapchain_frame *frame) { if (nk_convert(&ui->nk, &ui->cmds, &ui->verts, &ui->idx, &ui->convert_cfg) != NK_CONVERT_SUCCESS) { fprintf(stderr, "NK: failed converting draw commands!\n"); return false; } const struct nk_draw_command *cmd = NULL; const uint8_t *vertices = nk_buffer_memory(&ui->verts); const nk_draw_index *indices = nk_buffer_memory(&ui->idx); nk_draw_foreach(cmd, &ui->nk, &ui->cmds) { if (!cmd->elem_count) continue; pl_shader sh = pl_dispatch_begin(ui->dp); pl_shader_custom(sh, &(struct pl_custom_shader) { .description = "nuklear UI", .body = "color = textureLod(ui_tex, coord, 0.0).r * vcolor;", .output = PL_SHADER_SIG_COLOR, .num_descriptors = 1, .descriptors = &(struct pl_shader_desc) { .desc = { .name = "ui_tex", .type = PL_DESC_SAMPLED_TEX, }, .binding = { .object = cmd->texture.ptr, .sample_mode = PL_TEX_SAMPLE_NEAREST, }, }, }); struct pl_color_repr repr = frame->color_repr; pl_shader_color_map_ex(sh, NULL, pl_color_map_args( .src = pl_color_space_srgb, .dst = frame->color_space, )); pl_shader_encode_color(sh, &repr); bool ok = pl_dispatch_vertex(ui->dp, pl_dispatch_vertex_params( .shader = &sh, .target = frame->fbo, .blend_params = &pl_alpha_overlay, .scissors = { .x0 = cmd->clip_rect.x, .y0 = cmd->clip_rect.y, .x1 = cmd->clip_rect.x + cmd->clip_rect.w, .y1 = cmd->clip_rect.y + cmd->clip_rect.h, }, .vertex_attribs = ui->attribs_pl, .num_vertex_attribs = NUM_VERTEX_ATTRIBS, .vertex_stride = sizeof(struct ui_vertex), .vertex_position_idx = 0, .vertex_coords = PL_COORDS_ABSOLUTE, .vertex_flipped = frame->flipped, .vertex_type = PL_PRIM_TRIANGLE_LIST, .vertex_count = cmd->elem_count, .vertex_data = vertices, .index_data = indices, .index_fmt = PL_INDEX_UINT32, )); if (!ok) { fprintf(stderr, "placebo: failed rendering UI!\n"); return false; } indices += cmd->elem_count; } nk_clear(&ui->nk); nk_buffer_clear(&ui->cmds); nk_buffer_clear(&ui->verts); nk_buffer_clear(&ui->idx); return true; } libplacebo-v7.349.0/demos/ui.h000066400000000000000000000034121463457750100160710ustar00rootroot00000000000000// License: CC0 / Public Domain #pragma once #define NK_INCLUDE_FIXED_TYPES #define NK_INCLUDE_DEFAULT_ALLOCATOR #define NK_INCLUDE_STANDARD_IO #define NK_INCLUDE_STANDARD_BOOL #define NK_INCLUDE_STANDARD_VARARGS #define NK_INCLUDE_VERTEX_BUFFER_OUTPUT #define NK_INCLUDE_FONT_BAKING #define NK_INCLUDE_DEFAULT_FONT #define NK_BUTTON_TRIGGER_ON_RELEASE #define NK_UINT_DRAW_INDEX #include #include "common.h" #include "window.h" struct ui; struct ui *ui_create(pl_gpu gpu); void ui_destroy(struct ui **ui); // Update/Logic/Draw cycle void ui_update_input(struct ui *ui, const struct window *window); struct nk_context *ui_get_context(struct ui *ui); bool ui_draw(struct ui *ui, const struct pl_swapchain_frame *frame); // Helper function to draw a custom widget for drag&drop operations, returns // true if the widget is hovered static inline bool ui_widget_hover(struct nk_context *nk, const char *label) { struct nk_rect bounds; if (!nk_widget(&bounds, nk)) return false; struct nk_command_buffer *canvas = nk_window_get_canvas(nk); bool hover = nk_input_is_mouse_hovering_rect(&nk->input, bounds); float h, s, v; nk_color_hsv_f(&h, &s, &v, nk->style.window.background); struct nk_color background = nk_hsv_f(h, s, v + (hover ? 0.1f : -0.02f)); struct nk_color border = nk_hsv_f(h, s, v + 0.20f); nk_fill_rect(canvas, bounds, 0.0f, background); nk_stroke_rect(canvas, bounds, 0.0f, 2.0f, border); const float pad = 10.0f; struct nk_rect text = { .x = bounds.x + pad, .y = bounds.y + pad, .w = bounds.w - 2 * pad, .h = bounds.h - 2 * pad, }; nk_draw_text(canvas, text, label, nk_strlen(label), nk->style.font, background, nk->style.text.color); return hover; } libplacebo-v7.349.0/demos/utils.c000066400000000000000000000020701463457750100166060ustar00rootroot00000000000000// License: CC0 / Public Domain #include #include #include #include "utils.h" #include "../src/os.h" #ifdef PL_HAVE_WIN32 #include #else #include #include #include #include #endif const char *get_cache_dir(char (*buf)[512]) { // Check if XDG_CACHE_HOME is set for Linux/BSD const char* xdg_cache_home = getenv("XDG_CACHE_HOME"); if (xdg_cache_home) return xdg_cache_home; #ifdef _WIN32 const char* local_app_data = getenv("LOCALAPPDATA"); if (local_app_data) return local_app_data; #endif #ifdef __APPLE__ struct passwd* pw = getpwuid(getuid()); if (pw) { int ret = snprintf(*buf, sizeof(*buf), "%s/%s", pw->pw_dir, "Library/Caches"); if (ret > 0 && ret < sizeof(*buf)) return *buf; } #endif const char* home = getenv("HOME"); if (home) { int ret = snprintf(*buf, sizeof(*buf), "%s/.cache", home); if (ret > 0 && ret < sizeof(*buf)) return *buf; } return NULL; } libplacebo-v7.349.0/demos/utils.h000066400000000000000000000001571463457750100166170ustar00rootroot00000000000000// License: CC0 / Public Domain #pragma once #include "common.h" const char *get_cache_dir(char (*buf)[512]); libplacebo-v7.349.0/demos/video-filtering.c000066400000000000000000000646721463457750100205550ustar00rootroot00000000000000/* Presented are two hypothetical scenarios of how one might use libplacebo * as something like an FFmpeg or mpv video filter. We examine two example * APIs (loosely modeled after real video filtering APIs) and how each style * would like to use libplacebo. * * For sake of a simple example, let's assume this is a debanding filter. * For those of you too lazy to compile/run this file but still want to see * results, these are from my machine (RX 5700 XT + 1950X, as of 2020-05-25): * * RADV+ACO: * api1: 10000 frames in 16.328440 s => 1.632844 ms/frame (612.43 fps) * render: 0.113524 ms, upload: 0.127551 ms, download: 0.146097 ms * api2: 10000 frames in 5.335634 s => 0.533563 ms/frame (1874.19 fps) * render: 0.064378 ms, upload: 0.000000 ms, download: 0.189719 ms * * AMDVLK: * api1: 10000 frames in 14.921859 s => 1.492186 ms/frame (670.16 fps) * render: 0.110603 ms, upload: 0.114412 ms, download: 0.115375 ms * api2: 10000 frames in 4.667386 s => 0.466739 ms/frame (2142.53 fps) * render: 0.030781 ms, upload: 0.000000 ms, download: 0.075237 ms * * You can see that AMDVLK is still better at doing texture streaming than * RADV - this is because as of writing RADV still does not support * asynchronous texture queues / DMA engine transfers. If we disable the * `async_transfer` option with AMDVLK we get this: * * api1: 10000 frames in 16.087723 s => 1.608772 ms/frame (621.59 fps) * render: 0.111154 ms, upload: 0.122476 ms, download: 0.133162 ms * api2: 10000 frames in 6.344959 s => 0.634496 ms/frame (1576.05 fps) * render: 0.031307 ms, upload: 0.000000 ms, download: 0.083520 ms * * License: CC0 / Public Domain */ #include #include #include #include #include #include "common.h" #include "pl_clock.h" #include "pl_thread.h" #ifdef _WIN32 #include #endif #include #include #include #include /////////////////////// /// API definitions /// /////////////////////// // Stuff that would be common to each API void *init(void); void uninit(void *priv); struct format { // For simplicity let's make a few assumptions here, since configuring the // texture format is not the point of this example. (In practice you can // go nuts with the `utils/upload.h` helpers) // // - All formats contain unsigned integers only // - All components have the same size in bits // - All components are in the "canonical" order // - All formats have power of two sizes only (2 or 4 components, not 3) // - All plane strides are a multiple of the pixel size int num_comps; int bitdepth; }; struct plane { int subx, suby; // subsampling shift struct format fmt; size_t stride; void *data; }; #define MAX_PLANES 4 struct image { int width, height; int num_planes; struct plane planes[MAX_PLANES]; // For API #2, the associated mapped buffer (if any) struct api2_buf *associated_buf; }; // Example API design #1: synchronous, blocking, double-copy (bad!) // // In this API, `api1_filter` must immediately return with the new data. // This prevents parallelism on the GPU and should be avoided if possible, // but sometimes that's what you have to work with. So this is what it // would look like. // // Also, let's assume this API design reconfigures the filter chain (using // a blank `proxy` image every time the image format or dimensions change, // and doesn't expect us to fail due to format mismatches or resource // exhaustion afterwards. bool api1_reconfig(void *priv, const struct image *proxy); bool api1_filter(void *priv, struct image *dst, struct image *src); // Example API design #2: asynchronous, streaming, queued, zero-copy (good!) // // In this API, `api2_process` will run by the calling code every so often // (e.g. when new data is available or expected). This function has access // to non-blocking functions `get_image` and `put_image` that interface // with the video filtering engine's internal queueing system. // // This API is also designed to feed multiple frames ahead of time, i.e. // it will feed us as many frames as it can while we're still returning // `API2_WANT_MORE`. To drain the filter chain, it would continue running // the process function until `API2_HAVE_MORE` is no longer present // in the output. // // This API is also designed to do zero-copy where possible. When it wants // to create a data buffer of a given size, it will call our function // `api2_alloc` which will return a buffer that we can process directly. // We can use this to do zero-copy uploading to the GPU, by creating // host-visible persistently mapped buffers. In order to prevent the video // filtering system from re-using our buffers while copies are happening, we // use special functions `image_lock` and `image_unlock` to increase a // refcount on the image's backing storage. (As is typical of such APIs) // // Finally, this API is designed to be fully dynamic: The image parameters // could change at any time, and we must be equipped to handle that. enum api2_status { // Negative values are used to signal error conditions API2_ERR_FMT = -2, // incompatible / unsupported format API2_ERR_UNKNOWN = -1, // some other error happened API2_OK = 0, // no error, no status - everything's good // Positive values represent a mask of status conditions API2_WANT_MORE = (1 << 0), // we want more frames, please feed some more! API2_HAVE_MORE = (1 << 1), // we have more frames but they're not ready }; enum api2_status api2_process(void *priv); // Functions for creating persistently mapped buffers struct api2_buf { void *data; size_t size; void *priv; }; bool api2_alloc(void *priv, size_t size, struct api2_buf *out); void api2_free(void *priv, const struct api2_buf *buf); // These functions are provided by the API. The exact details of how images // are enqueued, dequeued and locked are not really important here, so just // do something unrealistic but simple to demonstrate with. struct image *get_image(void); void put_image(struct image *img); void image_lock(struct image *img); void image_unlock(struct image *img); ///////////////////////////////// /// libplacebo implementation /// ///////////////////////////////// // For API #2: #define PARALLELISM 8 struct entry { pl_buf buf; // to stream the download pl_tex tex_in[MAX_PLANES]; pl_tex tex_out[MAX_PLANES]; struct image image; // For entries that are associated with a held image, so we can unlock them // as soon as possible struct image *held_image; pl_buf held_buf; }; // For both APIs: struct priv { pl_log log; pl_vulkan vk; pl_gpu gpu; pl_dispatch dp; pl_shader_obj dither_state; // Timer objects pl_timer render_timer; pl_timer upload_timer; pl_timer download_timer; uint64_t render_sum; uint64_t upload_sum; uint64_t download_sum; int render_count; int upload_count; int download_count; // API #1: A simple pair of input and output textures pl_tex tex_in[MAX_PLANES]; pl_tex tex_out[MAX_PLANES]; // API #2: A ring buffer of textures/buffers for streaming int idx_in; // points the next free entry int idx_out; // points to the first entry still in progress struct entry entries[PARALLELISM]; }; void *init(void) { struct priv *p = calloc(1, sizeof(struct priv)); if (!p) return NULL; p->log = pl_log_create(PL_API_VER, pl_log_params( .log_cb = pl_log_simple, .log_level = PL_LOG_WARN, )); p->vk = pl_vulkan_create(p->log, pl_vulkan_params( // Note: This is for API #2. In API #1 you could just pass params=NULL // and it wouldn't really matter much. .async_transfer = true, .async_compute = true, .queue_count = PARALLELISM, )); if (!p->vk) { fprintf(stderr, "Failed creating vulkan context\n"); goto error; } // Give this a shorter name for convenience p->gpu = p->vk->gpu; p->dp = pl_dispatch_create(p->log, p->gpu); if (!p->dp) { fprintf(stderr, "Failed creating shader dispatch object\n"); goto error; } p->render_timer = pl_timer_create(p->gpu); p->upload_timer = pl_timer_create(p->gpu); p->download_timer = pl_timer_create(p->gpu); return p; error: uninit(p); return NULL; } void uninit(void *priv) { struct priv *p = priv; // API #1 for (int i = 0; i < MAX_PLANES; i++) { pl_tex_destroy(p->gpu, &p->tex_in[i]); pl_tex_destroy(p->gpu, &p->tex_out[i]); } // API #2 for (int i = 0; i < PARALLELISM; i++) { pl_buf_destroy(p->gpu, &p->entries[i].buf); for (int j = 0; j < MAX_PLANES; j++) { pl_tex_destroy(p->gpu, &p->entries[i].tex_in[j]); pl_tex_destroy(p->gpu, &p->entries[i].tex_out[j]); } if (p->entries[i].held_image) image_unlock(p->entries[i].held_image); } pl_timer_destroy(p->gpu, &p->render_timer); pl_timer_destroy(p->gpu, &p->upload_timer); pl_timer_destroy(p->gpu, &p->download_timer); pl_shader_obj_destroy(&p->dither_state); pl_dispatch_destroy(&p->dp); pl_vulkan_destroy(&p->vk); pl_log_destroy(&p->log); free(p); } // Helper function to set up the `pl_plane_data` struct from the image params static void setup_plane_data(const struct image *img, struct pl_plane_data out[MAX_PLANES]) { for (int i = 0; i < img->num_planes; i++) { const struct plane *plane = &img->planes[i]; out[i] = (struct pl_plane_data) { .type = PL_FMT_UNORM, .width = img->width >> plane->subx, .height = img->height >> plane->suby, .pixel_stride = plane->fmt.num_comps * plane->fmt.bitdepth / 8, .row_stride = plane->stride, .pixels = plane->data, }; // For API 2 (direct rendering) if (img->associated_buf) { pl_buf buf = img->associated_buf->priv; out[i].pixels = NULL; out[i].buf = buf; out[i].buf_offset = (uintptr_t) plane->data - (uintptr_t) buf->data; } for (int c = 0; c < plane->fmt.num_comps; c++) { out[i].component_size[c] = plane->fmt.bitdepth; out[i].component_pad[c] = 0; out[i].component_map[c] = c; } } } static bool do_plane(struct priv *p, pl_tex dst, pl_tex src) { int new_depth = dst->params.format->component_depth[0]; // Do some debanding, and then also make sure to dither to the new depth // so that our debanded gradients are actually preserved well pl_shader sh = pl_dispatch_begin(p->dp); pl_shader_deband(sh, pl_sample_src( .tex = src ), NULL); pl_shader_dither(sh, new_depth, &p->dither_state, NULL); return pl_dispatch_finish(p->dp, pl_dispatch_params( .shader = &sh, .target = dst, .timer = p->render_timer, )); } static void check_timers(struct priv *p) { uint64_t ret; while ((ret = pl_timer_query(p->gpu, p->render_timer))) { p->render_sum += ret; p->render_count++; } while ((ret = pl_timer_query(p->gpu, p->upload_timer))) { p->upload_sum += ret; p->upload_count++; } while ((ret = pl_timer_query(p->gpu, p->download_timer))) { p->download_sum += ret; p->download_count++; } } // API #1 implementation: // // In this design, we will create all GPU resources inside `reconfig`, based on // the texture format configured from the proxy image. This will avoid failing // later on due to e.g. resource exhaustion or texture format mismatch, and // thereby falls within the intended semantics of this style of API. bool api1_reconfig(void *priv, const struct image *proxy) { struct priv *p = priv; struct pl_plane_data data[MAX_PLANES]; setup_plane_data(proxy, data); for (int i = 0; i < proxy->num_planes; i++) { pl_fmt fmt = pl_plane_find_fmt(p->gpu, NULL, &data[i]); if (!fmt) { fprintf(stderr, "Failed configuring filter: no good texture format!\n"); return false; } bool ok = true; ok &= pl_tex_recreate(p->gpu, &p->tex_in[i], pl_tex_params( .w = data[i].width, .h = data[i].height, .format = fmt, .sampleable = true, .host_writable = true, )); ok &= pl_tex_recreate(p->gpu, &p->tex_out[i], pl_tex_params( .w = data[i].width, .h = data[i].height, .format = fmt, .renderable = true, .host_readable = true, )); if (!ok) { fprintf(stderr, "Failed creating GPU textures!\n"); return false; } } return true; } bool api1_filter(void *priv, struct image *dst, struct image *src) { struct priv *p = priv; struct pl_plane_data data[MAX_PLANES]; setup_plane_data(src, data); // Upload planes for (int i = 0; i < src->num_planes; i++) { bool ok = pl_tex_upload(p->gpu, pl_tex_transfer_params( .tex = p->tex_in[i], .row_pitch = data[i].row_stride, .ptr = src->planes[i].data, .timer = p->upload_timer, )); if (!ok) { fprintf(stderr, "Failed uploading data to the GPU!\n"); return false; } } // Process planes for (int i = 0; i < src->num_planes; i++) { if (!do_plane(p, p->tex_out[i], p->tex_in[i])) { fprintf(stderr, "Failed processing planes!\n"); return false; } } // Download planes for (int i = 0; i < src->num_planes; i++) { bool ok = pl_tex_download(p->gpu, pl_tex_transfer_params( .tex = p->tex_out[i], .row_pitch = dst->planes[i].stride, .ptr = dst->planes[i].data, .timer = p->download_timer, )); if (!ok) { fprintf(stderr, "Failed downloading data from the GPU!\n"); return false; } } check_timers(p); return true; } // API #2 implementation: // // In this implementation we maintain a queue (implemented as ring buffer) // of "work entries", which are isolated structs that hold independent GPU // resources - so that the GPU has no cross-entry dependencies on any of the // textures or other resources. (Side note: It still has a dependency on the // dither state, but this is just a shared LUT anyway) // Align up to the nearest multiple of a power of two #define ALIGN2(x, align) (((x) + (align) - 1) & ~((align) - 1)) static enum api2_status submit_work(struct priv *p, struct entry *e, struct image *img) { // If the image comes from a mapped buffer, we have to take a lock // while our upload is in progress if (img->associated_buf) { assert(!e->held_image); image_lock(img); e->held_image = img; e->held_buf = img->associated_buf->priv; } // Upload this image's data struct pl_plane_data data[MAX_PLANES]; setup_plane_data(img, data); for (int i = 0; i < img->num_planes; i++) { pl_fmt fmt = pl_plane_find_fmt(p->gpu, NULL, &data[i]); if (!fmt) return API2_ERR_FMT; // FIXME: can we plumb a `pl_timer` in here somehow? if (!pl_upload_plane(p->gpu, NULL, &e->tex_in[i], &data[i])) return API2_ERR_UNKNOWN; // Re-create the target FBO as well with this format if necessary bool ok = pl_tex_recreate(p->gpu, &e->tex_out[i], pl_tex_params( .w = data[i].width, .h = data[i].height, .format = fmt, .renderable = true, .host_readable = true, )); if (!ok) return API2_ERR_UNKNOWN; } // Dispatch the work for this image for (int i = 0; i < img->num_planes; i++) { if (!do_plane(p, e->tex_out[i], e->tex_in[i])) return API2_ERR_UNKNOWN; } // Set up the resulting `struct image` that will hold our target // data. We just copy the format etc. from the source image memcpy(&e->image, img, sizeof(struct image)); size_t offset[MAX_PLANES], stride[MAX_PLANES], total_size = 0; for (int i = 0; i < img->num_planes; i++) { // For performance, we want to make sure we align the stride // to a multiple of the GPU's preferred texture transfer stride // (This is entirely optional) stride[i] = ALIGN2(img->planes[i].stride, p->gpu->limits.align_tex_xfer_pitch); int height = img->height >> img->planes[i].suby; // Round up the offset to the nearest multiple of the optimal // transfer alignment. (This is also entirely optional) offset[i] = ALIGN2(total_size, p->gpu->limits.align_tex_xfer_offset); total_size = offset[i] + stride[i] * height; } // Dispatch the asynchronous download into a mapped buffer bool ok = pl_buf_recreate(p->gpu, &e->buf, pl_buf_params( .size = total_size, .host_mapped = true, )); if (!ok) return API2_ERR_UNKNOWN; for (int i = 0; i < img->num_planes; i++) { ok = pl_tex_download(p->gpu, pl_tex_transfer_params( .tex = e->tex_out[i], .row_pitch = stride[i], .buf = e->buf, .buf_offset = offset[i], .timer = p->download_timer, )); if (!ok) return API2_ERR_UNKNOWN; // Update the output fields e->image.planes[i].data = e->buf->data + offset[i]; e->image.planes[i].stride = stride[i]; } // Make sure this work starts processing in the background, and especially // so we can move on to the next queue on the gPU pl_gpu_flush(p->gpu); return API2_OK; } enum api2_status api2_process(void *priv) { struct priv *p = priv; enum api2_status ret = 0; // Opportunistically release any held images. We do this across the ring // buffer, rather than doing this as part of the following loop, because // we want to release images ahead-of-time (no FIFO constraints) for (int i = 0; i < PARALLELISM; i++) { struct entry *e = &p->entries[i]; if (e->held_image && !pl_buf_poll(p->gpu, e->held_buf, 0)) { // upload buffer is no longer in use, release it image_unlock(e->held_image); e->held_image = NULL; e->held_buf = NULL; } } // Poll the status of existing entries and dequeue the ones that are done while (p->idx_out != p->idx_in) { struct entry *e = &p->entries[p->idx_out]; if (pl_buf_poll(p->gpu, e->buf, 0)) break; if (e->held_image) { image_unlock(e->held_image); e->held_image = NULL; e->held_buf = NULL; } // download buffer is no longer busy, dequeue the frame put_image(&e->image); p->idx_out = (p->idx_out + 1) % PARALLELISM; } // Fill up the queue with more work int last_free_idx = (p->idx_out ? p->idx_out : PARALLELISM) - 1; while (p->idx_in != last_free_idx) { struct image *img = get_image(); if (!img) { ret |= API2_WANT_MORE; break; } enum api2_status err = submit_work(p, &p->entries[p->idx_in], img); if (err < 0) return err; p->idx_in = (p->idx_in + 1) % PARALLELISM; } if (p->idx_out != p->idx_in) ret |= API2_HAVE_MORE; return ret; } bool api2_alloc(void *priv, size_t size, struct api2_buf *out) { struct priv *p = priv; if (!p->gpu->limits.buf_transfer || size > p->gpu->limits.max_mapped_size) return false; pl_buf buf = pl_buf_create(p->gpu, pl_buf_params( .size = size, .host_mapped = true, )); if (!buf) return false; *out = (struct api2_buf) { .data = buf->data, .size = size, .priv = (void *) buf, }; return true; } void api2_free(void *priv, const struct api2_buf *buf) { struct priv *p = priv; pl_buf plbuf = buf->priv; pl_buf_destroy(p->gpu, &plbuf); } //////////////////////////////////// /// Proof of Concept / Benchmark /// //////////////////////////////////// #define FRAMES 10000 // Let's say we're processing a 1920x1080 4:2:0 8-bit NV12 video, arbitrarily // with a stride aligned to 256 bytes. (For no particular reason) #define TEXELSZ sizeof(uint8_t) #define WIDTH 1920 #define HEIGHT 1080 #define STRIDE (ALIGN2(WIDTH, 256) * TEXELSZ) // Subsampled planes #define SWIDTH (WIDTH >> 1) #define SHEIGHT (HEIGHT >> 1) #define SSTRIDE (ALIGN2(SWIDTH, 256) * TEXELSZ) // Plane offsets / sizes #define SIZE0 (HEIGHT * STRIDE) #define SIZE1 (2 * SHEIGHT * SSTRIDE) #define OFFSET0 0 #define OFFSET1 SIZE0 #define BUFSIZE (OFFSET1 + SIZE1) // Skeleton of an example image static const struct image example_image = { .width = WIDTH, .height = HEIGHT, .num_planes = 2, .planes = { { .subx = 0, .suby = 0, .stride = STRIDE, .fmt = { .num_comps = 1, .bitdepth = 8 * TEXELSZ, }, }, { .subx = 1, .suby = 1, .stride = SSTRIDE * 2, .fmt = { .num_comps = 2, .bitdepth = 8 * TEXELSZ, }, }, }, }; // API #1: Nice and simple (but slow) static void api1_example(void) { struct priv *vf = init(); if (!vf) return; if (!api1_reconfig(vf, &example_image)) { fprintf(stderr, "api1: Failed configuring video filter!\n"); return; } // Allocate two buffers to hold the example data, and fill the source // buffer arbitrarily with a "simple" pattern. (Decoding the data into // the buffer is not meant to be part of this benchmark) uint8_t *srcbuf = malloc(BUFSIZE), *dstbuf = malloc(BUFSIZE); if (!srcbuf || !dstbuf) goto done; for (size_t i = 0; i < BUFSIZE; i++) srcbuf[i] = i; struct image src = example_image, dst = example_image; src.planes[0].data = srcbuf + OFFSET0; src.planes[1].data = srcbuf + OFFSET1; dst.planes[0].data = dstbuf + OFFSET0; dst.planes[1].data = dstbuf + OFFSET1; const pl_clock_t start = pl_clock_now(); // Process this dummy frame a bunch of times unsigned frames = 0; for (frames = 0; frames < FRAMES; frames++) { if (!api1_filter(vf, &dst, &src)) { fprintf(stderr, "api1: Failed filtering frame... aborting\n"); break; } } const pl_clock_t stop = pl_clock_now(); const float secs = pl_clock_diff(stop, start); printf("api1: %4u frames in %1.6f s => %2.6f ms/frame (%5.2f fps)\n", frames, secs, 1000 * secs / frames, frames / secs); if (vf->render_count) { printf(" render: %f ms, upload: %f ms, download: %f ms\n", 1e-6 * vf->render_sum / vf->render_count, vf->upload_count ? (1e-6 * vf->upload_sum / vf->upload_count) : 0.0, vf->download_count ? (1e-6 * vf->download_sum / vf->download_count) : 0.0); } done: free(srcbuf); free(dstbuf); uninit(vf); } // API #2: Pretend we have some fancy pool of images. #define POOLSIZE (PARALLELISM + 1) static struct api2_buf buffers[POOLSIZE] = {0}; static struct image images[POOLSIZE] = {0}; static int refcount[POOLSIZE] = {0}; static unsigned api2_frames_in = 0; static unsigned api2_frames_out = 0; static void api2_example(void) { struct priv *vf = init(); if (!vf) return; // Set up a bunch of dummy images for (int i = 0; i < POOLSIZE; i++) { uint8_t *data; images[i] = example_image; if (api2_alloc(vf, BUFSIZE, &buffers[i])) { data = buffers[i].data; images[i].associated_buf = &buffers[i]; } else { // Fall back in case mapped buffers are unsupported fprintf(stderr, "warning: falling back to malloc, may be slow\n"); data = malloc(BUFSIZE); } // Fill with some "data" (like in API #1) for (size_t n = 0; n < BUFSIZE; n++) data[i] = n; images[i].planes[0].data = data + OFFSET0; images[i].planes[1].data = data + OFFSET1; } const pl_clock_t start = pl_clock_now(); // Just keep driving the event loop regardless of the return status // until we reach the critical number of frames. (Good enough for this PoC) while (api2_frames_out < FRAMES) { enum api2_status ret = api2_process(vf); if (ret < 0) { fprintf(stderr, "api2: Failed processing... aborting\n"); break; } // Sleep a short time (100us) to prevent busy waiting the CPU pl_thread_sleep(1e-4); check_timers(vf); } const pl_clock_t stop = pl_clock_now(); const float secs = pl_clock_diff(stop, start); printf("api2: %4u frames in %1.6f s => %2.6f ms/frame (%5.2f fps)\n", api2_frames_out, secs, 1000 * secs / api2_frames_out, api2_frames_out / secs); if (vf->render_count) { printf(" render: %f ms, upload: %f ms, download: %f ms\n", 1e-6 * vf->render_sum / vf->render_count, vf->upload_count ? (1e-6 * vf->upload_sum / vf->upload_count) : 0.0, vf->download_count ? (1e-6 * vf->download_sum / vf->download_count) : 0.0); } for (int i = 0; i < POOLSIZE; i++) { if (images[i].associated_buf) { api2_free(vf, images[i].associated_buf); } else { // This is what we originally malloc'd free(images[i].planes[0].data); } } uninit(vf); } struct image *get_image(void) { if (api2_frames_in == FRAMES) return NULL; // simulate EOF, to avoid queueing up "extra" work // if we can find a free (unlocked) image, give it that for (int i = 0; i < POOLSIZE; i++) { if (refcount[i] == 0) { api2_frames_in++; return &images[i]; } } return NULL; // no free image available } void put_image(struct image *img) { (void)img; api2_frames_out++; } void image_lock(struct image *img) { int index = img - images; // cheat, for lack of having actual image management refcount[index]++; } void image_unlock(struct image *img) { int index = img - images; refcount[index]--; } int main(void) { printf("Running benchmarks...\n"); api1_example(); api2_example(); return 0; } libplacebo-v7.349.0/demos/window.c000066400000000000000000000054031463457750100167600ustar00rootroot00000000000000// License: CC0 / Public Domain #include #include "common.h" #include "window.h" #ifdef _WIN32 #include #include #endif extern const struct window_impl win_impl_glfw_vk; extern const struct window_impl win_impl_glfw_gl; extern const struct window_impl win_impl_glfw_d3d11; extern const struct window_impl win_impl_sdl_vk; extern const struct window_impl win_impl_sdl_gl; static const struct window_impl *win_impls[] = { #ifdef HAVE_GLFW_VULKAN &win_impl_glfw_vk, #endif #ifdef HAVE_GLFW_OPENGL &win_impl_glfw_gl, #endif #ifdef HAVE_GLFW_D3D11 &win_impl_glfw_d3d11, #endif #ifdef HAVE_SDL_VULKAN &win_impl_sdl_vk, #endif #ifdef HAVE_SDL_OPENGL &win_impl_sdl_gl, #endif NULL }; struct window *window_create(pl_log log, const struct window_params *params) { for (const struct window_impl **impl = win_impls; *impl; impl++) { if (params->forced_impl && strcmp((*impl)->tag, params->forced_impl) != 0) continue; printf("Attempting to initialize API: %s\n", (*impl)->name); struct window *win = (*impl)->create(log, params); if (win) { #ifdef _WIN32 if (timeBeginPeriod(1) != TIMERR_NOERROR) fprintf(stderr, "timeBeginPeriod failed!\n"); #endif return win; } } if (params->forced_impl) fprintf(stderr, "'%s' windowing system not compiled or supported!\n", params->forced_impl); else fprintf(stderr, "No windowing system / graphical API compiled or supported!\n"); exit(1); } void window_destroy(struct window **win) { if (!*win) return; (*win)->impl->destroy(win); #ifdef _WIN32 timeEndPeriod(1); #endif } void window_poll(struct window *win, bool block) { return win->impl->poll(win, block); } void window_get_cursor(const struct window *win, int *x, int *y) { return win->impl->get_cursor(win, x, y); } void window_get_scroll(const struct window *win, float *dx, float *dy) { return win->impl->get_scroll(win, dx, dy); } bool window_get_button(const struct window *win, enum button btn) { return win->impl->get_button(win, btn); } bool window_get_key(const struct window *win, enum key key) { return win->impl->get_key(win, key); } char *window_get_file(const struct window *win) { return win->impl->get_file(win); } bool window_toggle_fullscreen(const struct window *win, bool fullscreen) { return win->impl->toggle_fullscreen(win, fullscreen); } bool window_is_fullscreen(const struct window *win) { return win->impl->is_fullscreen(win); } const char *window_get_clipboard(const struct window *win) { return win->impl->get_clipboard(win); } void window_set_clipboard(const struct window *win, const char *text) { win->impl->set_clipboard(win, text); } libplacebo-v7.349.0/demos/window.h000066400000000000000000000036041463457750100167660ustar00rootroot00000000000000// License: CC0 / Public Domain #pragma once #include struct window { const struct window_impl *impl; pl_swapchain swapchain; pl_gpu gpu; bool window_lost; }; struct window_params { const char *title; int width; int height; const char *forced_impl; // initial color space struct pl_swapchain_colors colors; bool alpha; }; struct window *window_create(pl_log log, const struct window_params *params); void window_destroy(struct window **win); // Poll/wait for window events void window_poll(struct window *win, bool block); // Input handling enum button { BTN_LEFT, BTN_RIGHT, BTN_MIDDLE, }; enum key { KEY_ESC, }; void window_get_cursor(const struct window *win, int *x, int *y); void window_get_scroll(const struct window *win, float *dx, float *dy); bool window_get_button(const struct window *win, enum button); bool window_get_key(const struct window *win, enum key); char *window_get_file(const struct window *win); bool window_toggle_fullscreen(const struct window *win, bool fullscreen); bool window_is_fullscreen(const struct window *win); const char *window_get_clipboard(const struct window *win); void window_set_clipboard(const struct window *win, const char *text); // For implementations struct window_impl { const char *name; const char *tag; __typeof__(window_create) *create; __typeof__(window_destroy) *destroy; __typeof__(window_poll) *poll; __typeof__(window_get_cursor) *get_cursor; __typeof__(window_get_scroll) *get_scroll; __typeof__(window_get_button) *get_button; __typeof__(window_get_key) *get_key; __typeof__(window_get_file) *get_file; __typeof__(window_toggle_fullscreen) *toggle_fullscreen; __typeof__(window_is_fullscreen) *is_fullscreen; __typeof__(window_get_clipboard) *get_clipboard; __typeof__(window_set_clipboard) *set_clipboard; }; libplacebo-v7.349.0/demos/window_glfw.c000066400000000000000000000332351463457750100200030ustar00rootroot00000000000000// License: CC0 / Public Domain #if defined(USE_GL) + defined(USE_VK) + defined(USE_D3D11) != 1 #error Specify exactly one of -DUSE_GL, -DUSE_VK or -DUSE_D3D11 when compiling! #endif #include #include #include "common.h" #include "window.h" #ifdef USE_VK #define VK_NO_PROTOTYPES #include #define GLFW_INCLUDE_VULKAN #define IMPL win_impl_glfw_vk #define IMPL_NAME "GLFW (vulkan)" #define IMPL_TAG "glfw-vk" #endif #ifdef USE_GL #include #define IMPL win_impl_glfw_gl #define IMPL_NAME "GLFW (opengl)" #define IMPL_TAG "glfw-gl" #endif #ifdef USE_D3D11 #include #define IMPL win_impl_glfw_d3d11 #define IMPL_NAME "GLFW (D3D11)" #define IMPL_TAG "glfw-d3d11" #endif #include #if defined(USE_GL) && defined(HAVE_EGL) #define GLFW_EXPOSE_NATIVE_EGL #include #endif #ifdef USE_D3D11 #define GLFW_EXPOSE_NATIVE_WIN32 #include #endif #ifdef _WIN32 #define strdup _strdup #endif #ifdef NDEBUG #define DEBUG false #else #define DEBUG true #endif #define PL_ARRAY_SIZE(s) (sizeof(s) / sizeof((s)[0])) const struct window_impl IMPL; struct window_pos { int x; int y; int w; int h; }; struct priv { struct window w; GLFWwindow *win; #ifdef USE_VK VkSurfaceKHR surf; pl_vulkan vk; pl_vk_inst vk_inst; #endif #ifdef USE_GL pl_opengl gl; #endif #ifdef USE_D3D11 pl_d3d11 d3d11; #endif float scroll_dx, scroll_dy; char **files; size_t files_num; size_t files_size; bool file_seen; struct window_pos windowed_pos; }; static void err_cb(int code, const char *desc) { fprintf(stderr, "GLFW err %d: %s\n", code, desc); } static void close_cb(GLFWwindow *win) { struct priv *p = glfwGetWindowUserPointer(win); p->w.window_lost = true; } static void resize_cb(GLFWwindow *win, int width, int height) { struct priv *p = glfwGetWindowUserPointer(win); if (!pl_swapchain_resize(p->w.swapchain, &width, &height)) { fprintf(stderr, "libplacebo: Failed resizing swapchain? Exiting...\n"); p->w.window_lost = true; } } static void scroll_cb(GLFWwindow *win, double dx, double dy) { struct priv *p = glfwGetWindowUserPointer(win); p->scroll_dx += dx; p->scroll_dy += dy; } static void drop_cb(GLFWwindow *win, int num, const char *files[]) { struct priv *p = glfwGetWindowUserPointer(win); for (int i = 0; i < num; i++) { if (p->files_num == p->files_size) { size_t new_size = p->files_size ? p->files_size * 2 : 16; char **new_files = realloc(p->files, new_size * sizeof(char *)); if (!new_files) return; p->files = new_files; p->files_size = new_size; } char *file = strdup(files[i]); if (!file) return; p->files[p->files_num++] = file; } } #ifdef USE_GL static bool make_current(void *priv) { GLFWwindow *win = priv; glfwMakeContextCurrent(win); return true; } static void release_current(void *priv) { glfwMakeContextCurrent(NULL); } #endif #ifdef USE_VK static VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL get_vk_proc_addr(VkInstance instance, const char* pName) { return (PFN_vkVoidFunction) glfwGetInstanceProcAddress(instance, pName); } #endif static struct window *glfw_create(pl_log log, const struct window_params *params) { struct priv *p = calloc(1, sizeof(struct priv)); if (!p) return NULL; p->w.impl = &IMPL; if (!glfwInit()) { fprintf(stderr, "GLFW: Failed initializing?\n"); goto error; } glfwSetErrorCallback(&err_cb); #ifdef USE_VK if (!glfwVulkanSupported()) { fprintf(stderr, "GLFW: No vulkan support! Perhaps recompile with -DUSE_GL\n"); goto error; } glfwWindowHint(GLFW_CLIENT_API, GLFW_NO_API); #endif // USE_VK #ifdef USE_D3D11 glfwWindowHint(GLFW_CLIENT_API, GLFW_NO_API); #endif // USE_D3D11 #ifdef USE_GL struct { int api; int major, minor; int glsl_ver; int profile; } gl_vers[] = { { GLFW_OPENGL_API, 4, 6, 460, GLFW_OPENGL_CORE_PROFILE }, { GLFW_OPENGL_API, 4, 5, 450, GLFW_OPENGL_CORE_PROFILE }, { GLFW_OPENGL_API, 4, 4, 440, GLFW_OPENGL_CORE_PROFILE }, { GLFW_OPENGL_API, 4, 0, 400, GLFW_OPENGL_CORE_PROFILE }, { GLFW_OPENGL_API, 3, 3, 330, GLFW_OPENGL_CORE_PROFILE }, { GLFW_OPENGL_API, 3, 2, 150, GLFW_OPENGL_CORE_PROFILE }, { GLFW_OPENGL_ES_API, 3, 2, 320, }, { GLFW_OPENGL_API, 3, 1, 140, }, { GLFW_OPENGL_ES_API, 3, 1, 310, }, { GLFW_OPENGL_API, 3, 0, 130, }, { GLFW_OPENGL_ES_API, 3, 0, 300, }, { GLFW_OPENGL_ES_API, 2, 0, 100, }, { GLFW_OPENGL_API, 2, 1, 120, }, { GLFW_OPENGL_API, 2, 0, 110, }, }; for (int i = 0; i < PL_ARRAY_SIZE(gl_vers); i++) { glfwWindowHint(GLFW_CLIENT_API, gl_vers[i].api); #ifdef HAVE_EGL glfwWindowHint(GLFW_CONTEXT_CREATION_API, GLFW_EGL_CONTEXT_API); #endif glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, gl_vers[i].major); glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, gl_vers[i].minor); glfwWindowHint(GLFW_OPENGL_PROFILE, gl_vers[i].profile); #ifdef __APPLE__ if (gl_vers[i].profile == GLFW_OPENGL_CORE_PROFILE) glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE); #endif #endif // USE_GL if (params->alpha) glfwWindowHint(GLFW_TRANSPARENT_FRAMEBUFFER, GLFW_TRUE); printf("Creating %dx%d window%s...\n", params->width, params->height, params->alpha ? " (with alpha)" : ""); p->win = glfwCreateWindow(params->width, params->height, params->title, NULL, NULL); #ifdef USE_GL if (p->win) break; } #endif // USE_GL if (!p->win) { fprintf(stderr, "GLFW: Failed creating window\n"); goto error; } // Set up GLFW event callbacks glfwSetWindowUserPointer(p->win, p); glfwSetFramebufferSizeCallback(p->win, resize_cb); glfwSetWindowCloseCallback(p->win, close_cb); glfwSetScrollCallback(p->win, scroll_cb); glfwSetDropCallback(p->win, drop_cb); #ifdef USE_VK VkResult err; uint32_t num; p->vk_inst = pl_vk_inst_create(log, pl_vk_inst_params( .get_proc_addr = get_vk_proc_addr, .debug = DEBUG, .extensions = glfwGetRequiredInstanceExtensions(&num), .num_extensions = num, )); if (!p->vk_inst) { fprintf(stderr, "libplacebo: Failed creating vulkan instance\n"); goto error; } err = glfwCreateWindowSurface(p->vk_inst->instance, p->win, NULL, &p->surf); if (err != VK_SUCCESS) { fprintf(stderr, "GLFW: Failed creating vulkan surface\n"); goto error; } p->vk = pl_vulkan_create(log, pl_vulkan_params( .instance = p->vk_inst->instance, .get_proc_addr = p->vk_inst->get_proc_addr, .surface = p->surf, .allow_software = true, )); if (!p->vk) { fprintf(stderr, "libplacebo: Failed creating vulkan device\n"); goto error; } p->w.swapchain = pl_vulkan_create_swapchain(p->vk, pl_vulkan_swapchain_params( .surface = p->surf, .present_mode = VK_PRESENT_MODE_FIFO_KHR, )); if (!p->w.swapchain) { fprintf(stderr, "libplacebo: Failed creating vulkan swapchain\n"); goto error; } p->w.gpu = p->vk->gpu; #endif // USE_VK #ifdef USE_GL p->gl = pl_opengl_create(log, pl_opengl_params( .allow_software = true, .debug = DEBUG, #ifdef HAVE_EGL .egl_display = glfwGetEGLDisplay(), .egl_context = glfwGetEGLContext(p->win), #endif .make_current = make_current, .release_current = release_current, .get_proc_addr = glfwGetProcAddress, .priv = p->win, )); if (!p->gl) { fprintf(stderr, "libplacebo: Failed creating opengl device\n"); goto error; } p->w.swapchain = pl_opengl_create_swapchain(p->gl, pl_opengl_swapchain_params( .swap_buffers = (void (*)(void *)) glfwSwapBuffers, .priv = p->win, )); if (!p->w.swapchain) { fprintf(stderr, "libplacebo: Failed creating opengl swapchain\n"); goto error; } p->w.gpu = p->gl->gpu; #endif // USE_GL #ifdef USE_D3D11 p->d3d11 = pl_d3d11_create(log, pl_d3d11_params( .debug = DEBUG )); if (!p->d3d11) { fprintf(stderr, "libplacebo: Failed creating D3D11 device\n"); goto error; } p->w.swapchain = pl_d3d11_create_swapchain(p->d3d11, pl_d3d11_swapchain_params( .window = glfwGetWin32Window(p->win), )); if (!p->w.swapchain) { fprintf(stderr, "libplacebo: Failed creating D3D11 swapchain\n"); goto error; } p->w.gpu = p->d3d11->gpu; #endif // USE_D3D11 glfwGetWindowSize(p->win, &p->windowed_pos.w, &p->windowed_pos.h); glfwGetWindowPos(p->win, &p->windowed_pos.x, &p->windowed_pos.y); int w, h; glfwGetFramebufferSize(p->win, &w, &h); pl_swapchain_colorspace_hint(p->w.swapchain, ¶ms->colors); if (!pl_swapchain_resize(p->w.swapchain, &w, &h)) { fprintf(stderr, "libplacebo: Failed initializing swapchain\n"); goto error; } return &p->w; error: window_destroy((struct window **) &p); return NULL; } static void glfw_destroy(struct window **window) { struct priv *p = (struct priv *) *window; if (!p) return; pl_swapchain_destroy(&p->w.swapchain); #ifdef USE_VK pl_vulkan_destroy(&p->vk); if (p->surf) { PFN_vkDestroySurfaceKHR vkDestroySurfaceKHR = (PFN_vkDestroySurfaceKHR) p->vk_inst->get_proc_addr(p->vk_inst->instance, "vkDestroySurfaceKHR"); vkDestroySurfaceKHR(p->vk_inst->instance, p->surf, NULL); } pl_vk_inst_destroy(&p->vk_inst); #endif #ifdef USE_GL pl_opengl_destroy(&p->gl); #endif #ifdef USE_D3D11 pl_d3d11_destroy(&p->d3d11); #endif for (int i = 0; i < p->files_num; i++) free(p->files[i]); free(p->files); glfwTerminate(); free(p); *window = NULL; } static void glfw_poll(struct window *window, bool block) { if (block) { glfwWaitEvents(); } else { glfwPollEvents(); } } static void glfw_get_cursor(const struct window *window, int *x, int *y) { struct priv *p = (struct priv *) window; double dx, dy; int fw, fh, ww, wh; glfwGetCursorPos(p->win, &dx, &dy); glfwGetFramebufferSize(p->win, &fw, &fh); glfwGetWindowSize(p->win, &ww, &wh); *x = floor(dx * fw / ww); *y = floor(dy * fh / wh); } static bool glfw_get_button(const struct window *window, enum button btn) { static const int button_map[] = { [BTN_LEFT] = GLFW_MOUSE_BUTTON_LEFT, [BTN_RIGHT] = GLFW_MOUSE_BUTTON_RIGHT, [BTN_MIDDLE] = GLFW_MOUSE_BUTTON_MIDDLE, }; struct priv *p = (struct priv *) window; return glfwGetMouseButton(p->win, button_map[btn]) == GLFW_PRESS; } static bool glfw_get_key(const struct window *window, enum key key) { static const int key_map[] = { [KEY_ESC] = GLFW_KEY_ESCAPE, }; struct priv *p = (struct priv *) window; return glfwGetKey(p->win, key_map[key]) == GLFW_PRESS; } static void glfw_get_scroll(const struct window *window, float *dx, float *dy) { struct priv *p = (struct priv *) window; *dx = p->scroll_dx; *dy = p->scroll_dy; p->scroll_dx = p->scroll_dy = 0.0; } static char *glfw_get_file(const struct window *window) { struct priv *p = (struct priv *) window; if (p->file_seen) { assert(p->files_num); free(p->files[0]); memmove(&p->files[0], &p->files[1], --p->files_num * sizeof(char *)); p->file_seen = false; } if (!p->files_num) return NULL; p->file_seen = true; return p->files[0]; } static bool glfw_is_fullscreen(const struct window *window) { const struct priv *p = (const struct priv *) window; return glfwGetWindowMonitor(p->win); } static bool glfw_toggle_fullscreen(const struct window *window, bool fullscreen) { struct priv *p = (struct priv *) window; bool window_fullscreen = glfw_is_fullscreen(window); if (window_fullscreen == fullscreen) return true; if (window_fullscreen) { glfwSetWindowMonitor(p->win, NULL, p->windowed_pos.x, p->windowed_pos.y, p->windowed_pos.w, p->windowed_pos.h, GLFW_DONT_CARE); return true; } // For simplicity sake use primary monitor GLFWmonitor *monitor = glfwGetPrimaryMonitor(); if (!monitor) return false; const GLFWvidmode *mode = glfwGetVideoMode(monitor); if (!mode) return false; glfwGetWindowPos(p->win, &p->windowed_pos.x, &p->windowed_pos.y); glfwGetWindowSize(p->win, &p->windowed_pos.w, &p->windowed_pos.h); glfwSetWindowMonitor(p->win, monitor, 0, 0, mode->width, mode->height, mode->refreshRate); return true; } static const char *glfw_get_clipboard(const struct window *window) { struct priv *p = (struct priv *) window; return glfwGetClipboardString(p->win); } static void glfw_set_clipboard(const struct window *window, const char *text) { struct priv *p = (struct priv *) window; glfwSetClipboardString(p->win, text); } const struct window_impl IMPL = { .name = IMPL_NAME, .tag = IMPL_TAG, .create = glfw_create, .destroy = glfw_destroy, .poll = glfw_poll, .get_cursor = glfw_get_cursor, .get_button = glfw_get_button, .get_key = glfw_get_key, .get_scroll = glfw_get_scroll, .get_file = glfw_get_file, .toggle_fullscreen = glfw_toggle_fullscreen, .is_fullscreen = glfw_is_fullscreen, .get_clipboard = glfw_get_clipboard, .set_clipboard = glfw_set_clipboard, }; libplacebo-v7.349.0/demos/window_sdl.c000066400000000000000000000243631463457750100176300ustar00rootroot00000000000000// License: CC0 / Public Domain #if !defined(USE_GL) && !defined(USE_VK) || defined(USE_GL) && defined(USE_VK) #error Specify exactly one of -DUSE_GL or -DUSE_VK when compiling! #endif #include #include "common.h" #include "window.h" #ifdef USE_VK #define VK_NO_PROTOTYPES #include #include #define WINFLAG_API SDL_WINDOW_VULKAN #define IMPL win_impl_sdl_vk #define IMPL_NAME "SDL2 (vulkan)" #define IMPL_TAG "sdl2-vk" #endif #ifdef USE_GL #include #define WINFLAG_API SDL_WINDOW_OPENGL #define IMPL win_impl_sdl_gl #define IMPL_NAME "SDL2 (opengl)" #define IMPL_TAG "sdl2-gl" #endif #ifdef NDEBUG #define DEBUG false #else #define DEBUG true #endif const struct window_impl IMPL; struct priv { struct window w; SDL_Window *win; #ifdef USE_VK VkSurfaceKHR surf; pl_vulkan vk; pl_vk_inst vk_inst; #endif #ifdef USE_GL SDL_GLContext gl_ctx; pl_opengl gl; #endif int scroll_dx, scroll_dy; char **files; size_t files_num; size_t files_size; bool file_seen; char *clip_text; }; #ifdef USE_GL static bool make_current(void *priv) { struct priv *p = priv; return SDL_GL_MakeCurrent(p->win, p->gl_ctx) == 0; } static void release_current(void *priv) { struct priv *p = priv; SDL_GL_MakeCurrent(p->win, NULL); } #endif static struct window *sdl_create(pl_log log, const struct window_params *params) { struct priv *p = calloc(1, sizeof(struct priv)); if (!p) return NULL; p->w.impl = &IMPL; if (SDL_Init(SDL_INIT_VIDEO) < 0) { fprintf(stderr, "SDL2: Failed initializing: %s\n", SDL_GetError()); goto error; } uint32_t sdl_flags = SDL_WINDOW_SHOWN | SDL_WINDOW_RESIZABLE | WINFLAG_API; p->win = SDL_CreateWindow(params->title, SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, params->width, params->height, sdl_flags); if (!p->win) { fprintf(stderr, "SDL2: Failed creating window: %s\n", SDL_GetError()); goto error; } int w, h; #ifdef USE_VK unsigned int num = 0; if (!SDL_Vulkan_GetInstanceExtensions(p->win, &num, NULL)) { fprintf(stderr, "SDL2: Failed enumerating vulkan extensions: %s\n", SDL_GetError()); goto error; } const char **exts = malloc(num * sizeof(const char *)); SDL_Vulkan_GetInstanceExtensions(p->win, &num, exts); p->vk_inst = pl_vk_inst_create(log, pl_vk_inst_params( .get_proc_addr = SDL_Vulkan_GetVkGetInstanceProcAddr(), .debug = DEBUG, .extensions = exts, .num_extensions = num, )); free(exts); if (!p->vk_inst) { fprintf(stderr, "libplacebo: Failed creating vulkan instance!\n"); goto error; } if (!SDL_Vulkan_CreateSurface(p->win, p->vk_inst->instance, &p->surf)) { fprintf(stderr, "SDL2: Failed creating surface: %s\n", SDL_GetError()); goto error; } p->vk = pl_vulkan_create(log, pl_vulkan_params( .instance = p->vk_inst->instance, .get_proc_addr = p->vk_inst->get_proc_addr, .surface = p->surf, .allow_software = true, )); if (!p->vk) { fprintf(stderr, "libplacebo: Failed creating vulkan device\n"); goto error; } p->w.swapchain = pl_vulkan_create_swapchain(p->vk, pl_vulkan_swapchain_params( .surface = p->surf, .present_mode = VK_PRESENT_MODE_FIFO_KHR, )); if (!p->w.swapchain) { fprintf(stderr, "libplacebo: Failed creating vulkan swapchain\n"); goto error; } p->w.gpu = p->vk->gpu; SDL_Vulkan_GetDrawableSize(p->win, &w, &h); #endif // USE_VK #ifdef USE_GL p->gl_ctx = SDL_GL_CreateContext(p->win); if (!p->gl_ctx) { fprintf(stderr, "SDL2: Failed creating GL context: %s\n", SDL_GetError()); goto error; } p->gl = pl_opengl_create(log, pl_opengl_params( .allow_software = true, .debug = DEBUG, .make_current = make_current, .release_current = release_current, .get_proc_addr = (void *) SDL_GL_GetProcAddress, .priv = p, )); if (!p->gl) { fprintf(stderr, "libplacebo: Failed creating opengl device\n"); goto error; } p->w.swapchain = pl_opengl_create_swapchain(p->gl, pl_opengl_swapchain_params( .swap_buffers = (void (*)(void *)) SDL_GL_SwapWindow, .priv = p->win, )); if (!p->w.swapchain) { fprintf(stderr, "libplacebo: Failed creating opengl swapchain\n"); goto error; } p->w.gpu = p->gl->gpu; SDL_GL_GetDrawableSize(p->win, &w, &h); #endif // USE_GL pl_swapchain_colorspace_hint(p->w.swapchain, ¶ms->colors); if (!pl_swapchain_resize(p->w.swapchain, &w, &h)) { fprintf(stderr, "libplacebo: Failed initializing swapchain\n"); goto error; } return &p->w; error: window_destroy((struct window **) &p); return NULL; } static void sdl_destroy(struct window **window) { struct priv *p = (struct priv *) *window; if (!p) return; pl_swapchain_destroy(&p->w.swapchain); #ifdef USE_VK pl_vulkan_destroy(&p->vk); if (p->surf) { PFN_vkDestroySurfaceKHR vkDestroySurfaceKHR = (PFN_vkDestroySurfaceKHR) p->vk_inst->get_proc_addr(p->vk_inst->instance, "vkDestroySurfaceKHR"); vkDestroySurfaceKHR(p->vk_inst->instance, p->surf, NULL); } pl_vk_inst_destroy(&p->vk_inst); #endif #ifdef USE_GL pl_opengl_destroy(&p->gl); SDL_GL_DeleteContext(p->gl_ctx); #endif for (int i = 0; i < p->files_num; i++) SDL_free(p->files[i]); free(p->files); SDL_free(p->clip_text); SDL_DestroyWindow(p->win); SDL_Quit(); free(p); *window = NULL; } static inline void handle_event(struct priv *p, SDL_Event *event) { switch (event->type) { case SDL_QUIT: p->w.window_lost = true; return; case SDL_WINDOWEVENT: if (event->window.windowID != SDL_GetWindowID(p->win)) return; if (event->window.event == SDL_WINDOWEVENT_SIZE_CHANGED) { int width = event->window.data1, height = event->window.data2; if (!pl_swapchain_resize(p->w.swapchain, &width, &height)) { fprintf(stderr, "libplacebo: Failed resizing swapchain? Exiting...\n"); p->w.window_lost = true; } } return; case SDL_MOUSEWHEEL: p->scroll_dx += event->wheel.x; p->scroll_dy += event->wheel.y; return; case SDL_DROPFILE: if (p->files_num == p->files_size) { size_t new_size = p->files_size ? p->files_size * 2 : 16; char **new_files = realloc(p->files, new_size * sizeof(char *)); if (!new_files) return; p->files = new_files; p->files_size = new_size; } p->files[p->files_num++] = event->drop.file; return; } } static void sdl_poll(struct window *window, bool block) { struct priv *p = (struct priv *) window; SDL_Event event; int ret; do { ret = block ? SDL_WaitEvent(&event) : SDL_PollEvent(&event); if (ret) handle_event(p, &event); // Only block on the first iteration block = false; } while (ret); } static void sdl_get_cursor(const struct window *window, int *x, int *y) { SDL_GetMouseState(x, y); } static bool sdl_get_button(const struct window *window, enum button btn) { static const uint32_t button_mask[] = { [BTN_LEFT] = SDL_BUTTON_LMASK, [BTN_RIGHT] = SDL_BUTTON_RMASK, [BTN_MIDDLE] = SDL_BUTTON_MMASK, }; return SDL_GetMouseState(NULL, NULL) & button_mask[btn]; } static bool sdl_get_key(const struct window *window, enum key key) { static const size_t key_map[] = { [KEY_ESC] = SDL_SCANCODE_ESCAPE, }; return SDL_GetKeyboardState(NULL)[key_map[key]]; } static void sdl_get_scroll(const struct window *window, float *dx, float *dy) { struct priv *p = (struct priv *) window; *dx = p->scroll_dx; *dy = p->scroll_dy; p->scroll_dx = p->scroll_dy = 0; } static char *sdl_get_file(const struct window *window) { struct priv *p = (struct priv *) window; if (p->file_seen) { assert(p->files_num); SDL_free(p->files[0]); memmove(&p->files[0], &p->files[1], --p->files_num * sizeof(char *)); p->file_seen = false; } if (!p->files_num) return NULL; p->file_seen = true; return p->files[0]; } static bool sdl_is_fullscreen(const struct window *window) { const struct priv *p = (const struct priv *) window; return SDL_GetWindowFlags(p->win) & SDL_WINDOW_FULLSCREEN; } static bool sdl_toggle_fullscreen(const struct window *window, bool fullscreen) { struct priv *p = (struct priv *) window; bool window_fullscreen = sdl_is_fullscreen(window); if (window_fullscreen == fullscreen) return true; SDL_DisplayMode mode; if (SDL_GetDesktopDisplayMode(0, &mode)) { fprintf(stderr, "SDL2: Failed to get display mode: %s\n", SDL_GetError()); SDL_ClearError(); return false; } if (SDL_SetWindowDisplayMode(p->win, &mode)) { fprintf(stderr, "SDL2: Failed to set window display mode: %s\n", SDL_GetError()); SDL_ClearError(); return false; } if (SDL_SetWindowFullscreen(p->win, fullscreen ? SDL_WINDOW_FULLSCREEN : 0)) { fprintf(stderr, "SDL2: SetWindowFullscreen failed: %s\n", SDL_GetError()); SDL_ClearError(); return false; } return true; } static const char *sdl_get_clipboard(const struct window *window) { struct priv *p = (struct priv *) window; SDL_free(p->clip_text); return p->clip_text = SDL_GetClipboardText(); } static void sdl_set_clipboard(const struct window *window, const char *text) { SDL_SetClipboardText(text); } const struct window_impl IMPL = { .name = IMPL_NAME, .tag = IMPL_TAG, .create = sdl_create, .destroy = sdl_destroy, .poll = sdl_poll, .get_cursor = sdl_get_cursor, .get_button = sdl_get_button, .get_key = sdl_get_key, .get_scroll = sdl_get_scroll, .get_file = sdl_get_file, .toggle_fullscreen = sdl_toggle_fullscreen, .is_fullscreen = sdl_is_fullscreen, .get_clipboard = sdl_get_clipboard, .set_clipboard = sdl_set_clipboard, }; libplacebo-v7.349.0/docs/000077500000000000000000000000001463457750100151245ustar00rootroot00000000000000libplacebo-v7.349.0/docs/CNAME000066400000000000000000000000171463457750100156700ustar00rootroot00000000000000libplacebo.org libplacebo-v7.349.0/docs/basic-rendering.md000066400000000000000000000313541463457750100205100ustar00rootroot00000000000000# Basic windowing / output example We will demonstrate the basics of the libplacebo GPU output API with a worked example. The goal is to show a simple color on screen. ## Creating a `pl_log` Almost all major entry-points into libplacebo require providing a log callback (or `NULL` to disable logging). This is abstracted into the `pl_log` object type, which we can create with `pl_log_create`: ``` c linenums="1" #include pl_log pllog; int main() { pllog = pl_log_create(PL_API_VER, pl_log_params( .log_cb = pl_log_color, .log_level = PL_LOG_INFO, )); // ... pl_log_destroy(&pllog); return 0; } ``` !!! note "Compiling" You can compile this example with: ``` bash $ gcc example.c -o example `pkg-config --cflags --libs libplacebo` ``` The parameter `PL_API_VER` has no special significance and is merely included for historical reasons. Aside from that, this snippet introduces a number of core concepts of the libplacebo API: ### Parameter structs For extensibility, almost all libplacebo calls take a pointer to a `const struct pl_*_params`, into which all extensible parameters go. For convenience, libplacebo provides macros which create anonymous params structs on the stack (and also fill in default parameters). Note that this only works for C99 and above, users of C89 and C++ must initialize parameter structs manually. Under the hood, `pl_log_params(...)` just translates to `&((struct pl_log_params) { /* default params */, ... })`. This style of API allows libplacebo to effectively simulate optional named parameters. !!! note "On default parameters" Wherever possible, parameters are designed in such a way that `{0}` gives you a minimal parameter structure, with default behavior and no optional features enabled. This is done for forwards compatibility - as new features are introduced, old struct initializers will simply opt out of them. ### Destructors All libplacebo objects must be destroyed manually using the corresponding `pl_*_destroy` call, which takes a pointer to the variable the object is stored in. The resulting variable is written to `NULL`. This helps prevent use-after-free bugs. !!! note "NULL" As a general rule, all libplacebo destructors are safe to call on variables containing `NULL`. So, users need not explicitly `NULL`-test before calling destructors on variables. ## Creating a window While libplacebo can work in isolation, to render images offline, for the sake of this guide we want to provide something graphical on-screen. As such, we need to create some sort of window. Libplacebo provides no built-in mechanism for this, it assumes the API user will already have a windowing system in-place. Complete examples (based on GLFW and SDL) can be found [in the libplacebo demos](https://code.videolan.org/videolan/libplacebo/-/tree/master/demos). But for now, we will focus on getting a very simple window on-screen using GLFW: ``` c linenums="1" hl_lines="3 5 6 7 9 17 18 20 21 22 24 25 26 28 29" // ... #include const char * const title = "libplacebo demo"; int width = 800; int height = 600; GLFWwindow *window; int main() { pllog = pl_log_create(PL_API_VER, pl_log_params( .log_level = PL_LOG_INFO, )); if (!glfwInit()) return 1; window = glfwCreateWindow(width, height, title, NULL, NULL); if (!window) return 1; while (!glfwWindowShouldClose(window)) { glfwWaitEvents(); } glfwDestroyWindow(window); glfwTerminate(); pl_log_destroy(&pllog); return 0; } ``` !!! note "Compiling" We now also need to include the glfw3 library to compile this example. ``` bash $ gcc example.c -o example `pkg-config --cflags --libs glfw3 libplacebo` ``` ## Creating the `pl_gpu` All GPU operations are abstracted into an internal `pl_gpu` object, which serves as the primary entry-point to any sort of GPU interaction. This object cannot be created directly, but must be obtained from some graphical API: currently there are Vulkan, OpenGL or D3D11. A `pl_gpu` can be accessed from an API-specific object like `pl_vulkan`, `pl_opengl` and `pl_d3d11`. In this guide, for simplicity, we will be using OpenGL, simply because that's what GLFW initializes by default. ``` c linenums="1" hl_lines="3 5-6 15-23 29 36-45" // ... pl_opengl opengl; static bool make_current(void *priv); static void release_current(void *priv); int main() { // ... window = glfwCreateWindow(width, height, title, NULL, NULL); if (!window) return 1; opengl = pl_opengl_create(pllog, pl_opengl_params( .get_proc_addr = glfwGetProcAddress, .allow_software = true, // allow software rasterers .debug = true, // enable error reporting .make_current = make_current, // (1) .release_current = release_current, )); if (!opengl) return 2; while (!glfwWindowShouldClose(window)) { glfwWaitEvents(); } pl_opengl_destroy(&opengl); glfwDestroyWindow(window); glfwTerminate(); pl_log_destroy(&pllog); return 0; } static bool make_current(void *priv) { glfwMakeContextCurrent(window); return true; } static void release_current(void *priv) { glfwMakeContextCurrent(NULL); } ``` 1. Setting this allows the resulting `pl_gpu` to be thread-safe, which enables asynchronous transfers to be used. The alternative is to simply call `glfwMakeContextCurrent` once after creating the window. This method of making the context current is generally preferred, however, so we've demonstrated it here for completeness' sake. ## Creating a swapchain All access to window-based rendering commands are abstracted into an object known as a "swapchain" (from Vulkan terminology), including the default backbuffers on D3D11 and OpenGL. If we want to present something to screen, we need to first create a `pl_swapchain`. We can use this swapchain to perform the equivalent of `gl*SwapBuffers`: ``` c linenums="1" hl_lines="2 4-9 17-22 24-27 30-31 34" // ... pl_swapchain swchain; static void resize_cb(GLFWwindow *win, int new_w, int new_h) { width = new_w; height = new_h; pl_swapchain_resize(swchain, &width, &height); } int main() { // ... if (!opengl) return 2; swchain = pl_opengl_create_swapchain(opengl, pl_opengl_swapchain_params( .swap_buffers = (void (*)(void *)) glfwSwapBuffers, .priv = window, )); if (!swchain) return 2; // (2) if (!pl_swapchain_resize(swchain, &width, &height)) return 2; glfwSetFramebufferSizeCallback(window, resize_cb); while (!glfwWindowShouldClose(window)) { pl_swapchain_swap_buffers(swchain); glfwPollEvents(); // (1) } pl_swapchain_destroy(&swchain); pl_opengl_destroy(&opengl); glfwDestroyWindow(window); glfwTerminate(); pl_log_destroy(&pllog); return 0; } ``` 1. We change this from `glfwWaitEvents` to `glfwPollEvents` because we now want to re-run our main loop once per vsync, rather than only when new events arrive. The `pl_swapchain_swap_buffers` call will ensure that this does not execute too quickly. 2. The swapchain needs to be resized to fit the size of the window, which in GLFW is handled by listening to a callback. In addition to setting this callback, we also need to inform the swapchain of the initial window size. Note that the `pl_swapchain_resize` function handles both resize requests and size queries - hence, the actual swapchain size is returned back to the passed variables. ## Getting pixels on the screen With a swapchain in hand, we're now equipped to start drawing pixels to the screen: ``` c linenums="1" hl_lines="3-8 15-20" // ... static void render_frame(struct pl_swapchain_frame frame) { pl_gpu gpu = opengl->gpu; pl_tex_clear(gpu, frame.fbo, (float[4]){ 1.0, 0.5, 0.0, 1.0 }); } int main() { // ... while (!glfwWindowShouldClose(window)) { struct pl_swapchain_frame frame; while (!pl_swapchain_start_frame(swchain, &frame)) glfwWaitEvents(); // (1) render_frame(frame); if (!pl_swapchain_submit_frame(swchain)) break; // (2) pl_swapchain_swap_buffers(swchain); glfwPollEvents(); } // ... } ``` 1. If `pl_swapchain_start_frame` fails, it typically means the window is hidden, minimized or blocked. This is not a fatal condition, and as such we simply want to process window events until we can resume rendering. 2. If `pl_swapchain_submit_frame` fails, it typically means the window has been lost, and further rendering commands are not expected to succeed. As such, in this case, we simply terminate the example program. Our main render loop has changed into a combination of `pl_swapchain_start_frame`, rendering, and `pl_swapchain_submit_frame`. To start with, we simply use the `pl_tex_clear` function to blit a constant orange color to the framebuffer. ### Interlude: Rendering commands The previous code snippet represented our first foray into the `pl_gpu` API. For more detail on this API, see the [GPU API](#TODO) section. But as a general rule of thumb, all `pl_gpu`-level operations are thread safe, asynchronous (except when returning something to the CPU), and internally refcounted (so you can destroy all objects as soon as you no longer need the reference). In the example loop, `pl_swapchain_swap_buffers` is the only operation that actually flushes commands to the GPU. You can force an early flush with `pl_gpu_flush()` or `pl_gpu_finish()`, but other than that, commands will "queue" internally and complete asynchronously at some unknown point in time, until forward progress is needed (e.g. `pl_tex_download`). ## Conclusion We have demonstrated how to create a window, how to initialize the libplacebo API, create a GPU instance based on OpenGL, and how to write a basic rendering loop that blits a single color to the framebuffer. Here is a complete transcript of the example we built in this section: ??? example "Basic rendering" ``` c linenums="1" #include #include #include #include const char * const title = "libplacebo demo"; int width = 800; int height = 600; GLFWwindow *window; pl_log pllog; pl_opengl opengl; pl_swapchain swchain; static bool make_current(void *priv); static void release_current(void *priv); static void resize_cb(GLFWwindow *win, int new_w, int new_h) { width = new_w; height = new_h; pl_swapchain_resize(swchain, &width, &height); } static void render_frame(struct pl_swapchain_frame frame) { pl_gpu gpu = opengl->gpu; pl_tex_clear(gpu, frame.fbo, (float[4]){ 1.0, 0.5, 0.0, 1.0 }); } int main() { pllog = pl_log_create(PL_API_VER, pl_log_params( .log_cb = pl_log_color, .log_level = PL_LOG_INFO, )); if (!glfwInit()) return 1; window = glfwCreateWindow(width, height, title, NULL, NULL); if (!window) return 1; opengl = pl_opengl_create(pllog, pl_opengl_params( .get_proc_addr = glfwGetProcAddress, .allow_software = true, // allow software rasterers .debug = true, // enable error reporting .make_current = make_current, .release_current = release_current, )); swchain = pl_opengl_create_swapchain(opengl, pl_opengl_swapchain_params( .swap_buffers = (void (*)(void *)) glfwSwapBuffers, .priv = window, )); if (!swchain) return 2; if (!pl_swapchain_resize(swchain, &width, &height)) return 2; glfwSetFramebufferSizeCallback(window, resize_cb); while (!glfwWindowShouldClose(window)) { struct pl_swapchain_frame frame; while (!pl_swapchain_start_frame(swchain, &frame)) glfwWaitEvents(); render_frame(frame); if (!pl_swapchain_submit_frame(swchain)) break; pl_swapchain_swap_buffers(swchain); glfwPollEvents(); } pl_swapchain_destroy(&swchain); pl_opengl_destroy(&opengl); glfwDestroyWindow(window); glfwTerminate(); pl_log_destroy(&pllog); return 0; } static bool make_current(void *priv) { glfwMakeContextCurrent(window); return true; } static void release_current(void *priv) { glfwMakeContextCurrent(NULL); } ``` libplacebo-v7.349.0/docs/custom-shaders.md000066400000000000000000000563031463457750100204160ustar00rootroot00000000000000# Custom Shaders (mpv .hook syntax) libplacebo supports the same [custom shader syntax used by mpv](https://mpv.io/manual/master/#options-glsl-shader), with some important changes. This document will serve as a complete reference for this syntax. ## Overview In general, user shaders are divided into distinct *blocks*. Each block can define a shader, a texture, a buffer, or a tunable parameter. Each block starts with a collection of header directives, which are lines starting with the syntax `//!`. As an example, here is a simple shader that simply inverts the video signal: ``` glsl linenums="1" //!HOOK LUMA //!HOOK RGB //!BIND HOOKED vec4 hook() { vec4 color = HOOKED_texOff(0); color.rgb = vec3(1.0) - color.rgb; return color; } ``` This shader defines one block - a shader block which hooks into the two texture stages `LUMA` and `RGB`, binds the hooked texture, inverts the value of the `rgb` channels, and then returns the modified color. ### Expressions In a few contexts, shader directives accept arithmetic expressions, denoted by `` in the listing below. For historical reasons, all expressions are given in [reverse polish notation (RPN)](https://en.wikipedia.org/wiki/Reverse_Polish_notation), and the only value type is a floating point number. The following value types and arithmetic operations are available: * `1.234`: Literal float constant, evaluates to itself. * `NAME.w`, `NAME.width`: Evaluates to the width of a texture with name `NAME`. * `NAME.h`, `NAME.height`: Evaluates to the height of a texture with name `NAME`. * `PAR`: Evaluates to the value of a tunable shader parameter with name `PAR`. * `+`: Evaluates to `X+Y`. * `-`: Evaluates to `X-Y`. * `*`: Evaluates to `X*Y`. * `/`: Evaluates to `X/Y`. * `%`: Evaluates to `fmod(X, Y)`. * `>`: Evaluates to `(X > Y) ? 1.0 : 0.0`. * `<`: Evaluates to `(X < Y) ? 1.0 : 0.0`. * `=`: Evaluates to `fuzzy_eq(X, Y) ? 1.0 : 0.0`, with some tolerance to allow for floating point inaccuracy. (Around 1 ppm) * `!`: Evaluates to `X ? 0.0 : 1.0`. Note that `+` and `*` can be used as suitable replacements for the otherwise absent boolean logic expressions (`||` and `&&`). ## Shaders Shaders are the default block type, and have no special syntax to indicate their presence. Shader stages contain raw GLSL code that will be (conditionally) executed. This GLSL snippet must define a single function `vec4 hook()`, or `void hook()` for compute shaders. During the execution of any shader, the following global variables are made available: * `int frame`: A raw counter tracking the number of executions of this shader stage. * `float random`: A pseudo-random float uniformly distributed in the range `[0,1)`. * `vec2 input_size`: The nominal size (in pixels) of the original input image. * `vec2 target_size`: The nominal size (in pixels) of the output rectangle. * `vec2 tex_offset`: The nominal offset (in pixels), of the original input crop. * `vec4 linearize(vec4 color)`: Linearize the input color according to the image's tagged gamma function. * `vec4 delinearize(vec4 color)`: Opposite counterpart to `linearize`. Shader stages accept the following directives: ### `HOOK ` A `HOOK` directive determines when a shader stage is run. During internal processing, libplacebo goes over a number of pre-defined *hook points* at set points in the processing pipeline. It is only possible to intercept the image, and run custom shaders, at these fixed hook points. Here is a current list of hook points: * `RGB`: Input plane containing RGB values * `LUMA`: Input plane containing a Y value * `CHROMA`: Input plane containing chroma values (one or both) * `ALPHA`: Input plane containing a single alpha value * `XYZ`: Input plane containing XYZ values * `CHROMA_SCALED`: Chroma plane, after merging and upscaling to luma size * `ALPHA_SCALED`: Alpha plane, after upscaling to luma size * `NATIVE`: Merged input planes, before any sort of color conversion (as-is) * `MAIN`: After conversion to RGB, before linearization/scaling * `LINEAR`: After conversion to linear light (for scaling purposes) * `SIGMOID`: After conversion to sigmoidized light (for scaling purposes) * `PREKERNEL`: Immediately before the execution of the main scaler kernel * `POSTKERNEL`: Immediately after the execution of the main scaler kernel * `SCALED`: After scaling, in either linear or non-linear light RGB * `PREOUTPUT`: After color conversion to target colorspace, before alpha blending * `OUTPUT`: After alpha blending, before dithering and final output pass !!! warning "`MAINPRESUB`" In mpv, `MAIN` and `MAINPRESUB` are separate shader stages, because the mpv option `--blend-subtitles=video` allows rendering overlays directly onto the pre-scaled video stage. libplacebo does not support this feature, and as such, the `MAINPRESUB` shader stage does not exist. It is still valid to refer to this name in shaders, but it is handled identically to `MAIN`. It's possible for a hook point to never fire. For example, `SIGMOID` will not fire when downscaling, as sigmoidization only happens when upscaling. Similarly, `LUMA`/`CHROMA` will not fire on an RGB video and vice versa. A single shader stage may hook multiple hook points simultaneously, for example, to cover both `LUMA` and `RGB` cases with the same logic. (See the example shader in the introduction) ### `BIND ` The `BIND` directive makes a texture available for use in the shader. This can be any of the previously named hook points, a custom texture define by a `TEXTURE` block, a custom texture saved by a `SAVE` directive, or the special value `HOOKED` which allows binding whatever texture hook dispatched this shader stage. A bound texture will define the following GLSL functions (as macros): * `sampler2D NAME_raw`: A reference to the raw texture sampler itself. * `vec2 NAME_pos`: The texel coordinates of the current pixel. * `vec2 NAME_map(ivec2 id)`: A function that maps from `gl_GlobalInvocationID` to texel coordinates. (Compute shaders) * `vec2 NAME_size`: The size (in pixels) of the texture. * `vec2 NAME_pt`: Convenience macro for `1.0 / NAME_size`. The size of a single pixel (in texel coordinates). * `vec2 NAME_off`: The sample offset of the texture. Basically, the pixel coordinates of the top-left corner of the sampled area. * `float NAME_mul`: The coefficient that must be multiplied into sampled values in order to rescale them to `[0,1]`. * `vec4 NAME_tex(vec2 pos)`: A wrapper around `NAME_mul * textureLod(NAME_raw, pos, 0.0)`. * `vec4 NAME_texOff(vec2 offset)`: A wrapper around `NAME_tex(NAME_pos + NAME_pt * offset)`. This can be used to easily access adjacent pixels, e.g. `NAME_texOff(-1,2)` samples a pixel one to the left and two to the bottom of the current location. * `vec4 NAME_gather(vec2 pos, int c)`: A wrapper around `NAME_mul * textureGather(pos, c)`, with appropriate scaling. (Only when supported[^ifdef]) !!! note "Rotation matrix" For compatibility with mpv, we also define a `mat2 NAME_rot` which is simply equal to a 2x2 identity matrix. libplacebo never rotates input planes - all rotation happens during the final output to the display. [^ifdef]: Because these are macros, their presence can be tested for using `#ifdef` inside the GLSL preprocessor. This same directive can also be used to bind buffer blocks (i.e. uniform/storage buffers), as defined by the [`BUFFER` directive](#buffer-name). ### `SAVE ` By default, after execution of a shader stage, the resulting output is captured back into the same hooked texture that triggered the shader. This behavior can be overridden using the explicit `SAVE` directive. For example, a shader might need access to a low-res version of the luma input texture in order to process chroma: ``` glsl linenums="1" //!HOOK CHROMA //!BIND CHROMA //!BIND LUMA //!SAVE LUMA_LOWRES //!WIDTH CHROMA.w //!HEIGHT CHROMA.h vec4 hook() { return LUMA_texOff(0); } ``` This shader binds both luma and chroma and resizes the luma plane down to the size of the chroma plane, saving the result as a new texture `LUMA_LOWRES`. In general, you can pick any name you want, here. ### `DESC ` This purely informative directive simply gives the shader stage a name. This is the name that will be reported to the shader stage and execution time metrics. ### `OFFSET ` This directive indicates a pixel shift (offset) introduced by this pass. These pixel offsets will be accumulated and corrected automatically as part of plane alignment / main scaling. A special value of `ALIGN` will attempt to counteract any existing offset of the hooked texture by aligning it with reference plane (i.e. luma). This can be used to e.g. introduce custom chroma scaling in a way that doesn't break chroma subtexel offsets. An example: ``` glsl linenums="1" //!HOOK LUMA //!BIND HOOKED //!OFFSET 100.5 100.5 vec4 hook() { // Constant offset by N pixels towards the bottom right return HOOKED_texOff(-vec2(100.5)); } ``` This (slightly silly) shader simply shifts the entire sampled region to the bottom right by 100.5 pixels, and propagates this shift to the main scaler using the `OFFSET` directive. As such, the end result of this is that there is no visible shift of the overall image, but some detail (~100 pixels) near the bottom-right border is lost due to falling outside the bounds of the texture. ### `WIDTH `, `HEIGHT ` These directives can be used to override the dimensions of the resulting texture. Note that not all textures can be resized this way. Currently, only `RGB`, `LUMA`, `CHROMA`, `XYZ`, `NATIVE` and `MAIN` are resizable. Trying to save a texture with an incompatible size to any other shader stage will result in an error. ### `WHEN ` This directive takes an expression that can be used to make shader stages conditionally executed. If this evaluates to 0, the shader stage will be skipped. Example: ``` glsl linenums="1" //!PARAM strength //!TYPE float //!MINIMUM 0 1.0 //!HOOK MAIN //!BIND HOOKED //!WHEN intensity 0 > //!DESC do something based on 'intensity' ... ``` This example defines a shader stage that only conditionally executes itself if the value of the `intensity` shader parameter is non-zero. ### `COMPONENTS ` This directive overrides the number of components present in a texture. For example, if you want to extract a one-dimensional feature map from the otherwise 3 or 4 dimensional `MAIN` texture, you can use this directive to save on memory bandwidth and consumption by having libplacebo only allocate a one-component texture to store the feature map in: ``` glsl linenums="1" //!HOOK MAIN //!BIND HOOKED //!SAVE featuremap //!COMPONENTS 1 ``` ### `COMPUTE [ ]` This directive specifies that the shader should be treated as a compute shader, with the block size `bw` and `bh`. The compute shader will be dispatched with however many blocks are necessary to completely tile over the output. Within each block, there will be `tw*th` threads, forming a single work group. In other words: `tw` and `th` specify the work group size, which can be different from the block size. So for example, a compute shader with `bw = bh = 32` and `tw = th = 8` running on a `500x500` texture would dispatch `16x16` blocks (rounded up), each with `8x8` threads. Instead of defining a `vec4 hook()`, compute shaders must define a `void hook()` which results directly to the output texture, a `writeonly image2D out_image` made available to the shader stage. For example, here is a shader executing a single-pass 41x41 convolution (average blur) on the luma plane, using a compute shader to share sampling work between adjacent threads in a work group: ``` glsl linenums="1" //!HOOK LUMA //!BIND HOOKED //!COMPUTE 32 32 //!DESC avg convolution // Kernel size, 41x41 as an example const ivec2 ksize = ivec2(41, 41); const ivec2 offset = ksize / 2; // We need to load extra source texels to account for padding due to kernel // overhang const ivec2 isize = ivec2(gl_WorkGroupSize) + ksize - 1; shared float inp[isize.y][isize.x]; void hook() { // load texels into shmem ivec2 base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize); for (uint y = gl_LocalInvocationID.y; y < isize.y; y += gl_WorkGroupSize.y) { for (uint x = gl_LocalInvocationID.x; x < isize.x; x += gl_WorkGroupSize.x) inp[y][x] = texelFetch(HOOKED_raw, base + ivec2(x,y) - offset, 0).x; } // synchronize threads barrier(); // do convolution float sum; for (uint y = 0; y < ksize.y; y++) { for (uint x = 0; x < ksize.x; x++) sum += inp[gl_LocalInvocationID.y+y][gl_LocalInvocationID.x+x]; } vec4 color = vec4(HOOKED_mul * sum / (ksize.x * ksize.y), 0, 0, 1); imageStore(out_image, ivec2(gl_GlobalInvocationID), color); } ``` ## Textures Custom textures can be defined and made available to shader stages using `TEXTURE` blocks. These can be used to provide e.g. LUTs or pre-trained weights. The data for a texture is provided as a raw hexadecimal string encoding the in-memory representation of a texture, according to its given texture format, for example: ``` glsl linenums="1" //!TEXTURE COLORS //!SIZE 3 3 //!FORMAT rgba32f //!FILTER NEAREST //!BORDER REPEAT 0000803f000000000000000000000000000000000000803f00000000000000000000000 0000000000000803f00000000000000000000803f0000803f000000000000803f000000 000000803f000000000000803f0000803f00000000000000009a99993e9a99993e9a999 93e000000009a99193F9A99193f9a99193f000000000000803f0000803f0000803f0000 0000 ``` Texture blocks accept the following directives: ### `TEXTURE ` This must be the first directive in a texture block, and marks it as such. The name given is the name that the texture will be referred to (via `BIND` directives). ### `SIZE [ []]` This directive gives the size of the texture, as integers. For example, `//!SIZE 512 512` marks a 512x512 texture block. Textures can be 1D, 2D or 3D depending on the number of coordinates specified. ### `FORMAT ` This directive specifies the texture format. A complete list of known textures is exposed as part of the `pl_gpu` struct metadata, but they follow the format convention `rgba8`, `rg16hf`, `rgba32f`, `r64i` and so on. ### `FILTER ` This directive specifies the texture magnification/minification filter. ### `BORDER ` This directive specifies the border clamping method of the texture. ### `STORAGE` If present, this directive marks the texture as a storage image. It will still be initialized with the initial values, but rather than being bound as a read-only and immutable `sampler2D`, it is bound as a `readwrite coherent image2D`. Such texture scan be used to, for example, store persistent state across invocations of the shader. ## Buffers Custom uniform / storage shader buffer blocks can be defined using `BUFFER` directives. The (initial) data for a buffer is provided as a raw hexadecimal string encoding the in-memory representation of a buffer in the corresponding GLSL packing layout (std140 or std430 for uniform and storage blocks, respectively): ``` glsl linenums="1" //!BUFFER buf_uniform //!VAR float foo //!VAR float bar 0000000000000000 //!BUFFER buf_storage //!VAR vec2 bat //!VAR int big[32]; //!STORAGE ``` Buffer blocks accept the following directives: ### `BUFFER ` This must be the first directive in a buffer block, and marks it as such. The name given is mostly cosmetic, as individual variables can be accessed directly using the names given in the corresponding `VAR` directives. ### `STORAGE` If present, this directive marks the buffer as a (readwrite coherent) shader storage block, instead of a readonly uniform buffer block. Such storage blocks can be used to track and evolve state across invocations of this shader. Storage blocks may also be initialized with default data, but this is optional. They can also be initialized as part of the first shader execution (e.g. by testing for `frame == 0`). ### `VAR ` This directive appends a new variable to the shader block, with GLSL type `` and shader name ``. For example, `VAR float foo` introduces a `float foo;` member into the buffer block, and `VAR mat4 transform` introduces a `mat4 transform;` member. It is also possible to introduce array variables, using `[N]` as part of the variable name. ## Tunable parameters Finally, the `PARAM` directive allows introducing tunable shader parameters, which are exposed programmatically as part of the C API (`pl_hook`).[^mpv] [^mpv]: In mpv using `--vo=gpu-next`, these can be set using the [`--glsl-shader-opts` option](https://mpv.io/manual/master/#options-glsl-shader-opts). The default value of a parameter is given as the block body, for example: ``` glsl linenums="1" //!PARAM contrast //!DESC Gain to apply to image brightness //!TYPE float //!MINIMUM 0.0 //!MAXIMUM 100.0 1.0 ``` Parameters accept the following directives: ### `PARAM ` This must be the first directive in a parameter block, and marks it as such. The name given is the name that will be used to refer to this parameter in GLSL code. ### `DESC ` This directive can be used to provide a friendlier description of the shader parameter, exposed as part of the C API to end users. ### `MINIMUM `, `MAXIMUM ` Provides the minimum/maximum value bound of this parameter. If absent, no minimum/maximum is enforced. ### `TYPE [ENUM] >` This gives the type of the parameter, which determines what type of values it can hold and how it will be made available to the shader. `` must be a scalar GLSL numeric type, such as `int`, `float` or `uint`. If a type is `ENUM`, it is treated as an enumeration type. To use this, `type` must either be `int` or `DEFINE`. Instead of providing a single default value, the param body should be a list of all possible enumeration values (as separate lines). These names will be made available inside the shader body (as a `#define`), as well as inside RPN expressions (e.g. `WHEN`). The qualifiers `MINIMUM` and `MAXIMUM` are ignored for `ENUM` parameters, with the value range instead being set implicitly from the list of options. The optional qualifiers `DYNAMIC` or `CONSTANT` mark the parameter as dynamically changing and compile-time constant, respectively. A `DYNAMIC` variable is assumed to change frequently, and will be grouped with other frequently-changing input parameters. A `CONSTANT` parameter will be introduced as a compile-time constant into the shader header, which means thy can be used in e.g. constant expressions such as array sizes.[^spec] [^spec]: On supported platforms, these are implemented using specialization constants, which can be updated at run-time without requiring a full shader recompilation. Finally, the special type `TYPE DEFINE` marks a variable as a preprocessor define, which can be used inside `#if` preprocessor expressions. For example: ``` glsl linenums="1" //!PARAM taps //!DESC Smoothing taps //!TYPE DEFINE //!MINIMUM 0 //!MAXIMUM 5 2 //!HOOK LUMA //!BIND HOOKED const uint row_size = 2 * taps + 1; const float weights[row_size] = { #if taps == 0 1.0, #endif #if taps == 1 0.10650697891920, 0.78698604216159, 0.10650697891920, #endif #if taps == 2 0.05448868454964, 0.24420134200323, 0.40261994689424, 0.24420134200323, 0.05448868454964, #endif // ... }; ``` An example of an enum parameter: ``` glsl linenums="1" //!PARAM csp //!DESC Colorspace //!TYPE ENUM int BT709 BT2020 DCIP3 //!HOOK MAIN //!BIND HOOKED const mat3 matrices[3] = { mat3(...), // BT709 mat3(...), // BT2020 mat3(...), // DCIP3 }; #define MAT matrices[csp] // ... ``` ## Full example A collection of full examples can be found in the [mpv user shaders wiki](https://github.com/mpv-player/mpv/wiki/User-Scripts#user-shaders), but here is an example of a parametrized Gaussian smoothed film grain compute shader: ``` glsl linenums="1" //!PARAM intensity //!DESC Film grain intensity //!TYPE float //!MINIMUM 0 0.1 //!PARAM taps //!DESC Film grain smoothing taps //!TYPE DEFINE //!MINIMUM 0 //!MAXIMUM 5 2 //!HOOK LUMA //!BIND HOOKED //!DESC Apply gaussian smoothed film grain //!WHEN intensity 0 > //!COMPUTE 32 32 const uint row_size = 2 * taps + 1; const float weights[row_size] = { #if taps == 0 1.0, #endif #if taps == 1 0.10650697891920, 0.78698604216159, 0.10650697891920, #endif #if taps == 2 0.05448868454964, 0.24420134200323, 0.40261994689424, 0.24420134200323, 0.05448868454964, #endif #if taps == 3 0.03663284536919, 0.11128075847888, 0.21674532140370, 0.27068214949642, 0.21674532140370, 0.11128075847888, 0.03663284536919, #endif #if taps == 4 0.02763055063889, 0.06628224528636, 0.12383153680577, 0.18017382291138, 0.20416368871516, 0.18017382291138, 0.12383153680577, 0.06628224528636, 0.02763055063889, #endif #if taps == 5 0.02219054849244, 0.04558899978527, 0.07981140824009, 0.11906462996609, 0.15136080967773, 0.16396720767670, 0.15136080967773, 0.11906462996609, 0.07981140824009, 0.04558899978527, 0.02219054849244, #endif }; const uvec2 isize = uvec2(gl_WorkGroupSize) + uvec2(2 * taps); shared float grain[isize.y][isize.x]; // PRNG float permute(float x) { x = (34.0 * x + 1.0) * x; return fract(x * 1.0/289.0) * 289.0; } float seed(uvec2 pos) { const float phi = 1.61803398874989; vec3 m = vec3(fract(phi * vec2(pos)), random) + vec3(1.0); return permute(permute(m.x) + m.y) + m.z; } float rand(inout float state) { state = permute(state); return fract(state * 1.0/41.0); } // Turns uniform white noise into gaussian white noise by passing it // through an approximation of the gaussian quantile function float rand_gaussian(inout float state) { const float a0 = 0.151015505647689; const float a1 = -0.5303572634357367; const float a2 = 1.365020122861334; const float b0 = 0.132089632343748; const float b1 = -0.7607324991323768; float p = 0.95 * rand(state) + 0.025; float q = p - 0.5; float r = q * q; float g = q * (a2 + (a1 * r + a0) / (r*r + b1*r + b0)); g *= 0.255121822830526; // normalize to [-1,1) return g; } void hook() { // generate grain in `grain` uint num_threads = gl_WorkGroupSize.x * gl_WorkGroupSize.y; for (uint i = gl_LocalInvocationIndex; i < isize.y * isize.x; i += num_threads) { uvec2 pos = uvec2(i % isize.y, i / isize.y); float state = seed(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + pos); grain[pos.y][pos.x] = rand_gaussian(state); } // make writes visible barrier(); // convolve horizontally for (uint y = gl_LocalInvocationID.y; y < isize.y; y += gl_WorkGroupSize.y) { float hsum = 0; for (uint x = 0; x < row_size; x++) { float g = grain[y][gl_LocalInvocationID.x + x]; hsum += weights[x] * g; } // update grain LUT grain[y][gl_LocalInvocationID.x + taps] = hsum; } barrier(); // convolve vertically float vsum = 0.0; for (uint y = 0; y < row_size; y++) { float g = grain[gl_LocalInvocationID.y + y][gl_LocalInvocationID.x + taps]; vsum += weights[y] * g; } vec4 color = HOOKED_tex(HOOKED_pos); color.rgb += vec3(intensity * vsum); imageStore(out_image, ivec2(gl_GlobalInvocationID), color); } ``` libplacebo-v7.349.0/docs/glsl.md000066400000000000000000000442771463457750100164250ustar00rootroot00000000000000# GLSL shader system ## Overall design Shaders in libplacebo are all written in GLSL, and built up incrementally, on demand. Generally, all shaders for each frame are generated *per frame*. So functions like `pl_shader_color_map` etc. are run anew for every frame. This makes the renderer very stateless and allows us to directly embed relevant constants, uniforms etc. as part of the same code that generates the actual GLSL shader. To avoid this from becoming wasteful, libplacebo uses an internal string building abstraction ([`pl_str_builder`](https://code.videolan.org/videolan/libplacebo/-/blob/master/src/pl_string.h#L263)). Rather than building up a string directly, a `pl_str_builder` is like a list of string building functions/callbacks to execute in order to generate the actual shader. Combined with an efficient `pl_str_builder_hash`, this allows us to avoid the bulk of the string templating work for already-cached shaders. ## Legacy API For the vast majority of libplacebo's history, the main entry-point into the shader building mechanism was the `GLSL()` macro ([and variants](#shader-sections-glsl-glslh-glslf)), which works like a `printf`-append: ```c linenums="1" void pl_shader_extract_features(pl_shader sh, struct pl_color_space csp) { if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0)) return; sh_describe(sh, "feature extraction"); pl_shader_linearize(sh, &csp); GLSL("// pl_shader_extract_features \n" "{ \n" "vec3 lms = %f * "$" * color.rgb; \n" "lms = pow(max(lms, 0.0), vec3(%f)); \n" "lms = (vec3(%f) + %f * lms) \n" " / (vec3(1.0) + %f * lms); \n" "lms = pow(lms, vec3(%f)); \n" "float I = dot(vec3(%f, %f, %f), lms); \n" "color = vec4(I, 0.0, 0.0, 1.0); \n" "} \n", PL_COLOR_SDR_WHITE / 10000, SH_MAT3(pl_ipt_rgb2lms(pl_raw_primaries_get(csp.primaries))), PQ_M1, PQ_C1, PQ_C2, PQ_C3, PQ_M2, pl_ipt_lms2ipt.m[0][0], pl_ipt_lms2ipt.m[0][1], pl_ipt_lms2ipt.m[0][2]); } ``` The special macro `$` is a stand-in for an *identifier* (`ident_t`), which is the internal type used to pass references to loaded uniforms, descriptors and so on: ```c typedef unsigned short ident_t; #define $ "_%hx" #define NULL_IDENT 0u // ... ident_t sh_var_mat3(pl_shader sh, const char *name, pl_matrix3x3 val); #define SH_MAT3(val) sh_var_mat3(sh, "mat", val) ``` In general, constants in libplacebo are divided into three categories: ### Literal shader constants These are values that are expected to change very infrequently (or never), or for which we want to generate a different shader variant per value. Such values should be directly formatted as numbers into the shader text: `%d`, `%f` and so on. This is commonly used for array sizes, constants that depend only on hardware limits, constants that never change (but which have a friendly name, like `PQ_C2` above), and so on. As an example, the debanding iterations weights are hard-coded like this, because the debanding shader is expected to change as a result of a different number of iterations anyway: ```c linenums="1" // For each iteration, compute the average at a given distance and // pick it instead of the color if the difference is below the threshold. for (int i = 1; i <= params->iterations; i++) { GLSL(// Compute a random angle and distance "d = "$".xy * vec2(%d.0 * "$", %f); \n" // (1) "d = d.x * vec2(cos(d.y), sin(d.y)); \n" // Sample at quarter-turn intervals around the source pixel "avg = T(0.0); \n" "avg += GET(+d.x, +d.y); \n" "avg += GET(-d.x, +d.y); \n" "avg += GET(-d.x, -d.y); \n" "avg += GET(+d.x, -d.y); \n" "avg *= 0.25; \n" // Compare the (normalized) average against the pixel "diff = abs(res - avg); \n" "bound = T("$" / %d.0); \n", prng, i, radius, M_PI * 2, threshold, i); if (num_comps > 1) { GLSL("res = mix(avg, res, greaterThan(diff, bound)); \n"); } else { GLSL("res = mix(avg, res, diff > bound); \n"); } } ``` 1. The `%d.0` here corresponds to the iteration index `i`, while the `%f` corresponds to the fixed constant `M_PI * 2`. ### Specializable shader constants These are used for tunable parameters that are expected to change infrequently during normal playback. These constitute by far the biggest category, and most parameters coming from the various `_params` structs should be loaded like this. They are loaded using the `sh_const_*()` functions, which generate a specialization constant on supported platforms, falling back to a literal shader `#define` otherwise. For anoymous parameters, you can use the short-hands `SH_FLOAT`, `SH_INT` etc.: ```c ident_t sh_const_int(pl_shader sh, const char *name, int val); ident_t sh_const_uint(pl_shader sh, const char *name, unsigned int val); ident_t sh_const_float(pl_shader sh, const char *name, float val); #define SH_INT(val) sh_const_int(sh, "const", val) #define SH_UINT(val) sh_const_uint(sh, "const", val) #define SH_FLOAT(val) sh_const_float(sh, "const", val) ``` Here is an example of them in action: ```c linenums="1" void pl_shader_sigmoidize(pl_shader sh, const struct pl_sigmoid_params *params) { if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0)) return; params = PL_DEF(params, &pl_sigmoid_default_params); float center = PL_DEF(params->center, 0.75); float slope = PL_DEF(params->slope, 6.5); // This function needs to go through (0,0) and (1,1), so we compute the // values at 1 and 0, and then scale/shift them, respectively. float offset = 1.0 / (1 + expf(slope * center)); float scale = 1.0 / (1 + expf(slope * (center - 1))) - offset; GLSL("// pl_shader_sigmoidize \n" "color = clamp(color, 0.0, 1.0); \n" "color = vec4("$") - vec4("$") * \n" " log(vec4(1.0) / (color * vec4("$") + vec4("$")) \n" " - vec4(1.0)); \n", SH_FLOAT(center), SH_FLOAT(1.0 / slope), SH_FLOAT(scale), SH_FLOAT(offset)); } ``` The advantage of this type of shader constant is that they will be transparently replaced by dynamic uniforms whenever `pl_render_params.dynamic_constants` is true, which allows the renderer to respond more instantly to changes in the parameters (e.g. as a result of a user dragging a slider around). During "normal" playback, they will then be "promoted" to actual shader constants to prevent them from taking up registers. ### Dynamic variables For anything else, e.g. variables which are expected to change very frequently, you can use the generic `sh_var()` mechanism, which sends constants either as elements of a uniform buffer, or directly as push constants: ```c ident_t sh_var_int(pl_shader sh, const char *name, int val, bool dynamic); ident_t sh_var_uint(pl_shader sh, const char *name, unsigned int val, bool dynamic); ident_t sh_var_float(pl_shader sh, const char *name, float val, bool dynamic); #define SH_INT_DYN(val) sh_var_int(sh, "const", val, true) #define SH_UINT_DYN(val) sh_var_uint(sh, "const", val, true) #define SH_FLOAT_DYN(val) sh_var_float(sh, "const", val, true) ``` These are used primarily when a variable is expected to change very frequently, e.g. as a result of randomness, or for constants which depend on dynamically computed, source-dependent variables (e.g. input frame characteristics): ```c linenums="1" if (params->show_clipping) { const float eps = 1e-6f; GLSL("bool clip_hi, clip_lo; \n" "clip_hi = any(greaterThan(color.rgb, vec3("$"))); \n" "clip_lo = any(lessThan(color.rgb, vec3("$"))); \n" "clip_hi = clip_hi || ipt.x > "$"; \n" "clip_lo = clip_lo || ipt.x < "$"; \n", SH_FLOAT_DYN(pl_hdr_rescale(PL_HDR_PQ, PL_HDR_NORM, tone.input_max) + eps), SH_FLOAT(pl_hdr_rescale(PL_HDR_PQ, PL_HDR_NORM, tone.input_min) - eps), SH_FLOAT_DYN(tone.input_max + eps), SH_FLOAT(tone.input_min - eps)); } ``` ### Shader sections (GLSL, GLSLH, GLSLF) Shader macros come in three main flavors, depending on where the resulting text should be formatted: - `GLSL`: Expanded in the scope of the current `main` function, and is related to code directly processing the current pixel value. - `GLSLH`: Printed to the 'header', before the first function, but after variables, uniforms etc. This is used for global definitions, helper functions, shared memory variables, and so on. - `GLSLF`: Printed to the `footer`, which is always at the end of the current `main` function, but before returning to the caller / writing to the framebuffer. Used to e.g. update SSBO state in preparation for the next frame. Finally, there is a fourth category `GLSLP` (prelude), which is currently only used internally to generate preambles during e.g. compute shader translation. ## New #pragma GLSL macro Starting with libplacebo v6, the internal shader system has been augmented by a custom macro preprocessor, which is designed to ease the boilerplate of writing shaders (and also strip redundant whitespace from generated shaders). The code for this is found in the [tools/glsl_preproc](https://code.videolan.org/videolan/libplacebo/-/tree/master/tools/glsl_preproc) directory. In a nutshell, this allows us to embed GLSL snippets directly as `#pragma GLSL` macros (resp. `#pragma GLSLH`, `#pragma GLSLF`): ```c linenums="1" bool pl_shader_sample_bicubic(pl_shader sh, const struct pl_sample_src *src) { ident_t tex, pos, pt; float rx, ry, scale; if (!setup_src(sh, src, &tex, &pos, &pt, &rx, &ry, NULL, &scale, true, LINEAR)) return false; if (rx < 1 || ry < 1) { PL_TRACE(sh, "Using fast bicubic sampling when downscaling. This " "will most likely result in nasty aliasing!"); } // Explanation of how bicubic scaling with only 4 texel fetches is done: // http://www.mate.tue.nl/mate/pdfs/10318.pdf // 'Efficient GPU-Based Texture Interpolation using Uniform B-Splines' sh_describe(sh, "bicubic"); #pragma GLSL /* pl_shader_sample_bicubic */ \ vec4 color; \ { \ vec2 pos = $pos; \ vec2 size = vec2(textureSize($tex, 0)); \ vec2 frac = fract(pos * size + vec2(0.5)); \ vec2 frac2 = frac * frac; \ vec2 inv = vec2(1.0) - frac; \ vec2 inv2 = inv * inv; \ /* compute basis spline */ \ vec2 w0 = 1.0/6.0 * inv2 * inv; \ vec2 w1 = 2.0/3.0 - 0.5 * frac2 * (2.0 - frac); \ vec2 w2 = 2.0/3.0 - 0.5 * inv2 * (2.0 - inv); \ vec2 w3 = 1.0/6.0 * frac2 * frac; \ vec4 g = vec4(w0 + w1, w2 + w3); \ vec4 h = vec4(w1, w3) / g + inv.xyxy; \ h.xy -= vec2(2.0); \ /* sample four corners, then interpolate */ \ vec4 p = pos.xyxy + $pt.xyxy * h; \ vec4 c00 = textureLod($tex, p.xy, 0.0); \ vec4 c01 = textureLod($tex, p.xw, 0.0); \ vec4 c0 = mix(c01, c00, g.y); \ vec4 c10 = textureLod($tex, p.zy, 0.0); \ vec4 c11 = textureLod($tex, p.zw, 0.0); \ vec4 c1 = mix(c11, c10, g.y); \ color = ${float:scale} * mix(c1, c0, g.x); \ } return true; } ``` This gets transformed, by the GLSL macro preprocessor, into an optimized shader template invocation like the following: ```c linenums="1" { // ... sh_describe(sh, "bicubic"); const struct __attribute__((__packed__)) { ident_t pos; ident_t tex; ident_t pt; ident_t scale; } _glsl_330_args = { .pos = pos, .tex = tex, .pt = pt, .scale = sh_const_float(sh, "scale", scale), }; size_t _glsl_330_fn(void *, pl_str *, const uint8_t *); pl_str_builder_append(sh->buffers[SH_BUF_BODY], _glsl_330_fn, &_glsl_330_args, sizeof(_glsl_330_args)); // ... } size_t _glsl_330_fn(void *alloc, pl_str *buf, const uint8_t *ptr) { struct __attribute__((__packed__)) { ident_t pos; ident_t tex; ident_t pt; ident_t scale; } vars; memcpy(&vars, ptr, sizeof(vars)); pl_str_append_asprintf_c(alloc, buf, "/* pl_shader_sample_bicubic */\n" " vec4 color;\n" " {\n" " vec2 pos = /*pos*/_%hx;\n" " vec2 size = vec2(textureSize(/*tex*/_%hx, 0));\n" " vec2 frac = fract(pos * size + vec2(0.5));\n" " vec2 frac2 = frac * frac;\n" " vec2 inv = vec2(1.0) - frac;\n" " vec2 inv2 = inv * inv;\n" " /* compute basis spline */\n" " vec2 w0 = 1.0/6.0 * inv2 * inv;\n" " vec2 w1 = 2.0/3.0 - 0.5 * frac2 * (2.0 - frac);\n" " vec2 w2 = 2.0/3.0 - 0.5 * inv2 * (2.0 - inv);\n" " vec2 w3 = 1.0/6.0 * frac2 * frac;\n" " vec4 g = vec4(w0 + w1, w2 + w3);\n" " vec4 h = vec4(w1, w3) / g + inv.xyxy;\n" " h.xy -= vec2(2.0);\n" " /* sample four corners, then interpolate */\n" " vec4 p = pos.xyxy + /*pt*/_%hx.xyxy * h;\n" " vec4 c00 = textureLod(/*tex*/_%hx, p.xy, 0.0);\n" " vec4 c01 = textureLod(/*tex*/_%hx, p.xw, 0.0);\n" " vec4 c0 = mix(c01, c00, g.y);\n" " vec4 c10 = textureLod(/*tex*/_%hx, p.zy, 0.0);\n" " vec4 c11 = textureLod(/*tex*/_%hx, p.zw, 0.0);\n" " vec4 c1 = mix(c11, c10, g.y);\n" " color = /*scale*/_%hx * mix(c1, c0, g.x);\n" " }\n", vars.pos, vars.tex, vars.pt, vars.tex, vars.tex, vars.tex, vars.tex, vars.scale ); return sizeof(vars); } ``` To support this style of shader programming, special syntax was invented: ### Shader variables Instead of being formatted with `"$"`, `%f` etc. and supplied in a big list, printf style, GLSL macros may directly embed shader variables: ```c ident_t pos, tex = sh_bind(sh, texture, ..., &pos, ...); #pragma GLSL vec4 color = texture($tex, $pos); ``` The simplest possible shader variable is just `$name`, which corresponds to any variable of type `ident_t`. More complicated expression are also possible: ```glsl #define RAND3 ${sh_prng(sh, false, NULL)} color.rgb += ${float:params->noise} * RAND3; ``` In the expression `${float:params->noise}`, the `float:` prefix here transforms the shader variable into the equivalent of `SH_FLOAT()` in the legacy API, that is, a generic float (specialization) constant. Other possible types are: ```glsl TYPE i = ${ident: sh_desc(...)}; float f = ${float: M_PI}; int i = ${int: params->width}; uint u = ${uint: sizeof(ssbo)}; ``` In addition to a type specifier, the optional qualifiers `dynamic` and `const` will modify the variable, turning it into (respectively) a dynamically loaded uniform (`SH_FLOAT_DYN` etc.), or a hard-coded shader literal (`%d`, `%f` etc.): ```glsl const float base = ${const float: M_LOG10E}; int seed = ${dynamic int: rand()}; ``` For sampling from component masks, the special types `swizzle` and `(u|i)vecType` can be used to generate the appropriate texture swizzle and corresponding vector type: ```glsl ${vecType: comp_mask} tmp = color.${swizzle: comp_mask}; ``` ### Macro directives Lines beginning with `@` are not included in the GLSL as-is, but instead parsed as macro directives, to control the code flow inside the macro expansion: #### @if / @else Standard-purpose conditional. Example: ```glsl float alpha = ...; @if (repr.alpha == PL_ALPHA_INDEPENDENT) color.a *= alpha; @else color.rgba *= alpha; ``` The condition is evaluated outside the macro (in the enclosing scope) and the resulting boolean variable is directly passed to the template. An `@if` block can also enclose multiple lines: ```glsl @if (threshold > 0) { float thresh = ${float:threshold}; coeff = mix(coeff, vec2(0.0), lessThan(coeff, vec2(thresh))); coeff = mix(coeff, vec2(1.0), greaterThan(coeff, vec2(1.0 - thresh))); @} ``` #### @for This can be used to generate (unrolled) loops: ```glsl int offset = ${const int: params->kernel_width / 2}; float sum = 0.0; @for (x < params->kernel_width) sum += textureLodOffset($luma, $pos, 0.0, int(@sum - offset)).r; ``` This introduces a local variable, `@x`, which expands to an integer containing the current loop index. Loop indices always start at 0. Valid terminating conditions include `<` and `<=`, and the loop stop condition is also evaluated as an integer. Alternatively, this can be used to iterate over a bitmask (as commonly used for e.g. components in a color mask): ```glsl float weight = /* ... */; vec4 color = textureLod($tex, $pos, 0.0); @for (c : params->component_mask) sum[@c] += weight * color[@c]; ``` Finally, to combine loops with conditionals, the special syntax `@if @(cond)` may be used to evaluate expressions inside the template loop: ```glsl @for (i < 10) { float weight = /* ... */; @if @(i < 5) weight = -weight; sum += weight * texture(...); @} ``` In this case, the `@if` conditional may only reference local (loop) variables. #### @switch / @case This corresponds fairly straightforwardly to a normal switch/case from C: ```glsl @switch (color->transfer) { @case PL_COLOR_TRC_SRGB: color.rgb = mix(color.rgb * 1.0/12.92, pow((color.rgb + vec3(0.055)) / 1.055, vec3(2.4)), lessThan(vec3(0.04045), color.rgb)); @break; @case PL_COLOR_TRC_GAMMA18: color.rgb = pow(color.rgb, vec3(1.8)); @break; @case PL_COLOR_TRC_GAMMA20: color.rgb = pow(color.rgb, vec3(2.0)); @break; @case PL_COLOR_TRC_GAMMA22: color.rgb = pow(color.rgb, vec3(2.2)); @break; /* ... */ @} ``` The switch body is always evaluated as an `unsigned int`. libplacebo-v7.349.0/docs/index.md000066400000000000000000000027601463457750100165620ustar00rootroot00000000000000# Introduction ## Overview This document will serve as an introduction to and usage example for the [libplacebo](https://code.videolan.org/videolan/libplacebo) API. This is not intended as a full API reference, for that you should see the repository of [header files](https://code.videolan.org/videolan/libplacebo/-/tree/master/src/include/libplacebo), which are written to be (hopefully) understandable as-is. libplacebo exposes large parts of its internal abstractions publicly. This guide will take the general approach of starting as high level as possible and diving into the details in later chapters. A full listing of currently available APIs and their corresponding header files can be seen [here](https://code.videolan.org/videolan/libplacebo#api-overview). ## Getting Started To get started using libplacebo, you need to install it (and its development headers) somehow onto your system. On most distributions, this should be as simple as installing the corresponding `libplacebo-devel` package, or the appropriate variants. You can see a fill list of libplacebo packages and their names [on repology](https://repology.org/project/libplacebo/versions). !!! note "API versions" This document is targeting the "v4 API" overhaul, and as such, examples provided will generally fail to compile on libplacebo versions below v4.x. Alternatively, you can install it from the source code. For that, see the build instructions [located here](https://code.videolan.org/videolan/libplacebo#installing). libplacebo-v7.349.0/docs/options.md000066400000000000000000001135511463457750100171470ustar00rootroot00000000000000# Options The following provides an overview of all options available via the built-in `pl_options` system. ## Global preset ### `preset=` Override all options from all sections by the values from the given preset. The following presets are available: - `default`: Default settings, tuned to provide a balance of performance and quality. Should be fine on almost all systems. - `fast`: Disable all advanced rendering, equivalent to passing `no` to every option. Increases performance on very slow / old integrated GPUs. - `high_quality`: Reset all structs to their `high_quality` presets (where available), set the upscaler to `ewa_lanczossharp`, and enable `deband=yes`. Suitable for use on machines with a discrete GPU. ## Scaling ### `upscaler=` Sets the filter used for upscaling. Defaults to `lanczos`. Pass `upscaler=help` to see a full list of filters. The most relevant options, roughly ordered from fastest to slowest: - `none`: No filter, only use basic GPU texture sampling - `nearest`: Nearest-neighbour (box) sampling (very fast) - `bilinear`: Bilinear sampling (very fast) - `oversample`: Aspect-ratio preserving nearest neighbour sampling (very fast) - `bicubic`: Bicubic interpolation (fast) - `gaussian`: Gaussian smoothing (fast) - `catmull_rom`: Catmull-Rom cubic spline - `lanczos`: Lanczos reconstruction - `ewa_lanczos`: EWA Lanczos ("Jinc") reconstruction (slow) - `ewa_lanczossharp`: Sharpened version of `ewa_lanczos` (slow) - `ewa_lanczos4sharpest`: Very sharp version of `ewa_lanczos`, with anti-ringing (very slow) ### `downscaler=` Sets the filter used for downscaling. Defaults to `hermite`. Pass `downscaler=help` to see a full list of filters. The most relevant options, roughly ordered from fastest to slowest: - `none`: Use the same filter as specified for `upscaler` - `box`: Box averaging (very fast) - `hermite`: Hermite-weighted averaging (fast) - `bilinear`: Bilinear (triangle) averaging (fast) - `bicubic`: Bicubic interpolation (fast) - `gaussian`: Gaussian smoothing (fast) - `catmull_rom`: Catmull-Rom cubic spline - `mitchell`: Mitchell-Netravalia cubic spline - `lanczos`: Lanczos reconstruction ### `plane_upscaler=`, `plane_downscaler=` Override the filter used for upscaling/downscaling planes, e.g. chroma/alpha. If set to `none`, use the same setting as `upscaler` and `downscaler`, respectively. Defaults to `none` for both. ### `frame_mixer=` Sets the filter used for frame mixing (temporal interpolation). Defaults to `oversample`. Pass `frame_mixer=help` to see a full list of filters. The most relevant options, roughly ordered from fastest to slowest: - `none`: Disable frame mixing, show nearest frame to target PTS - `oversample`: Oversampling, only mix "edge" frames while preserving FPS - `hermite`: Hermite-weighted frame mixing - `linear`: Linear frame mixing - `cubic`: Cubic B-spline frame mixing ### `antiringing_strength=<0.0..1.0>` Antiringing strength to use for all filters. A value of `0.0` disables antiringing, and a value of `1.0` enables full-strength antiringing. Defaults to `0.0`. !!! note Specific filter presets may override this option. ### Custom scalers Custom filter kernels can be created by setting the filter to `custom`, in addition to setting the respective options, replacing `` by the corresponding scaler (`upscaler`, `downscaler`, etc.) #### `_preset=` Overrides the value of all options in this section by their default values from the given filter preset. #### `_kernel=`, `_window=` Choose the filter kernel and window function, rspectively. Pass `help` to get a full list of filter kernels. Defaults to `none`. #### `_radius=<0.0..16.0>` Override the filter kernel radius. Has no effect if the filter kernel is not resizeable. Defaults to `0.0`, meaning "no override". #### `_clamp=<0.0..1.0>` Represents an extra weighting/clamping coefficient for negative weights. A value of `0.0` represents no clamping. A value of `1.0` represents full clamping, i.e. all negative lobes will be removed. Defaults to `0.0`. #### `_blur=<0.0..100.0>` Additional blur coefficient. This effectively stretches the kernel, without changing the effective radius of the filter radius. Setting this to a value of `0.0` is equivalent to disabling it. Values significantly below `1.0` may seriously degrade the visual output, and should be used with care. Defaults to `0.0`. #### `_taper=<0.0..1.0>` Additional taper coefficient. This essentially flattens the function's center. The values within `[-taper, taper]` will return `1.0`, with the actual function being squished into the remainder of `[taper, radius]`. Defaults to `0.0`. #### `_antiring=<0.0..1.0>` Antiringing override for this filter. Defaults to `0.0`, which infers the value from `antiringing_strength`. #### `_param1`, `_param2` `_wparam1`, `_wparam2` Parameters for the respective filter function. Ignored if not tunable. Defaults to `0.0`. #### `_polar=` If true, this filter is a polar/2D filter (EWA), instead of a separable/1D (orthogonal) filter. Defaults to `no`. ## Debanding These options control the optional debanding step. Debanding can be used to reduce the prevalence of quantization artefacts in low quality sources, but can be heavy to compute on weaker devices. !!! note This can also be used as a pure grain generator, by setting `deband_iterations=0`. ### `deband=` Enables debanding. Defaults to `no`. ### `deband_preset=` Overrides the value of all options in this section by their default values from the given preset. ### `deband_iterations=<0..16>` The number of debanding steps to perform per sample. Each step reduces a bit more banding, but takes time to compute. Note that the strength of each step falls off very quickly, so high numbers (>4) are practically useless. Defaults to `1`. ### `deband_threshold=<0.0..1000.0>` The debanding filter's cut-off threshold. Higher numbers increase the debanding strength dramatically, but progressively diminish image details. Defaults to `3.0`. ### `deband_radius=<0.0..1000.0>` The debanding filter's initial radius. The radius increases linearly for each iteration. A higher radius will find more gradients, but a lower radius will smooth more aggressively. Defaults to `16.0`. ### `deband_grain=<0.0..1000.0>` Add some extra noise to the image. This significantly helps cover up remaining quantization artifacts. Higher numbers add more noise. Defaults to `4.0`, which is very mild. ### `deband_grain_neutral_r, deband_grain_neutral_g, deband_grain_neutral_b` 'Neutral' grain value for each channel being debanded. Grain application will be modulated to avoid disturbing colors close to this value. Set this to a value corresponding to black in the relevant colorspace. !!! note This is done automatically by `pl_renderer` and should not need to be touched by the user. This is purely a debug option. ## Sigmoidization These options control the sigmoidization parameters. Sigmoidization is an optional step during upscaling which reduces the prominence of ringing artifacts. ### `sigmoid=` Enables sigmoidization. Defaults to `yes`. ### `sigmoid_preset=` Overrides the value of all options in this section by their default values from the given preset. ### `sigmoid_center=<0.0..1.0>` The center (bias) of the sigmoid curve. Defaults to `0.75`. ### `sigmoid_slope=<1.0..20.0>` The slope (steepness) of the sigmoid curve. Defaults to `6.5`. ## Color adjustment These options affect the decoding of the source color values, and can be used to subjectively alter the appearance of the video. ### `color_adjustment=` Enables color adjustment. Defaults to `yes`. ### `color_adjustment_preset=` Overrides the value of all options in this section by their default values from the given preset. ### `brightness=<-1.0..1.0>` Brightness boost. Adds a constant bias onto the source luminance signal. `0.0` = neutral, `1.0` = solid white, `-1.0` = solid black. Defaults to `0.0`. ### `contrast=<0.0..100.0>` Contrast gain. Multiplies the source luminance signal by a constant factor. `1.0` = neutral, `0.0` = solid black. Defaults to `1.0`. ### `saturation=<0.0..100.0>` Saturation gain. Multiplies the source chromaticity signal by a constant factor. `1.0` = neutral, `0.0` = grayscale. Defaults to `1.0`. ### `hue=` Hue shift. Corresponds to a rotation of the UV subvector around the neutral axis. Specified in radians. Defaults to `0.0` (neutral). ### `gamma=<0.0..100.0>` Gamma lift. Subjectively brightnes or darkens the scene while preserving overall contrast. `1.0` = neutral, `0.0` = solid black. Defaults to `1.0`. ### `temperature=<-1.143..5.286>` Color temperature shift. Relative to 6500 K, a value of `0.0` gives you 6500 K (no change), a value of `-1.0` gives you 3000 K, and a value of `1.0` gives you 10000 K. Defaults to `0.0`. ## HDR peak detection These options affect the HDR peak detection step. This can be used to greatly improve the HDR tone-mapping process in the absence of dynamic video metadata, but may be prohibitively slow on some devices (e.g. weaker integrated GPUs). ### `peak_detect=` Enables HDR peak detection. Defaults to `yes`. ### `peak_detection_preset=` Overrides the value of all options in this section by their default values from the given preset. `high_quality` also enables frame histogram measurement. ### `peak_smoothing_period=<0.0..1000.0>` Smoothing coefficient for the detected values. This controls the time parameter (tau) of an IIR low pass filter. In other words, it represent the cutoff period (= 1 / cutoff frequency) in frames. Frequencies below this length will be suppressed. This helps block out annoying "sparkling" or "flickering" due to small variations in frame-to-frame brightness. If left as `0.0`, this smoothing is completely disabled. Defaults to `20.0`. ### `scene_threshold_low=<0.0..100.0>`, `scene_threshold_high=<0.0..100.0>` In order to avoid reacting sluggishly on scene changes as a result of the low-pass filter, we disable it when the difference between the current frame brightness and the average frame brightness exceeds a given threshold difference. But rather than a single hard cutoff, which would lead to weird discontinuities on fades, we gradually disable it over a small window of brightness ranges. These parameters control the lower and upper bounds of this window, in units of 1% PQ. Setting either one of these to 0.0 disables this logic. Defaults to `1.0` and `3.0`, respectively. ### `peak_percentile=<0.0..100.0>` Which percentile of the input image brightness histogram to consider as the true peak of the scene. If this is set to `100` (or `0`), the brightest pixel is measured. Otherwise, the top of the frequency distribution is progressively cut off. Setting this too low will cause clipping of very bright details, but can improve the dynamic brightness range of scenes with very bright isolated highlights. Defaults to `100.0`. The `high_quality` preset instead sets this to `99.995`, which is very conservative and should cause no major issues in typical content. ### `black_cutoff=<0.0..100.0>` Black cutoff strength. To prevent unnatural pixel shimmer and excessive darkness in mostly black scenes, as well as avoid black bars from affecting the content, (smoothly) cut off any value below this (PQ%) threshold. Defaults to `1.0`, or 1% PQ. Setting this to `0.0` (or a negative value) disables this functionality. ### `allow_delayed_peak=` Allows the peak detection result to be delayed by up to a single frame, which can sometimes improve thoughput, at the cost of introducing the possibility of 1-frame flickers on transitions. Defaults to `no`. ## Color mapping These options affect the way colors are transformed between color spaces, including tone- and gamut-mapping where needed. ### `color_map=` Enables the use of these color mapping settings. Defaults to `yes`. !!! note Disabling this option does *not* disable color mapping, it just means "use the default options for everything". ### `color_map_preset=` Overrides the value of all options in this section by their default values from the given preset. `high_quality` also enables HDR contrast recovery. ### `gamut_mapping=` Gamut mapping function to use to handle out-of-gamut colors, including colors which are out-of-gamut as a consequence of tone mapping. Defaults to `perceptual`. The following options are available: - `clip`: Performs no gamut-mapping, just hard clips out-of-range colors per-channel. - `perceptual`: Performs a perceptually balanced (saturation) gamut mapping, using a soft knee function to preserve in-gamut colors, followed by a final softclip operation. This works bidirectionally, meaning it can both compress and expand the gamut. Behaves similar to a blend of `saturation` and `softclip`. - `softclip`: Performs a perceptually balanced gamut mapping using a soft knee function to roll-off clipped regions, and a hue shifting function to preserve saturation. - `relative`: Performs relative colorimetric clipping, while maintaining an exponential relationship between brightness and chromaticity. - `saturation`: Performs simple RGB->RGB saturation mapping. The input R/G/B channels are mapped directly onto the output R/G/B channels. Will never clip, but will distort all hues and/or result in a faded look. - `absolute`: Performs absolute colorimetric clipping. Like `relative`, but does not adapt the white point. - `desaturate`: Performs constant-luminance colorimetric clipping, desaturing colors towards white until they're in-range. - `darken`: Uniformly darkens the input slightly to prevent clipping on blown-out highlights, then clamps colorimetrically to the input gamut boundary, biased slightly to preserve chromaticity over luminance. - `highlight`: Performs no gamut mapping, but simply highlights out-of-gamut pixels. - `linear`: Linearly/uniformly desaturates the image in order to bring the entire image into the target gamut. ### Gamut mapping constants These settings can be used to fine-tune the constants used for the various gamut mapping algorithms. #### `perceptual_deadzone=<0.0..1.0>` (Relative) chromaticity protection zone for `perceptual` mapping. Defaults to `0.30`. #### `perceptual_strength=<0.0..1.0>` Strength of the `perceptual` saturation mapping component. Defaults to `0.80`. #### `colorimetric_gamma=<0.0..10.0>` I vs C curve gamma to use for colorimetric clipping (`relative`, `absolute` and `darken`). Defaults to `1.80`. #### `softclip_knee=<0.0..1.0>` Knee point to use for soft-clipping methods (`perceptual`, `softclip`). Defaults to `0.70`. #### `softclip_desat=<0.0..1.0>` Desaturation strength for `softclip`. Defaults to `0.35`. ### `lut3d_size_I=<0..1024>`, `lut3d_size_C=<0..1024>`, `lut3d_size_h=<0..1024>` Gamut mapping 3DLUT size. Setting a dimension to `0` picks the default value. Defaults to `48`, `32` and `256`, respectively, for channels `I`, `C` and `h`. ### `lut3d_tricubic=` Use higher quality, but slower, tricubic interpolation for gamut mapping 3DLUTs. May substantially improve the 3DLUT gamut mapping accuracy, in particular at smaller 3DLUT sizes. Shouldn't have much effect at the default size. Defaults to `no`. ### `gamut_expansion=` If enabled, allows the gamut mapping function to expand the gamut, in cases where the target gamut exceeds that of the source. If disabled, the source gamut will never be enlarged, even when using a gamut mapping function capable of bidirectional mapping. Defaults to `no`. ### `tone_mapping=` Tone mapping function to use for adapting between difference luminance ranges, including black point adaptation. Defaults to `spline`. The following functions are available: - `clip`: Performs no tone-mapping, just clips out-of-range colors. Retains perfect color accuracy for in-range colors but completely destroys out-of-range information. Does not perform any black point adaptation. - `spline`: Simple spline consisting of two polynomials, joined by a single pivot point, which is tuned based on the source scene average brightness (taking into account dynamic metadata if available). This function can be used for both forward and inverse tone mapping. - `st2094-40`: EETF from SMPTE ST 2094-40 Annex B, which uses the provided OOTF based on Bezier curves to perform tone-mapping. The OOTF used is adjusted based on the ratio between the targeted and actual display peak luminances. In the absence of HDR10+ metadata, falls back to a simple constant bezier curve. - `st2094-10`: EETF from SMPTE ST 2094-10 Annex B.2, which takes into account the input signal average luminance in addition to the maximum/minimum. !!! warning This does *not* currently include the subjective gain/offset/gamma controls defined in Annex B.3. (Open an issue with a valid sample file if you want such parameters to be respected.) - `bt2390`: EETF from the ITU-R Report BT.2390, a hermite spline roll-off with linear segment. - `bt2446a`: EETF from ITU-R Report BT.2446, method A. Can be used for both forward and inverse tone mapping. - `reinhard:` Very simple non-linear curve. Named after Erik Reinhard. - `mobius`: Generalization of the `reinhard` tone mapping algorithm to support an additional linear slope near black. The name is derived from its function shape `(ax+b)/(cx+d)`, which is known as a Möbius transformation. This function is considered legacy/low-quality, and should not be used. - `hable`: Piece-wise, filmic tone-mapping algorithm developed by John Hable for use in Uncharted 2, inspired by a similar tone-mapping algorithm used by Kodak. Popularized by its use in video games with HDR rendering. Preserves both dark and bright details very well, but comes with the drawback of changing the average brightness quite significantly. This is sort of similar to `reinhard` with `reinhard_contrast=0.24`. This function is considered legacy/low-quality, and should not be used. - `gamma`: Fits a gamma (power) function to transfer between the source and target color spaces, effectively resulting in a perceptual hard-knee joining two roughly linear sections. This preserves details at all scales, but can result in an image with a muted or dull appearance. This function is considered legacy/low-quality and should not be used. - `linear`: Linearly stretches the input range to the output range, in PQ space. This will preserve all details accurately, but results in a significantly different average brightness. Can be used for inverse tone-mapping in addition to regular tone-mapping. - `linearlight`: Like `linear`, but in linear light (instead of PQ). Works well for small range adjustments but may cause severe darkening when downconverting from e.g. 10k nits to SDR. ### Tone-mapping constants These settings can be used to fine-tune the constants used for the various tone mapping algorithms. #### `knee_adaptation=<0.0..1.0>` Configures the knee point, as a ratio between the source average and target average (in PQ space). An adaptation of `1.0` always adapts the source scene average brightness to the (scaled) target average, while a value of `0.0` never modifies scene brightness. Affects all methods that use the ST2094 knee point determination (currently `spline`, `st2094-40` and `st2094-10`). Defaults to `0.4`. #### `knee_minimum=<0.0..0.5>`, `knee_maximum=<0.5..1.0>` Configures the knee point minimum and maximum, respectively, as a percentage of the PQ luminance range. Provides a hard limit on the knee point chosen by `knee_adaptation`. Defaults to `0.1` and `0.8`, respectively. #### `knee_default=<0.0..1.0>` Default knee point to use in the absence of source scene average metadata. Normally, this is ignored in favor of picking the knee point as the (relative) source scene average brightness level. Defaults to `0.4`. #### `knee_offset=<0.5..2.0>` Knee point offset (for `bt2390` only). Note that a value of `0.5` is the spec-defined default behavior, which differs from the libplacebo default of `1.0`. #### `slope_tuning=<0.0..10.0>`, `slope_offset=<0.0..1.0>` For the single-pivot polynomial (spline) function, this controls the coefficients used to tune the slope of the curve. This tuning is designed to make the slope closer to `1.0` when the difference in peaks is low, and closer to linear when the difference between peaks is high. Defaults to `1.5`, with offset `0.2`. #### `spline_contrast=<0.0..1.5>` Contrast setting for the `spline` function. Higher values make the curve steeper (closer to `clip`), preserving midtones at the cost of losing shadow/highlight details, while lower values make the curve shallowed (closer to `linear`), preserving highlights at the cost of losing midtone contrast. Values above `1.0` are possible, resulting in an output with more contrast than the input. Defaults to `0.5`. #### `reinhard_contrast=<0.0..1.0>` For the `reinhard` function, this specifies the local contrast coefficient at the display peak. Essentially, a value of `0.5` implies that the reference white will be about half as bright as when clipping. Defaults to `0.5`. #### `linear_knee=<0.0..1.0>` For legacy functions (`mobius`, `gamma`) which operate on linear light, this directly sets the corresponding knee point. Defaults to `0.3`. #### `exposure=<0.0..10.0>` For linear methods (`linear`, `linearlight`), this controls the linear exposure/gain applied to the image. Defaults to `1.0`. ### `inverse_tone_mapping=` If enabled, and supported by the given tone mapping function, will perform inverse tone mapping to expand the dynamic range of a signal. libplacebo is not liable for any HDR-induced eye damage. Defaults to `no`. ### `tone_map_metadata=` Data source to use when tone-mapping. Setting this to a specific value allows overriding the default metadata preference logic. Defaults to `any`. ### `tone_lut_size=<0..4096>` Tone mapping LUT size. Setting `0` picks the default size. Defaults to `256`. ### `contrast_recovery=<0.0..2.0>` HDR contrast recovery strength. If set to a value above `0.0`, the source image will be divided into high-frequency and low-frequency components, and a portion of the high-frequency image is added back onto the tone-mapped output. May cause excessive ringing artifacts for some HDR sources, but can improve the subjective sharpness and detail left over in the image after tone-mapping. Defaults to `0.0`. The `high_quality` preset sets this to `0.3`, which is a fairly conservativee value and should subtly enhance the image quality without creating too many obvious artefacts. ### `contrast_smoothness=<1.0..32.0>` HDR contrast recovery lowpass kernel size. Increasing or decreasing this will affect the visual appearance substantially. Defaults to `3.5`. ### Debug options Miscellaneous debugging and display options related to tone/gamut mapping. #### `force_tone_mapping_lut=` Force the use of a full tone-mapping LUT even for functions that have faster pure GLSL replacements (e.g. `clip`, `linear`, `saturation`). This is a debug option. Defaults to `no`. #### `visualize_lut=` Visualize the color mapping LUTs. Displays a (PQ-PQ) graph of the active tone-mapping LUT. The X axis shows PQ input values, the Y axis shows PQ output values. The tone-mapping curve is shown in green/yellow. Yellow means the brightness has been boosted from the source, dark blue regions show where the brightness has been reduced. The extra colored regions and lines indicate various monitor limits, as well a reference diagonal (neutral tone-mapping) and source scene average brightness information (if available). The background behind this shows a visualization of the gamut mapping 3DLUT, in IPT space. Iso-luminance, iso-chromaticity and iso-hue lines are highlighted (depending on the exact value of `visualize_theta`). Defaults to `no`. #### `visualize_lut_x0`, `visualize_lut_y0`, `visualize_lut_x0`, `visualize_lut_y1` Controls where to draw the LUt visualization, relative to the rendered video. Defaults to `0.0` for `x0`/`y0`, and `1.0` for `x1`/`y1`. #### `visualize_hue=`, `visualize_theta=` Controls the rotation of the gamut 3DLUT visualization. The `hue` parameter rotates the gamut through hue space (around the `I` axis), while the `theta` parameter vertically rotates the cross section (around the `C` axis), in radians. Defaults to `0.0` for both. #### `show_clipping=` Graphically highlight hard-clipped pixels during tone-mapping (i.e. pixels that exceed the claimed source luminance range). Defaults to `no`. ## Dithering These options affect the way colors are dithered before output. Dithering is always required to avoid introducing banding artefacts as a result of quantization to a lower bit depth output texture. ### `dither=` Enables dithering. Defaults to `yes`. ### `dither_preset=` Overrides the value of all options in this section by their default values from the given preset. ### `dither_method=` Chooses the dithering method to use. Defaults to `blue`. The following methods are available: - `blue`: Dither with blue noise. Very high quality, but requires the use of a LUT. !!! warning Computing a blue noise texture with a large size can be very slow, however this only needs to be performed once. Even so, using this with a `dither_lut_size` greater than `6` is generally ill-advised. - `ordered_lut`: Dither with an ordered (bayer) dither matrix, using a LUT. Low quality, and since this also uses a LUT, there's generally no advantage to picking this instead of `blue`. It's mainly there for testing. - `ordered`: The same as `ordered`, but uses fixed function math instead of a LUT. This is faster, but only supports a fixed dither matrix size of 16x16 (equivalent to `dither_lut_size=4`). - `white`: Dither with white noise. This does not require a LUT and is fairly cheap to compute. Unlike the other modes it doesn't show any repeating patterns either spatially or temporally, but the downside is that this is visually fairly jarring due to the presence of low frequencies in the noise spectrum. ### `dither_lut_size=<1..8>` For the dither methods which require the use of a LUT (`blue`, `ordered_lut`), this controls the size of the LUT (base 2). Defaults to `6`. ### `dither_temporal=` Enables temporal dithering. This reduces the persistence of dithering artifacts by perturbing the dithering matrix per frame. Defaults to `no`. !!! warning This can cause nasty aliasing artifacts on some LCD screens. ## Cone distortion These options can be optionally used to modulate the signal in LMS space, in particular, to simulate color blindiness. ### `cone=` Enables cone distortion. Defaults to `no`. ### `cone_preset=` Overrides the value of all options in this section by their default values from the given preset. The following presets are available: - `normal`: No distortion (92% of population) - `protanomaly`: Red cone deficiency (0.66% of population) - `protanopia`: Red cone absence (0.59% of population) - `deuteranomaly`: Green cone deficiency (2.7% of population) - `deuteranopia`: Green cone absence (0.56% of population) - `tritanomaly`: Blue cone deficiency (0.01% of population) - `tritanopia`: Blue cone absence (0.016% of population) - `monochromacy`: Blue cones only (<0.001% of population) - `achromatopsia`: Rods only (<0.0001% of population) ### `cones=` Choose the set of cones to modulate. Defaults to `none`. ### `cone_strength=` Defect/gain coefficient to apply to these cones. `1.0` = unaffected, `0.0` = full blindness. Defaults to `1.0`. Values above `1.0` can be used to instead boost the signal going to this cone. For example, to partially counteract deuteranomaly, you could set `cones=m`, `cone_strength=2.0`. Defaults to `0.0`. ## Output blending These options affect the way the image is blended onto the output framebuffer. ### `blend=` Enables output blending. Defaults to `no`. ### `blend_preset=` Overrides the value of all options in this section by their default values from the given preset. Currently, the only preset is `alpha_overlay`, which corresponds to normal alpha blending. ### `blend_src_rgb`, `blend_src_alpha`, `blend_dst_rgb`, `blend_dst_alpha` Choose the blending mode for each component. Defaults to `zero` for all. The following modes are available: - `zero`: Component will be unused. - `one`: Component will be added at full strength. - `alpha`: Component will be multiplied by the source alpha value. - `one_minus_alpha`: Component will be multiplied by 1 minus the source alpha. ## Deinterlacing Configures the settings used to deinterlace frames, if required. !!! note The use of these options requires the caller to pass extra metadata to incoming frames to link them together / mark them as fields. ### `deinterlace=` Enables deinterlacing. Defaults to `no`. ### `deinterlace_preset=` Overrides the value of all options in this section by their default values from the given preset. ### `deinterlace_algo=` Chooses the algorithm to use for deinterlacing. Defaults to `yadif`. The following algorithms are available: - `weave`: No-op deinterlacing, just sample the weaved frame un-touched. - `bob`: Naive bob deinterlacing. Doubles the field lines vertically. - `yadif`: "Yet another deinterlacing filter". Deinterlacer with temporal and spatial information. Based on FFmpeg's Yadif filter algorithm, but adapted slightly for the GPU. ### `deinterlace_skip_spatial=` Skip the spatial interlacing check for `yadif`. Defaults to `no`. ## Distortion The settings in this section can be used to distort/transform the output image. ### `distort=` Enables distortion. Defaults to `no`. ### `distort_preset=` Overrides the value of all options in this section by their default values from the given preset. ### `distort_scale_x`, `distort_scale_y` Scale the image in the X/Y dimension by an arbitrary factor. Corresponds to the main diagonal of the transformation matrix. Defaults to `1.0` for both. ### `distort_shear_x`, `distort_shear_y` Adds the X/Y dimension onto the Y/X dimension (respectively), scaled by an arbitrary amount. Corresponds to the anti-diagonal of the 2x2 transformation matrix. Defaults to `0.0` for both. ### `distort_offset_x`, `distort_offset_y` Offsets the X/Y dimensions by an arbitrary offset, relative to the image size. Corresponds to the bottom row of a 3x3 affine transformation matrix. Defaults to `0.0` for both. ### `distort_unscaled=` If enabled, the texture is placed inside the center of the canvas without scaling. Otherwise, it is effectively stretched to the canvas size. Defaults to `no`. !!! note This option has no effect when using `pl_renderer`. ### `distort_constrain=` If enabled, the transformation is automatically scaled down and shifted to ensure that the resulting image fits inside the output canvas. Defaults to `no`. ### `distort_bicubic=` If enabled, use bicubic interpolation rather than faster bilinear interpolation. Higher quality but slower. Defaults to `no`. ### `distort_addreess_mode=` Specifies the texture address mode to use when sampling out of bounds. Defaults to `clamp`. ### `distort_alpha_mode=` If set to something other than `none`, all out-of-bounds accesses will instead be treated as transparent, according to the given alpha mode. ## Miscellaneous renderer settings ### `error_diffusion=` Enables error diffusion dithering. Error diffusion is a very slow and memory intensive method of dithering without the use of a fixed dither pattern. If set, this will be used instead of `dither_method` whenever possible. It's highly recommended to use this only for still images, not moving video. Defaults to `none`. The following options are available: - `simple`: Simple error diffusion (fast) - `false-fs`: False Floyd-Steinberg kernel (fast) - `sierra-lite`: Sierra Lite kernel (slow) - `floyd-steinberg`: Floyd-Steinberg kernel (slow) - `atkinson`: Atkinson kernel (slow) - `jarvis-judice-ninke`: Jarvis, Judice & Ninke kernel (very slow) - `stucki`: Stucki kernel (very slow) - `burkes`: Burkes kernel (very slow) - `sierra-2`: Two-row Sierra (very slow) - `sierra-3`: Three-row Sierra (very slow) ### `lut_type=` Overrides the color mapping LUT type. Defaults to `unknown`. The following options are available: - `unknown`: Unknown LUT type, try and guess from metadata - `native`: LUT is applied to raw image contents - `normalized`: LUT is applied to normalized (HDR) RGB values - `conversion`: LUT fully replaces color conversion step !!! note There is no way to load LUTs via the options mechanism, so this option only has an effect if the LUT is loaded via external means. ### `background_r=<0.0..1.0>`, `background_g=<0.0..1.0>`, `background_b=<0.0..1.0>` If the image being rendered does not span the entire size of the target, it will be cleared explicitly using this background color (RGB). Defaults to `0.0` for all. ### `background_transparency=<0.0..1.0>` The (inverted) alpha value of the background clear color. Defaults to `0.0`. ### `skip_target_clearing=` If set, skips clearing the background backbuffer entirely. Defaults to `no`. !!! note This is automatically skipped if the image to be rendered would completely cover the backbuffer. ### `corner_rounding=<0.0..1.0>` If set to a value above `0.0`, the output will be rendered with rounded corners, as if an alpha transparency mask had been applied. The value indicates the relative fraction of the side length to round - a value of `1.0` rounds the corners as much as possible. Defaults to `0.0`. ### `blend_against_tiles=` If true, then transparent images will made opaque by painting them against a checkerboard pattern consisting of alternating colors. Defaults to `no`. ### `tile_color_hi_r`, `tile_color_hi_g`, `tile_color_hi_b`, `tile_color_lo_r`, `tile_color_lo_g`, `tile_color_l_b` The colors of the light/dark tiles used for `blend_against_tiles`. Defaults to `0.93` for light R/G/B and `0.87` for dark R/G/B, respectively. ### `tile_size=<2..256>` The size, in output pixels, of the tiles used for `blend_against_tiles`. Defaults to `32`. ## Performance / quality trade-offs These should generally be left off where quality is desired, as they can degrade the result quite noticeably; but may be useful for older or slower hardware. Note that libplacebo will automatically disable advanced features on hardware where they are unsupported, regardless of these settings. So only enable them if you need a performance bump. ### `skip_anti_aliasing=` Disables anti-aliasing on downscaling. This will result in moiré artifacts and nasty, jagged pixels when downscaling, except for some very limited special cases (e.g. bilinear downsampling to exactly 0.5x). Significantly speeds up downscaling with high downscaling ratios. Defaults to `no`. ### `preserve_mixing_cache=` Normally, when the size of the target framebuffer changes, or the render parameters are updated, the internal cache of mixed frames must be discarded in order to re-render all required frames. Setting this option to `yes` will skip the cache invalidation and instead re-use the existing frames (with bilinear scaling to the new size if necessary). This comes at a hefty quality loss shortly after a resize, but should make it much more smooth. Defaults to `no`. ## Debugging, tuning and testing These may affect performance or may make debugging problems easier, but shouldn't have any effect on the quality (except where otherwise noted). ### `skip_caching_single_frame=` Normally, single frames will also get pushed through the mixer cache, in order to speed up re-draws. Enabling this option disables that logic, causing single frames to bypass being written to the cache. Defaults to `no`. !!! note If a frame is *already* cached, it will be re-used, regardless. ### `disable_linear_scaling=` Disables linearization / sigmoidization before scaling. This might be useful when tracking down unexpected image artifacts or excessing ringing, but it shouldn't normally be necessary. Defaults to `no`. ### `disable_builtin_scalers=` Forces the use of the slower, "general" scaling algorithms even when faster built-in replacements exist. Defaults to `no`. ### `correct_subpixel_offsets=` Forces correction of subpixel offsets (using the configured `upscaler`). Defaults to `no`. !!! warning Enabling this may cause such images to get noticeably blurrier, especially when using a polar scaler. It's not generally recommended to enable this. ### `force_dither=` Forces the use of dithering, even when rendering to 16-bit FBOs. This is generally pretty pointless because most 16-bit FBOs have high enough depth that rounding errors are below the human perception threshold, but this can be used to test the dither code. Defaults to `no`. ### `disable_dither_gamma_correction=` Disables the gamma-correct dithering logic which normally applies when dithering to low bit depths. No real use, outside of testing. Defaults to `no`. ### `disable_fbos=` Completely overrides the use of FBOs, as if there were no renderable texture format available. This disables most features. Defaults to `no`. ### `force_low_bit_depth_fbos=` Use only low-bit-depth FBOs (8 bits). Note that this also implies disabling linear scaling and sigmoidization. Defaults to `no`. ### `dynamic_constants=` If this is enabled, all shaders will be generated as "dynamic" shaders, with any compile-time constants being replaced by runtime-adjustable values. This is generally a performance loss, but has the advantage of being able to freely change parameters without triggering shader recompilations. It's a good idea to enable this if you will change these options very frequently, but it should be disabled once those values are "dialed in". Defaults to `no`. libplacebo-v7.349.0/docs/renderer.md000066400000000000000000000252021463457750100172550ustar00rootroot00000000000000# Rendering content: pl_frame, pl_renderer, and pl_queue This example roughly builds off the [previous entry](./basic-rendering.md), and as such will not cover the basics of how to create a window, initialize a `pl_gpu` and get pixels onto the screen. ## Renderer The `pl_renderer` set of APIs represents the highest-level interface into libplacebo, and is what most users who simply want to display e.g. a video feed on-screen will want to be using. The basic initialization is straightforward, requiring no extra parameters: ``` c linenums="1" pl_renderer renderer; init() { renderer = pl_renderer_create(pllog, gpu); if (!renderer) goto error; // ... } uninit() { pl_renderer_destroy(&renderer); } ``` What makes the renderer powerful is the large number of `pl_render_params` it exposes. By default, libplacebo provides several presets to use: * **pl_render_fast_params**: Disables everything except for defaults. This is the fastest possible configuration. * **pl_render_default_params**: Contains the recommended default parameters, including some slightly higher quality scaling, as well as dithering. * **pl_render_high_quality_params**: A preset of reasonable defaults for a higher-end machine (i.e. anything with a discrete GPU). This enables most of the basic functionality, including upscaling, downscaling, debanding and better HDR tone mapping. Covering all of the possible options exposed by `pl_render_params` is out-of-scope of this example and would be better served by looking at [the API documentation](https://code.videolan.org/videolan/libplacebo/-/blob/master/src/include/libplacebo/renderer.h#L94). ### Frames [`pl_frame`](https://code.videolan.org/videolan/libplacebo/-/blob/master/src/include/libplacebo/renderer.h#L503) is the struct libplacebo uses to group textures and their metadata together into a coherent unit that can be rendered using the renderer. This is not currently a dynamically allocated or refcounted heap object, it is merely a struct that can live on the stack (or anywhere else). The actual data lives in corresponding `pl_tex` objects referenced in each of the frame's planes. ``` c linenums="1" bool render_frame(const struct pl_frame *image, const struct pl_swapchain_frame *swframe) { struct pl_frame target; pl_frame_from_swapchain(&target, swframe); return pl_render_image(renderer, image, target, &pl_render_default_params); } ``` !!! note "Renderer state" The `pl_renderer` is conceptually (almost) stateless. The only thing that is needed to get a different result is to change the render params, which can be varied freely on every call, if the user desires. The one case where this is not entirely true is when using frame mixing (see below), or when using HDR peak detection. In this case, the renderer can be explicitly reset using `pl_renderer_flush_cache`. To upload frames, the easiest methods are made available as dedicated helpers in [``](https://code.videolan.org/videolan/libplacebo/-/blob/master/src/include/libplacebo/utils/upload.h), and [``](https://code.videolan.org/videolan/libplacebo/-/blob/master/src/include/libplacebo/utils/libav.h) (for AVFrames). In general, I recommend checking out the [demo programs](https://code.videolan.org/videolan/libplacebo/-/tree/master/demos) for a clearer illustration of how to use them in practice. ### Shader cache The renderer internally generates, compiles and caches a potentially large number of shader programs, some of which can be complex. On some platforms (notably D3D11), these can be quite costly to recompile on every program launch. As such, the renderer offers a way to save/restore its internal shader cache from some external location (managed by the API user). The use of this API is highly recommended: ``` c linenums="1" hl_lines="1-2 10-14 21-27" static uint8_t *load_saved_cache(); static void store_saved_cache(uint8_t *cache, size_t bytes); void init() { renderer = pl_renderer_create(pllog, gpu); if (!renderer) goto error; uint8_t *cache = load_saved_cache(); if (cache) { pl_renderer_load(renderer, cache); free(cache); } // ... } void uninit() { size_t cache_bytes = pl_renderer_save(renderer, NULL); uint8_t *cache = malloc(cache_bytes); if (cache) { pl_renderer_save(renderer, cache); store_saved_cache(cache, cache_bytes); free(cache); } pl_renderer_destroy(&renderer); } ``` !!! warning "Cache safety" libplacebo performs only minimal validity checking on the shader cache, and in general, cannot possibly guard against malicious alteration of such files. Loading a cache from an untrusted source represents a remote code execution vector. ## Frame mixing One of the renderer's most powerful features is its ability to compensate for differences in framerates between the source and display by using [frame mixing](https://github.com/mpv-player/mpv/wiki/Interpolation) to blend adjacent frames together. Using this API requires presenting the renderer, at each vsync, with a `pl_frame_mix` struct, describing the current state of the vsync. In principle, such structs can be constructed by hand. To do this, all of the relevant frames (nearby the vsync timestamp) must be collected, and their relative distances to the vsync determined, by normalizing all PTS values such that the vsync represents time `0.0` (and a distance of `1.0` represents the nominal duration between adjacent frames). Note that timing vsyncs, and determining the correct vsync duration, are both left as problems for the user to solve.[^timing]. Here could be an example of a valid struct: [^timing]: However, this may change in the future, as the recent introduction of the Vulkan display timing extension may result in display timing feedback being added to the `pl_swapchain` API. That said, as of writing, this has not yet happened. ``` c (struct pl_frame_mix) { .num_frames = 6 .frames = (const struct pl_frame *[]) { /* frame 0 */ /* frame 1 */ /* ... */ /* frame 5 */ }, .signatures = (uint64_t[]) { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5 // (1) }, .timestamps = (float[]) { -2.4, -1.4, -0.4, 0.6, 1.6, 2.6, // (2) }, .vsync_duration = 0.4, // 24 fps video on 60 fps display } ``` 1. These must be unique per frame, but always refer to the same frame. For example, this could be based on the frame's PTS, the frame's numerical ID (in order of decoding), or some sort of hash. The details don't matter, only that this uniquely identifies specific frames. 2. Typically, for CFR sources, frame timestamps will always be separated in this list by a distance of 1.0. In this example, the vsync falls roughly halfway (but not quite) in between two adjacent frames (with IDs 0x2 and 0x3). !!! note "Frame mixing radius" In this example, the frame mixing radius (as determined by `pl_frame_mix_radius` is `3.0`, so we include all frames that fall within the timestamp interval of `[-3, 3)`. In general, you should consult this function to determine what frames need to be included in the `pl_frame_mix` - though including more frames than needed is not an error. ### Frame queue Because this API is rather unwieldy and clumsy to use directly, libplacebo provides a helper abstraction known as `pl_queue` to assist in transforming some arbitrary source of frames (such as a video decoder) into nicely packed `pl_frame_mix` structs ready for consumption by the `pl_renderer`: ``` c linenums="1" #include pl_queue queue; void init() { queue = pl_queue_create(gpu); } void uninit() { pl_queue_destroy(&queue); // ... } ``` This queue can be interacted with through a number of mechanisms: either pushing frames (blocking or non-blocking), or by having the queue poll frames (via blocking or non-blocking callback) as-needed. For a full overview of the various methods of pushing and polling frames, check the [API documentation](https://code.videolan.org/videolan/libplacebo/-/blob/master/src/include/libplacebo/utils/frame_queue.h#L115). In this example, I will assume that we have a separate decoder thread pushing frames into the `pl_queue` in a blocking manner: ``` c linenums="1" static void decoder_thread(void) { void *frame; while ((frame = /* decode new frame */)) { pl_queue_push_block(queue, UINT64_MAX, &(struct pl_source_frame) { .pts = /* frame pts */, .duration = /* frame duration */, .map = /* map callback */, .unmap = /* unmap callback */, .frame_data = frame, }); } pl_queue_push(queue, NULL); // signal EOF } ``` Now, in our render loop, we want to call `pl_queue_update` with appropriate values to retrieve the correct frame mix for each vsync: ``` c linenums="1" hl_lines="3-10 12-21 27" bool render_frame(const struct pl_swapchain_frame *swframe) { struct pl_frame_mix mix; enum pl_queue_status res; res = pl_queue_update(queue, &mix, pl_queue_params( .pts = /* time of next vsync */, .radius = pl_frame_mix_radius(&render_params), .vsync_duration = /* if known */, .timeout = UINT64_MAX, // (2) )); switch (res) { case PL_QUEUE_OK: break; case PL_QUEUE_EOF: /* no more frames */ return false; case PL_QUEUE_ERR: goto error; // (1) } struct pl_frame target; pl_frame_from_swapchain(&target, swframe); return pl_render_image_mix(renderer, &mix, target, &pl_render_default_params); } ``` 1. There is a fourth status, `PL_QUEUE_MORE`, which is returned only if the resulting frame mix is incomplete (and the timeout was reached) - basically this can only happen if the queue runs dry due to frames not being supplied fast enough. In this example, since we are setting `timeout` to `UINT64_MAX`, we will never get this return value. 2. Setting this makes `pl_queue_update` block indefinitely until sufficiently many frames have been pushed into the `pl_queue` from our separate decoding thread. ### Deinterlacing The frame queue also vastly simplifies the process of performing motion-adaptive temporal deinterlacing, by automatically linking together adjacent fields/frames. To take advantage of this, all you need to do is set the appropriate field (`pl_source_frame.first_frame`), as well as enabling [deinterlacing parameters](https://code.videolan.org/videolan/libplacebo/-/blob/master/src/include/libplacebo/renderer.h#L186). libplacebo-v7.349.0/docs/style.css000066400000000000000000000000471463457750100167770ustar00rootroot00000000000000.md-typeset p { margin: 1em 1em; } libplacebo-v7.349.0/gcovr.cfg000066400000000000000000000001261463457750100157740ustar00rootroot00000000000000exclude = .*/tests/.* exclude = .*/demos/.* exclude = .*_gen\.c$ sort-uncovered = yes libplacebo-v7.349.0/meson.build000066400000000000000000000556271463457750100163550ustar00rootroot00000000000000project('libplacebo', ['c', 'cpp'], license: 'LGPL2.1+', default_options: [ 'buildtype=debugoptimized', 'warning_level=2', 'c_std=c11', 'cpp_std=c++20', ], meson_version: '>=0.63', version: '@0@.@1@.@2@'.format( # Major version 7, # API version { '349': 'add pl_color_{primaries,system,transfer}_name(s)', '348': 'add pl_color_linearize and pl_color_delinearize', '347': 'add pl_gpu_limits.max_mapped_vram', '346': 'add pl_render_params.background/border, deprecate skip_target_clearing and blend_against_tiles', '345': 'add pl_frame_clear_tiles', '344': 'add PL_ALPHA_NONE', '343': 'add pl_map_avdovi_metadata and deprecate pl_frame_map_avdovi_metadata', '342': 'add pl_cache_signature', '341': 're-add pl_filter_function_{bicubic,bcspline,catmull_rom,mitchell,robidoux,robidouxsharp} as deprecated', '340': 'add pl_queue_params.drift_compensation, PL_QUEUE_DEFAULTS and pl_queue_pts_offset', '339': 'add pl_peak_detect_params.black_cutoff', '338': 'split pl_filter_nearest into pl_filter_nearest and pl_filter_box', '337': 'fix PL_FILTER_DOWNSCALING constant', '336': 'deprecate pl_filter.radius_cutoff in favor of pl_filter.radius', '335': 'remove {pl_render_params,pl_sample_filter_params}.{lut_entries,polar_cutoff}', '334': 'add pl_tex_transfer_params.no_import', '333': 'add pl_shader_sample_{hermite,gaussian}', '332': 'add pl_filter_function_hermite and pl_filter_hermite', '331': 'add pl_filter_function_cubic and remove bcspline family of filter functions', '330': 'add pl_frames_infer(_mix)', '329': 'add pl_frame_mix_current and pl_frame_mix_nearest', '328': 'remove pl_render_params.ignore_icc_profiles', '327': 'remove pl_render_params.icc_params', '326': 'add pl_frame.icc', '325': 'add pl_icc_update', '324': 'add pl_render_params.correct_subpixel_offsets', '323': 'deprecate pl_{dispatch,renderer}_{save,load}', '322': 'remove pl_pass_params.cached_program(_len)', '321': 'deprecate pl_icc_params.cache_{save,load,priv}', '320': 'add pl_gpu_set_cache', '319': 'add ', '318': 'add pl_filter_ewa_lanczossharp and pl_filter_ewa_lanczos4sharpest', '317': 'add pl_filter_config.antiring', '316': 'remove pl_filter_config.filter_scale', '315': 'remove pl_tone_map_auto', '314': 'add pl_renderer_get_hdr_metadata', '313': 'remove pl_peak_detect_params.minimum_peak', '312': 'add pl_gamut_map_constants.perceptual_strength', '311': 'add pl_tone_map_constants, pl_tone_map_params.constants and pl_color_map_params.tone_constants', '310': 'add pl_gamut_map_constants, pl_gamut_map_params.constants and pl_color_map_params.gamut_constants', '309': 'add ', '308': 'add pl_hook_par.names', '307': 'add pl_filter.radius_zero', '306': 'add pl_filter_functions and pl_filter_configs', '305': 'add pl_filter_function.opaque and move pl_filter_oversample to filters.h', '304': 'add pl_filter_config.allowed/recommended', '303': 'refactor pl_filter_config and pl_filter_function', '302': 'change type of pl_icc_params.size_r/g/b to int', '301': 'add PL_COLOR_ADJUSTMENT_NEUTRAL and pl_color_adjustment()', '300': 'add pl_color_map_params.gamut_expansion', '299': 'add pl_primaries_compatible and pl_primaries_clip', '298': 'add pl_gamut_map_softclip', '297': 'add pl_tone_map_linear_light', '296': 'add pl_queue_estimate_fps/vps, pl_queue_num_frames and pl_queue_peek', '295': 'change pl_source_frame.pts and pl_queue_params.pts to double', '294': 'add pl_vulkan_swapchain_params.disable_10bit_sdr', '293': 'add pl_test_pixfmt_caps', '292': 'add pl_peak_detect_high_quality_params and pl_color_map_high_quality_params', '291': 'add PL_COLOR_HDR_BLACK, PL_COLOR_SDR_CONTRAST and PL_COLOR_HLG_PEAK', '290': 'remove pl_color_map_params.hybrid_mix', '289': 'remove pl_gamut_map_params.chroma_margin', '288': 'add pl_color_map_params.lut3d_tricubic', '287': 'add pl_transform2x2_bounds', '286': 'add PL_RENDER_ERR_CONTRAST_RECOVERY', '285': 'add pl_color_map_params.contrast_recovery/smoothness, ' + 'pl_color_map_args.feature_map and pl_shader_extract_features', '284': 'add pl_color_map_args and pl_shader_color_map_ex', '283': 'add pl_render_params.distort_params', '282': 'add PL_HOOK_PRE_OUTPUT', '281': 'add pl_matrix2x2_rotation', '280': 'add pl_distortion_params and pl_shader_distort', '279': 'add pl_matrix2x2_scale/invert and pl_transform2x2_scale/invert', '278': 'switch pl_vulkan.(un)lock_queue to uint32_t', '277': 'add pl_render_params.corner_rounding', '276': 'add pl_get_mapped_avframe', '275': 'add pl_vulkan_params.extra_queues', '274': 'drop minimum vulkan version to 1.2', '273': 'add pl_vulkan_required_features and refactor pl_vulkan_recommended_features', '272': 'require vulkan version 1.3 minimum', '271': 'deprecate pl_vulkan.queues', '270': 'add pl_color_map_params.visualize_hue/theta', '269': 'refactor pl_color_map_params gamut mapping settings', '268': 'add ', '267': 'add pl_ipt_lms2rgb/rgb2lms and pl_ipt_lms2ipt/ipt2lms', '266': 'add pl_shader_info and change type of pl_dispatch_info.shader', '265': 'remove fields deprecated for libplacebo v4', '264': 'add pl_color_map_params.show_clipping', '263': 'add pl_peak_detect_params.percentile', '262': 'add pl_color_map_params.visualize_rect', '261': 'add pl_color_map_params.metadata', '260': 'add pl_tone_map_params.input_avg', '259': 'add pl_color_space_nominal_luma_ex', '258': 'add pl_hdr_metadata_type and pl_hdr_metadata_contains', '257': 'add pl_hdr_metadata.max_pq_y and avg_pq_y', '256': 'deprecate pl_peak_detect_params.overshoot_margin', '255': 'deprecate pl_get_detected_peak and add pl_get_detected_hdr_metadata', '254': 'deprecate pl_renderer_params.allow_delayed_peak_detect and add pl_peak_detect_params.allow_delayed', '253': 'remove pl_color_space.nominal_min/max and add pl_color_space_nominal_peak', '252': 'remove pl_swapchain.impl', '251': 'add `utils/dolbyvision.h` and `pl_hdr_metadata_from_dovi_rpu`', '250': 'add pl_frame_map_avdovi_metadata', '249': 'add `pl_render_error`, `pl_render_errors` and `pl_renderer_get_errors`', '248': 'add pl_hook.signature', '247': 'add pl_color_map_params.visualize_lut', '246': 'add `pl_tone_map_st2094_10` and `pl_tone_map_st2094_40`', '245': 'add `pl_tone_map_params.hdr`', '244': 'add `pl_map_hdr_metadata`', '243': 'add `pl_color_space.nominal_min/max`', '242': 'add `pl_hdr_metadata.scene_max/avg` and `pl_hdr_metadata.ootf`', '241': 'add `pl_plane_data.swapped`', '240': 'add `PL_COLOR_TRC_ST428`', '239': 'add `pl_fmt.planes` and `pl_tex.planes`', '238': 'add `pl_vulkan_wrap_params.aspect`', '237': 'add `pl_vulkan_hold_ex` and `pl_vulkan_release_ex`', '236': 'add `pl_vulkan_sem_create` and `pl_vulkan_sem_destroy`', '235': 'add `pl_vulkan.get_proc_addr`', '234': 'add `pl_gpu_limits.host_cached`', '233': 'add `pl_hook.parameters`, `struct pl_hook_par`', '232': 'add `pl_plane_data_from_comps`', '231': 'add `pl_tone_map_params_infer`', '230': 'add PL_COLOR_PRIM_ACES_AP0 and PL_COLOR_PRIM_ACES_AP1', '229': 'add pl_shader_sample_ortho2, deprecate pl_shader_sample_ortho', '228': 'add pl_icc_params.force_bpc', '227': 'refactor `pl_render_info.index` and add `pl_render_info.count`', '226': 'add `pl_dither_params.transfer` and `pl_render_params.disable_dither_gamma_correction`', '225': 'add `pl_render_params.error_diffusion`', '224': 'add `pl_shader_error_diffusion` and related functions', '223': 'add ', '222': 'add `pl_icc_params.cache_save/load`', '221': 'add `pl_source_frame.first_field`', '220': 'add deinterlacing-related fields to `pl_frame` and `pl_render_params`', '219': 'add pl_source_frame.duration, deprecating pl_queue_params.frame_duration', '218': 'add and pl_shader_deinterlace', '217': 'add pl_color_space_infer_map', '216': 'add pl_deband_params.grain_neutral', '215': 'add pl_opengl_params.get_proc_addr_ex', '214': 'drop deprecated legacy C struct names', '213': 'add pl_opengl_params.get_proc_addr', '212': 'add pl_opengl.major/minor version numbers', '211': 'add pl_opengl.extensions and pl_opengl_has_ext', '210': 'add PL_HANDLE_MTL_TEX, PL_HANDLE_IOSURFACE, and pl_shared_mem.plane', '209': 'add pl_gpu_limits.array_size_constants', '208': 'add pl_filter_function.name and pl_filter_config.name', '207': 'add pl_render_params.plane_upscaler and plane_downscaler', '206': 'add new ICC profile API (pl_icc_open, ...)', '205': 'add pl_cie_from_XYZ and pl_raw_primaries_similar, fix pl_cie_xy_equal', '204': 'add pl_d3d11_swapchain_params.disable_10bit_sdr', '203': 'add pl_film_grain_from_av', '202': 'add pl_frame.acquire/release', '201': 'add pl_vulkan.(un)lock_queue', '200': 'always set pl_vulkan.queue_*', '199': 'add pl_plane.flipped', '198': 'remove PL_HOOK_PRE_OVERLAY', '197': 'add pl_overlay.coords, change type of pl_overlay_part.dst', '196': 'add pl_render_params.force_low_bit_depth_fbos', '195': 'change pl_log_create prototype to pl_log_create_${api_ver} to do linking time api check', '194': 'add pl_primaries_valid', '193': 'add pl_hook_params.orig_repr/color', '192': 'add pl_map_avframe_ex', '191': 'add pl_map_dovi_metadata', '190': 'add pl_color_map_params.gamut_mode, replacing gamut_clipping/warning', '189': 'refactor pl_color_space, merging it with pl_hdr_metadata', '188': 'refactor pl_color_map_params tone mapping settings', '187': 'add ', '186': 'add pl_d3d11_swapchain_params.flags', '185': 'add PL_COLOR_SYSTEM_DOLBYVISION and reshaping', '184': 'add pl_map_avframe/pl_unmap_avframe, deprecate pl_upload_avframe', '183': 'relax pl_shared_mem.size > 0 requirement', '182': 'add pl_vulkan_get, pl_opengl_get, pl_d3d11_get', '181': 'add pl_shader_set_alpha, change alpha handling of pl_shader_decode_color', '180': 'add pl_gpu_limits.max_variable_comps', '179': 'add pl_render_params.skip_caching_single_frame', '178': 'add pl_gpu_limits.align_vertex_stride', '177': 'add debug_tag to pl_tex/buf_params', '176': 'revert vulkan 1.2 requirement', '175': 'require timeline semaphores for all vulkan devices', '174': 'deprecate pl_vulkan_params.disable_events', '173': 'remove VkAccessFlags from pl_vulkan_hold/release', '172': 'replace VkSemaphore by pl_vulkan_sem in pl_vulkan_hold/release', '171': 'make vulkan 1.2 the minimum version', '170': 'allow pl_queue_update on NULL', '169': 'refactor pl_pass_params.target_dummy into target_format', '168': 'refactor pl_tex_transfer.stride_w/h into row/depth_pitch', '167': 'expose pl_dispatch_reset_frame', '166': 'add pl_index_format', '165': 'add pl_fmt.signature', '164': 'support blending against tiles', '163': 'add pl_frame_copy_stream_props', '162': 'support rotation in pl_renderer', '161': 'make H.274 film grain values indirect', '160': 'add preprocessor macros for default params', '159': 'remove fields deprecated for libplacebo v3', '158': 'add support for H.274 film grain', '157': 'add pl_peak_detect_params.minimum_peak', '156': 'add pl_swapchain_colors_from_avframe/dav1dpicture', '155': 'refactor pl_swapchain_hdr_metadata into pl_swapchain_colorspace_hint', '154': 'add ', '153': 'add pl_render_info callbacks', '152': 'add pl_dispatch_info callbacks', '151': 'pl_shader_res.description/steps', '150': 'add PL_FMT_CAP_READWRITE', '149': 'add pl_gpu_limits.buf_transfer', '148': 'refactor pl_gpu_caps', '147': 'add pl_color_space.sig_floor and black point adaptation', '146': 'add PL_COLOR_TRC_GAMMA20, GAMMA24 and GAMMA26', '145': 'add pl_render_params/pl_shader_params.dynamic_constants', '144': 'add support for pl_constant specialization constants', '143': 'add pl_color_space_infer_ref', '142': 'add pl_render_params.background_transparency and pl_frame_clear_rgba', '141': 'add pl_filter_oversample', '140': 'add pl_shader_sample_oversample', '139': 'make vulkan 1.1 the minimum vulkan version', '138': 're-add and properly deprecate pl_filter_haasnsoft', '137': 'change behavior of pl_image_mix.num_frames == 1', '136': 'add pl_fmt.gatherable', '135': 'add pl_queue_params.interpolation_threshold', '134': 'add pl_render_params.ignore_icc_profiles', '133': 'remove pl_shader_signature', '132': 'add pl_tex_clear_ex', '131': 'remove PL_PRIM_TRIANGLE_FAN', '130': 'provide typedefs for object types, e.g. const struct pl_tex * -> pl_tex', '129': 'rename pl_context to pl_log, move to ', '128': 'add pl_opengl_params.make/release_current, for thread safety', '127': 'add pl_get_buffer2', '126': 'add pl_render_params.background_color', '125': 'allow calling pl_render_image on NULL', '124': 'make pl_queue_update return valid data even on PL_QUEUE_MORE', '123': 'refactor pl_overlay from pl_plane into pl_overlay_part', '122': 'make pl_gpu thread safe', '121': 'add pl_queue_push_block and refactor frame queue threading', '120': 'refactor pl_named_filter_config into pl_filter_preset', '119': 'add pl_color_adjustment.temperature', '118': 'add ', '117': 'rename pl_filter_triangle/box to pl_filter_bilinear/nearest', '116': 'add pl_frame_recreate_from_avframe and pl_download_avframe', '115': 'add pl_dispatch_vertex', '114': 'add pl_pass_run_params.index_data', '113': 'add ', '112': 'add , replacing existing 3dlut API', '111': 'add pl_fmt.modifiers for proper DRM format modifier support', '110': 'refactor pl_upload_dav1dpicture', '109': 'add support for host pointer imports on OpenGL', '108': 'add ', '107': 'add pl_render_image_mix', '106': 'add pl_shared_mem.stride_w/h', '105': 'add asynchronous texture transfers', '104': 'add pl_render_params.blend_params', '103': 'move pl_tex_sample_mode from pl_tex_params to pl_desc_binding', '102': 'add pl_tex_poll', '101': 'merge pl_image and pl_render_target into pl_frame', '100': 'add pl_render_target.planes', '99': 'add pl_sample_src.component_mask', '98': 'add pl_vulkan_params.disable_overmapping', '97': 'add pl_av1_grain_params.luma_comp', '96': 'add ', '95': 'add PL_COLOR_PRIM_EBU3213 and FILM_C', '94': 'add support for //!BUFFER to user shaders', '93': 'add pl_plane_data_align', '92': 'add more pl_var helper functions', '91': 'implement PL_HANDLE_DMA_BUF for EGL', '90': 'add pl_opengl_params.allow_software', '89': 'use uniform arrays instead of shader literals for LUTs', '88': 'add pl_shared_mem.drm_format_mod', '87': 'refactor pl_opengl_wrap', '86': 'add pl_pass_run_params.vertex_buf', '85': 'add PL_HANDLE_HOST_PTR', '84': 'add pl_buf_params.import/export_handle', '83': 'add pl_shader_custom', '82': 'add pl_gpu_is_failed', '81': 'add PL_GPU_CAP_SUBGROUPS', '80': 'add pl_color_map_params.gamut_clipping', '79': 'add pl_get_detected_peak', '78': 'add pl_buf_copy', '77': 'make all pl_buf_* commands implicitly synchronized', '76': 'add pl_vulkan_swapchain_params.prefer_hdr', '75': 'add pl_dispatch_save/load', '74': 'remove pl_image.signature', '73': 'add pl_memory_qualifiers', '72': 'generalize PL_SHADER_SIG_SAMPLER2D into PL_SHADER_SIG_SAMPLER', '71': 'add pl_opengl_wrap/unwrap', '70': 'add pl_tex_sampler_type', '69': 'add pl_peak_detect_params.overshoot_margin', '68': 'add PL_TONE_MAPPING_BT_2390', '67': 'add pl_image_set_chroma_location', '66': 'change pl_render_target.dst_rect from pl_rect2d to pl_rect2df', '65': 'add PL_SHADER_SIG_SAMPLER2D', '64': 'add pl_rect2df_aspect_* family of functions', '63': 'refactor pl_shader_av1_grain', '62': 'refactor PL_COLOR_REF_WHITE into PL_COLOR_SDR_WHITE and PL_COLOR_SDR_WHITE_HLG', '61': 'refactor pl_dispatch_finish etc. to support timers', '60': 'add pl_timer', '59': 'add pl_render_high_quality_params', '58': 'add and pl_hook', '57': 'add width/height fields to pl_dispatch_compute', '56': 'make pl_vulkan.features etc. extensible', '55': 'add pl_vulkan_params.features', '54': 'add pl_vulkan_import', '53': 'refactor pl_vulkan_wrap', '52': 'revert addition of pl_filter_nearest', '51': 'add pl_vulkan_hold_raw', '50': 'add pl_vulkan_params.device_uuid', '49': 'add pl_filter_nearest', '48': 'deprecate pl_image.width/height', '47': 'add more matrix math helpers to common.h', '46': 'add pl_vk_inst_params.debug_extra', '45': 'add pl_vulkan.api_version', '44': 'add pl_swapchain_hdr_metadata', '43': 'add pl_vulkan/opengl_params.max_glsl_version', '42': 'add pl_vk_inst_params.layers/opt_layers', '41': 'add PL_FMT_CAP_HOST_READABLE', '40': 'add PL_GPU_CAP_BLITTABLE_1D_3D', '39': 'add pl_render_params.disable_fbos', '38': 'add pl_render_params.force_dither', '37': 'add pl_color_levels_guess', '36': 'remove pl_opengl.priv leftover', '35': 'fix pl_vulkan_swapchain_suboptimal signature', '34': 'add ', '33': 'add pl_image.av1_grain', '32': 'refactor pl_grain_params', '31': 'add pl_vulkan_params.get_proc_addr', '30': 'add pl_gpu.pci', '29': 'add pl_vulkan_swapchain_params.allow_suboptimal', '28': 'eliminate void *priv fields from all object types', '27': 'add pl_vulkan_choose_device', '26': 'add PL_GPU_CAP_MAPPED_BUFFERS', '25': 'add pl_fmt.internal_size', '24': 'add pl_vulkan_params.disable_events', '23': 'add error checking to functions in ', '22': 'add pl_vulkan_params.blacklist_caps', '21': 'add pl_shader_params.glsl', '20': 'refactor pl_shader_alloc', '19': 'default to GLSL 130 instead of 110 if unspecified', '18': 'add pl_swapchain_resize', '17': 'add pl_context_update', '16': 'add pl_tex/buf_params.user_data', '15': 'add ', '14': 'remove ident from pl_shader_reset', '13': 'add pl_render_params.peak_detect_params', '12': 'add pl_shader_detect_peak', '11': 'add pl_var_int', '10': 'refactor pl_color_map_params desaturation fields', '9': 'add pl_tex_params.import/export_handle', '8': 'add pl_color_space.sig_scale', '7': 'initial major release', '6': '', '5': '', '4': '', '3': '', '2': '', '1': '', }.keys().length(), # Fix version 0) ) ### Version number and configuration version = meson.project_version() version_pretty = 'v' + version version_split = version.split('.') majorver = version_split[0] apiver = version_split[1] fixver = version_split[2] # Configuration data conf_public = configuration_data() conf_internal = configuration_data() conf_public.set('majorver', majorver) conf_public.set('apiver', apiver) conf_internal.set('BUILD_API_VER', apiver) conf_internal.set('BUILD_FIX_VER', fixver) conf_internal.set('PL_DEBUG_ABORT', get_option('debug-abort')) ### Global build options build_opts = [ # Warnings '-Wundef', '-Wshadow', '-Wparentheses', '-Wpointer-arith', '-fno-math-errno', ] link_args = [] cc = meson.get_compiler('c') cxx = meson.get_compiler('cpp') c_opts = [ '-D_ISOC99_SOURCE', '-D_ISOC11_SOURCE', '-D_GNU_SOURCE', '-U__STRICT_ANSI__', '-Wmissing-prototypes', # Warnings to ignore '-Wno-sign-compare', '-Wno-unused-parameter', '-Wno-missing-field-initializers', '-Wno-type-limits', # Warnings to treat as errors '-Werror=implicit-function-declaration', ] if cc.has_argument('-Wincompatible-pointer-types') c_opts += ['-Werror=incompatible-pointer-types'] endif # clang's version of -Wmissing-braces rejects the common {0} initializers if cc.get_id() == 'clang' c_opts += ['-Wno-missing-braces'] endif # For sanitizers to work/link properly some public symbols have to be available. if get_option('b_sanitize') == 'none' # don't leak library symbols if possible vflag = '-Wl,--exclude-libs=ALL' # link and lld-link don't support this arg, but it only shows warning about # unsupported argument. Meson doesn't detect it, so manually exclude them. if cc.has_link_argument(vflag) and not ['lld-link', 'link'].contains(cc.get_linker_id()) link_args += [vflag] endif endif # OS specific build options if host_machine.system() == 'windows' build_opts += ['-D_WIN32_WINNT=0x0601', '-D_USE_MATH_DEFINES', '-DWIN32_LEAN_AND_MEAN', '-DNOMINMAX', '-D_CRT_SECURE_NO_WARNINGS'] subdir('win32') endif add_project_arguments(build_opts + c_opts, language: ['c']) add_project_arguments(build_opts, language: ['c', 'cpp']) add_project_link_arguments(link_args, language: ['c', 'cpp']) # Global dependencies fs = import('fs') libm = cc.find_library('m', required: false) thirdparty = meson.project_source_root()/'3rdparty' python = import('python').find_installation() python_env = environment() python_env.append('PYTHONPATH', thirdparty/'jinja/src') python_env.append('PYTHONPATH', thirdparty/'markupsafe/src') python_env.append('PYTHONPATH', thirdparty/'glad') if host_machine.system() == 'windows' threads = declare_dependency() else pthreads = dependency('threads') has_setclock = cc.has_header_symbol( 'pthread.h', 'pthread_condattr_setclock', dependencies: pthreads, args: c_opts, ) threads = declare_dependency( dependencies: pthreads, compile_args: [pthreads.found() ? '-DPL_HAVE_PTHREAD' : '', has_setclock ? '-DPTHREAD_HAS_SETCLOCK' : '',] ) endif build_deps = [ libm, threads ] subdir('tools') subdir('src') if get_option('demos') subdir('demos') endif # Allows projects to build libplacebo by cloning into ./subprojects/libplacebo meson.override_dependency('libplacebo', libplacebo) libplacebo-v7.349.0/meson_options.txt000066400000000000000000000040621463457750100176330ustar00rootroot00000000000000# Optional components option('vulkan', type: 'feature', value: 'auto', description: 'Vulkan-based renderer') option('vk-proc-addr', type: 'feature', value: 'auto', description: 'Link directly against vkGetInstanceProcAddr from libvulkan.so') option('vulkan-registry', type: 'string', value: '', description: 'Path to vulkan XML registry (for code generation)') option('opengl', type: 'feature', value: 'auto', description: 'OpenGL-based renderer') option('gl-proc-addr', type: 'feature', value: 'auto', description: 'Enable built-in OpenGL loader (uses dlopen, dlsym...)') option('d3d11', type: 'feature', value: 'auto', description: 'Direct3D 11 based renderer') option('glslang', type: 'feature', value: 'auto', description: 'glslang SPIR-V compiler') option('shaderc', type: 'feature', value: 'auto', description: 'libshaderc SPIR-V compiler') option('lcms', type: 'feature', value: 'auto', description: 'LittleCMS 2 support') option('dovi', type: 'feature', value: 'auto', description: 'Dolby Vision reshaping support') option('libdovi', type: 'feature', value: 'auto', description: 'libdovi support') # Miscellaneous option('demos', type: 'boolean', value: true, description: 'Enable building (and installing) the demo programs') option('tests', type: 'boolean', value: false, description: 'Enable building the test cases') option('bench', type: 'boolean', value: false, description: 'Enable building benchmarks (`meson test benchmark`)') option('fuzz', type: 'boolean', value: false, description: 'Enable building fuzzer binaries (`CC=afl-cc`)') option('unwind', type: 'feature', value: 'auto', description: 'Enable linking against libunwind for printing stack traces caused by runtime errors') option('xxhash', type: 'feature', value: 'auto', description: 'Use libxxhash as a faster replacement for internal siphash') option('debug-abort', type: 'boolean', value: false, description: 'abort() on most runtime errors (only for debugging purposes)') libplacebo-v7.349.0/mkdocs.yml000066400000000000000000000017431463457750100162040ustar00rootroot00000000000000site_name: libplacebo site_url: https://libplacebo.org/ repo_url: https://code.videolan.org/videolan/libplacebo repo_name: videolan/libplacebo copyright: Copyright © 2017-2022 Niklas Haas theme: name: material palette: - scheme: slate primary: deep purple accent: deep purple toggle: icon: material/brightness-4 name: Switch to light mode - scheme: default primary: purple accent: purple toggle: icon: material/brightness-7 name: Switch to dark mode icon: repo: fontawesome/brands/gitlab features: - content.code.annotate extra_css: - style.css markdown_extensions: - admonition - footnotes - pymdownx.highlight: anchor_linenums: true - pymdownx.details - pymdownx.snippets - pymdownx.superfences - toc: toc_depth: 3 nav: - 'Using': - index.md - basic-rendering.md - renderer.md - custom-shaders.md - options.md - 'Developing': - glsl.md libplacebo-v7.349.0/src/000077500000000000000000000000001463457750100147635ustar00rootroot00000000000000libplacebo-v7.349.0/src/cache.c000066400000000000000000000307021463457750100161740ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include #include #include "common.h" #include "cache.h" #include "log.h" #include "pl_thread.h" const struct pl_cache_params pl_cache_default_params = {0}; struct priv { pl_log log; pl_mutex lock; PL_ARRAY(pl_cache_obj) objects; size_t total_size; }; int pl_cache_objects(pl_cache cache) { if (!cache) return 0; struct priv *p = PL_PRIV(cache); pl_mutex_lock(&p->lock); int num = p->objects.num; pl_mutex_unlock(&p->lock); return num; } size_t pl_cache_size(pl_cache cache) { if (!cache) return 0; struct priv *p = PL_PRIV(cache); pl_mutex_lock(&p->lock); size_t size = p->total_size; pl_mutex_unlock(&p->lock); return size; } pl_cache pl_cache_create(const struct pl_cache_params *params) { struct pl_cache_t *cache = pl_zalloc_obj(NULL, cache, struct priv); struct priv *p = PL_PRIV(cache); pl_mutex_init(&p->lock); if (params) { cache->params = *params; p->log = params->log; } // Sanitize size limits size_t total_size = PL_DEF(cache->params.max_total_size, SIZE_MAX); size_t object_size = PL_DEF(cache->params.max_object_size, SIZE_MAX); object_size = PL_MIN(total_size, object_size); cache->params.max_total_size = total_size; cache->params.max_object_size = object_size; return cache; } static void remove_obj(pl_cache cache, pl_cache_obj obj) { struct priv *p = PL_PRIV(cache); p->total_size -= obj.size; if (obj.free) obj.free(obj.data); } void pl_cache_destroy(pl_cache *pcache) { pl_cache cache = *pcache; if (!cache) return; struct priv *p = PL_PRIV(cache); for (int i = 0; i < p->objects.num; i++) remove_obj(cache, p->objects.elem[i]); pl_assert(p->total_size == 0); pl_mutex_destroy(&p->lock); pl_free((void *) cache); *pcache = NULL; } void pl_cache_reset(pl_cache cache) { if (!cache) return; struct priv *p = PL_PRIV(cache); pl_mutex_lock(&p->lock); for (int i = 0; i < p->objects.num; i++) remove_obj(cache, p->objects.elem[i]); p->objects.num = 0; pl_assert(p->total_size == 0); pl_mutex_unlock(&p->lock); } static bool try_set(pl_cache cache, pl_cache_obj obj) { struct priv *p = PL_PRIV(cache); // Remove any existing entry with this key for (int i = p->objects.num - 1; i >= 0; i--) { pl_cache_obj prev = p->objects.elem[i]; if (prev.key == obj.key) { PL_TRACE(p, "Removing out-of-date object 0x%"PRIx64, prev.key); remove_obj(cache, prev); PL_ARRAY_REMOVE_AT(p->objects, i); break; } } if (!obj.size) { PL_TRACE(p, "Deleted object 0x%"PRIx64, obj.key); return true; } if (obj.size > cache->params.max_object_size) { PL_DEBUG(p, "Object 0x%"PRIx64" (size %zu) exceeds max size %zu, discarding", obj.key, obj.size, cache->params.max_object_size); return false; } // Make space by deleting old objects while (p->total_size + obj.size > cache->params.max_total_size || p->objects.num == INT_MAX) { pl_assert(p->objects.num); pl_cache_obj old = p->objects.elem[0]; PL_TRACE(p, "Removing object 0x%"PRIx64" (size %zu) to make room", old.key, old.size); remove_obj(cache, old); PL_ARRAY_REMOVE_AT(p->objects, 0); } if (!obj.free) { obj.data = pl_memdup(NULL, obj.data, obj.size); obj.free = pl_free; } PL_TRACE(p, "Inserting new object 0x%"PRIx64" (size %zu)", obj.key, obj.size); PL_ARRAY_APPEND((void *) cache, p->objects, obj); p->total_size += obj.size; return true; } static pl_cache_obj strip_obj(pl_cache_obj obj) { return (pl_cache_obj) { .key = obj.key }; } bool pl_cache_try_set(pl_cache cache, pl_cache_obj *pobj) { if (!cache) return false; pl_cache_obj obj = *pobj; struct priv *p = PL_PRIV(cache); pl_mutex_lock(&p->lock); bool ok = try_set(cache, obj); pl_mutex_unlock(&p->lock); if (ok) { *pobj = strip_obj(obj); // ownership transfers, clear ptr } else { obj = strip_obj(obj); // ownership remains with caller, clear copy } if (cache->params.set) cache->params.set(cache->params.priv, obj); return ok; } void pl_cache_set(pl_cache cache, pl_cache_obj *obj) { if (!pl_cache_try_set(cache, obj)) { if (obj->free) obj->free(obj->data); *obj = (pl_cache_obj) { .key = obj->key }; } } static void noop(void *ignored) { (void) ignored; } bool pl_cache_get(pl_cache cache, pl_cache_obj *out_obj) { const uint64_t key = out_obj->key; if (!cache) goto fail; struct priv *p = PL_PRIV(cache); pl_mutex_lock(&p->lock); // Search backwards to prioritize recently added entries for (int i = p->objects.num - 1; i >= 0; i--) { pl_cache_obj obj = p->objects.elem[i]; if (obj.key == key) { PL_ARRAY_REMOVE_AT(p->objects, i); p->total_size -= obj.size; pl_mutex_unlock(&p->lock); pl_assert(obj.free); *out_obj = obj; return true; } } pl_mutex_unlock(&p->lock); if (!cache->params.get) goto fail; pl_cache_obj obj = cache->params.get(cache->params.priv, key); if (!obj.size) goto fail; // Sanitize object obj.key = key; obj.free = PL_DEF(obj.free, noop); *out_obj = obj; return true; fail: *out_obj = (pl_cache_obj) { .key = key }; return false; } void pl_cache_iterate(pl_cache cache, void (*cb)(void *priv, pl_cache_obj obj), void *priv) { if (!cache) return; struct priv *p = PL_PRIV(cache); pl_mutex_lock(&p->lock); for (int i = 0; i < p->objects.num; i++) cb(priv, p->objects.elem[i]); pl_mutex_unlock(&p->lock); } uint64_t pl_cache_signature(pl_cache cache) { uint64_t hash = 0; if (!cache) return hash; // Simple XOR of all keys. This satisfies our order-invariant requirement, // and does not pose issues because duplicate keys are not allowed, nor // are keys with hash 0. struct priv *p = PL_PRIV(cache); pl_mutex_lock(&p->lock); for (int i = 0; i < p->objects.num; i++) { assert(p->objects.elem[i].key); hash ^= p->objects.elem[i].key; } pl_mutex_unlock(&p->lock); return hash; } // --- Saving/loading #define CACHE_MAGIC "pl_cache" #define CACHE_VERSION 1 #define PAD_ALIGN(x) PL_ALIGN2(x, sizeof(uint32_t)) struct __attribute__((__packed__)) cache_header { char magic[8]; uint32_t version; uint32_t num_entries; }; struct __attribute__((__packed__)) cache_entry { uint64_t key; uint64_t size; uint64_t hash; }; pl_static_assert(sizeof(struct cache_header) % alignof(struct cache_entry) == 0); int pl_cache_save_ex(pl_cache cache, void (*write)(void *priv, size_t size, const void *ptr), void *priv) { if (!cache) return 0; struct priv *p = PL_PRIV(cache); pl_mutex_lock(&p->lock); pl_clock_t start = pl_clock_now(); const int num_objects = p->objects.num; const size_t saved_bytes = p->total_size; write(priv, sizeof(struct cache_header), &(struct cache_header) { .magic = CACHE_MAGIC, .version = CACHE_VERSION, .num_entries = num_objects, }); for (int i = 0; i < num_objects; i++) { pl_cache_obj obj = p->objects.elem[i]; PL_TRACE(p, "Saving object 0x%"PRIx64" (size %zu)", obj.key, obj.size); write(priv, sizeof(struct cache_entry), &(struct cache_entry) { .key = obj.key, .size = obj.size, .hash = pl_mem_hash(obj.data, obj.size), }); static const uint8_t padding[PAD_ALIGN(1)] = {0}; write(priv, obj.size, obj.data); write(priv, PAD_ALIGN(obj.size) - obj.size, padding); } pl_mutex_unlock(&p->lock); pl_log_cpu_time(p->log, start, pl_clock_now(), "saving cache"); if (num_objects) PL_DEBUG(p, "Saved %d objects, totalling %zu bytes", num_objects, saved_bytes); return num_objects; } int pl_cache_load_ex(pl_cache cache, bool (*read)(void *priv, size_t size, void *ptr), void *priv) { if (!cache) return 0; struct priv *p = PL_PRIV(cache); struct cache_header header; if (!read(priv, sizeof(header), &header)) { PL_ERR(p, "Failed loading cache: file seems empty or truncated"); return -1; } if (memcmp(header.magic, CACHE_MAGIC, sizeof(header.magic)) != 0) { PL_ERR(p, "Failed loading cache: invalid magic bytes"); return -1; } if (header.version != CACHE_VERSION) { PL_INFO(p, "Failed loading cache: wrong version... skipping"); return 0; } if (header.num_entries > INT_MAX) { PL_ERR(p, "Failed loading cache: %"PRIu32" entries overflows int", header.num_entries); return 0; } int num_loaded = 0; size_t loaded_bytes = 0; pl_mutex_lock(&p->lock); pl_clock_t start = pl_clock_now(); for (int i = 0; i < header.num_entries; i++) { struct cache_entry entry; if (!read(priv, sizeof(entry), &entry)) { PL_WARN(p, "Cache seems truncated, missing objects.. ignoring rest"); goto error; } if (entry.size > SIZE_MAX) { PL_WARN(p, "Cache object size %"PRIu64" overflows SIZE_MAX.. " "suspect broken file, ignoring rest", entry.size); goto error; } void *buf = pl_alloc(NULL, PAD_ALIGN(entry.size)); if (!read(priv, PAD_ALIGN(entry.size), buf)) { PL_WARN(p, "Cache seems truncated, missing objects.. ignoring rest"); pl_free(buf); goto error; } uint64_t checksum = pl_mem_hash(buf, entry.size); if (checksum != entry.hash) { PL_WARN(p, "Cache entry seems corrupt, checksum mismatch.. ignoring rest"); pl_free(buf); goto error; } pl_cache_obj obj = { .key = entry.key, .size = entry.size, .data = buf, .free = pl_free, }; PL_TRACE(p, "Loading object 0x%"PRIx64" (size %zu)", obj.key, obj.size); if (try_set(cache, obj)) { num_loaded++; loaded_bytes += entry.size; } else { pl_free(buf); } } pl_log_cpu_time(p->log, start, pl_clock_now(), "loading cache"); if (num_loaded) PL_DEBUG(p, "Loaded %d objects, totalling %zu bytes", num_loaded, loaded_bytes); // fall through error: pl_mutex_unlock(&p->lock); return num_loaded; } // Save/load wrappers struct ptr_ctx { uint8_t *data; // base pointer size_t size; // total size size_t pos; // read/write index }; static void write_ptr(void *priv, size_t size, const void *ptr) { struct ptr_ctx *ctx = priv; size_t end = PL_MIN(ctx->pos + size, ctx->size); if (end > ctx->pos) memcpy(ctx->data + ctx->pos, ptr, end - ctx->pos); ctx->pos += size; } static bool read_ptr(void *priv, size_t size, void *ptr) { struct ptr_ctx *ctx = priv; if (ctx->pos + size > ctx->size) return false; memcpy(ptr, ctx->data + ctx->pos, size); ctx->pos += size; return true; } size_t pl_cache_save(pl_cache cache, uint8_t *data, size_t size) { struct ptr_ctx ctx = { data, size }; pl_cache_save_ex(cache, write_ptr, &ctx); return ctx.pos; } int pl_cache_load(pl_cache cache, const uint8_t *data, size_t size) { return pl_cache_load_ex(cache, read_ptr, &(struct ptr_ctx) { .data = (uint8_t *) data, .size = size, }); } libplacebo-v7.349.0/src/cache.h000066400000000000000000000050541463457750100162030ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "common.h" #include "hash.h" #include // Convenience wrapper around pl_cache_set static inline void pl_cache_str(pl_cache cache, uint64_t key, pl_str *str) { pl_cache_set(cache, &(pl_cache_obj) { .key = key, .data = pl_steal(NULL, str->buf), .size = str->len, .free = pl_free, }); *str = (pl_str) {0}; } // Steal and insert a cache object static inline void pl_cache_steal(pl_cache cache, pl_cache_obj *obj) { if (obj->free == pl_free) obj->data = pl_steal(NULL, obj->data); pl_cache_set(cache, obj); } // Resize `obj->data` to a given size, re-using allocated buffers where possible static inline void pl_cache_obj_resize(void *alloc, pl_cache_obj *obj, size_t size) { if (obj->free != pl_free) { if (obj->free) obj->free(obj->data); obj->data = pl_alloc(alloc, size); obj->free = pl_free; } else if (pl_get_size(obj->data) < size) { obj->data = pl_steal(alloc, obj->data); obj->data = pl_realloc(alloc, obj->data, size); } obj->size = size; } // Internal list of base seeds for different object types, randomly generated enum { CACHE_KEY_SH_LUT = UINT64_C(0x2206183d320352c6), // sh_lut cache CACHE_KEY_ICC_3DLUT = UINT64_C(0xff703a6dd8a996f6), // ICC 3dlut CACHE_KEY_DITHER = UINT64_C(0x6fed75eb6dce86cb), // dither matrix CACHE_KEY_H274 = UINT64_C(0x2fb9adca04b42c4d), // H.274 film grain DB CACHE_KEY_GAMUT_LUT = UINT64_C(0x6109e47f15d478b1), // gamut mapping 3DLUT CACHE_KEY_SPIRV = UINT64_C(0x32352f6605ff60a7), // bare SPIR-V module CACHE_KEY_VK_PIPE = UINT64_C(0x4bdab2817ad02ad4), // VkPipelineCache CACHE_KEY_GL_PROG = UINT64_C(0x4274c309f4f0477b), // GL_ARB_get_program_binary CACHE_KEY_D3D_DXBC = UINT64_C(0x5c9e6f43ec73f787), // DXBC bytecode }; libplacebo-v7.349.0/src/colorspace.c000066400000000000000000001741171463457750100172740ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include "common.h" #include "colorspace.h" #include "hash.h" #include #include bool pl_color_system_is_ycbcr_like(enum pl_color_system sys) { switch (sys) { case PL_COLOR_SYSTEM_UNKNOWN: case PL_COLOR_SYSTEM_RGB: case PL_COLOR_SYSTEM_XYZ: return false; case PL_COLOR_SYSTEM_BT_601: case PL_COLOR_SYSTEM_BT_709: case PL_COLOR_SYSTEM_SMPTE_240M: case PL_COLOR_SYSTEM_BT_2020_NC: case PL_COLOR_SYSTEM_BT_2020_C: case PL_COLOR_SYSTEM_BT_2100_PQ: case PL_COLOR_SYSTEM_BT_2100_HLG: case PL_COLOR_SYSTEM_DOLBYVISION: case PL_COLOR_SYSTEM_YCGCO: return true; case PL_COLOR_SYSTEM_COUNT: break; }; pl_unreachable(); } bool pl_color_system_is_linear(enum pl_color_system sys) { switch (sys) { case PL_COLOR_SYSTEM_UNKNOWN: case PL_COLOR_SYSTEM_RGB: case PL_COLOR_SYSTEM_BT_601: case PL_COLOR_SYSTEM_BT_709: case PL_COLOR_SYSTEM_SMPTE_240M: case PL_COLOR_SYSTEM_BT_2020_NC: case PL_COLOR_SYSTEM_YCGCO: return true; case PL_COLOR_SYSTEM_BT_2020_C: case PL_COLOR_SYSTEM_BT_2100_PQ: case PL_COLOR_SYSTEM_BT_2100_HLG: case PL_COLOR_SYSTEM_DOLBYVISION: case PL_COLOR_SYSTEM_XYZ: return false; case PL_COLOR_SYSTEM_COUNT: break; }; pl_unreachable(); } const char *const pl_color_system_names[PL_COLOR_SYSTEM_COUNT] = { [PL_COLOR_SYSTEM_UNKNOWN] = "Auto (unknown)", [PL_COLOR_SYSTEM_BT_601] = "ITU-R Rec. BT.601 (SD)", [PL_COLOR_SYSTEM_BT_709] = "ITU-R Rec. BT.709 (HD)", [PL_COLOR_SYSTEM_SMPTE_240M] = "SMPTE-240M", [PL_COLOR_SYSTEM_BT_2020_NC] = "ITU-R Rec. BT.2020 (non-constant luminance)", [PL_COLOR_SYSTEM_BT_2020_C] = "ITU-R Rec. BT.2020 (constant luminance)", [PL_COLOR_SYSTEM_BT_2100_PQ] = "ITU-R Rec. BT.2100 ICtCp PQ variant", [PL_COLOR_SYSTEM_BT_2100_HLG] = "ITU-R Rec. BT.2100 ICtCp HLG variant", [PL_COLOR_SYSTEM_DOLBYVISION] = "Dolby Vision (invalid for output)", [PL_COLOR_SYSTEM_YCGCO] = "YCgCo (derived from RGB)", [PL_COLOR_SYSTEM_RGB] = "Red, Green and Blue", [PL_COLOR_SYSTEM_XYZ] = "Digital Cinema Distribution Master (XYZ)", }; const char *pl_color_system_name(enum pl_color_system sys) { pl_assert(sys >= 0 && sys < PL_COLOR_SYSTEM_COUNT); return pl_color_system_names[sys]; } enum pl_color_system pl_color_system_guess_ycbcr(int width, int height) { if (width >= 1280 || height > 576) { // Typical HD content return PL_COLOR_SYSTEM_BT_709; } else { // Typical SD content return PL_COLOR_SYSTEM_BT_601; } } bool pl_bit_encoding_equal(const struct pl_bit_encoding *b1, const struct pl_bit_encoding *b2) { return b1->sample_depth == b2->sample_depth && b1->color_depth == b2->color_depth && b1->bit_shift == b2->bit_shift; } const struct pl_color_repr pl_color_repr_unknown = {0}; const struct pl_color_repr pl_color_repr_rgb = { .sys = PL_COLOR_SYSTEM_RGB, .levels = PL_COLOR_LEVELS_FULL, }; const struct pl_color_repr pl_color_repr_sdtv = { .sys = PL_COLOR_SYSTEM_BT_601, .levels = PL_COLOR_LEVELS_LIMITED, }; const struct pl_color_repr pl_color_repr_hdtv = { .sys = PL_COLOR_SYSTEM_BT_709, .levels = PL_COLOR_LEVELS_LIMITED, }; const struct pl_color_repr pl_color_repr_uhdtv = { .sys = PL_COLOR_SYSTEM_BT_2020_NC, .levels = PL_COLOR_LEVELS_LIMITED, }; const struct pl_color_repr pl_color_repr_jpeg = { .sys = PL_COLOR_SYSTEM_BT_601, .levels = PL_COLOR_LEVELS_FULL, }; bool pl_color_repr_equal(const struct pl_color_repr *c1, const struct pl_color_repr *c2) { return c1->sys == c2->sys && c1->levels == c2->levels && c1->alpha == c2->alpha && c1->dovi == c2->dovi && pl_bit_encoding_equal(&c1->bits, &c2->bits); } static struct pl_bit_encoding pl_bit_encoding_merge(const struct pl_bit_encoding *orig, const struct pl_bit_encoding *new) { return (struct pl_bit_encoding) { .sample_depth = PL_DEF(orig->sample_depth, new->sample_depth), .color_depth = PL_DEF(orig->color_depth, new->color_depth), .bit_shift = PL_DEF(orig->bit_shift, new->bit_shift), }; } void pl_color_repr_merge(struct pl_color_repr *orig, const struct pl_color_repr *new) { *orig = (struct pl_color_repr) { .sys = PL_DEF(orig->sys, new->sys), .levels = PL_DEF(orig->levels, new->levels), .alpha = PL_DEF(orig->alpha, new->alpha), .dovi = PL_DEF(orig->dovi, new->dovi), .bits = pl_bit_encoding_merge(&orig->bits, &new->bits), }; } enum pl_color_levels pl_color_levels_guess(const struct pl_color_repr *repr) { if (repr->sys == PL_COLOR_SYSTEM_DOLBYVISION) return PL_COLOR_LEVELS_FULL; if (repr->levels) return repr->levels; return pl_color_system_is_ycbcr_like(repr->sys) ? PL_COLOR_LEVELS_LIMITED : PL_COLOR_LEVELS_FULL; } float pl_color_repr_normalize(struct pl_color_repr *repr) { float scale = 1.0; struct pl_bit_encoding *bits = &repr->bits; if (bits->bit_shift) { scale /= (1LL << bits->bit_shift); bits->bit_shift = 0; } // If one of these is set but not the other, use the set one int tex_bits = PL_DEF(bits->sample_depth, 8); int col_bits = PL_DEF(bits->color_depth, tex_bits); tex_bits = PL_DEF(tex_bits, col_bits); if (pl_color_levels_guess(repr) == PL_COLOR_LEVELS_LIMITED) { // Limit range is always shifted directly scale *= (float) (1LL << tex_bits) / (1LL << col_bits); } else { // Full range always uses the full range available scale *= ((1LL << tex_bits) - 1.) / ((1LL << col_bits) - 1.); } bits->color_depth = bits->sample_depth; return scale; } bool pl_color_primaries_is_wide_gamut(enum pl_color_primaries prim) { switch (prim) { case PL_COLOR_PRIM_UNKNOWN: case PL_COLOR_PRIM_BT_601_525: case PL_COLOR_PRIM_BT_601_625: case PL_COLOR_PRIM_BT_709: case PL_COLOR_PRIM_BT_470M: case PL_COLOR_PRIM_EBU_3213: return false; case PL_COLOR_PRIM_BT_2020: case PL_COLOR_PRIM_APPLE: case PL_COLOR_PRIM_ADOBE: case PL_COLOR_PRIM_PRO_PHOTO: case PL_COLOR_PRIM_CIE_1931: case PL_COLOR_PRIM_DCI_P3: case PL_COLOR_PRIM_DISPLAY_P3: case PL_COLOR_PRIM_V_GAMUT: case PL_COLOR_PRIM_S_GAMUT: case PL_COLOR_PRIM_FILM_C: case PL_COLOR_PRIM_ACES_AP0: case PL_COLOR_PRIM_ACES_AP1: return true; case PL_COLOR_PRIM_COUNT: break; } pl_unreachable(); } const char *const pl_color_primaries_names[PL_COLOR_PRIM_COUNT] = { [PL_COLOR_PRIM_UNKNOWN] = "Auto (unknown)", [PL_COLOR_PRIM_BT_601_525] = "ITU-R Rec. BT.601 (525-line = NTSC, SMPTE-C)", [PL_COLOR_PRIM_BT_601_625] = "ITU-R Rec. BT.601 (625-line = PAL, SECAM)", [PL_COLOR_PRIM_BT_709] = "ITU-R Rec. BT.709 (HD), also sRGB", [PL_COLOR_PRIM_BT_470M] = "ITU-R Rec. BT.470 M", [PL_COLOR_PRIM_EBU_3213] = "EBU Tech. 3213-E / JEDEC P22 phosphors", [PL_COLOR_PRIM_BT_2020] = "ITU-R Rec. BT.2020 (Ultra HD)", [PL_COLOR_PRIM_APPLE] = "Apple RGB", [PL_COLOR_PRIM_ADOBE] = "Adobe RGB (1998)", [PL_COLOR_PRIM_PRO_PHOTO] = "ProPhoto RGB (ROMM)", [PL_COLOR_PRIM_CIE_1931] = "CIE 1931 RGB primaries", [PL_COLOR_PRIM_DCI_P3] = "DCI-P3 (Digital Cinema)", [PL_COLOR_PRIM_DISPLAY_P3] = "DCI-P3 (Digital Cinema) with D65 white point", [PL_COLOR_PRIM_V_GAMUT] = "Panasonic V-Gamut (VARICAM)", [PL_COLOR_PRIM_S_GAMUT] = "Sony S-Gamut", [PL_COLOR_PRIM_FILM_C] = "Traditional film primaries with Illuminant C", [PL_COLOR_PRIM_ACES_AP0] = "ACES Primaries #0", [PL_COLOR_PRIM_ACES_AP1] = "ACES Primaries #1", }; const char *pl_color_primaries_name(enum pl_color_primaries prim) { pl_assert(prim >= 0 && prim < PL_COLOR_PRIM_COUNT); return pl_color_primaries_names[prim]; } enum pl_color_primaries pl_color_primaries_guess(int width, int height) { // HD content if (width >= 1280 || height > 576) return PL_COLOR_PRIM_BT_709; switch (height) { case 576: // Typical PAL content, including anamorphic/squared return PL_COLOR_PRIM_BT_601_625; case 480: // Typical NTSC content, including squared case 486: // NTSC Pro or anamorphic NTSC return PL_COLOR_PRIM_BT_601_525; default: // No good metric, just pick BT.709 to minimize damage return PL_COLOR_PRIM_BT_709; } } const char *const pl_color_transfer_names[PL_COLOR_TRC_COUNT] = { [PL_COLOR_TRC_UNKNOWN] = "Auto (unknown SDR)", [PL_COLOR_TRC_BT_1886] = "ITU-R Rec. BT.1886 (CRT emulation + OOTF)", [PL_COLOR_TRC_SRGB] = "IEC 61966-2-4 sRGB (CRT emulation)", [PL_COLOR_TRC_LINEAR] = "Linear light content", [PL_COLOR_TRC_GAMMA18] = "Pure power gamma 1.8", [PL_COLOR_TRC_GAMMA20] = "Pure power gamma 2.0", [PL_COLOR_TRC_GAMMA22] = "Pure power gamma 2.2", [PL_COLOR_TRC_GAMMA24] = "Pure power gamma 2.4", [PL_COLOR_TRC_GAMMA26] = "Pure power gamma 2.6", [PL_COLOR_TRC_GAMMA28] = "Pure power gamma 2.8", [PL_COLOR_TRC_PRO_PHOTO] = "ProPhoto RGB (ROMM)", [PL_COLOR_TRC_ST428] = "Digital Cinema Distribution Master (XYZ)", [PL_COLOR_TRC_PQ] = "ITU-R BT.2100 PQ (perceptual quantizer), aka SMPTE ST2048", [PL_COLOR_TRC_HLG] = "ITU-R BT.2100 HLG (hybrid log-gamma), aka ARIB STD-B67", [PL_COLOR_TRC_V_LOG] = "Panasonic V-Log (VARICAM)", [PL_COLOR_TRC_S_LOG1] = "Sony S-Log1", [PL_COLOR_TRC_S_LOG2] = "Sony S-Log2", }; const char *pl_color_transfer_name(enum pl_color_transfer trc) { pl_assert(trc >= 0 && trc < PL_COLOR_TRC_COUNT); return pl_color_transfer_names[trc]; } // HLG 75% value (scene-referred) #define HLG_75 3.17955 float pl_color_transfer_nominal_peak(enum pl_color_transfer trc) { switch (trc) { case PL_COLOR_TRC_UNKNOWN: case PL_COLOR_TRC_BT_1886: case PL_COLOR_TRC_SRGB: case PL_COLOR_TRC_LINEAR: case PL_COLOR_TRC_GAMMA18: case PL_COLOR_TRC_GAMMA20: case PL_COLOR_TRC_GAMMA22: case PL_COLOR_TRC_GAMMA24: case PL_COLOR_TRC_GAMMA26: case PL_COLOR_TRC_GAMMA28: case PL_COLOR_TRC_PRO_PHOTO: case PL_COLOR_TRC_ST428: return 1.0; case PL_COLOR_TRC_PQ: return 10000.0 / PL_COLOR_SDR_WHITE; case PL_COLOR_TRC_HLG: return 12.0 / HLG_75; case PL_COLOR_TRC_V_LOG: return 46.0855; case PL_COLOR_TRC_S_LOG1: return 6.52; case PL_COLOR_TRC_S_LOG2: return 9.212; case PL_COLOR_TRC_COUNT: break; } pl_unreachable(); } const struct pl_hdr_metadata pl_hdr_metadata_empty = {0}; const struct pl_hdr_metadata pl_hdr_metadata_hdr10 ={ .prim = { .red = {0.708, 0.292}, .green = {0.170, 0.797}, .blue = {0.131, 0.046}, .white = {0.31271, 0.32902}, }, .min_luma = 0, .max_luma = 10000, .max_cll = 10000, .max_fall = 0, // unknown }; float pl_hdr_rescale(enum pl_hdr_scaling from, enum pl_hdr_scaling to, float x) { if (from == to) return x; if (!x) // micro-optimization for common value return x; x = fmaxf(x, 0.0f); // Convert input to PL_SCALE_RELATIVE switch (from) { case PL_HDR_PQ: x = powf(x, 1.0f / PQ_M2); x = fmaxf(x - PQ_C1, 0.0f) / (PQ_C2 - PQ_C3 * x); x = powf(x, 1.0f / PQ_M1); x *= 10000.0f; // fall through case PL_HDR_NITS: x /= PL_COLOR_SDR_WHITE; // fall through case PL_HDR_NORM: goto output; case PL_HDR_SQRT: x *= x; goto output; case PL_HDR_SCALING_COUNT: break; } pl_unreachable(); output: // Convert PL_SCALE_RELATIVE to output switch (to) { case PL_HDR_NORM: return x; case PL_HDR_SQRT: return sqrtf(x); case PL_HDR_NITS: return x * PL_COLOR_SDR_WHITE; case PL_HDR_PQ: x *= PL_COLOR_SDR_WHITE / 10000.0f; x = powf(x, PQ_M1); x = (PQ_C1 + PQ_C2 * x) / (1.0f + PQ_C3 * x); x = powf(x, PQ_M2); return x; case PL_HDR_SCALING_COUNT: break; } pl_unreachable(); } static inline bool pl_hdr_bezier_equal(const struct pl_hdr_bezier *a, const struct pl_hdr_bezier *b) { return a->target_luma == b->target_luma && a->knee_x == b->knee_x && a->knee_y == b->knee_y && a->num_anchors == b->num_anchors && !memcmp(a->anchors, b->anchors, sizeof(a->anchors[0]) * a->num_anchors); } bool pl_hdr_metadata_equal(const struct pl_hdr_metadata *a, const struct pl_hdr_metadata *b) { return pl_raw_primaries_equal(&a->prim, &b->prim) && a->min_luma == b->min_luma && a->max_luma == b->max_luma && a->max_cll == b->max_cll && a->max_fall == b->max_fall && a->scene_max[0] == b->scene_max[0] && a->scene_max[1] == b->scene_max[1] && a->scene_max[2] == b->scene_max[2] && a->scene_avg == b->scene_avg && pl_hdr_bezier_equal(&a->ootf, &b->ootf) && a->max_pq_y == b->max_pq_y && a->avg_pq_y == b->avg_pq_y; } void pl_hdr_metadata_merge(struct pl_hdr_metadata *orig, const struct pl_hdr_metadata *update) { pl_raw_primaries_merge(&orig->prim, &update->prim); if (!orig->min_luma) orig->min_luma = update->min_luma; if (!orig->max_luma) orig->max_luma = update->max_luma; if (!orig->max_cll) orig->max_cll = update->max_cll; if (!orig->max_fall) orig->max_fall = update->max_fall; if (!orig->scene_max[1]) memcpy(orig->scene_max, update->scene_max, sizeof(orig->scene_max)); if (!orig->scene_avg) orig->scene_avg = update->scene_avg; if (!orig->ootf.target_luma) orig->ootf = update->ootf; if (!orig->max_pq_y) orig->max_pq_y = update->max_pq_y; if (!orig->avg_pq_y) orig->avg_pq_y = update->avg_pq_y; } bool pl_hdr_metadata_contains(const struct pl_hdr_metadata *data, enum pl_hdr_metadata_type type) { bool has_hdr10 = data->max_luma; bool has_hdr10plus = data->scene_avg && (data->scene_max[0] || data->scene_max[1] || data->scene_max[2]); bool has_cie_y = data->max_pq_y && data->avg_pq_y; switch (type) { case PL_HDR_METADATA_NONE: return true; case PL_HDR_METADATA_ANY: return has_hdr10 || has_hdr10plus || has_cie_y; case PL_HDR_METADATA_HDR10: return has_hdr10; case PL_HDR_METADATA_HDR10PLUS: return has_hdr10plus; case PL_HDR_METADATA_CIE_Y: return has_cie_y; case PL_HDR_METADATA_TYPE_COUNT: break; } pl_unreachable(); } const struct pl_color_space pl_color_space_unknown = {0}; const struct pl_color_space pl_color_space_srgb = { .primaries = PL_COLOR_PRIM_BT_709, .transfer = PL_COLOR_TRC_SRGB, }; const struct pl_color_space pl_color_space_bt709 = { .primaries = PL_COLOR_PRIM_BT_709, .transfer = PL_COLOR_TRC_BT_1886, }; const struct pl_color_space pl_color_space_hdr10 = { .primaries = PL_COLOR_PRIM_BT_2020, .transfer = PL_COLOR_TRC_PQ, }; const struct pl_color_space pl_color_space_bt2020_hlg = { .primaries = PL_COLOR_PRIM_BT_2020, .transfer = PL_COLOR_TRC_HLG, }; const struct pl_color_space pl_color_space_monitor = { .primaries = PL_COLOR_PRIM_BT_709, // sRGB primaries .transfer = PL_COLOR_TRC_UNKNOWN, // unknown SDR response }; bool pl_color_space_is_hdr(const struct pl_color_space *csp) { return csp->hdr.max_luma > PL_COLOR_SDR_WHITE || pl_color_transfer_is_hdr(csp->transfer); } bool pl_color_space_is_black_scaled(const struct pl_color_space *csp) { switch (csp->transfer) { case PL_COLOR_TRC_UNKNOWN: case PL_COLOR_TRC_SRGB: case PL_COLOR_TRC_LINEAR: case PL_COLOR_TRC_GAMMA18: case PL_COLOR_TRC_GAMMA20: case PL_COLOR_TRC_GAMMA22: case PL_COLOR_TRC_GAMMA24: case PL_COLOR_TRC_GAMMA26: case PL_COLOR_TRC_GAMMA28: case PL_COLOR_TRC_PRO_PHOTO: case PL_COLOR_TRC_ST428: case PL_COLOR_TRC_HLG: return true; case PL_COLOR_TRC_BT_1886: case PL_COLOR_TRC_PQ: case PL_COLOR_TRC_V_LOG: case PL_COLOR_TRC_S_LOG1: case PL_COLOR_TRC_S_LOG2: return false; case PL_COLOR_TRC_COUNT: break; } pl_unreachable(); } #define MAP3(...) \ do { \ float X; \ for (int _i = 0; _i < 3; _i++) { \ X = color[_i]; \ color[_i] = __VA_ARGS__; \ } \ } while (0) void pl_color_linearize(const struct pl_color_space *csp, float color[3]) { if (csp->transfer == PL_COLOR_TRC_LINEAR) return; float csp_min, csp_max; pl_color_space_nominal_luma_ex(pl_nominal_luma_params( .color = csp, .metadata = PL_HDR_METADATA_HDR10, .scaling = PL_HDR_NORM, .out_min = &csp_min, .out_max = &csp_max, )); MAP3(fmaxf(X, 0)); switch (csp->transfer) { case PL_COLOR_TRC_SRGB: MAP3(X > 0.04045f ? powf((X + 0.055f) / 1.055f, 2.4f) : X / 12.92f); goto scale_out; case PL_COLOR_TRC_BT_1886: { const float lb = powf(csp_min, 1/2.4f); const float lw = powf(csp_max, 1/2.4f); const float a = powf(lw - lb, 2.4f); const float b = lb / (lw - lb); MAP3(a * powf(X + b, 2.4f)); return; } case PL_COLOR_TRC_GAMMA18: MAP3(powf(X, 1.8f)); goto scale_out; case PL_COLOR_TRC_GAMMA20: MAP3(powf(X, 2.0f)); goto scale_out; case PL_COLOR_TRC_UNKNOWN: case PL_COLOR_TRC_GAMMA22: MAP3(powf(X, 2.2f)); goto scale_out; case PL_COLOR_TRC_GAMMA24: MAP3(powf(X, 2.4f)); goto scale_out; case PL_COLOR_TRC_GAMMA26: MAP3(powf(X, 2.6f)); goto scale_out; case PL_COLOR_TRC_GAMMA28: MAP3(powf(X, 2.8f)); goto scale_out; case PL_COLOR_TRC_PRO_PHOTO: MAP3(X > 0.03125f ? powf(X, 1.8f) : X / 16); goto scale_out; case PL_COLOR_TRC_ST428: MAP3(52.37f/48 * powf(X, 2.6f)); goto scale_out; case PL_COLOR_TRC_PQ: MAP3(powf(X, 1 / PQ_M2)); MAP3(fmaxf(X - PQ_C1, 0) / (PQ_C2 - PQ_C3 * X)); MAP3(10000 / PL_COLOR_SDR_WHITE * powf(X, 1 / PQ_M1)); return; case PL_COLOR_TRC_HLG: { const float y = fmaxf(1.2f + 0.42f * log10f(csp_max / HLG_REF), 1); const float b = sqrtf(3 * powf(csp_min / csp_max, 1 / y)); const pl_matrix3x3 rgb2xyz = pl_get_rgb2xyz_matrix(pl_raw_primaries_get(csp->primaries)); const float *coef = rgb2xyz.m[1]; // OETF^-1 MAP3((1 - b) * X + b); MAP3(X > 0.5f ? expf((X - HLG_C) / HLG_A) + HLG_B : 4 * X * X); // OOTF float luma = coef[0] * color[0] + coef[1] * color[1] + coef[2] * color[2]; luma = powf(fmaxf(luma / 12, 0), y - 1); MAP3(luma * X / 12); return; } case PL_COLOR_TRC_V_LOG: MAP3(X >= 0.181f ? powf(10, (X - VLOG_D) / VLOG_C) - VLOG_B : (X - 0.125f) / 5.6f); return; case PL_COLOR_TRC_S_LOG1: MAP3(powf(10, (X - SLOG_C) / SLOG_A) - SLOG_B); return; case PL_COLOR_TRC_S_LOG2: MAP3(X >= SLOG_Q ? (powf(10, (X - SLOG_C) / SLOG_A) - SLOG_B) / SLOG_K2 : (X - SLOG_Q) / SLOG_P); return; case PL_COLOR_TRC_LINEAR: case PL_COLOR_TRC_COUNT: break; } pl_unreachable(); scale_out: MAP3((csp_max - csp_min) * X + csp_min); } void pl_color_delinearize(const struct pl_color_space *csp, float color[3]) { if (csp->transfer == PL_COLOR_TRC_LINEAR) return; float csp_min, csp_max; pl_color_space_nominal_luma_ex(pl_nominal_luma_params( .color = csp, .metadata = PL_HDR_METADATA_HDR10, .scaling = PL_HDR_NORM, .out_min = &csp_min, .out_max = &csp_max, )); if (pl_color_space_is_black_scaled(csp) && csp->transfer != PL_COLOR_TRC_HLG) MAP3((X - csp_min) / (csp_max - csp_min)); MAP3(fmaxf(X, 0)); switch (csp->transfer) { case PL_COLOR_TRC_SRGB: MAP3(X >= 0.0031308f ? 1.055f * powf(X, 1/2.4f) - 0.055f : 12.92f * X); return; case PL_COLOR_TRC_BT_1886: { const float lb = powf(csp_min, 1/2.4f); const float lw = powf(csp_max, 1/2.4f); const float a = powf(lw - lb, 2.4f); const float b = lb / (lw - lb); MAP3(powf(X / a, 1/2.4f) - b); return; } case PL_COLOR_TRC_GAMMA18: MAP3(powf(X, 1/1.8f)); return; case PL_COLOR_TRC_GAMMA20: MAP3(powf(X, 1/2.0f)); return; case PL_COLOR_TRC_UNKNOWN: case PL_COLOR_TRC_GAMMA22: MAP3(powf(X, 1/2.2f)); return; case PL_COLOR_TRC_GAMMA24: MAP3(powf(X, 1/2.4f)); return; case PL_COLOR_TRC_GAMMA26: MAP3(powf(X, 1/2.6f)); return; case PL_COLOR_TRC_GAMMA28: MAP3(powf(X, 1/2.8f)); return; case PL_COLOR_TRC_ST428: MAP3(powf(X * 48/52.37f, 1/2.6f)); return; case PL_COLOR_TRC_PRO_PHOTO: MAP3(X >= 0.001953f ? powf(X, 1/1.8f) : 16 * X); return; case PL_COLOR_TRC_PQ: MAP3(powf(X * PL_COLOR_SDR_WHITE / 10000, PQ_M1)); MAP3(powf((PQ_C1 + PQ_C2 * X) / (1 + PQ_C3 * X), PQ_M2)); return; case PL_COLOR_TRC_HLG: { const float y = fmaxf(1.2f + 0.42f * log10f(csp_max / HLG_REF), 1); const float b = sqrtf(3 * powf(csp_min / csp_max, 1 / y)); const pl_matrix3x3 rgb2xyz = pl_get_rgb2xyz_matrix(pl_raw_primaries_get(csp->primaries)); const float *coef = rgb2xyz.m[1]; // OOTF^-1 float luma = coef[0] * color[0] + coef[1] * color[1] + coef[2] * color[2]; luma = fmaxf(1e-6f, powf(luma / csp_max, (1 - y) / y)); MAP3(12 / csp_max * luma * X); // OETF MAP3(X > 1 ? HLG_A * logf(X - HLG_B) + HLG_C : 0.5f * sqrtf(X)); MAP3((X - b) / (1 - b)); return; } case PL_COLOR_TRC_V_LOG: MAP3(X >= 0.01f ? VLOG_C * log10f(X + VLOG_B) + VLOG_D : 5.6f * X + 0.125f); return; case PL_COLOR_TRC_S_LOG1: MAP3(SLOG_A * log10f(X + SLOG_B) + SLOG_C); return; case PL_COLOR_TRC_S_LOG2: MAP3(X >= 0 ? SLOG_A * log10f(SLOG_B * X + SLOG_C) : SLOG_P * X + SLOG_Q); return; case PL_COLOR_TRC_LINEAR: case PL_COLOR_TRC_COUNT: break; } pl_unreachable(); } void pl_color_space_merge(struct pl_color_space *orig, const struct pl_color_space *new) { if (!orig->primaries) orig->primaries = new->primaries; if (!orig->transfer) orig->transfer = new->transfer; pl_hdr_metadata_merge(&orig->hdr, &new->hdr); } bool pl_color_space_equal(const struct pl_color_space *c1, const struct pl_color_space *c2) { return c1->primaries == c2->primaries && c1->transfer == c2->transfer && pl_hdr_metadata_equal(&c1->hdr, &c2->hdr); } // Estimates luminance from maxRGB by looking at how monochromatic MaxSCL is static void luma_from_maxrgb(const struct pl_color_space *csp, enum pl_hdr_scaling scaling, float *out_max, float *out_avg) { const float maxscl = PL_MAX3(csp->hdr.scene_max[0], csp->hdr.scene_max[1], csp->hdr.scene_max[2]); if (!maxscl) return; struct pl_raw_primaries prim = csp->hdr.prim; pl_raw_primaries_merge(&prim, pl_raw_primaries_get(csp->primaries)); const pl_matrix3x3 rgb2xyz = pl_get_rgb2xyz_matrix(&prim); const float max_luma = rgb2xyz.m[1][0] * csp->hdr.scene_max[0] + rgb2xyz.m[1][1] * csp->hdr.scene_max[1] + rgb2xyz.m[1][2] * csp->hdr.scene_max[2]; const float coef = max_luma / maxscl; *out_max = pl_hdr_rescale(PL_HDR_NITS, scaling, max_luma); *out_avg = pl_hdr_rescale(PL_HDR_NITS, scaling, coef * csp->hdr.scene_avg); } static inline bool metadata_compat(enum pl_hdr_metadata_type metadata, enum pl_hdr_metadata_type compat) { return metadata == PL_HDR_METADATA_ANY || metadata == compat; } void pl_color_space_nominal_luma_ex(const struct pl_nominal_luma_params *params) { if (!params || (!params->out_min && !params->out_max && !params->out_avg)) return; const struct pl_color_space *csp = params->color; const enum pl_hdr_scaling scaling = params->scaling; float min_luma = 0, max_luma = 0, avg_luma = 0; if (params->metadata != PL_HDR_METADATA_NONE) { // Initialize from static HDR10 metadata, in all cases min_luma = pl_hdr_rescale(PL_HDR_NITS, scaling, csp->hdr.min_luma); max_luma = pl_hdr_rescale(PL_HDR_NITS, scaling, csp->hdr.max_luma); } if (metadata_compat(params->metadata, PL_HDR_METADATA_HDR10PLUS) && pl_hdr_metadata_contains(&csp->hdr, PL_HDR_METADATA_HDR10PLUS)) { luma_from_maxrgb(csp, scaling, &max_luma, &avg_luma); } if (metadata_compat(params->metadata, PL_HDR_METADATA_CIE_Y) && pl_hdr_metadata_contains(&csp->hdr, PL_HDR_METADATA_CIE_Y)) { max_luma = pl_hdr_rescale(PL_HDR_PQ, scaling, csp->hdr.max_pq_y); avg_luma = pl_hdr_rescale(PL_HDR_PQ, scaling, csp->hdr.avg_pq_y); } // Clamp to sane value range const float hdr_min = pl_hdr_rescale(PL_HDR_NITS, scaling, PL_COLOR_HDR_BLACK); const float hdr_max = pl_hdr_rescale(PL_HDR_PQ, scaling, 1.0f); max_luma = max_luma ? PL_CLAMP(max_luma, hdr_min, hdr_max) : 0; min_luma = min_luma ? PL_CLAMP(min_luma, hdr_min, hdr_max) : 0; if ((max_luma && min_luma >= max_luma) || min_luma >= hdr_max) min_luma = max_luma = 0; // sanity // PQ is always scaled down to absolute black, ignoring HDR metadata if (csp->transfer == PL_COLOR_TRC_PQ) min_luma = hdr_min; // Baseline/fallback metadata, inferred entirely from the colorspace // description and built-in default assumptions if (!max_luma) { if (csp->transfer == PL_COLOR_TRC_HLG) { max_luma = pl_hdr_rescale(PL_HDR_NITS, scaling, PL_COLOR_HLG_PEAK); } else { const float peak = pl_color_transfer_nominal_peak(csp->transfer); max_luma = pl_hdr_rescale(PL_HDR_NORM, scaling, peak); } } if (!min_luma) { if (pl_color_transfer_is_hdr(csp->transfer)) { min_luma = hdr_min; } else { const float peak = pl_hdr_rescale(scaling, PL_HDR_NITS, max_luma); min_luma = pl_hdr_rescale(PL_HDR_NITS, scaling, peak / PL_COLOR_SDR_CONTRAST); } } if (avg_luma) avg_luma = PL_CLAMP(avg_luma, min_luma, max_luma); // sanity if (params->out_min) *params->out_min = min_luma; if (params->out_max) *params->out_max = max_luma; if (params->out_avg) *params->out_avg = avg_luma; } void pl_color_space_infer(struct pl_color_space *space) { if (!space->primaries) space->primaries = PL_COLOR_PRIM_BT_709; if (!space->transfer) space->transfer = PL_COLOR_TRC_BT_1886; // Sanitize the static HDR metadata pl_color_space_nominal_luma_ex(pl_nominal_luma_params( .color = space, .metadata = PL_HDR_METADATA_HDR10, .scaling = PL_HDR_NITS, .out_max = &space->hdr.max_luma, // Preserve tagged minimum .out_min = space->hdr.min_luma ? NULL : &space->hdr.min_luma, )); // Default the signal color space based on the nominal raw primaries if (!pl_primaries_valid(&space->hdr.prim)) space->hdr.prim = *pl_raw_primaries_get(space->primaries); } static void infer_both_ref(struct pl_color_space *space, struct pl_color_space *ref) { pl_color_space_infer(ref); if (!space->primaries) { if (pl_color_primaries_is_wide_gamut(ref->primaries)) { space->primaries = PL_COLOR_PRIM_BT_709; } else { space->primaries = ref->primaries; } } if (!space->transfer) { switch (ref->transfer) { case PL_COLOR_TRC_UNKNOWN: case PL_COLOR_TRC_COUNT: pl_unreachable(); case PL_COLOR_TRC_BT_1886: case PL_COLOR_TRC_SRGB: case PL_COLOR_TRC_GAMMA22: // Re-use input transfer curve to avoid small adaptations space->transfer = ref->transfer; break; case PL_COLOR_TRC_PQ: case PL_COLOR_TRC_HLG: case PL_COLOR_TRC_V_LOG: case PL_COLOR_TRC_S_LOG1: case PL_COLOR_TRC_S_LOG2: // Pick BT.1886 model because it models SDR contrast accurately, // and we need contrast information for tone mapping space->transfer = PL_COLOR_TRC_BT_1886; break; case PL_COLOR_TRC_PRO_PHOTO: // ProPhotoRGB and sRGB are both piecewise with linear slope space->transfer = PL_COLOR_TRC_SRGB; break; case PL_COLOR_TRC_LINEAR: case PL_COLOR_TRC_GAMMA18: case PL_COLOR_TRC_GAMMA20: case PL_COLOR_TRC_GAMMA24: case PL_COLOR_TRC_GAMMA26: case PL_COLOR_TRC_GAMMA28: case PL_COLOR_TRC_ST428: // Pick pure power output curve to avoid introducing black crush space->transfer = PL_COLOR_TRC_GAMMA22; break; } } // Infer the remaining fields after making the above choices pl_color_space_infer(space); } void pl_color_space_infer_ref(struct pl_color_space *space, const struct pl_color_space *refp) { // Make a copy of `refp` to infer missing values first struct pl_color_space ref = *refp; infer_both_ref(space, &ref); } void pl_color_space_infer_map(struct pl_color_space *src, struct pl_color_space *dst) { bool unknown_src_contrast = !src->hdr.min_luma; bool unknown_dst_contrast = !dst->hdr.min_luma; infer_both_ref(dst, src); // If the src has an unspecified gamma curve with dynamic black scaling, // default it to match the dst colorspace contrast. This does not matter in // most cases, but ensures that BT.1886 is tuned to the appropriate black // point by default. bool dynamic_src_contrast = pl_color_space_is_black_scaled(src) || src->transfer == PL_COLOR_TRC_BT_1886; if (unknown_src_contrast && dynamic_src_contrast) src->hdr.min_luma = dst->hdr.min_luma; // Do the same in reverse if both src and dst are SDR curves bool src_is_sdr = !pl_color_space_is_hdr(src); bool dst_is_sdr = !pl_color_space_is_hdr(dst); if (unknown_dst_contrast && src_is_sdr && dst_is_sdr) dst->hdr.min_luma = src->hdr.min_luma; // If the src is HLG and the output is HDR, tune the HLG peak to the output if (src->transfer == PL_COLOR_TRC_HLG && pl_color_space_is_hdr(dst)) src->hdr.max_luma = dst->hdr.max_luma; } const struct pl_color_adjustment pl_color_adjustment_neutral = { PL_COLOR_ADJUSTMENT_NEUTRAL }; void pl_chroma_location_offset(enum pl_chroma_location loc, float *x, float *y) { *x = *y = 0; // This is the majority of subsampled chroma content out there loc = PL_DEF(loc, PL_CHROMA_LEFT); switch (loc) { case PL_CHROMA_LEFT: case PL_CHROMA_TOP_LEFT: case PL_CHROMA_BOTTOM_LEFT: *x = -0.5; break; default: break; } switch (loc) { case PL_CHROMA_TOP_LEFT: case PL_CHROMA_TOP_CENTER: *y = -0.5; break; default: break; } switch (loc) { case PL_CHROMA_BOTTOM_LEFT: case PL_CHROMA_BOTTOM_CENTER: *y = 0.5; break; default: break; } } struct pl_cie_xy pl_white_from_temp(float temp) { temp = PL_CLAMP(temp, 2500, 25000); double ti = 1000.0 / temp, ti2 = ti * ti, ti3 = ti2 * ti, x; if (temp <= 7000) { x = -4.6070 * ti3 + 2.9678 * ti2 + 0.09911 * ti + 0.244063; } else { x = -2.0064 * ti3 + 1.9018 * ti2 + 0.24748 * ti + 0.237040; } return (struct pl_cie_xy) { .x = x, .y = -3 * (x*x) + 2.87 * x - 0.275, }; } bool pl_raw_primaries_equal(const struct pl_raw_primaries *a, const struct pl_raw_primaries *b) { return pl_cie_xy_equal(&a->red, &b->red) && pl_cie_xy_equal(&a->green, &b->green) && pl_cie_xy_equal(&a->blue, &b->blue) && pl_cie_xy_equal(&a->white, &b->white); } bool pl_raw_primaries_similar(const struct pl_raw_primaries *a, const struct pl_raw_primaries *b) { float delta = fabsf(a->red.x - b->red.x) + fabsf(a->red.y - b->red.y) + fabsf(a->green.x - b->green.x) + fabsf(a->green.y - b->green.y) + fabsf(a->blue.x - b->blue.x) + fabsf(a->blue.y - b->blue.y) + fabsf(a->white.x - b->white.x) + fabsf(a->white.y - b->white.y); return delta < 0.001; } void pl_raw_primaries_merge(struct pl_raw_primaries *orig, const struct pl_raw_primaries *update) { union { struct pl_raw_primaries prim; float raw[8]; } *pa = (void *) orig, *pb = (void *) update; pl_static_assert(sizeof(*pa) == sizeof(*orig)); for (int i = 0; i < PL_ARRAY_SIZE(pa->raw); i++) pa->raw[i] = PL_DEF(pa->raw[i], pb->raw[i]); } const struct pl_raw_primaries *pl_raw_primaries_get(enum pl_color_primaries prim) { /* Values from: ITU-R Recommendations BT.470-6, BT.601-7, BT.709-5, BT.2020-0 https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.470-6-199811-S!!PDF-E.pdf https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.601-7-201103-I!!PDF-E.pdf https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.709-5-200204-I!!PDF-E.pdf https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.2020-0-201208-I!!PDF-E.pdf Other colorspaces from https://en.wikipedia.org/wiki/RGB_color_space#Specifications */ // CIE standard illuminant series #define CIE_D50 {0.3457, 0.3585} #define CIE_D65 {0.3127, 0.3290} #define CIE_C {0.3100, 0.3160} #define CIE_E {1.0/3.0, 1.0/3.0} #define DCI {0.3140, 0.3510} static const struct pl_raw_primaries primaries[] = { [PL_COLOR_PRIM_BT_470M] = { .red = {0.670, 0.330}, .green = {0.210, 0.710}, .blue = {0.140, 0.080}, .white = CIE_C, }, [PL_COLOR_PRIM_BT_601_525] = { .red = {0.630, 0.340}, .green = {0.310, 0.595}, .blue = {0.155, 0.070}, .white = CIE_D65, }, [PL_COLOR_PRIM_BT_601_625] = { .red = {0.640, 0.330}, .green = {0.290, 0.600}, .blue = {0.150, 0.060}, .white = CIE_D65, }, [PL_COLOR_PRIM_BT_709] = { .red = {0.640, 0.330}, .green = {0.300, 0.600}, .blue = {0.150, 0.060}, .white = CIE_D65, }, [PL_COLOR_PRIM_BT_2020] = { .red = {0.708, 0.292}, .green = {0.170, 0.797}, .blue = {0.131, 0.046}, .white = CIE_D65, }, [PL_COLOR_PRIM_APPLE] = { .red = {0.625, 0.340}, .green = {0.280, 0.595}, .blue = {0.115, 0.070}, .white = CIE_D65, }, [PL_COLOR_PRIM_ADOBE] = { .red = {0.640, 0.330}, .green = {0.210, 0.710}, .blue = {0.150, 0.060}, .white = CIE_D65, }, [PL_COLOR_PRIM_PRO_PHOTO] = { .red = {0.7347, 0.2653}, .green = {0.1596, 0.8404}, .blue = {0.0366, 0.0001}, .white = CIE_D50, }, [PL_COLOR_PRIM_CIE_1931] = { .red = {0.7347, 0.2653}, .green = {0.2738, 0.7174}, .blue = {0.1666, 0.0089}, .white = CIE_E, }, // From SMPTE RP 431-2 [PL_COLOR_PRIM_DCI_P3] = { .red = {0.680, 0.320}, .green = {0.265, 0.690}, .blue = {0.150, 0.060}, .white = DCI, }, [PL_COLOR_PRIM_DISPLAY_P3] = { .red = {0.680, 0.320}, .green = {0.265, 0.690}, .blue = {0.150, 0.060}, .white = CIE_D65, }, // From Panasonic VARICAM reference manual [PL_COLOR_PRIM_V_GAMUT] = { .red = {0.730, 0.280}, .green = {0.165, 0.840}, .blue = {0.100, -0.03}, .white = CIE_D65, }, // From Sony S-Log reference manual [PL_COLOR_PRIM_S_GAMUT] = { .red = {0.730, 0.280}, .green = {0.140, 0.855}, .blue = {0.100, -0.05}, .white = CIE_D65, }, // From FFmpeg source code [PL_COLOR_PRIM_FILM_C] = { .red = {0.681, 0.319}, .green = {0.243, 0.692}, .blue = {0.145, 0.049}, .white = CIE_C, }, [PL_COLOR_PRIM_EBU_3213] = { .red = {0.630, 0.340}, .green = {0.295, 0.605}, .blue = {0.155, 0.077}, .white = CIE_D65, }, // From Wikipedia [PL_COLOR_PRIM_ACES_AP0] = { .red = {0.7347, 0.2653}, .green = {0.0000, 1.0000}, .blue = {0.0001, -0.0770}, .white = {0.32168, 0.33767}, }, [PL_COLOR_PRIM_ACES_AP1] = { .red = {0.713, 0.293}, .green = {0.165, 0.830}, .blue = {0.128, 0.044}, .white = {0.32168, 0.33767}, }, }; // This is the default assumption if no colorspace information could // be determined, eg. for files which have no video channel. if (!prim) prim = PL_COLOR_PRIM_BT_709; pl_assert(prim < PL_ARRAY_SIZE(primaries)); return &primaries[prim]; } // Compute the RGB/XYZ matrix as described here: // http://www.brucelindbloom.com/index.html?Eqn_RGB_XYZ_Matrix.html pl_matrix3x3 pl_get_rgb2xyz_matrix(const struct pl_raw_primaries *prim) { pl_matrix3x3 out = {{{0}}}; float S[3], X[4], Z[4]; X[0] = pl_cie_X(prim->red); X[1] = pl_cie_X(prim->green); X[2] = pl_cie_X(prim->blue); X[3] = pl_cie_X(prim->white); Z[0] = pl_cie_Z(prim->red); Z[1] = pl_cie_Z(prim->green); Z[2] = pl_cie_Z(prim->blue); Z[3] = pl_cie_Z(prim->white); // S = XYZ^-1 * W for (int i = 0; i < 3; i++) { out.m[0][i] = X[i]; out.m[1][i] = 1; out.m[2][i] = Z[i]; } pl_matrix3x3_invert(&out); for (int i = 0; i < 3; i++) S[i] = out.m[i][0] * X[3] + out.m[i][1] * 1 + out.m[i][2] * Z[3]; // M = [Sc * XYZc] for (int i = 0; i < 3; i++) { out.m[0][i] = S[i] * X[i]; out.m[1][i] = S[i] * 1; out.m[2][i] = S[i] * Z[i]; } return out; } pl_matrix3x3 pl_get_xyz2rgb_matrix(const struct pl_raw_primaries *prim) { // For simplicity, just invert the rgb2xyz matrix pl_matrix3x3 out = pl_get_rgb2xyz_matrix(prim); pl_matrix3x3_invert(&out); return out; } // Matrix used in CAT16, a revised one-step linear transform method static const pl_matrix3x3 m_cat16 = {{ { 0.401288, 0.650173, -0.051461 }, { -0.250268, 1.204414, 0.045854 }, { -0.002079, 0.048952, 0.953127 }, }}; // M := M * XYZd<-XYZs static void apply_chromatic_adaptation(struct pl_cie_xy src, struct pl_cie_xy dest, pl_matrix3x3 *mat) { // If the white points are nearly identical, this is a wasteful identity // operation. if (fabs(src.x - dest.x) < 1e-6 && fabs(src.y - dest.y) < 1e-6) return; // Linear "von Kries" method, adapted from CIECAM16 // http://www.brucelindbloom.com/index.html?Eqn_ChromAdapt.html float C[3][2]; for (int i = 0; i < 3; i++) { // source cone C[i][0] = m_cat16.m[i][0] * pl_cie_X(src) + m_cat16.m[i][1] * 1 + m_cat16.m[i][2] * pl_cie_Z(src); // dest cone C[i][1] = m_cat16.m[i][0] * pl_cie_X(dest) + m_cat16.m[i][1] * 1 + m_cat16.m[i][2] * pl_cie_Z(dest); } // tmp := I * [Cd/Cs] * Ma pl_matrix3x3 tmp = {0}; for (int i = 0; i < 3; i++) tmp.m[i][i] = C[i][1] / C[i][0]; pl_matrix3x3_mul(&tmp, &m_cat16); // M := M * Ma^-1 * tmp pl_matrix3x3 ma_inv = m_cat16; pl_matrix3x3_invert(&ma_inv); pl_matrix3x3_mul(mat, &ma_inv); pl_matrix3x3_mul(mat, &tmp); } pl_matrix3x3 pl_get_adaptation_matrix(struct pl_cie_xy src, struct pl_cie_xy dst) { // Use BT.709 primaries (with chosen white point) as an XYZ reference struct pl_raw_primaries csp = *pl_raw_primaries_get(PL_COLOR_PRIM_BT_709); csp.white = src; pl_matrix3x3 rgb2xyz = pl_get_rgb2xyz_matrix(&csp); pl_matrix3x3 xyz2rgb = rgb2xyz; pl_matrix3x3_invert(&xyz2rgb); apply_chromatic_adaptation(src, dst, &xyz2rgb); pl_matrix3x3_mul(&xyz2rgb, &rgb2xyz); return xyz2rgb; } pl_matrix3x3 pl_ipt_rgb2lms(const struct pl_raw_primaries *prim) { static const pl_matrix3x3 hpe = {{ // HPE XYZ->LMS (D65) method { 0.40024f, 0.70760f, -0.08081f }, { -0.22630f, 1.16532f, 0.04570f }, { 0.00000f, 0.00000f, 0.91822f }, }}; const float c = 0.04; // 4% crosstalk pl_matrix3x3 m = {{ { 1 - 2*c, c, c }, { c, 1 - 2*c, c }, { c, c, 1 - 2*c }, }}; pl_matrix3x3_mul(&m, &hpe); // Apply chromatic adaptation to D65 if the input white point differs static const struct pl_cie_xy d65 = CIE_D65; apply_chromatic_adaptation(prim->white, d65, &m); const pl_matrix3x3 rgb2xyz = pl_get_rgb2xyz_matrix(prim); pl_matrix3x3_mul(&m, &rgb2xyz); return m; } pl_matrix3x3 pl_ipt_lms2rgb(const struct pl_raw_primaries *prim) { pl_matrix3x3 m = pl_ipt_rgb2lms(prim); pl_matrix3x3_invert(&m); return m; } // As standardized in Ebner & Fairchild IPT (1998) const pl_matrix3x3 pl_ipt_lms2ipt = {{ { 0.4000, 0.4000, 0.2000 }, { 4.4550, -4.8510, 0.3960 }, { 0.8056, 0.3572, -1.1628 }, }}; // Numerically inverted from the matrix above const pl_matrix3x3 pl_ipt_ipt2lms = {{ { 1.0, 0.0975689, 0.205226 }, { 1.0, -0.1138760, 0.133217 }, { 1.0, 0.0326151, -0.676887 }, }}; const struct pl_cone_params pl_vision_normal = {PL_CONE_NONE, 1.0}; const struct pl_cone_params pl_vision_protanomaly = {PL_CONE_L, 0.5}; const struct pl_cone_params pl_vision_protanopia = {PL_CONE_L, 0.0}; const struct pl_cone_params pl_vision_deuteranomaly = {PL_CONE_M, 0.5}; const struct pl_cone_params pl_vision_deuteranopia = {PL_CONE_M, 0.0}; const struct pl_cone_params pl_vision_tritanomaly = {PL_CONE_S, 0.5}; const struct pl_cone_params pl_vision_tritanopia = {PL_CONE_S, 0.0}; const struct pl_cone_params pl_vision_monochromacy = {PL_CONE_LM, 0.0}; const struct pl_cone_params pl_vision_achromatopsia = {PL_CONE_LMS, 0.0}; pl_matrix3x3 pl_get_cone_matrix(const struct pl_cone_params *params, const struct pl_raw_primaries *prim) { // LMS<-RGB := LMS<-XYZ * XYZ<-RGB pl_matrix3x3 rgb2lms = m_cat16; pl_matrix3x3 rgb2xyz = pl_get_rgb2xyz_matrix(prim); pl_matrix3x3_mul(&rgb2lms, &rgb2xyz); // LMS versions of the two opposing primaries, plus neutral float lms_r[3] = {1.0, 0.0, 0.0}, lms_b[3] = {0.0, 0.0, 1.0}, lms_w[3] = {1.0, 1.0, 1.0}; pl_matrix3x3_apply(&rgb2lms, lms_r); pl_matrix3x3_apply(&rgb2lms, lms_b); pl_matrix3x3_apply(&rgb2lms, lms_w); float a, b, c = params->strength; pl_matrix3x3 distort; switch (params->cones) { case PL_CONE_NONE: return pl_matrix3x3_identity; case PL_CONE_L: // Solve to preserve neutral and blue a = (lms_b[0] - lms_b[2] * lms_w[0] / lms_w[2]) / (lms_b[1] - lms_b[2] * lms_w[1] / lms_w[2]); b = (lms_b[0] - lms_b[1] * lms_w[0] / lms_w[1]) / (lms_b[2] - lms_b[1] * lms_w[2] / lms_w[1]); assert(fabs(a * lms_w[1] + b * lms_w[2] - lms_w[0]) < 1e-6); distort = (pl_matrix3x3) {{ { c, (1.0 - c) * a, (1.0 - c) * b}, { 0.0, 1.0, 0.0}, { 0.0, 0.0, 1.0}, }}; break; case PL_CONE_M: // Solve to preserve neutral and blue a = (lms_b[1] - lms_b[2] * lms_w[1] / lms_w[2]) / (lms_b[0] - lms_b[2] * lms_w[0] / lms_w[2]); b = (lms_b[1] - lms_b[0] * lms_w[1] / lms_w[0]) / (lms_b[2] - lms_b[0] * lms_w[2] / lms_w[0]); assert(fabs(a * lms_w[0] + b * lms_w[2] - lms_w[1]) < 1e-6); distort = (pl_matrix3x3) {{ { 1.0, 0.0, 0.0}, {(1.0 - c) * a, c, (1.0 - c) * b}, { 0.0, 0.0, 1.0}, }}; break; case PL_CONE_S: // Solve to preserve neutral and red a = (lms_r[2] - lms_r[1] * lms_w[2] / lms_w[1]) / (lms_r[0] - lms_r[1] * lms_w[0] / lms_w[1]); b = (lms_r[2] - lms_r[0] * lms_w[2] / lms_w[0]) / (lms_r[1] - lms_r[0] * lms_w[1] / lms_w[0]); assert(fabs(a * lms_w[0] + b * lms_w[1] - lms_w[2]) < 1e-6); distort = (pl_matrix3x3) {{ { 1.0, 0.0, 0.0}, { 0.0, 1.0, 0.0}, {(1.0 - c) * a, (1.0 - c) * b, c}, }}; break; case PL_CONE_LM: // Solve to preserve neutral a = lms_w[0] / lms_w[2]; b = lms_w[1] / lms_w[2]; distort = (pl_matrix3x3) {{ { c, 0.0, (1.0 - c) * a}, { 0.0, c, (1.0 - c) * b}, { 0.0, 0.0, 1.0}, }}; break; case PL_CONE_MS: // Solve to preserve neutral a = lms_w[1] / lms_w[0]; b = lms_w[2] / lms_w[0]; distort = (pl_matrix3x3) {{ { 1.0, 0.0, 0.0}, {(1.0 - c) * a, c, 0.0}, {(1.0 - c) * b, 0.0, c}, }}; break; case PL_CONE_LS: // Solve to preserve neutral a = lms_w[0] / lms_w[1]; b = lms_w[2] / lms_w[1]; distort = (pl_matrix3x3) {{ { c, (1.0 - c) * a, 0.0}, { 0.0, 1.0, 0.0}, { 0.0, (1.0 - c) * b, c}, }}; break; case PL_CONE_LMS: { // Rod cells only, which can be modelled somewhat as a combination of // L and M cones. Either way, this is pushing the limits of the our // color model, so this is only a rough approximation. const float w[3] = {0.3605, 0.6415, -0.002}; assert(fabs(w[0] + w[1] + w[2] - 1.0) < 1e-6); for (int i = 0; i < 3; i++) { for (int j = 0; j < 3; j++) { distort.m[i][j] = (1.0 - c) * w[j] * lms_w[i] / lms_w[j]; if (i == j) distort.m[i][j] += c; } } break; } default: pl_unreachable(); } // out := RGB<-LMS * distort * LMS<-RGB pl_matrix3x3 out = rgb2lms; pl_matrix3x3_invert(&out); pl_matrix3x3_mul(&out, &distort); pl_matrix3x3_mul(&out, &rgb2lms); return out; } pl_matrix3x3 pl_get_color_mapping_matrix(const struct pl_raw_primaries *src, const struct pl_raw_primaries *dst, enum pl_rendering_intent intent) { // In saturation mapping, we don't care about accuracy and just want // primaries to map to primaries, making this an identity transformation. if (intent == PL_INTENT_SATURATION) return pl_matrix3x3_identity; // RGBd<-RGBs = RGBd<-XYZd * XYZd<-XYZs * XYZs<-RGBs // Equations from: http://www.brucelindbloom.com/index.html?Math.html // Note: Perceptual is treated like relative colorimetric. There's no // definition for perceptual other than "make it look good". // RGBd<-XYZd matrix pl_matrix3x3 xyz2rgb_d = pl_get_xyz2rgb_matrix(dst); // Chromatic adaptation, except in absolute colorimetric intent if (intent != PL_INTENT_ABSOLUTE_COLORIMETRIC) apply_chromatic_adaptation(src->white, dst->white, &xyz2rgb_d); // XYZs<-RGBs pl_matrix3x3 rgb2xyz_s = pl_get_rgb2xyz_matrix(src); pl_matrix3x3_mul(&xyz2rgb_d, &rgb2xyz_s); return xyz2rgb_d; } // Test the sign of 'p' relative to the line 'ab' (barycentric coordinates) static float test_point_line(const struct pl_cie_xy p, const struct pl_cie_xy a, const struct pl_cie_xy b) { return (p.x - b.x) * (a.y - b.y) - (a.x - b.x) * (p.y - b.y); } // Test if a point is entirely inside a gamut static float test_point_gamut(struct pl_cie_xy point, const struct pl_raw_primaries *prim) { float d1 = test_point_line(point, prim->red, prim->green), d2 = test_point_line(point, prim->green, prim->blue), d3 = test_point_line(point, prim->blue, prim->red); bool has_neg = d1 < -1e-6f || d2 < -1e-6f || d3 < -1e-6f, has_pos = d1 > 1e-6f || d2 > 1e-6f || d3 > 1e-6f; return !(has_neg && has_pos); } bool pl_primaries_superset(const struct pl_raw_primaries *a, const struct pl_raw_primaries *b) { return test_point_gamut(b->red, a) && test_point_gamut(b->green, a) && test_point_gamut(b->blue, a); } bool pl_primaries_valid(const struct pl_raw_primaries *prim) { // Test to see if the primaries form a valid triangle (nonzero area) float area = (prim->blue.x - prim->green.x) * (prim->red.y - prim->green.y) - (prim->red.x - prim->green.x) * (prim->blue.y - prim->green.y); return fabs(area) > 1e-6 && test_point_gamut(prim->white, prim); } static inline float xy_dist2(struct pl_cie_xy a, struct pl_cie_xy b) { const float dx = a.x - b.x, dy = a.y - b.y; return dx * dx + dy * dy; } bool pl_primaries_compatible(const struct pl_raw_primaries *a, const struct pl_raw_primaries *b) { float RR = xy_dist2(a->red, b->red), RG = xy_dist2(a->red, b->green), RB = xy_dist2(a->red, b->blue), GG = xy_dist2(a->green, b->green), GB = xy_dist2(a->green, b->blue), BB = xy_dist2(a->blue, b->blue); return RR < RG && RR < RB && GG < RG && GG < GB && BB < RB && BB < GB; } // returns the intersection of the two lines defined by ab and cd static struct pl_cie_xy intersection(struct pl_cie_xy a, struct pl_cie_xy b, struct pl_cie_xy c, struct pl_cie_xy d) { float det = (a.x - b.x) * (c.y - d.y) - (a.y - b.y) * (c.x - d.x); float t = ((a.x - c.x) * (c.y - d.y) - (a.y - c.y) * (c.x - d.x)) / det; return (struct pl_cie_xy) { .x = t ? a.x + t * (b.x - a.x) : 0.0f, .y = t ? a.y + t * (b.y - a.y) : 0.0f, }; } // x, y, z specified in clockwise order, with a, b, c being the enclosing gamut static struct pl_cie_xy clip_point(struct pl_cie_xy x, struct pl_cie_xy y, struct pl_cie_xy z, struct pl_cie_xy a, struct pl_cie_xy b, struct pl_cie_xy c) { const float d1 = test_point_line(y, a, b); const float d2 = test_point_line(y, b, c); if (d1 <= 0.0f && d2 <= 0.0f) { return y; // already inside triangle } else if (d1 > 0.0f && d2 > 0.0f) { return b; // target vertex fully enclosed } else if (d1 > 0.0f) { return intersection(a, b, y, z); } else { return intersection(x, y, b, c); } } struct pl_raw_primaries pl_primaries_clip(const struct pl_raw_primaries *src, const struct pl_raw_primaries *dst) { return (struct pl_raw_primaries) { .red = clip_point(src->green, src->red, src->blue, dst->green, dst->red, dst->blue), .green = clip_point(src->blue, src->green, src->red, dst->blue, dst->green, dst->red), .blue = clip_point(src->red, src->blue, src->green, dst->red, dst->blue, dst->green), .white = src->white, }; } /* Fill in the Y, U, V vectors of a yuv-to-rgb conversion matrix * based on the given luma weights of the R, G and B components (lr, lg, lb). * lr+lg+lb is assumed to equal 1. * This function is meant for colorspaces satisfying the following * conditions (which are true for common YUV colorspaces): * - The mapping from input [Y, U, V] to output [R, G, B] is linear. * - Y is the vector [1, 1, 1]. (meaning input Y component maps to 1R+1G+1B) * - U maps to a value with zero R and positive B ([0, x, y], y > 0; * i.e. blue and green only). * - V maps to a value with zero B and positive R ([x, y, 0], x > 0; * i.e. red and green only). * - U and V are orthogonal to the luma vector [lr, lg, lb]. * - The magnitudes of the vectors U and V are the minimal ones for which * the image of the set Y=[0...1],U=[-0.5...0.5],V=[-0.5...0.5] under the * conversion function will cover the set R=[0...1],G=[0...1],B=[0...1] * (the resulting matrix can be converted for other input/output ranges * outside this function). * Under these conditions the given parameters lr, lg, lb uniquely * determine the mapping of Y, U, V to R, G, B. */ static pl_matrix3x3 luma_coeffs(float lr, float lg, float lb) { pl_assert(fabs(lr+lg+lb - 1) < 1e-6); return (pl_matrix3x3) {{ {1, 0, 2 * (1-lr) }, {1, -2 * (1-lb) * lb/lg, -2 * (1-lr) * lr/lg }, {1, 2 * (1-lb), 0 }, }}; } // Applies hue and saturation controls to a YCbCr->RGB matrix static inline void apply_hue_sat(pl_matrix3x3 *m, const struct pl_color_adjustment *params) { // Hue is equivalent to rotating input [U, V] subvector around the origin. // Saturation scales [U, V]. float huecos = params->saturation * cos(params->hue); float huesin = params->saturation * sin(params->hue); for (int i = 0; i < 3; i++) { float u = m->m[i][1], v = m->m[i][2]; m->m[i][1] = huecos * u - huesin * v; m->m[i][2] = huesin * u + huecos * v; } } pl_transform3x3 pl_color_repr_decode(struct pl_color_repr *repr, const struct pl_color_adjustment *params) { params = PL_DEF(params, &pl_color_adjustment_neutral); pl_matrix3x3 m; switch (repr->sys) { case PL_COLOR_SYSTEM_BT_709: m = luma_coeffs(0.2126, 0.7152, 0.0722); break; case PL_COLOR_SYSTEM_BT_601: m = luma_coeffs(0.2990, 0.5870, 0.1140); break; case PL_COLOR_SYSTEM_SMPTE_240M: m = luma_coeffs(0.2122, 0.7013, 0.0865); break; case PL_COLOR_SYSTEM_BT_2020_NC: m = luma_coeffs(0.2627, 0.6780, 0.0593); break; case PL_COLOR_SYSTEM_BT_2020_C: // Note: This outputs into the [-0.5,0.5] range for chroma information. m = (pl_matrix3x3) {{ {0, 0, 1}, {1, 0, 0}, {0, 1, 0}, }}; break; case PL_COLOR_SYSTEM_BT_2100_PQ: { // Reversed from the matrix in the spec, hard-coded for efficiency // and precision reasons. Exact values truncated from ITU-T H-series // Supplement 18. static const float lm_t = 0.008609, lm_p = 0.111029625; m = (pl_matrix3x3) {{ {1.0, lm_t, lm_p}, {1.0, -lm_t, -lm_p}, {1.0, 0.560031, -0.320627}, }}; break; } case PL_COLOR_SYSTEM_BT_2100_HLG: { // Similar to BT.2100 PQ, exact values truncated from WolframAlpha static const float lm_t = 0.01571858011, lm_p = 0.2095810681; m = (pl_matrix3x3) {{ {1.0, lm_t, lm_p}, {1.0, -lm_t, -lm_p}, {1.0, 1.02127108, -0.605274491}, }}; break; } case PL_COLOR_SYSTEM_DOLBYVISION: m = repr->dovi->nonlinear; break; case PL_COLOR_SYSTEM_YCGCO: m = (pl_matrix3x3) {{ {1, -1, 1}, {1, 1, 0}, {1, -1, -1}, }}; break; case PL_COLOR_SYSTEM_UNKNOWN: // fall through case PL_COLOR_SYSTEM_RGB: m = pl_matrix3x3_identity; break; case PL_COLOR_SYSTEM_XYZ: { // For lack of anything saner to do, just assume the caller wants // DCI-P3 primaries, which is a reasonable assumption. const struct pl_raw_primaries *dst = pl_raw_primaries_get(PL_COLOR_PRIM_DCI_P3); m = pl_get_xyz2rgb_matrix(dst); // DCDM X'Y'Z' is expected to have equal energy white point (EG 432-1 Annex H) apply_chromatic_adaptation((struct pl_cie_xy)CIE_E, dst->white, &m); break; } case PL_COLOR_SYSTEM_COUNT: pl_unreachable(); } // Apply hue and saturation in the correct way depending on the colorspace. if (pl_color_system_is_ycbcr_like(repr->sys)) { apply_hue_sat(&m, params); } else if (params->saturation != 1.0 || params->hue != 0.0) { // Arbitrarily simulate hue shifts using the BT.709 YCbCr model pl_matrix3x3 yuv2rgb = luma_coeffs(0.2126, 0.7152, 0.0722); pl_matrix3x3 rgb2yuv = yuv2rgb; pl_matrix3x3_invert(&rgb2yuv); apply_hue_sat(&yuv2rgb, params); // M := RGB<-YUV * YUV<-RGB * M pl_matrix3x3_rmul(&rgb2yuv, &m); pl_matrix3x3_rmul(&yuv2rgb, &m); } // Apply color temperature adaptation, relative to BT.709 primaries if (params->temperature) { struct pl_cie_xy src = pl_white_from_temp(6500); struct pl_cie_xy dst = pl_white_from_temp(6500 + 3500 * params->temperature); pl_matrix3x3 adapt = pl_get_adaptation_matrix(src, dst); pl_matrix3x3_rmul(&adapt, &m); } pl_transform3x3 out = { .mat = m }; int bit_depth = PL_DEF(repr->bits.sample_depth, PL_DEF(repr->bits.color_depth, 8)); double ymax, ymin, cmax, cmid; double scale = (1LL << bit_depth) / ((1LL << bit_depth) - 1.0); switch (pl_color_levels_guess(repr)) { case PL_COLOR_LEVELS_LIMITED: { ymax = 235 / 256. * scale; ymin = 16 / 256. * scale; cmax = 240 / 256. * scale; cmid = 128 / 256. * scale; break; } case PL_COLOR_LEVELS_FULL: // Note: For full-range YUV, there are multiple, subtly inconsistent // standards. So just pick the sanest implementation, which is to // assume MAX_INT == 1.0. ymax = 1.0; ymin = 0.0; cmax = 1.0; cmid = 128 / 256. * scale; // *not* exactly 0.5 break; default: pl_unreachable(); } double ymul = 1.0 / (ymax - ymin); double cmul = 0.5 / (cmax - cmid); double mul[3] = { ymul, ymul, ymul }; double black[3] = { ymin, ymin, ymin }; #ifdef PL_HAVE_DOVI if (repr->sys == PL_COLOR_SYSTEM_DOLBYVISION) { // The RPU matrix already includes levels normalization, but in this // case we also have to respect the signalled color offsets for (int i = 0; i < 3; i++) { mul[i] = 1.0; black[i] = repr->dovi->nonlinear_offset[i] * scale; } } else #endif if (pl_color_system_is_ycbcr_like(repr->sys)) { mul[1] = mul[2] = cmul; black[1] = black[2] = cmid; } // Contrast scales the output value range (gain) // Brightness scales the constant output bias (black lift/boost) for (int i = 0; i < 3; i++) { mul[i] *= params->contrast; out.c[i] += params->brightness; } // Multiply in the texture multiplier and adjust `c` so that black[j] keeps // on mapping to RGB=0 (black to black) for (int i = 0; i < 3; i++) { for (int j = 0; j < 3; j++) { out.mat.m[i][j] *= mul[j]; out.c[i] -= out.mat.m[i][j] * black[j]; } } // Finally, multiply in the scaling factor required to get the color up to // the correct representation. pl_matrix3x3_scale(&out.mat, pl_color_repr_normalize(repr)); // Update the metadata to reflect the change. repr->sys = PL_COLOR_SYSTEM_RGB; repr->levels = PL_COLOR_LEVELS_FULL; return out; } bool pl_icc_profile_equal(const struct pl_icc_profile *p1, const struct pl_icc_profile *p2) { if (p1->len != p2->len) return false; // Ignore signatures on length-0 profiles, as a special case return !p1->len || p1->signature == p2->signature; } void pl_icc_profile_compute_signature(struct pl_icc_profile *profile) { if (!profile->len) profile->signature = 0; // In theory, we could get this value from the profile header itself if // lcms is available, but I'm not sure if it's even worth the trouble. Just // hard-code this to a pl_mem_hash(), which is decently fast anyway. profile->signature = pl_mem_hash(profile->data, profile->len); } libplacebo-v7.349.0/src/colorspace.h000066400000000000000000000032071463457750100172700ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include // Common constants for SMPTE ST.2084 (PQ) static const float PQ_M1 = 2610./4096 * 1./4, PQ_M2 = 2523./4096 * 128, PQ_C1 = 3424./4096, PQ_C2 = 2413./4096 * 32, PQ_C3 = 2392./4096 * 32; // Common constants for ARIB STD-B67 (HLG) static const float HLG_A = 0.17883277, HLG_B = 0.28466892, HLG_C = 0.55991073, HLG_REF = 1000.0 / PL_COLOR_SDR_WHITE; // Common constants for Panasonic V-Log static const float VLOG_B = 0.00873, VLOG_C = 0.241514, VLOG_D = 0.598206; // Common constants for Sony S-Log static const float SLOG_A = 0.432699, SLOG_B = 0.037584, SLOG_C = 0.616596 + 0.03, SLOG_P = 3.538813, SLOG_Q = 0.030001, SLOG_K2 = 155.0 / 219.0; libplacebo-v7.349.0/src/common.c000066400000000000000000000324431463457750100164250ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include "common.h" #include "version.h" #include int pl_fix_ver(void) { return BUILD_FIX_VER; } const char *pl_version(void) { return BUILD_VERSION; } void pl_rect2d_normalize(pl_rect2d *rc) { *rc = (pl_rect2d) { .x0 = PL_MIN(rc->x0, rc->x1), .x1 = PL_MAX(rc->x0, rc->x1), .y0 = PL_MIN(rc->y0, rc->y1), .y1 = PL_MAX(rc->y0, rc->y1), }; } void pl_rect3d_normalize(pl_rect3d *rc) { *rc = (pl_rect3d) { .x0 = PL_MIN(rc->x0, rc->x1), .x1 = PL_MAX(rc->x0, rc->x1), .y0 = PL_MIN(rc->y0, rc->y1), .y1 = PL_MAX(rc->y0, rc->y1), .z0 = PL_MIN(rc->z0, rc->z1), .z1 = PL_MAX(rc->z0, rc->z1), }; } void pl_rect2df_normalize(pl_rect2df *rc) { *rc = (pl_rect2df) { .x0 = PL_MIN(rc->x0, rc->x1), .x1 = PL_MAX(rc->x0, rc->x1), .y0 = PL_MIN(rc->y0, rc->y1), .y1 = PL_MAX(rc->y0, rc->y1), }; } void pl_rect3df_normalize(pl_rect3df *rc) { *rc = (pl_rect3df) { .x0 = PL_MIN(rc->x0, rc->x1), .x1 = PL_MAX(rc->x0, rc->x1), .y0 = PL_MIN(rc->y0, rc->y1), .y1 = PL_MAX(rc->y0, rc->y1), .z0 = PL_MIN(rc->z0, rc->z1), .z1 = PL_MAX(rc->z0, rc->z1), }; } pl_rect2d pl_rect2df_round(const pl_rect2df *rc) { return (pl_rect2d) { .x0 = roundf(rc->x0), .x1 = roundf(rc->x1), .y0 = roundf(rc->y0), .y1 = roundf(rc->y1), }; } pl_rect3d pl_rect3df_round(const pl_rect3df *rc) { return (pl_rect3d) { .x0 = roundf(rc->x0), .x1 = roundf(rc->x1), .y0 = roundf(rc->y0), .y1 = roundf(rc->y1), .z0 = roundf(rc->z0), .z1 = roundf(rc->z1), }; } const pl_matrix3x3 pl_matrix3x3_identity = {{ { 1, 0, 0 }, { 0, 1, 0 }, { 0, 0, 1 }, }}; void pl_matrix3x3_apply(const pl_matrix3x3 *mat, float vec[3]) { float x = vec[0], y = vec[1], z = vec[2]; for (int i = 0; i < 3; i++) vec[i] = mat->m[i][0] * x + mat->m[i][1] * y + mat->m[i][2] * z; } void pl_matrix3x3_apply_rc(const pl_matrix3x3 *mat, pl_rect3df *rc) { float x0 = rc->x0, x1 = rc->x1, y0 = rc->y0, y1 = rc->y1, z0 = rc->z0, z1 = rc->z1; rc->x0 = mat->m[0][0] * x0 + mat->m[0][1] * y0 + mat->m[0][2] * z0; rc->y0 = mat->m[1][0] * x0 + mat->m[1][1] * y0 + mat->m[1][2] * z0; rc->z0 = mat->m[2][0] * x0 + mat->m[2][1] * y0 + mat->m[2][2] * z0; rc->x1 = mat->m[0][0] * x1 + mat->m[0][1] * y1 + mat->m[0][2] * z1; rc->y1 = mat->m[1][0] * x1 + mat->m[1][1] * y1 + mat->m[1][2] * z1; rc->z1 = mat->m[2][0] * x1 + mat->m[2][1] * y1 + mat->m[2][2] * z1; } void pl_matrix3x3_scale(pl_matrix3x3 *mat, float scale) { for (int i = 0; i < 3; i++) { for (int j = 0; j < 3; j++) mat->m[i][j] *= scale; } } void pl_matrix3x3_invert(pl_matrix3x3 *mat) { double m00 = mat->m[0][0], m01 = mat->m[0][1], m02 = mat->m[0][2], m10 = mat->m[1][0], m11 = mat->m[1][1], m12 = mat->m[1][2], m20 = mat->m[2][0], m21 = mat->m[2][1], m22 = mat->m[2][2]; // calculate the adjoint double a00 = (m11 * m22 - m21 * m12); double a01 = -(m01 * m22 - m21 * m02); double a02 = (m01 * m12 - m11 * m02); double a10 = -(m10 * m22 - m20 * m12); double a11 = (m00 * m22 - m20 * m02); double a12 = -(m00 * m12 - m10 * m02); double a20 = (m10 * m21 - m20 * m11); double a21 = -(m00 * m21 - m20 * m01); double a22 = (m00 * m11 - m10 * m01); // calculate the determinant (as inverse == 1/det * adjoint, // adjoint * m == identity * det, so this calculates the det) double det = m00 * a00 + m10 * a01 + m20 * a02; det = 1.0 / det; mat->m[0][0] = det * a00; mat->m[0][1] = det * a01; mat->m[0][2] = det * a02; mat->m[1][0] = det * a10; mat->m[1][1] = det * a11; mat->m[1][2] = det * a12; mat->m[2][0] = det * a20; mat->m[2][1] = det * a21; mat->m[2][2] = det * a22; } void pl_matrix3x3_mul(pl_matrix3x3 *a, const pl_matrix3x3 *b) { float a00 = a->m[0][0], a01 = a->m[0][1], a02 = a->m[0][2], a10 = a->m[1][0], a11 = a->m[1][1], a12 = a->m[1][2], a20 = a->m[2][0], a21 = a->m[2][1], a22 = a->m[2][2]; for (int i = 0; i < 3; i++) { a->m[0][i] = a00 * b->m[0][i] + a01 * b->m[1][i] + a02 * b->m[2][i]; a->m[1][i] = a10 * b->m[0][i] + a11 * b->m[1][i] + a12 * b->m[2][i]; a->m[2][i] = a20 * b->m[0][i] + a21 * b->m[1][i] + a22 * b->m[2][i]; } } void pl_matrix3x3_rmul(const pl_matrix3x3 *a, pl_matrix3x3 *b) { pl_matrix3x3 m = *a; pl_matrix3x3_mul(&m, b); *b = m; } const pl_transform3x3 pl_transform3x3_identity = { .mat = {{ { 1, 0, 0 }, { 0, 1, 0 }, { 0, 0, 1 }, }}, }; void pl_transform3x3_apply(const pl_transform3x3 *t, float vec[3]) { pl_matrix3x3_apply(&t->mat, vec); for (int i = 0; i < 3; i++) vec[i] += t->c[i]; } void pl_transform3x3_apply_rc(const pl_transform3x3 *t, pl_rect3df *rc) { pl_matrix3x3_apply_rc(&t->mat, rc); rc->x0 += t->c[0]; rc->x1 += t->c[0]; rc->y0 += t->c[1]; rc->y1 += t->c[1]; rc->z0 += t->c[2]; rc->z1 += t->c[2]; } void pl_transform3x3_scale(pl_transform3x3 *t, float scale) { pl_matrix3x3_scale(&t->mat, scale); for (int i = 0; i < 3; i++) t->c[i] *= scale; } // based on DarkPlaces engine (relicensed from GPL to LGPL) void pl_transform3x3_invert(pl_transform3x3 *t) { pl_matrix3x3_invert(&t->mat); float m00 = t->mat.m[0][0], m01 = t->mat.m[0][1], m02 = t->mat.m[0][2], m10 = t->mat.m[1][0], m11 = t->mat.m[1][1], m12 = t->mat.m[1][2], m20 = t->mat.m[2][0], m21 = t->mat.m[2][1], m22 = t->mat.m[2][2]; // fix the constant coefficient // rgb = M * yuv + C // M^-1 * rgb = yuv + M^-1 * C // yuv = M^-1 * rgb - M^-1 * C // ^^^^^^^^^^ float c0 = t->c[0], c1 = t->c[1], c2 = t->c[2]; t->c[0] = -(m00 * c0 + m01 * c1 + m02 * c2); t->c[1] = -(m10 * c0 + m11 * c1 + m12 * c2); t->c[2] = -(m20 * c0 + m21 * c1 + m22 * c2); } const pl_matrix2x2 pl_matrix2x2_identity = {{ { 1, 0 }, { 0, 1 }, }}; pl_matrix2x2 pl_matrix2x2_rotation(float a) { return (pl_matrix2x2) {{ { cosf(a), -sinf(a) }, { sinf(a), cosf(a) }, }}; } void pl_matrix2x2_apply(const pl_matrix2x2 *mat, float vec[2]) { float x = vec[0], y = vec[1]; for (int i = 0; i < 2; i++) vec[i] = mat->m[i][0] * x + mat->m[i][1] * y; } void pl_matrix2x2_apply_rc(const pl_matrix2x2 *mat, pl_rect2df *rc) { float x0 = rc->x0, x1 = rc->x1, y0 = rc->y0, y1 = rc->y1; rc->x0 = mat->m[0][0] * x0 + mat->m[0][1] * y0; rc->y0 = mat->m[1][0] * x0 + mat->m[1][1] * y0; rc->x1 = mat->m[0][0] * x1 + mat->m[0][1] * y1; rc->y1 = mat->m[1][0] * x1 + mat->m[1][1] * y1; } void pl_matrix2x2_mul(pl_matrix2x2 *a, const pl_matrix2x2 *b) { float a00 = a->m[0][0], a01 = a->m[0][1], a10 = a->m[1][0], a11 = a->m[1][1]; for (int i = 0; i < 2; i++) { a->m[0][i] = a00 * b->m[0][i] + a01 * b->m[1][i]; a->m[1][i] = a10 * b->m[0][i] + a11 * b->m[1][i]; } } void pl_matrix2x2_rmul(const pl_matrix2x2 *a, pl_matrix2x2 *b) { pl_matrix2x2 m = *a; pl_matrix2x2_mul(&m, b); *b = m; } void pl_matrix2x2_scale(pl_matrix2x2 *mat, float scale) { for (int i = 0; i < 2; i++) { for (int j = 0; j < 2; j++) mat->m[i][j] *= scale; } } void pl_matrix2x2_invert(pl_matrix2x2 *mat) { float m00 = mat->m[0][0], m01 = mat->m[0][1], m10 = mat->m[1][0], m11 = mat->m[1][1]; float invdet = 1.0f / (m11 * m00 - m10 * m01); mat->m[0][0] = m11 * invdet; mat->m[0][1] = -m01 * invdet; mat->m[1][0] = -m10 * invdet; mat->m[1][1] = m00 * invdet; } const pl_transform2x2 pl_transform2x2_identity = { .mat = {{ { 1, 0 }, { 0, 1 }, }}, }; void pl_transform2x2_apply(const pl_transform2x2 *t, float vec[2]) { pl_matrix2x2_apply(&t->mat, vec); for (int i = 0; i < 2; i++) vec[i] += t->c[i]; } void pl_transform2x2_apply_rc(const pl_transform2x2 *t, pl_rect2df *rc) { pl_matrix2x2_apply_rc(&t->mat, rc); rc->x0 += t->c[0]; rc->x1 += t->c[0]; rc->y0 += t->c[1]; rc->y1 += t->c[1]; } void pl_transform2x2_mul(pl_transform2x2 *a, const pl_transform2x2 *b) { float c[2] = { b->c[0], b->c[1] }; pl_transform2x2_apply(a, c); memcpy(a->c, c, sizeof(c)); pl_matrix2x2_mul(&a->mat, &b->mat); } void pl_transform2x2_rmul(const pl_transform2x2 *a, pl_transform2x2 *b) { pl_transform2x2_apply(a, b->c); pl_matrix2x2_rmul(&a->mat, &b->mat); } void pl_transform2x2_scale(pl_transform2x2 *t, float scale) { pl_matrix2x2_scale(&t->mat, scale); for (int i = 0; i < 2; i++) t->c[i] *= scale; } void pl_transform2x2_invert(pl_transform2x2 *t) { pl_matrix2x2_invert(&t->mat); float m00 = t->mat.m[0][0], m01 = t->mat.m[0][1], m10 = t->mat.m[1][0], m11 = t->mat.m[1][1]; float c0 = t->c[0], c1 = t->c[1]; t->c[0] = -(m00 * c0 + m01 * c1); t->c[1] = -(m10 * c0 + m11 * c1); } pl_rect2df pl_transform2x2_bounds(const pl_transform2x2 *t, const pl_rect2df *rc) { float p[4][2] = { { rc->x0, rc->y0 }, { rc->x0, rc->y1 }, { rc->x1, rc->y0 }, { rc->x1, rc->y1 }, }; for (int i = 0; i < PL_ARRAY_SIZE(p); i++) pl_transform2x2_apply(t, p[i]); return (pl_rect2df) { .x0 = fminf(fminf(p[0][0], p[1][0]), fminf(p[2][0], p[3][0])), .x1 = fmaxf(fmaxf(p[0][0], p[1][0]), fmaxf(p[2][0], p[3][0])), .y0 = fminf(fminf(p[0][1], p[1][1]), fminf(p[2][1], p[3][1])), .y1 = fmaxf(fmaxf(p[0][1], p[1][1]), fmaxf(p[2][1], p[3][1])), }; } float pl_rect2df_aspect(const pl_rect2df *rc) { float w = fabsf(pl_rect_w(*rc)), h = fabsf(pl_rect_h(*rc)); return h ? (w / h) : 0.0; } void pl_rect2df_aspect_set(pl_rect2df *rc, float aspect, float panscan) { pl_assert(aspect >= 0); float orig_aspect = pl_rect2df_aspect(rc); if (!aspect || !orig_aspect) return; float scale_x, scale_y; if (aspect > orig_aspect) { // New aspect is wider than the original, so we need to either grow in // scale_x (panscan=1) or shrink in scale_y (panscan=0) scale_x = powf(aspect / orig_aspect, panscan); scale_y = powf(aspect / orig_aspect, panscan - 1.0); } else if (aspect < orig_aspect) { // New aspect is taller, so either grow in scale_y (panscan=1) or // shrink in scale_x (panscan=0) scale_x = powf(orig_aspect / aspect, panscan - 1.0); scale_y = powf(orig_aspect / aspect, panscan); } else { return; // No change in aspect } pl_rect2df_stretch(rc, scale_x, scale_y); } void pl_rect2df_aspect_fit(pl_rect2df *rc, const pl_rect2df *src, float panscan) { float orig_w = fabs(pl_rect_w(*rc)), orig_h = fabs(pl_rect_h(*rc)); if (!orig_w || !orig_h) return; // If either one of these is larger than 1, then we need to shrink to fit, // otherwise we can just directly stretch the rect. float scale_x = fabs(pl_rect_w(*src)) / orig_w, scale_y = fabs(pl_rect_h(*src)) / orig_h; if (scale_x > 1.0 || scale_y > 1.0) { pl_rect2df_aspect_copy(rc, src, panscan); } else { pl_rect2df_stretch(rc, scale_x, scale_y); } } void pl_rect2df_stretch(pl_rect2df *rc, float stretch_x, float stretch_y) { float midx = (rc->x0 + rc->x1) / 2.0, midy = (rc->y0 + rc->y1) / 2.0; rc->x0 = rc->x0 * stretch_x + midx * (1.0 - stretch_x); rc->x1 = rc->x1 * stretch_x + midx * (1.0 - stretch_x); rc->y0 = rc->y0 * stretch_y + midy * (1.0 - stretch_y); rc->y1 = rc->y1 * stretch_y + midy * (1.0 - stretch_y); } void pl_rect2df_offset(pl_rect2df *rc, float offset_x, float offset_y) { if (rc->x1 < rc->x0) offset_x = -offset_x; if (rc->y1 < rc->y0) offset_y = -offset_y; rc->x0 += offset_x; rc->x1 += offset_x; rc->y0 += offset_y; rc->y1 += offset_y; } void pl_rect2df_rotate(pl_rect2df *rc, pl_rotation rot) { if (!(rot = pl_rotation_normalize(rot))) return; float x0 = rc->x0, y0 = rc->y0, x1 = rc->x1, y1 = rc->y1; if (rot >= PL_ROTATION_180) { rot -= PL_ROTATION_180; PL_SWAP(x0, x1); PL_SWAP(y0, y1); } switch (rot) { case PL_ROTATION_0: *rc = (pl_rect2df) { .x0 = x0, .y0 = y0, .x1 = x1, .y1 = y1, }; return; case PL_ROTATION_90: *rc = (pl_rect2df) { .x0 = y1, .y0 = x0, .x1 = y0, .y1 = x1, }; return; default: pl_unreachable(); } } libplacebo-v7.349.0/src/common.h000066400000000000000000000135631463457750100164340ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #define __STDC_FORMAT_MACROS #ifdef __cplusplus #include #endif #if !defined(__cplusplus) || defined(__cpp_lib_stdatomic_h) #define PL_HAVE_STDATOMIC #endif #ifdef PL_HAVE_STDATOMIC #include #endif #include #include #include #include #include #if defined(__MINGW32__) && !defined(__clang__) #define PL_PRINTF(fmt, va) __attribute__ ((format(gnu_printf, fmt, va))) \ __attribute__ ((nonnull(fmt))) #elif defined(__GNUC__) #define PL_PRINTF(fmt, va) __attribute__ ((format(printf, fmt, va))) \ __attribute__ ((nonnull(fmt))) #else #define PL_PRINTF(fmt, va) #endif #define PL_NOINLINE __attribute__((noinline)) #include "os.h" #include "config_internal.h" #define PL_DEPRECATED_IN(VER) #include #include "pl_assert.h" #include "pl_alloc.h" #include "pl_clock.h" #include "pl_string.h" #if PL_API_VER != BUILD_API_VER #error Header mismatch? pulled from elsewhere! #endif // Divide a number while rounding up (careful: double-eval) #define PL_DIV_UP(x, y) (((x) + (y) - 1) / (y)) // Align up to the nearest multiple of an arbitrary alignment, which may also // be 0 to signal no alignment requirements. #define PL_ALIGN(x, align) ((align) ? PL_DIV_UP(x, align) * (align) : (x)) // This is faster but must only be called on positive powers of two. #define PL_ALIGN2(x, align) (((x) + (align) - 1) & ~((align) - 1)) // Returns the log base 2 of an unsigned long long #define PL_LOG2(x) ((unsigned) (8*sizeof (unsigned long long) - __builtin_clzll((x)) - 1)) // Rounds a number up to the nearest power of two #define PL_ALIGN_POT(x) (0x1LLU << (PL_LOG2((x) - 1) + 1)) // Right shift a number while rounding up #define PL_RSHIFT_UP(x, s) -((-(x)) >> (s)) // Returns whether or not a number is a power of two (or zero) #define PL_ISPOT(x) (((x) & ((x) - 1)) == 0) // Returns the size of a static array with known size. #define PL_ARRAY_SIZE(s) (sizeof(s) / sizeof((s)[0])) // Swaps two variables #define PL_SWAP(a, b) \ do { \ __typeof__ (a) _tmp = (a); \ (a) = (b); \ (b) = _tmp; \ } while (0) // Helper functions for transposing a matrix in-place. #define PL_TRANSPOSE_DIM(d, m) \ pl_transpose((d), (float[(d)*(d)]){0}, (const float *)(m)) #define PL_TRANSPOSE_2X2(m) PL_TRANSPOSE_DIM(2, m) #define PL_TRANSPOSE_3X3(m) PL_TRANSPOSE_DIM(3, m) #define PL_TRANSPOSE_4X4(m) PL_TRANSPOSE_DIM(4, m) static inline float *pl_transpose(int dim, float *out, const float *in) { for (int i = 0; i < dim; i++) { for (int j = 0; j < dim; j++) out[i * dim + j] = in[j * dim + i]; } return out; } // Helper functions for some common numeric operations (careful: double-eval) #define PL_MAX(x, y) ((x) > (y) ? (x) : (y)) #define PL_MAX3(x, y, z) PL_MAX(PL_MAX(x, y), z) #define PL_MIN(x, y) ((x) < (y) ? (x) : (y)) #define PL_CLAMP(x, l, h) ((x) < (l) ? (l) : (x) > (h) ? (h) : (x)) #define PL_CMP(a, b) (((a) > (b)) - ((a) < (b))) #define PL_DEF(x, d) ((x) ? (x) : (d)) #define PL_SQUARE(x) ((x) * (x)) #define PL_CUBE(x) ((x) * (x) * (x)) #define PL_MIX(a, b, x) ((x) * (b) + (1 - (x)) * (a)) static inline float pl_smoothstep(float edge0, float edge1, float x) { if (edge0 == edge1) return x >= edge0; x = (x - edge0) / (edge1 - edge0); x = PL_CLAMP(x, 0.0f, 1.0f); return x * x * (3.0f - 2.0f * x); } // Helpers for doing alignment calculations static inline size_t pl_gcd(size_t x, size_t y) { assert(x && y); while (y) { size_t tmp = y; y = x % y; x = tmp; } return x; } static inline size_t pl_lcm(size_t x, size_t y) { assert(x && y); return x * (y / pl_gcd(x, y)); } // Conditional abort() macro that depends on the configuration option #ifdef PL_DEBUG_ABORT # define pl_debug_abort() do { \ fprintf(stderr, "pl_debug_abort() triggered!\n"); \ abort(); \ } while (0) #else # define pl_debug_abort() do {} while (0) #endif #ifdef PL_HAVE_STDATOMIC // Refcounting helpers typedef atomic_uint_fast32_t pl_rc_t; #define pl_rc_init(rc) atomic_init(rc, 1) #define pl_rc_ref(rc) ((void) atomic_fetch_add_explicit(rc, 1, memory_order_acquire)) #define pl_rc_deref(rc) (atomic_fetch_sub_explicit(rc, 1, memory_order_release) == 1) #define pl_rc_count(rc) atomic_load(rc) #endif #define pl_unreachable() (assert(!"unreachable"), __builtin_unreachable()) // Helper for parameter validation #define pl_require(ctx, expr) \ do { \ if (!(expr)) { \ PL_ERR(ctx, "Validation failed: %s (%s:%d)", \ #expr, __FILE__, __LINE__); \ pl_log_stack_trace(ctx->log, PL_LOG_ERR); \ pl_debug_abort(); \ goto error; \ } \ } while (0) libplacebo-v7.349.0/src/convert.cc000066400000000000000000000172201463457750100167540ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include #include #if __has_include() # include #endif #include "pl_string.h" [[maybe_unused]] static int ccStrPrintDouble( char *str, int bufsize, int decimals, double value ); namespace { template struct has_std_to_chars_impl { template static auto _(CT s) -> decltype(std::to_chars(s, s, std::declval()), std::true_type{}); static auto _(...) -> std::false_type; static constexpr bool value = decltype(_((char *){}))::value; }; template constexpr bool has_std_to_chars = has_std_to_chars_impl::value; template static inline int to_chars(char *buf, size_t len, T n, Args ...args) { if constexpr (has_std_to_chars) { auto [ptr, ec] = std::to_chars(buf, buf + len, n, args...); return ec == std::errc() ? ptr - buf : 0; } else { static_assert(std::is_same_v || std::is_same_v, "Not implemented!"); // FIXME: Fallback for GCC <= 10 currently required for MinGW-w64 on // Ubuntu 22.04. Remove this when Ubuntu 24.04 is released, as it will // provide newer MinGW-w64 GCC and it will be safe to require it. return ccStrPrintDouble(buf, len, std::numeric_limits::max_digits10, n); } } template struct has_std_from_chars_impl { template static auto _(CT s) -> decltype(std::from_chars(s, s, std::declval()), std::true_type{}); static auto _(...) -> std::false_type; static constexpr bool value = decltype(_((const char *){}))::value; }; template constexpr bool has_std_from_chars = has_std_from_chars_impl::value; template static inline bool from_chars(pl_str str, T &n, Args ...args) { if constexpr (has_std_from_chars) { auto [ptr, ec] = std::from_chars((const char *) str.buf, (const char *) str.buf + str.len, n, args...); return ec == std::errc(); } else { constexpr bool is_fp = std::is_same_v || std::is_same_v; static_assert(is_fp, "Not implemented!"); #if !__has_include() static_assert(!is_fp, " is required, but not " \ "found. Please run `git submodule update --init`" \ " or provide "); #else // FIXME: Fallback for libc++, as it does not implement floating-point // variant of std::from_chars. Remove this when appropriate. auto [ptr, ec] = fast_float::from_chars((const char *) str.buf, (const char *) str.buf + str.len, n, args...); return ec == std::errc(); #endif } } } #define CHAR_CONVERT(name, type, ...) \ int pl_str_print_##name(char *buf, size_t len, type n) \ { \ return to_chars(buf, len, n __VA_OPT__(,) __VA_ARGS__); \ } \ bool pl_str_parse_##name(pl_str str, type *n) \ { \ return from_chars(str, *n __VA_OPT__(,) __VA_ARGS__); \ } CHAR_CONVERT(hex, unsigned short, 16) CHAR_CONVERT(int, int) CHAR_CONVERT(uint, unsigned int) CHAR_CONVERT(int64, int64_t) CHAR_CONVERT(uint64, uint64_t) CHAR_CONVERT(float, float) CHAR_CONVERT(double, double) /* ***************************************************************************** * * Copyright (c) 2007-2016 Alexis Naveros. * Modified for use with libplacebo by Niklas Haas * Changes include: * - Removed a CC_MIN macro dependency by equivalent logic * - Removed CC_ALWAYSINLINE * - Fixed (!seq) check to (!seqlength) * - Added support for scientific notation (e.g. 1.0e10) in ccSeqParseDouble * * Permission is granted to anyone to use this software for any purpose, * including commercial applications, and to alter it and redistribute it * freely, subject to the following restrictions: * * 1. The origin of this software must not be misrepresented; you must not * claim that you wrote the original software. If you use this software * in a product, an acknowledgment in the product documentation would be * appreciated but is not required. * 2. Altered source versions must be plainly marked as such, and must not be * misrepresented as being the original software. * 3. This notice may not be removed or altered from any source distribution. * * ----------------------------------------------------------------------------- */ static int ccStrPrintDouble( char *str, int bufsize, int decimals, double value ) { int size, offset, index; int32_t frac, accumsub; double muldec; uint32_t u32; uint64_t u64; size = 0; if( value < 0.0 ) { size = 1; *str++ = '-'; bufsize--; value = -value; } if( value < 4294967296.0 ) { u32 = (uint32_t)value; offset = pl_str_print_uint( str, bufsize, u32 ); if (!offset) goto error; size += offset; bufsize -= size; value -= (double)u32; } else if( value < 18446744073709551616.0 ) { u64 = (uint64_t)value; offset = pl_str_print_uint64( str, bufsize, u64 ); if (!offset) goto error; size += offset; bufsize -= size; value -= (double)u64; } else goto error; if (decimals > bufsize - 2) decimals = bufsize - 2; if( decimals <= 0 ) return size; muldec = 10.0; accumsub = 0; str += offset; for( index = 0 ; index < decimals ; index++ ) { // Skip printing insignificant decimal digits if (value * muldec - accumsub <= std::numeric_limits::epsilon()) break; if (index == 0) { size += 1; *str++ = '.'; } frac = (int32_t)( value * muldec ) - accumsub; frac = PL_CLAMP(frac, 0, 9); // FIXME: why is this needed? str[index] = '0' + (char)frac; accumsub += frac; accumsub = ( accumsub << 3 ) + ( accumsub << 1 ); if( muldec < 10000000 ) muldec *= 10.0; else { value *= 10000000.0; value -= (int32_t)value; muldec = 10.0; accumsub = 0; } } // Round up the last decimal digit if ( str[ index - 1 ] < '9' && (int32_t)( value * muldec ) - accumsub >= 5 ) str[ index - 1 ]++; str[ index ] = 0; size += index; return size; error: if( bufsize < 4 ) *str = 0; else { str[0] = 'E'; str[1] = 'R'; str[2] = 'R'; str[3] = 0; } return 0; } libplacebo-v7.349.0/src/d3d11/000077500000000000000000000000001463457750100155775ustar00rootroot00000000000000libplacebo-v7.349.0/src/d3d11/common.h000066400000000000000000000040461463457750100172440ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "../common.h" #include "../log.h" #ifdef PL_HAVE_DXGI_DEBUG #include #endif #include // Shared struct used to hold the D3D11 device and associated interfaces struct d3d11_ctx { pl_log log; pl_d3d11 d3d11; // Copy of the device from pl_d3d11 for convenience. Does not hold an // additional reference. ID3D11Device *dev; // DXGI device. This does hold a reference. IDXGIDevice1 *dxgi_dev; #ifdef PL_HAVE_DXGI_DEBUG // Debug interfaces IDXGIDebug *debug; IDXGIInfoQueue *iqueue; uint64_t last_discarded; // Last count of discarded messages DXGI_INFO_QUEUE_MESSAGE *dxgi_msg; #endif // pl_gpu_is_failed (We saw a device removed error!) bool is_failed; }; // DDK value. Apparently some D3D functions can return this instead of the // proper user-mode error code. See: // https://docs.microsoft.com/en-us/windows/win32/api/dxgi/nf-dxgi-idxgiswapchain-present #define D3DDDIERR_DEVICEREMOVED (0x88760870) #ifndef D3D11_FORMAT_SUPPORT2_UAV_TYPED_STORE #define D3D11_FORMAT_SUPPORT2_UAV_TYPED_STORE (0x80) #endif #ifndef D3D11_FORMAT_SUPPORT2_UAV_TYPED_LOAD #define D3D11_FORMAT_SUPPORT2_UAV_TYPED_LOAD (0x40) #endif #ifndef PL_HAVE_DXGI_DEBUG_D3D11 DEFINE_GUID(DXGI_DEBUG_D3D11, 0x4b99317b, 0xac39, 0x4aa6, 0xbb, 0xb, 0xba, 0xa0, 0x47, 0x84, 0x79, 0x8f); #endif libplacebo-v7.349.0/src/d3d11/context.c000066400000000000000000000372501463457750100174360ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "gpu.h" // Windows 8 enum value, not present in mingw-w64 v7 #define DXGI_ADAPTER_FLAG_SOFTWARE (2) const struct pl_d3d11_params pl_d3d11_default_params = { PL_D3D11_DEFAULTS }; static INIT_ONCE d3d11_once = INIT_ONCE_STATIC_INIT; static PFN_D3D11_CREATE_DEVICE pD3D11CreateDevice = NULL; static __typeof__(&CreateDXGIFactory1) pCreateDXGIFactory1 = NULL; #ifdef PL_HAVE_DXGI_DEBUG static __typeof__(&DXGIGetDebugInterface) pDXGIGetDebugInterface = NULL; #endif static void d3d11_load(void) { BOOL bPending = FALSE; InitOnceBeginInitialize(&d3d11_once, 0, &bPending, NULL); if (bPending) { HMODULE d3d11 = LoadLibraryW(L"d3d11.dll"); if (d3d11) { pD3D11CreateDevice = (void *) GetProcAddress(d3d11, "D3D11CreateDevice"); } HMODULE dxgi = LoadLibraryW(L"dxgi.dll"); if (dxgi) { pCreateDXGIFactory1 = (void *) GetProcAddress(dxgi, "CreateDXGIFactory1"); } #ifdef PL_HAVE_DXGI_DEBUG HMODULE dxgi_debug = LoadLibraryW(L"dxgidebug.dll"); if (dxgi_debug) { pDXGIGetDebugInterface = (void *) GetProcAddress(dxgi_debug, "DXGIGetDebugInterface"); } #endif } InitOnceComplete(&d3d11_once, 0, NULL); } // Get a const array of D3D_FEATURE_LEVELs from max_fl to min_fl (inclusive) static int get_feature_levels(int max_fl, int min_fl, const D3D_FEATURE_LEVEL **out) { static const D3D_FEATURE_LEVEL levels[] = { D3D_FEATURE_LEVEL_12_1, D3D_FEATURE_LEVEL_12_0, D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_11_0, D3D_FEATURE_LEVEL_10_1, D3D_FEATURE_LEVEL_10_0, D3D_FEATURE_LEVEL_9_3, D3D_FEATURE_LEVEL_9_2, D3D_FEATURE_LEVEL_9_1, }; static const int levels_len = PL_ARRAY_SIZE(levels); int start = 0; for (; start < levels_len; start++) { if (levels[start] <= max_fl) break; } int len = 0; for (; start + len < levels_len; len++) { if (levels[start + len] < min_fl) break; } *out = &levels[start]; return len; } static bool is_null_luid(LUID luid) { return luid.LowPart == 0 && luid.HighPart == 0; } static IDXGIAdapter *get_adapter(pl_d3d11 d3d11, LUID adapter_luid) { struct d3d11_ctx *ctx = PL_PRIV(d3d11); IDXGIFactory1 *factory = NULL; IDXGIAdapter1 *adapter1 = NULL; IDXGIAdapter *adapter = NULL; HRESULT hr; if (!pCreateDXGIFactory1) { PL_FATAL(ctx, "Failed to load dxgi.dll"); goto error; } pCreateDXGIFactory1(&IID_IDXGIFactory1, (void **) &factory); for (int i = 0;; i++) { hr = IDXGIFactory1_EnumAdapters1(factory, i, &adapter1); if (hr == DXGI_ERROR_NOT_FOUND) break; if (FAILED(hr)) { PL_FATAL(ctx, "Failed to enumerate adapters"); goto error; } DXGI_ADAPTER_DESC1 desc; D3D(IDXGIAdapter1_GetDesc1(adapter1, &desc)); if (desc.AdapterLuid.LowPart == adapter_luid.LowPart && desc.AdapterLuid.HighPart == adapter_luid.HighPart) { break; } SAFE_RELEASE(adapter1); } if (!adapter1) { PL_FATAL(ctx, "Adapter with LUID %08lx%08lx not found", adapter_luid.HighPart, adapter_luid.LowPart); goto error; } D3D(IDXGIAdapter1_QueryInterface(adapter1, &IID_IDXGIAdapter, (void **) &adapter)); error: SAFE_RELEASE(factory); SAFE_RELEASE(adapter1); return adapter; } static bool has_sdk_layers(void) { // This will fail if the SDK layers aren't installed return SUCCEEDED(pD3D11CreateDevice(NULL, D3D_DRIVER_TYPE_NULL, NULL, D3D11_CREATE_DEVICE_DEBUG, NULL, 0, D3D11_SDK_VERSION, NULL, NULL, NULL)); } static ID3D11Device *create_device(struct pl_d3d11_t *d3d11, const struct pl_d3d11_params *params) { struct d3d11_ctx *ctx = PL_PRIV(d3d11); bool debug = params->debug; bool warp = params->force_software; int max_fl = params->max_feature_level; int min_fl = params->min_feature_level; ID3D11Device *dev = NULL; IDXGIDevice1 *dxgi_dev = NULL; IDXGIAdapter *adapter = NULL; bool release_adapter = false; HRESULT hr; d3d11_load(); if (!pD3D11CreateDevice) { PL_FATAL(ctx, "Failed to load d3d11.dll"); goto error; } if (params->adapter) { adapter = params->adapter; } else if (!is_null_luid(params->adapter_luid)) { adapter = get_adapter(d3d11, params->adapter_luid); release_adapter = true; } if (debug && !has_sdk_layers()) { PL_INFO(ctx, "Debug layer not available, removing debug flag"); debug = false; } // Return here to retry creating the device do { // Use these default feature levels if they are not set max_fl = PL_DEF(max_fl, D3D_FEATURE_LEVEL_12_1); min_fl = PL_DEF(min_fl, D3D_FEATURE_LEVEL_9_1); // Get a list of feature levels from min_fl to max_fl const D3D_FEATURE_LEVEL *levels; int levels_len = get_feature_levels(max_fl, min_fl, &levels); if (!levels_len) { PL_FATAL(ctx, "No suitable Direct3D feature level found"); goto error; } D3D_DRIVER_TYPE type = D3D_DRIVER_TYPE_UNKNOWN; if (!adapter) { if (warp) { type = D3D_DRIVER_TYPE_WARP; } else { type = D3D_DRIVER_TYPE_HARDWARE; } } UINT flags = params->flags; if (debug) flags |= D3D11_CREATE_DEVICE_DEBUG; hr = pD3D11CreateDevice(adapter, type, NULL, flags, levels, levels_len, D3D11_SDK_VERSION, &dev, NULL, NULL); if (SUCCEEDED(hr)) break; pl_d3d11_after_error(ctx, hr); // Trying to create a D3D_FEATURE_LEVEL_12_0 device on Windows 8.1 or // below will not succeed. Try an 11_1 device. if (hr == E_INVALIDARG && max_fl >= D3D_FEATURE_LEVEL_12_0 && min_fl <= D3D_FEATURE_LEVEL_11_1) { PL_DEBUG(ctx, "Failed to create 12_0+ device, trying 11_1"); max_fl = D3D_FEATURE_LEVEL_11_1; continue; } // Trying to create a D3D_FEATURE_LEVEL_11_1 device on Windows 7 // without the platform update will not succeed. Try an 11_0 device. if (hr == E_INVALIDARG && max_fl >= D3D_FEATURE_LEVEL_11_1 && min_fl <= D3D_FEATURE_LEVEL_11_0) { PL_DEBUG(ctx, "Failed to create 11_1+ device, trying 11_0"); max_fl = D3D_FEATURE_LEVEL_11_0; continue; } // Retry with WARP if allowed if (!adapter && !warp && params->allow_software) { PL_DEBUG(ctx, "Failed to create hardware device, trying WARP: %s", pl_hresult_to_str(hr)); warp = true; max_fl = params->max_feature_level; min_fl = params->min_feature_level; continue; } PL_FATAL(ctx, "Failed to create Direct3D 11 device: %s", pl_hresult_to_str(hr)); goto error; } while (true); if (params->max_frame_latency) { D3D(ID3D11Device_QueryInterface(dev, &IID_IDXGIDevice1, (void **) &dxgi_dev)); IDXGIDevice1_SetMaximumFrameLatency(dxgi_dev, params->max_frame_latency); } d3d11->software = warp; error: if (release_adapter) SAFE_RELEASE(adapter); SAFE_RELEASE(dxgi_dev); return dev; } static void init_debug_layer(struct d3d11_ctx *ctx, bool leak_check) { #ifdef PL_HAVE_DXGI_DEBUG if (!pDXGIGetDebugInterface) d3d11_load(); if (!pDXGIGetDebugInterface) goto error; D3D(pDXGIGetDebugInterface(&IID_IDXGIInfoQueue, (void **) &ctx->iqueue)); // Push empty filter to get everything IDXGIInfoQueue_PushStorageFilter(ctx->iqueue, DXGI_DEBUG_ALL, &(DXGI_INFO_QUEUE_FILTER){0}); // Filter some annoying D3D11 messages DXGI_INFO_QUEUE_MESSAGE_ID deny_ids[] = { // This false-positive error occurs every time we Draw() with a shader // that samples from a texture format that only supports point sampling. // Since we already use CheckFormatSupport to know which formats can be // linearly sampled from, we shouldn't ever bind a non-point sampler to // a format that doesn't support it. D3D11_MESSAGE_ID_DEVICE_DRAW_RESOURCE_FORMAT_SAMPLE_UNSUPPORTED, }; DXGI_INFO_QUEUE_FILTER filter = { .DenyList = { .NumIDs = PL_ARRAY_SIZE(deny_ids), .pIDList = deny_ids, }, }; IDXGIInfoQueue_PushStorageFilter(ctx->iqueue, DXGI_DEBUG_D3D11, &filter); IDXGIInfoQueue_SetMessageCountLimit(ctx->iqueue, DXGI_DEBUG_D3D11, -1); IDXGIInfoQueue_SetMessageCountLimit(ctx->iqueue, DXGI_DEBUG_DXGI, -1); if (leak_check) D3D(pDXGIGetDebugInterface(&IID_IDXGIDebug, (void **) &ctx->debug)); error: return; #endif } void pl_d3d11_destroy(pl_d3d11 *ptr) { pl_d3d11 d3d11 = *ptr; if (!d3d11) return; struct d3d11_ctx *ctx = PL_PRIV(d3d11); pl_gpu_destroy(d3d11->gpu); SAFE_RELEASE(ctx->dev); SAFE_RELEASE(ctx->dxgi_dev); #ifdef PL_HAVE_DXGI_DEBUG if (ctx->debug) { // Report any leaked objects pl_d3d11_flush_message_queue(ctx, "After destroy"); IDXGIDebug_ReportLiveObjects(ctx->debug, DXGI_DEBUG_ALL, DXGI_DEBUG_RLO_DETAIL); pl_d3d11_flush_message_queue(ctx, "After leak check"); IDXGIDebug_ReportLiveObjects(ctx->debug, DXGI_DEBUG_ALL, DXGI_DEBUG_RLO_SUMMARY); pl_d3d11_flush_message_queue(ctx, "After leak summary"); } SAFE_RELEASE(ctx->debug); SAFE_RELEASE(ctx->iqueue); #endif pl_free_ptr((void **) ptr); } pl_d3d11 pl_d3d11_create(pl_log log, const struct pl_d3d11_params *params) { params = PL_DEF(params, &pl_d3d11_default_params); IDXGIAdapter1 *adapter = NULL; IDXGIAdapter2 *adapter2 = NULL; bool success = false; HRESULT hr; struct pl_d3d11_t *d3d11 = pl_zalloc_obj(NULL, d3d11, struct d3d11_ctx); struct d3d11_ctx *ctx = PL_PRIV(d3d11); ctx->log = log; ctx->d3d11 = d3d11; if (params->device) { d3d11->device = params->device; ID3D11Device_AddRef(d3d11->device); } else { d3d11->device = create_device(d3d11, params); if (!d3d11->device) goto error; } ctx->dev = d3d11->device; if (params->debug || ID3D11Device_GetCreationFlags(d3d11->device) & D3D11_CREATE_DEVICE_DEBUG) { // Do not report live object on pl_d3d11_destroy if device was created // externally, it makes no sense as there will be a lot of things alive. init_debug_layer(ctx, !params->device); } D3D(ID3D11Device_QueryInterface(d3d11->device, &IID_IDXGIDevice1, (void **) &ctx->dxgi_dev)); D3D(IDXGIDevice1_GetParent(ctx->dxgi_dev, &IID_IDXGIAdapter1, (void **) &adapter)); hr = IDXGIAdapter1_QueryInterface(adapter, &IID_IDXGIAdapter2, (void **) &adapter2); if (FAILED(hr)) adapter2 = NULL; if (adapter2) { PL_INFO(ctx, "Using DXGI 1.2+"); } else { PL_INFO(ctx, "Using DXGI 1.1"); } D3D_FEATURE_LEVEL fl = ID3D11Device_GetFeatureLevel(d3d11->device); PL_INFO(ctx, "Using Direct3D 11 feature level %u_%u", ((unsigned) fl) >> 12, (((unsigned) fl) >> 8) & 0xf); char *dev_name = NULL; UINT vendor_id, device_id, revision, subsys_id; LUID adapter_luid; UINT flags; if (adapter2) { // DXGI 1.2 IDXGIAdapter2::GetDesc2 is preferred over the DXGI 1.1 // version because it reports the real adapter information when using // feature level 9 hardware DXGI_ADAPTER_DESC2 desc; D3D(IDXGIAdapter2_GetDesc2(adapter2, &desc)); dev_name = pl_to_utf8(NULL, desc.Description); vendor_id = desc.VendorId; device_id = desc.DeviceId; revision = desc.Revision; subsys_id = desc.SubSysId; adapter_luid = desc.AdapterLuid; flags = desc.Flags; } else { DXGI_ADAPTER_DESC1 desc; D3D(IDXGIAdapter1_GetDesc1(adapter, &desc)); dev_name = pl_to_utf8(NULL, desc.Description); vendor_id = desc.VendorId; device_id = desc.DeviceId; revision = desc.Revision; subsys_id = desc.SubSysId; adapter_luid = desc.AdapterLuid; flags = desc.Flags; } PL_INFO(ctx, "Direct3D 11 device properties:"); PL_INFO(ctx, " Device Name: %s", dev_name); PL_INFO(ctx, " Device ID: %04x:%04x (rev %02x)", vendor_id, device_id, revision); PL_INFO(ctx, " Subsystem ID: %04x:%04x", LOWORD(subsys_id), HIWORD(subsys_id)); PL_INFO(ctx, " LUID: %08lx%08lx", adapter_luid.HighPart, adapter_luid.LowPart); pl_free(dev_name); LARGE_INTEGER version; hr = IDXGIAdapter1_CheckInterfaceSupport(adapter, &IID_IDXGIDevice, &version); if (SUCCEEDED(hr)) { PL_INFO(ctx, " Driver version: %u.%u.%u.%u", HIWORD(version.HighPart), LOWORD(version.HighPart), HIWORD(version.LowPart), LOWORD(version.LowPart)); } // Note: DXGI_ADAPTER_FLAG_SOFTWARE doesn't exist before Windows 8, but we // also set d3d11->software in create_device if we pick WARP ourselves if (flags & DXGI_ADAPTER_FLAG_SOFTWARE) d3d11->software = true; // If the primary display adapter is a software adapter, the // DXGI_ADAPTER_FLAG_SOFTWARE flag won't be set, but the device IDs should // still match the Microsoft Basic Render Driver if (vendor_id == 0x1414 && device_id == 0x8c) d3d11->software = true; if (d3d11->software) { bool external_adapter = params->device || params->adapter || !is_null_luid(params->adapter_luid); // The allow_software flag only applies if the API user didn't manually // specify an adapter or a device if (!params->allow_software && !external_adapter) { // If we got this far with allow_software set, the primary adapter // must be a software adapter PL_ERR(ctx, "Primary adapter is a software adapter"); goto error; } // If a software adapter was manually specified, don't show a warning enum pl_log_level level = PL_LOG_WARN; if (external_adapter || params->force_software) level = PL_LOG_INFO; PL_MSG(ctx, level, "Using a software adapter"); } d3d11->gpu = pl_gpu_create_d3d11(ctx); if (!d3d11->gpu) goto error; success = true; error: if (!success) { PL_FATAL(ctx, "Failed initializing Direct3D 11 device"); pl_d3d11_destroy((pl_d3d11 *) &d3d11); } SAFE_RELEASE(adapter); SAFE_RELEASE(adapter2); return d3d11; } libplacebo-v7.349.0/src/d3d11/formats.c000066400000000000000000000277211463457750100174270ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "formats.h" #include "gpu.h" #define FMT(_minor, _name, _dxfmt, _type, num, size, bits, order) \ (struct d3d_format) { \ .dxfmt = DXGI_FORMAT_##_dxfmt##_##_type, \ .minor = _minor, \ .fmt = { \ .name = _name, \ .type = PL_FMT_##_type, \ .num_components = num, \ .component_depth = bits, \ .texel_size = size, \ .texel_align = 1, \ .internal_size = size, \ .host_bits = bits, \ .sample_order = order, \ }, \ } #define IDX(...) {__VA_ARGS__} #define BITS(...) {__VA_ARGS__} #define REGFMT(name, dxfmt, type, num, bits) \ FMT(0, name, dxfmt, type, num, (num) * (bits) / 8, \ BITS(bits, bits, bits, bits), \ IDX(0, 1, 2, 3)) #define EMUFMT(_name, _dxfmt, _type, in, en, ib, eb) \ (struct d3d_format) { \ .dxfmt = DXGI_FORMAT_##_dxfmt##_##_type, \ .minor = 0, \ .fmt = { \ .name = _name, \ .type = PL_FMT_##_type, \ .num_components = en, \ .component_depth = BITS(ib, ib, ib, ib), \ .internal_size = (in) * (ib) / 8, \ .opaque = false, \ .emulated = true, \ .texel_size = (en) * (eb) / 8, \ .texel_align = (eb) / 8, \ .host_bits = BITS(eb, eb, eb, eb), \ .sample_order = IDX(0, 1, 2, 3), \ }, \ } const struct d3d_format pl_d3d11_formats[] = { REGFMT("r8", R8, UNORM, 1, 8), REGFMT("rg8", R8G8, UNORM, 2, 8), EMUFMT("rgb8", R8G8B8A8, UNORM, 4, 3, 8, 8), REGFMT("rgba8", R8G8B8A8, UNORM, 4, 8), REGFMT("r16", R16, UNORM, 1, 16), REGFMT("rg16", R16G16, UNORM, 2, 16), EMUFMT("rgb16", R16G16B16A16, UNORM, 4, 3, 16, 16), REGFMT("rgba16", R16G16B16A16, UNORM, 4, 16), REGFMT("r8s", R8, SNORM, 1, 8), REGFMT("rg8s", R8G8, SNORM, 2, 8), REGFMT("rgba8s", R8G8B8A8, SNORM, 4, 8), REGFMT("r16s", R16, SNORM, 1, 16), REGFMT("rg16s", R16G16, SNORM, 2, 16), REGFMT("rgba16s", R16G16B16A16, SNORM, 4, 16), REGFMT("r16hf", R16, FLOAT, 1, 16), REGFMT("rg16hf", R16G16, FLOAT, 2, 16), EMUFMT("rgb16hf", R16G16B16A16, FLOAT, 4, 3, 16, 16), REGFMT("rgba16hf", R16G16B16A16, FLOAT, 4, 16), REGFMT("r32f", R32, FLOAT, 1, 32), REGFMT("rg32f", R32G32, FLOAT, 2, 32), REGFMT("rgb32f", R32G32B32, FLOAT, 3, 32), REGFMT("rgba32f", R32G32B32A32, FLOAT, 4, 32), EMUFMT("r16f", R16, FLOAT, 1, 1, 16, 32), EMUFMT("rg16f", R16G16, FLOAT, 2, 2, 16, 32), EMUFMT("rgb16f", R16G16B16A16, FLOAT, 4, 3, 16, 32), EMUFMT("rgba16f", R16G16B16A16, FLOAT, 4, 4, 16, 32), REGFMT("r8u", R8, UINT, 1, 8), REGFMT("rg8u", R8G8, UINT, 2, 8), REGFMT("rgba8u", R8G8B8A8, UINT, 4, 8), REGFMT("r16u", R16, UINT, 1, 16), REGFMT("rg16u", R16G16, UINT, 2, 16), REGFMT("rgba16u", R16G16B16A16, UINT, 4, 16), REGFMT("r32u", R32, UINT, 1, 32), REGFMT("rg32u", R32G32, UINT, 2, 32), REGFMT("rgb32u", R32G32B32, UINT, 3, 32), REGFMT("rgba32u", R32G32B32A32, UINT, 4, 32), REGFMT("r8i", R8, SINT, 1, 8), REGFMT("rg8i", R8G8, SINT, 2, 8), REGFMT("rgba8i", R8G8B8A8, SINT, 4, 8), REGFMT("r16i", R16, SINT, 1, 16), REGFMT("rg16i", R16G16, SINT, 2, 16), REGFMT("rgba16i", R16G16B16A16, SINT, 4, 16), REGFMT("r32i", R32, SINT, 1, 32), REGFMT("rg32i", R32G32, SINT, 2, 32), REGFMT("rgb32i", R32G32B32, SINT, 3, 32), REGFMT("rgba32i", R32G32B32A32, SINT, 4, 32), FMT(0, "rgb10a2", R10G10B10A2, UNORM, 4, 4, BITS(10, 10, 10, 2), IDX(0, 1, 2, 3)), FMT(0, "rgb10a2u", R10G10B10A2, UINT, 4, 4, BITS(10, 10, 10, 2), IDX(0, 1, 2, 3)), FMT(0, "bgra8", B8G8R8A8, UNORM, 4, 4, BITS( 8, 8, 8, 8), IDX(2, 1, 0, 3)), FMT(0, "bgrx8", B8G8R8X8, UNORM, 3, 4, BITS( 8, 8, 8), IDX(2, 1, 0)), FMT(0, "rg11b10f", R11G11B10, FLOAT, 3, 4, BITS(11, 11, 10), IDX(0, 1, 2)), // D3D11.1 16-bit formats (resurrected D3D9 formats) FMT(1, "bgr565", B5G6R5, UNORM, 3, 2, BITS( 5, 6, 5), IDX(2, 1, 0)), FMT(1, "bgr5a1", B5G5R5A1, UNORM, 4, 2, BITS( 5, 5, 5, 1), IDX(2, 1, 0, 3)), FMT(1, "bgra4", B4G4R4A4, UNORM, 4, 2, BITS( 4, 4, 4, 4), IDX(2, 1, 0, 3)), {0} }; #undef BITS #undef IDX #undef REGFMT #undef FMT void pl_d3d11_setup_formats(struct pl_gpu_t *gpu) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); PL_ARRAY(pl_fmt) formats = {0}; HRESULT hr; for (int i = 0; pl_d3d11_formats[i].dxfmt; i++) { const struct d3d_format *d3d_fmt = &pl_d3d11_formats[i]; // The Direct3D 11.0 debug layer will segfault if CheckFormatSupport is // called on a format it doesn't know about if (pl_d3d11_formats[i].minor > p->minor) continue; UINT sup = 0; hr = ID3D11Device_CheckFormatSupport(p->dev, d3d_fmt->dxfmt, &sup); if (FAILED(hr)) continue; D3D11_FEATURE_DATA_FORMAT_SUPPORT2 sup2 = { .InFormat = d3d_fmt->dxfmt }; ID3D11Device_CheckFeatureSupport(p->dev, D3D11_FEATURE_FORMAT_SUPPORT2, ², sizeof(sup2)); struct pl_fmt_t *fmt = pl_alloc_obj(gpu, fmt, struct d3d_fmt *); const struct d3d_format **fmtp = PL_PRIV(fmt); *fmt = d3d_fmt->fmt; *fmtp = d3d_fmt; // For sanity, clear the superfluous fields for (int j = fmt->num_components; j < 4; j++) { fmt->component_depth[j] = 0; fmt->sample_order[j] = 0; fmt->host_bits[j] = 0; } static const struct { enum pl_fmt_caps caps; UINT sup; UINT sup2; } support[] = { { .caps = PL_FMT_CAP_SAMPLEABLE, .sup = D3D11_FORMAT_SUPPORT_TEXTURE2D, }, { .caps = PL_FMT_CAP_STORABLE, // SHADER_LOAD is for readonly images, which can use a SRV .sup = D3D11_FORMAT_SUPPORT_TEXTURE2D | D3D11_FORMAT_SUPPORT_TYPED_UNORDERED_ACCESS_VIEW | D3D11_FORMAT_SUPPORT_SHADER_LOAD, .sup2 = D3D11_FORMAT_SUPPORT2_UAV_TYPED_STORE, }, { .caps = PL_FMT_CAP_READWRITE, .sup = D3D11_FORMAT_SUPPORT_TEXTURE2D | D3D11_FORMAT_SUPPORT_TYPED_UNORDERED_ACCESS_VIEW, .sup2 = D3D11_FORMAT_SUPPORT2_UAV_TYPED_LOAD, }, { .caps = PL_FMT_CAP_LINEAR, .sup = D3D11_FORMAT_SUPPORT_TEXTURE2D | D3D11_FORMAT_SUPPORT_SHADER_SAMPLE, }, { .caps = PL_FMT_CAP_RENDERABLE, .sup = D3D11_FORMAT_SUPPORT_RENDER_TARGET, }, { .caps = PL_FMT_CAP_BLENDABLE, .sup = D3D11_FORMAT_SUPPORT_RENDER_TARGET | D3D11_FORMAT_SUPPORT_BLENDABLE, }, { .caps = PL_FMT_CAP_VERTEX, .sup = D3D11_FORMAT_SUPPORT_IA_VERTEX_BUFFER, }, { .caps = PL_FMT_CAP_TEXEL_UNIFORM, .sup = D3D11_FORMAT_SUPPORT_BUFFER | D3D11_FORMAT_SUPPORT_SHADER_LOAD, }, { .caps = PL_FMT_CAP_TEXEL_STORAGE, // SHADER_LOAD is for readonly buffers, which can use a SRV .sup = D3D11_FORMAT_SUPPORT_BUFFER | D3D11_FORMAT_SUPPORT_TYPED_UNORDERED_ACCESS_VIEW | D3D11_FORMAT_SUPPORT_SHADER_LOAD, .sup2 = D3D11_FORMAT_SUPPORT2_UAV_TYPED_STORE, }, { .caps = PL_FMT_CAP_HOST_READABLE, .sup = D3D11_FORMAT_SUPPORT_CPU_LOCKABLE, }, }; for (int j = 0; j < PL_ARRAY_SIZE(support); j++) { if ((sup & support[j].sup) == support[j].sup && (sup2.OutFormatSupport2 & support[j].sup2) == support[j].sup2) { fmt->caps |= support[j].caps; } } // PL_FMT_CAP_STORABLE implies compute shaders, so don't set it if we // don't have them if (!gpu->glsl.compute) fmt->caps &= ~PL_FMT_CAP_STORABLE; // PL_FMT_CAP_READWRITE implies PL_FMT_CAP_STORABLE if (!(fmt->caps & PL_FMT_CAP_STORABLE)) fmt->caps &= ~PL_FMT_CAP_READWRITE; // `fmt->gatherable` must have PL_FMT_CAP_SAMPLEABLE if ((fmt->caps & PL_FMT_CAP_SAMPLEABLE) && (sup & D3D11_FORMAT_SUPPORT_SHADER_GATHER)) { fmt->gatherable = true; } // PL_FMT_CAP_BLITTABLE implies support for stretching, flipping and // loose format conversion, which require a shader pass in D3D11 if (p->fl >= D3D_FEATURE_LEVEL_11_0) { // On >=FL11_0, we use a compute pass, which supports 1D and 3D // textures if (fmt->caps & PL_FMT_CAP_STORABLE) fmt->caps |= PL_FMT_CAP_BLITTABLE; } else { // On caps & req) == req) fmt->caps |= PL_FMT_CAP_BLITTABLE; } if (fmt->caps & (PL_FMT_CAP_VERTEX | PL_FMT_CAP_TEXEL_UNIFORM | PL_FMT_CAP_TEXEL_STORAGE)) { fmt->glsl_type = pl_var_glsl_type_name(pl_var_from_fmt(fmt, "")); pl_assert(fmt->glsl_type); } if (fmt->caps & (PL_FMT_CAP_STORABLE | PL_FMT_CAP_TEXEL_STORAGE)) fmt->glsl_format = pl_fmt_glsl_format(fmt, fmt->num_components); fmt->fourcc = pl_fmt_fourcc(fmt); // If no caps, D3D11 only supports this for things we don't care about if (!fmt->caps) { pl_free(fmt); continue; } PL_ARRAY_APPEND(gpu, formats, fmt); } gpu->formats = formats.elem; gpu->num_formats = formats.num; } libplacebo-v7.349.0/src/d3d11/formats.h000066400000000000000000000021511463457750100174220ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "common.h" struct d3d_format { DXGI_FORMAT dxfmt; int minor; // The D3D11 minor version number which supports this format struct pl_fmt_t fmt; }; extern const struct d3d_format pl_d3d11_formats[]; static inline DXGI_FORMAT fmt_to_dxgi(pl_fmt fmt) { const struct d3d_format **fmtp = PL_PRIV(fmt); return (*fmtp)->dxfmt; } void pl_d3d11_setup_formats(struct pl_gpu_t *gpu); libplacebo-v7.349.0/src/d3d11/gpu.c000066400000000000000000000602661463457750100165500ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include #include #include "common.h" #include "gpu.h" #include "formats.h" #include "glsl/spirv.h" #define DXGI_ADAPTER_FLAG3_SUPPORT_MONITORED_FENCES (0x8) struct timer_query { ID3D11Query *ts_start; ID3D11Query *ts_end; ID3D11Query *disjoint; }; struct pl_timer_t { // Ring buffer of timer queries to use int current; int pending; struct timer_query queries[16]; }; void pl_d3d11_timer_start(pl_gpu gpu, pl_timer timer) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; if (!timer) return; struct timer_query *query = &timer->queries[timer->current]; // Create the query objects lazilly if (!query->ts_start) { D3D(ID3D11Device_CreateQuery(p->dev, &(D3D11_QUERY_DESC) { D3D11_QUERY_TIMESTAMP }, &query->ts_start)); D3D(ID3D11Device_CreateQuery(p->dev, &(D3D11_QUERY_DESC) { D3D11_QUERY_TIMESTAMP }, &query->ts_end)); // Measuring duration in D3D11 requires three queries: start and end // timestamp queries, and a disjoint query containing a flag which says // whether the timestamps are usable or if a discontinuity occurred // between them, like a change in power state or clock speed. The // disjoint query also contains the timer frequency, so the timestamps // are useless without it. D3D(ID3D11Device_CreateQuery(p->dev, &(D3D11_QUERY_DESC) { D3D11_QUERY_TIMESTAMP_DISJOINT }, &query->disjoint)); } // Query the start timestamp ID3D11DeviceContext_Begin(p->imm, (ID3D11Asynchronous *) query->disjoint); ID3D11DeviceContext_End(p->imm, (ID3D11Asynchronous *) query->ts_start); return; error: SAFE_RELEASE(query->ts_start); SAFE_RELEASE(query->ts_end); SAFE_RELEASE(query->disjoint); } void pl_d3d11_timer_end(pl_gpu gpu, pl_timer timer) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); if (!timer) return; struct timer_query *query = &timer->queries[timer->current]; // Even if timer_start and timer_end are called in-order, timer_start might // have failed to create the timer objects if (!query->ts_start) return; // Query the end timestamp ID3D11DeviceContext_End(p->imm, (ID3D11Asynchronous *) query->ts_end); ID3D11DeviceContext_End(p->imm, (ID3D11Asynchronous *) query->disjoint); // Advance to the next set of queries, for the next call to timer_start timer->current++; if (timer->current >= PL_ARRAY_SIZE(timer->queries)) timer->current = 0; // Wrap around // Increment the number of pending queries, unless the ring buffer is full, // in which case, timer->current now points to the oldest one, which will be // dropped and reused if (timer->pending < PL_ARRAY_SIZE(timer->queries)) timer->pending++; } static uint64_t timestamp_to_ns(uint64_t timestamp, uint64_t freq) { static const uint64_t ns_per_s = 1000000000llu; return timestamp / freq * ns_per_s + timestamp % freq * ns_per_s / freq; } static uint64_t d3d11_timer_query(pl_gpu gpu, pl_timer timer) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; HRESULT hr; for (; timer->pending > 0; timer->pending--) { int index = timer->current - timer->pending; if (index < 0) index += PL_ARRAY_SIZE(timer->queries); struct timer_query *query = &timer->queries[index]; UINT64 start, end; D3D11_QUERY_DATA_TIMESTAMP_DISJOINT dj; // Fetch the results of each query, or on S_FALSE, return 0 to indicate // the queries are still pending D3D(hr = ID3D11DeviceContext_GetData(p->imm, (ID3D11Asynchronous *) query->disjoint, &dj, sizeof(dj), D3D11_ASYNC_GETDATA_DONOTFLUSH)); if (hr == S_FALSE) return 0; D3D(hr = ID3D11DeviceContext_GetData(p->imm, (ID3D11Asynchronous *) query->ts_end, &end, sizeof(end), D3D11_ASYNC_GETDATA_DONOTFLUSH)); if (hr == S_FALSE) return 0; D3D(hr = ID3D11DeviceContext_GetData(p->imm, (ID3D11Asynchronous *) query->ts_start, &start, sizeof(start), D3D11_ASYNC_GETDATA_DONOTFLUSH)); if (hr == S_FALSE) return 0; // There was a discontinuity during the queries, so a timestamp can't be // produced. Skip it and try the next one. if (dj.Disjoint || !dj.Frequency) continue; // We got a result. Return it to the caller. timer->pending--; pl_d3d11_flush_message_queue(ctx, "After timer query"); uint64_t ns = timestamp_to_ns(end - start, dj.Frequency); return PL_MAX(ns, 1); error: // There was an error fetching the timer result, so skip it and try the // next one continue; } // No more unprocessed results return 0; } static void d3d11_timer_destroy(pl_gpu gpu, pl_timer timer) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; for (int i = 0; i < PL_ARRAY_SIZE(timer->queries); i++) { SAFE_RELEASE(timer->queries[i].ts_start); SAFE_RELEASE(timer->queries[i].ts_end); SAFE_RELEASE(timer->queries[i].disjoint); } pl_d3d11_flush_message_queue(ctx, "After timer destroy"); pl_free(timer); } static pl_timer d3d11_timer_create(pl_gpu gpu) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); if (!p->has_timestamp_queries) return NULL; struct pl_timer_t *timer = pl_alloc_ptr(NULL, timer); *timer = (struct pl_timer_t) {0}; return timer; } static int d3d11_desc_namespace(pl_gpu gpu, enum pl_desc_type type) { // Vulkan-style binding, where all descriptors are in the same namespace, is // required to use SPIRV-Cross' HLSL resource mapping API, which targets // resources by binding number return 0; } static void d3d11_gpu_flush(pl_gpu gpu) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; ID3D11DeviceContext_Flush(p->imm); pl_d3d11_flush_message_queue(ctx, "After gpu flush"); } static void d3d11_gpu_finish(pl_gpu gpu) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; HRESULT hr; if (p->finish_fence) { p->finish_value++; D3D(ID3D11Fence_SetEventOnCompletion(p->finish_fence, p->finish_value, p->finish_event)); ID3D11DeviceContext4_Signal(p->imm4, p->finish_fence, p->finish_value); ID3D11DeviceContext_Flush(p->imm); WaitForSingleObject(p->finish_event, INFINITE); } else { ID3D11DeviceContext_End(p->imm, (ID3D11Asynchronous *) p->finish_query); // D3D11 doesn't have blocking queries, but it does have blocking // readback. As a performance hack to try to avoid polling, do a dummy // copy/readback between two buffers. Hopefully this will block until // all prior commands are finished. If it does, the first GetData call // will return a result and we won't have to poll. pl_buf_copy(gpu, p->finish_buf_dst, 0, p->finish_buf_src, 0, sizeof(uint32_t)); pl_buf_read(gpu, p->finish_buf_dst, 0, &(uint32_t) {0}, sizeof(uint32_t)); // Poll the event query until it completes for (;;) { BOOL idle; D3D(hr = ID3D11DeviceContext_GetData(p->imm, (ID3D11Asynchronous *) p->finish_query, &idle, sizeof(idle), 0)); if (hr == S_OK && idle) break; Sleep(1); } } pl_d3d11_flush_message_queue(ctx, "After gpu finish"); error: return; } static bool d3d11_gpu_is_failed(pl_gpu gpu) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; if (ctx->is_failed) return true; // GetDeviceRemovedReason returns S_OK if the device isn't removed HRESULT hr = ID3D11Device_GetDeviceRemovedReason(p->dev); if (FAILED(hr)) { ctx->is_failed = true; pl_d3d11_after_error(ctx, hr); } return ctx->is_failed; } static void d3d11_gpu_destroy(pl_gpu gpu) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); pl_buf_destroy(gpu, &p->finish_buf_src); pl_buf_destroy(gpu, &p->finish_buf_dst); // Release everything except the immediate context SAFE_RELEASE(p->dev); SAFE_RELEASE(p->dev1); SAFE_RELEASE(p->dev5); SAFE_RELEASE(p->imm1); SAFE_RELEASE(p->imm4); SAFE_RELEASE(p->vbuf.buf); SAFE_RELEASE(p->ibuf.buf); SAFE_RELEASE(p->rstate); SAFE_RELEASE(p->dsstate); for (int i = 0; i < PL_TEX_SAMPLE_MODE_COUNT; i++) { for (int j = 0; j < PL_TEX_ADDRESS_MODE_COUNT; j++) { SAFE_RELEASE(p->samplers[i][j]); } } SAFE_RELEASE(p->finish_fence); if (p->finish_event) CloseHandle(p->finish_event); SAFE_RELEASE(p->finish_query); // Destroy the immediate context synchronously so referenced objects don't // show up in the leak check if (p->imm) { ID3D11DeviceContext_ClearState(p->imm); ID3D11DeviceContext_Flush(p->imm); SAFE_RELEASE(p->imm); } pl_spirv_destroy(&p->spirv); pl_free((void *) gpu); } pl_d3d11 pl_d3d11_get(pl_gpu gpu) { const struct pl_gpu_fns *impl = PL_PRIV(gpu); if (impl->destroy == d3d11_gpu_destroy) { struct pl_gpu_d3d11 *p = (struct pl_gpu_d3d11 *) impl; return p->ctx->d3d11; } return NULL; } static bool load_d3d_compiler(pl_gpu gpu) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); HMODULE d3dcompiler = NULL; static const struct { const wchar_t *name; bool inbox; } compiler_dlls[] = { // Try the inbox D3DCompiler first (Windows 8.1 and up) { .name = L"d3dcompiler_47.dll", .inbox = true }, // Check for a packaged version of d3dcompiler_47.dll { .name = L"d3dcompiler_47.dll" }, // Try d3dcompiler_46.dll from the Windows 8 SDK { .name = L"d3dcompiler_46.dll" }, // Try d3dcompiler_43.dll from the June 2010 DirectX SDK { .name = L"d3dcompiler_43.dll" }, }; for (int i = 0; i < PL_ARRAY_SIZE(compiler_dlls); i++) { if (compiler_dlls[i].inbox) { if (!IsWindows8Point1OrGreater()) continue; d3dcompiler = LoadLibraryExW(compiler_dlls[i].name, NULL, LOAD_LIBRARY_SEARCH_SYSTEM32); } else { d3dcompiler = LoadLibraryW(compiler_dlls[i].name); } if (!d3dcompiler) continue; p->D3DCompile = (void *) GetProcAddress(d3dcompiler, "D3DCompile"); if (!p->D3DCompile) return false; p->d3d_compiler_ver = pl_get_dll_version(compiler_dlls[i].name); return true; } return false; } static struct pl_gpu_fns pl_fns_d3d11 = { .tex_create = pl_d3d11_tex_create, .tex_destroy = pl_d3d11_tex_destroy, .tex_invalidate = pl_d3d11_tex_invalidate, .tex_clear_ex = pl_d3d11_tex_clear_ex, .tex_blit = pl_d3d11_tex_blit, .tex_upload = pl_d3d11_tex_upload, .tex_download = pl_d3d11_tex_download, .buf_create = pl_d3d11_buf_create, .buf_destroy = pl_d3d11_buf_destroy, .buf_write = pl_d3d11_buf_write, .buf_read = pl_d3d11_buf_read, .buf_copy = pl_d3d11_buf_copy, .desc_namespace = d3d11_desc_namespace, .pass_create = pl_d3d11_pass_create, .pass_destroy = pl_d3d11_pass_destroy, .pass_run = pl_d3d11_pass_run, .timer_create = d3d11_timer_create, .timer_destroy = d3d11_timer_destroy, .timer_query = d3d11_timer_query, .gpu_flush = d3d11_gpu_flush, .gpu_finish = d3d11_gpu_finish, .gpu_is_failed = d3d11_gpu_is_failed, .destroy = d3d11_gpu_destroy, }; pl_gpu pl_gpu_create_d3d11(struct d3d11_ctx *ctx) { pl_assert(ctx->dev); IDXGIDevice1 *dxgi_dev = NULL; IDXGIAdapter1 *adapter = NULL; IDXGIAdapter4 *adapter4 = NULL; bool success = false; HRESULT hr; struct pl_gpu_t *gpu = pl_zalloc_obj(NULL, gpu, struct pl_gpu_d3d11); gpu->log = ctx->log; struct pl_gpu_d3d11 *p = PL_PRIV(gpu); uint32_t spirv_ver = PL_MIN(SPV_VERSION, PL_MAX_SPIRV_VER); *p = (struct pl_gpu_d3d11) { .ctx = ctx, .impl = pl_fns_d3d11, .dev = ctx->dev, .spirv = pl_spirv_create(ctx->log, (struct pl_spirv_version) { .env_version = pl_spirv_version_to_vulkan(spirv_ver), .spv_version = spirv_ver, }), .vbuf.bind_flags = D3D11_BIND_VERTEX_BUFFER, .ibuf.bind_flags = D3D11_BIND_INDEX_BUFFER, }; if (!p->spirv) goto error; ID3D11Device_AddRef(p->dev); ID3D11Device_GetImmediateContext(p->dev, &p->imm); // Check D3D11.1 interfaces hr = ID3D11Device_QueryInterface(p->dev, &IID_ID3D11Device1, (void **) &p->dev1); if (SUCCEEDED(hr)) { p->minor = 1; ID3D11Device1_GetImmediateContext1(p->dev1, &p->imm1); } // Check D3D11.4 interfaces hr = ID3D11Device_QueryInterface(p->dev, &IID_ID3D11Device5, (void **) &p->dev5); if (SUCCEEDED(hr)) { // There is no GetImmediateContext4 method hr = ID3D11DeviceContext_QueryInterface(p->imm, &IID_ID3D11DeviceContext4, (void **) &p->imm4); if (SUCCEEDED(hr)) p->minor = 4; } PL_INFO(gpu, "Using Direct3D 11.%d runtime", p->minor); D3D(ID3D11Device_QueryInterface(p->dev, &IID_IDXGIDevice1, (void **) &dxgi_dev)); D3D(IDXGIDevice1_GetParent(dxgi_dev, &IID_IDXGIAdapter1, (void **) &adapter)); DXGI_ADAPTER_DESC1 adapter_desc = {0}; IDXGIAdapter1_GetDesc1(adapter, &adapter_desc); // No resource can be larger than max_res_size in bytes unsigned int max_res_size = PL_CLAMP( D3D11_REQ_RESOURCE_SIZE_IN_MEGABYTES_EXPRESSION_B_TERM * adapter_desc.DedicatedVideoMemory, D3D11_REQ_RESOURCE_SIZE_IN_MEGABYTES_EXPRESSION_A_TERM * 1024u * 1024u, D3D11_REQ_RESOURCE_SIZE_IN_MEGABYTES_EXPRESSION_C_TERM * 1024u * 1024u); gpu->glsl = (struct pl_glsl_version) { .version = 450, .vulkan = true, }; gpu->limits = (struct pl_gpu_limits) { .max_buf_size = max_res_size, .max_ssbo_size = max_res_size, .max_vbo_size = max_res_size, .align_vertex_stride = 1, // Make up some values .align_tex_xfer_offset = 32, .align_tex_xfer_pitch = 1, .fragment_queues = 1, }; p->fl = ID3D11Device_GetFeatureLevel(p->dev); // If we're not using FL9_x, we can use the same suballocated buffer as a // vertex buffer and index buffer if (p->fl >= D3D_FEATURE_LEVEL_10_0) p->vbuf.bind_flags |= D3D11_BIND_INDEX_BUFFER; if (p->fl >= D3D_FEATURE_LEVEL_10_0) { gpu->limits.max_ubo_size = D3D11_REQ_CONSTANT_BUFFER_ELEMENT_COUNT * CBUF_ELEM; } else { // 10level9 restriction: // https://docs.microsoft.com/en-us/windows/win32/direct3d11/d3d11-graphics-reference-10level9-context gpu->limits.max_ubo_size = 255 * CBUF_ELEM; } if (p->fl >= D3D_FEATURE_LEVEL_11_0) { gpu->limits.max_tex_1d_dim = D3D11_REQ_TEXTURE1D_U_DIMENSION; gpu->limits.max_tex_2d_dim = D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION; gpu->limits.max_tex_3d_dim = D3D11_REQ_TEXTURE3D_U_V_OR_W_DIMENSION; } else if (p->fl >= D3D_FEATURE_LEVEL_10_0) { gpu->limits.max_tex_1d_dim = D3D10_REQ_TEXTURE1D_U_DIMENSION; gpu->limits.max_tex_2d_dim = D3D10_REQ_TEXTURE2D_U_OR_V_DIMENSION; gpu->limits.max_tex_3d_dim = D3D10_REQ_TEXTURE3D_U_V_OR_W_DIMENSION; } else if (p->fl >= D3D_FEATURE_LEVEL_9_3) { gpu->limits.max_tex_2d_dim = D3D_FL9_3_REQ_TEXTURE2D_U_OR_V_DIMENSION; // Same limit as FL9_1 gpu->limits.max_tex_3d_dim = D3D_FL9_1_REQ_TEXTURE3D_U_V_OR_W_DIMENSION; } else { gpu->limits.max_tex_2d_dim = D3D_FL9_1_REQ_TEXTURE2D_U_OR_V_DIMENSION; gpu->limits.max_tex_3d_dim = D3D_FL9_1_REQ_TEXTURE3D_U_V_OR_W_DIMENSION; } if (p->fl >= D3D_FEATURE_LEVEL_10_0) { gpu->limits.max_buffer_texels = 1 << D3D11_REQ_BUFFER_RESOURCE_TEXEL_COUNT_2_TO_EXP; } if (p->fl >= D3D_FEATURE_LEVEL_11_0) { gpu->glsl.compute = true; gpu->limits.compute_queues = 1; // Set `gpu->limits.blittable_1d_3d`, since `pl_tex_blit_compute`, which // is used to emulate blits on 11_0 and up, supports 1D and 3D textures gpu->limits.blittable_1d_3d = true; gpu->glsl.max_shmem_size = D3D11_CS_TGSM_REGISTER_COUNT * sizeof(float); gpu->glsl.max_group_threads = D3D11_CS_THREAD_GROUP_MAX_THREADS_PER_GROUP; gpu->glsl.max_group_size[0] = D3D11_CS_THREAD_GROUP_MAX_X; gpu->glsl.max_group_size[1] = D3D11_CS_THREAD_GROUP_MAX_Y; gpu->glsl.max_group_size[2] = D3D11_CS_THREAD_GROUP_MAX_Z; gpu->limits.max_dispatch[0] = gpu->limits.max_dispatch[1] = gpu->limits.max_dispatch[2] = D3D11_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION; } if (p->fl >= D3D_FEATURE_LEVEL_11_0) { // The offset limits are defined by HLSL: // https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/gather4-po--sm5---asm- gpu->glsl.min_gather_offset = -32; gpu->glsl.max_gather_offset = 31; } else if (p->fl >= D3D_FEATURE_LEVEL_10_1) { // SM4.1 has no gather4_po, so the offset must be specified by an // immediate with a range of [-8, 7] // https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/gather4--sm4-1---asm- // https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/sample--sm4---asm-#address-offset gpu->glsl.min_gather_offset = -8; gpu->glsl.max_gather_offset = 7; } if (p->fl >= D3D_FEATURE_LEVEL_10_0) { p->max_srvs = D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT; } else { // 10level9 restriction: // https://docs.microsoft.com/en-us/windows/win32/direct3d11/d3d11-graphics-reference-10level9-context p->max_srvs = 8; } if (p->fl >= D3D_FEATURE_LEVEL_11_1) { p->max_uavs = D3D11_1_UAV_SLOT_COUNT; } else { p->max_uavs = D3D11_PS_CS_UAV_REGISTER_COUNT; } if (!load_d3d_compiler(gpu)) { PL_FATAL(gpu, "Could not find D3DCompiler DLL"); goto error; } PL_INFO(gpu, "D3DCompiler version: %u.%u.%u.%u", p->d3d_compiler_ver.major, p->d3d_compiler_ver.minor, p->d3d_compiler_ver.build, p->d3d_compiler_ver.revision); // Detect support for timestamp queries. Some FL9_x devices don't support them. hr = ID3D11Device_CreateQuery(p->dev, &(D3D11_QUERY_DESC) { D3D11_QUERY_TIMESTAMP }, NULL); p->has_timestamp_queries = SUCCEEDED(hr); pl_d3d11_setup_formats(gpu); // The rasterizer state never changes, so create it here D3D11_RASTERIZER_DESC rdesc = { .FillMode = D3D11_FILL_SOLID, .CullMode = D3D11_CULL_NONE, .FrontCounterClockwise = FALSE, .DepthClipEnable = TRUE, // Required for 10level9 .ScissorEnable = TRUE, }; D3D(ID3D11Device_CreateRasterizerState(p->dev, &rdesc, &p->rstate)); // The depth stencil state never changes either, and we only set it to turn // depth testing off so the debug layer doesn't complain about an unbound // depth buffer D3D11_DEPTH_STENCIL_DESC dsdesc = { .DepthEnable = FALSE, .DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ALL, .DepthFunc = D3D11_COMPARISON_LESS, .StencilReadMask = D3D11_DEFAULT_STENCIL_READ_MASK, .StencilWriteMask = D3D11_DEFAULT_STENCIL_WRITE_MASK, .FrontFace = { .StencilFailOp = D3D11_STENCIL_OP_KEEP, .StencilDepthFailOp = D3D11_STENCIL_OP_KEEP, .StencilPassOp = D3D11_STENCIL_OP_KEEP, .StencilFunc = D3D11_COMPARISON_ALWAYS, }, .BackFace = { .StencilFailOp = D3D11_STENCIL_OP_KEEP, .StencilDepthFailOp = D3D11_STENCIL_OP_KEEP, .StencilPassOp = D3D11_STENCIL_OP_KEEP, .StencilFunc = D3D11_COMPARISON_ALWAYS, }, }; D3D(ID3D11Device_CreateDepthStencilState(p->dev, &dsdesc, &p->dsstate)); // Initialize the samplers for (int sample_mode = 0; sample_mode < PL_TEX_SAMPLE_MODE_COUNT; sample_mode++) { for (int address_mode = 0; address_mode < PL_TEX_ADDRESS_MODE_COUNT; address_mode++) { static const D3D11_TEXTURE_ADDRESS_MODE d3d_address_mode[] = { [PL_TEX_ADDRESS_CLAMP] = D3D11_TEXTURE_ADDRESS_CLAMP, [PL_TEX_ADDRESS_REPEAT] = D3D11_TEXTURE_ADDRESS_WRAP, [PL_TEX_ADDRESS_MIRROR] = D3D11_TEXTURE_ADDRESS_MIRROR, }; static const D3D11_FILTER d3d_filter[] = { [PL_TEX_SAMPLE_NEAREST] = D3D11_FILTER_MIN_MAG_MIP_POINT, [PL_TEX_SAMPLE_LINEAR] = D3D11_FILTER_MIN_MAG_MIP_LINEAR, }; D3D11_SAMPLER_DESC sdesc = { .AddressU = d3d_address_mode[address_mode], .AddressV = d3d_address_mode[address_mode], .AddressW = d3d_address_mode[address_mode], .ComparisonFunc = D3D11_COMPARISON_NEVER, .MinLOD = 0, .MaxLOD = D3D11_FLOAT32_MAX, .MaxAnisotropy = 1, .Filter = d3d_filter[sample_mode], }; D3D(ID3D11Device_CreateSamplerState(p->dev, &sdesc, &p->samplers[sample_mode][address_mode])); } } hr = IDXGIAdapter1_QueryInterface(adapter, &IID_IDXGIAdapter4, (void **) &adapter4); if (SUCCEEDED(hr)) { DXGI_ADAPTER_DESC3 adapter_desc3 = {0}; IDXGIAdapter4_GetDesc3(adapter4, &adapter_desc3); p->has_monitored_fences = adapter_desc3.Flags & DXGI_ADAPTER_FLAG3_SUPPORT_MONITORED_FENCES; } // Try to create a D3D11.4 fence object to wait on in pl_gpu_finish() if (p->dev5 && p->has_monitored_fences) { hr = ID3D11Device5_CreateFence(p->dev5, 0, D3D11_FENCE_FLAG_NONE, &IID_ID3D11Fence, (void **) &p->finish_fence); if (SUCCEEDED(hr)) { p->finish_event = CreateEventW(NULL, FALSE, FALSE, NULL); if (!p->finish_event) { PL_ERR(gpu, "Failed to create finish() event"); goto error; } } } // If fences are not available, we will have to poll a event query instead if (!p->finish_fence) { // Buffers for dummy copy/readback (see d3d11_gpu_finish()) p->finish_buf_src = pl_buf_create(gpu, pl_buf_params( .size = sizeof(uint32_t), .drawable = true, // Make these vertex buffers for 10level9 .initial_data = &(uint32_t) {0x11223344}, )); p->finish_buf_dst = pl_buf_create(gpu, pl_buf_params( .size = sizeof(uint32_t), .host_readable = true, .drawable = true, )); D3D(ID3D11Device_CreateQuery(p->dev, &(D3D11_QUERY_DESC) { D3D11_QUERY_EVENT }, &p->finish_query)); } pl_d3d11_flush_message_queue(ctx, "After gpu create"); success = true; error: SAFE_RELEASE(dxgi_dev); SAFE_RELEASE(adapter); SAFE_RELEASE(adapter4); if (success) { return pl_gpu_finalize(gpu); } else { d3d11_gpu_destroy(gpu); return NULL; } } libplacebo-v7.349.0/src/d3d11/gpu.h000066400000000000000000000146421463457750100165520ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include #include #include #include #include #include "../gpu.h" #include "../glsl/spirv.h" #include "common.h" #include "utils.h" pl_gpu pl_gpu_create_d3d11(struct d3d11_ctx *ctx); // --- pl_gpu internal structs and helpers // Size of one constant in a constant buffer #define CBUF_ELEM (sizeof(float[4])) struct d3d_stream_buf { UINT bind_flags; ID3D11Buffer *buf; size_t size; size_t used; unsigned int align; }; struct pl_gpu_d3d11 { struct pl_gpu_fns impl; struct d3d11_ctx *ctx; ID3D11Device *dev; ID3D11Device1 *dev1; ID3D11Device5 *dev5; ID3D11DeviceContext *imm; ID3D11DeviceContext1 *imm1; ID3D11DeviceContext4 *imm4; // The Direct3D 11 minor version number int minor; pl_spirv spirv; pD3DCompile D3DCompile; struct dll_version d3d_compiler_ver; // Device capabilities D3D_FEATURE_LEVEL fl; bool has_timestamp_queries; bool has_monitored_fences; int max_srvs; int max_uavs; // Streaming vertex and index buffers struct d3d_stream_buf vbuf; struct d3d_stream_buf ibuf; // Shared rasterizer state ID3D11RasterizerState *rstate; // Shared depth-stencil state ID3D11DepthStencilState *dsstate; // Array of ID3D11SamplerStates for every combination of sample/address modes ID3D11SamplerState *samplers[PL_TEX_SAMPLE_MODE_COUNT][PL_TEX_ADDRESS_MODE_COUNT]; // Resources for finish() ID3D11Fence *finish_fence; uint64_t finish_value; HANDLE finish_event; ID3D11Query *finish_query; pl_buf finish_buf_src; pl_buf finish_buf_dst; }; void pl_d3d11_setup_formats(struct pl_gpu_t *gpu); void pl_d3d11_timer_start(pl_gpu gpu, pl_timer timer); void pl_d3d11_timer_end(pl_gpu gpu, pl_timer timer); struct pl_buf_d3d11 { ID3D11Buffer *buf; ID3D11Buffer *staging; ID3D11ShaderResourceView *raw_srv; ID3D11UnorderedAccessView *raw_uav; ID3D11ShaderResourceView *texel_srv; ID3D11UnorderedAccessView *texel_uav; char *data; bool dirty; }; void pl_d3d11_buf_destroy(pl_gpu gpu, pl_buf buf); pl_buf pl_d3d11_buf_create(pl_gpu gpu, const struct pl_buf_params *params); void pl_d3d11_buf_write(pl_gpu gpu, pl_buf buf, size_t offset, const void *data, size_t size); bool pl_d3d11_buf_read(pl_gpu gpu, pl_buf buf, size_t offset, void *dest, size_t size); void pl_d3d11_buf_copy(pl_gpu gpu, pl_buf dst, size_t dst_offset, pl_buf src, size_t src_offset, size_t size); // Ensure a buffer is up-to-date with its system memory mirror before it is used void pl_d3d11_buf_resolve(pl_gpu gpu, pl_buf buf); struct pl_tex_d3d11 { // res mirrors one of tex1d, tex2d or tex3d for convenience. It does not // hold an additional reference to the texture object. ID3D11Resource *res; ID3D11Texture1D *tex1d; ID3D11Texture2D *tex2d; ID3D11Texture3D *tex3d; int array_slice; // Mirrors one of staging1d, staging2d, or staging3d, and doesn't hold a ref ID3D11Resource *staging; // Staging textures for pl_tex_download ID3D11Texture1D *staging1d; ID3D11Texture2D *staging2d; ID3D11Texture3D *staging3d; ID3D11ShaderResourceView *srv; ID3D11RenderTargetView *rtv; ID3D11UnorderedAccessView *uav; // for tex_upload/download fallback code pl_fmt texel_fmt; }; void pl_d3d11_tex_destroy(pl_gpu gpu, pl_tex tex); pl_tex pl_d3d11_tex_create(pl_gpu gpu, const struct pl_tex_params *params); void pl_d3d11_tex_invalidate(pl_gpu gpu, pl_tex tex); void pl_d3d11_tex_clear_ex(pl_gpu gpu, pl_tex tex, const union pl_clear_color color); void pl_d3d11_tex_blit(pl_gpu gpu, const struct pl_tex_blit_params *params); bool pl_d3d11_tex_upload(pl_gpu gpu, const struct pl_tex_transfer_params *params); bool pl_d3d11_tex_download(pl_gpu gpu, const struct pl_tex_transfer_params *params); // Constant buffer layout used for gl_NumWorkGroups emulation struct d3d_num_workgroups_buf { alignas(CBUF_ELEM) uint32_t num_wgs[3]; }; enum { HLSL_BINDING_NOT_USED = -1, // Slot should always be bound as NULL HLSL_BINDING_NUM_WORKGROUPS = -2, // Slot used for gl_NumWorkGroups emulation }; // Represents a specific shader stage in a pl_pass (VS, PS, CS) struct d3d_pass_stage { // Lists for each resource type, to simplify binding in pl_pass_run. Indexes // match the index of the arrays passed to the ID3D11DeviceContext methods. // Entries are the index of pass->params.descriptors which should be bound // in that position, or a HLSL_BINDING_* special value. PL_ARRAY(int) cbvs; PL_ARRAY(int) srvs; PL_ARRAY(int) samplers; }; struct pl_pass_d3d11 { ID3D11PixelShader *ps; ID3D11VertexShader *vs; ID3D11ComputeShader *cs; ID3D11InputLayout *layout; ID3D11BlendState *bstate; // gl_NumWorkGroups emulation struct d3d_num_workgroups_buf last_num_wgs; ID3D11Buffer *num_workgroups_buf; bool num_workgroups_used; // Maximum binding number int max_binding; struct d3d_pass_stage main; // PS and CS struct d3d_pass_stage vertex; // List of resources, as in `struct pass_stage`, except UAVs are shared // between all shader stages PL_ARRAY(int) uavs; // Pre-allocated resource arrays to use in pl_pass_run ID3D11Buffer **cbv_arr; ID3D11ShaderResourceView **srv_arr; ID3D11SamplerState **sampler_arr; ID3D11UnorderedAccessView **uav_arr; }; void pl_d3d11_pass_destroy(pl_gpu gpu, pl_pass pass); const struct pl_pass_t *pl_d3d11_pass_create(pl_gpu gpu, const struct pl_pass_params *params); void pl_d3d11_pass_run(pl_gpu gpu, const struct pl_pass_run_params *params); libplacebo-v7.349.0/src/d3d11/gpu_buf.c000066400000000000000000000252531463457750100174010ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "gpu.h" #include "formats.h" void pl_d3d11_buf_destroy(pl_gpu gpu, pl_buf buf) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; struct pl_buf_d3d11 *buf_p = PL_PRIV(buf); SAFE_RELEASE(buf_p->buf); SAFE_RELEASE(buf_p->staging); SAFE_RELEASE(buf_p->raw_srv); SAFE_RELEASE(buf_p->raw_uav); SAFE_RELEASE(buf_p->texel_srv); SAFE_RELEASE(buf_p->texel_uav); pl_d3d11_flush_message_queue(ctx, "After buffer destroy"); pl_free((void *) buf); } pl_buf pl_d3d11_buf_create(pl_gpu gpu, const struct pl_buf_params *params) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; struct pl_buf_t *buf = pl_zalloc_obj(NULL, buf, struct pl_buf_d3d11); buf->params = *params; buf->params.initial_data = NULL; struct pl_buf_d3d11 *buf_p = PL_PRIV(buf); D3D11_BUFFER_DESC desc = { .ByteWidth = params->size }; if (params->uniform && !params->format && (params->storable || params->drawable)) { // TODO: Figure out what to do with these PL_ERR(gpu, "Uniform buffers cannot share any other buffer type"); goto error; } // TODO: Distinguish between uniform buffers and texel uniform buffers. // Currently we assume that if uniform and format are set, it's a texel // buffer and NOT a uniform buffer. if (params->uniform && !params->format) { desc.BindFlags |= D3D11_BIND_CONSTANT_BUFFER; desc.ByteWidth = PL_ALIGN2(desc.ByteWidth, CBUF_ELEM); } if (params->uniform && params->format) { desc.BindFlags |= D3D11_BIND_SHADER_RESOURCE; } if (params->storable) { desc.BindFlags |= D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE; desc.ByteWidth = PL_ALIGN2(desc.ByteWidth, sizeof(float)); desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS; } if (params->drawable) { desc.BindFlags |= D3D11_BIND_VERTEX_BUFFER; // In FL9_x, a vertex buffer can't also be an index buffer, so index // buffers are unsupported in FL9_x for now if (p->fl > D3D_FEATURE_LEVEL_9_3) desc.BindFlags |= D3D11_BIND_INDEX_BUFFER; } char *data = NULL; // D3D11 doesn't allow partial constant buffer updates without special // conditions. To support partial buffer updates, keep a mirror of the // buffer data in system memory and upload the whole thing before the buffer // is used. // // Note: We don't use a staging buffer for this because of Intel. // https://github.com/mpv-player/mpv/issues/5293 // https://crbug.com/593024 if (params->uniform && !params->format && params->host_writable) { data = pl_zalloc(buf, desc.ByteWidth); buf_p->data = data; } D3D11_SUBRESOURCE_DATA srdata = { 0 }; if (params->initial_data) { if (desc.ByteWidth != params->size) { // If the size had to be rounded-up, uploading from // params->initial_data is technically undefined behavior, so copy // the initial data to an allocation first if (!data) data = pl_zalloc(buf, desc.ByteWidth); srdata.pSysMem = data; } else { srdata.pSysMem = params->initial_data; } if (data) memcpy(data, params->initial_data, params->size); } D3D(ID3D11Device_CreateBuffer(p->dev, &desc, params->initial_data ? &srdata : NULL, &buf_p->buf)); if (!buf_p->data) pl_free(data); // Create raw views for PL_DESC_BUF_STORAGE if (params->storable) { // A SRV is used for PL_DESC_ACCESS_READONLY D3D11_SHADER_RESOURCE_VIEW_DESC sdesc = { .Format = DXGI_FORMAT_R32_TYPELESS, .ViewDimension = D3D11_SRV_DIMENSION_BUFFEREX, .BufferEx = { .NumElements = PL_ALIGN2(buf->params.size, sizeof(float)) / sizeof(float), .Flags = D3D11_BUFFEREX_SRV_FLAG_RAW, }, }; D3D(ID3D11Device_CreateShaderResourceView(p->dev, (ID3D11Resource *) buf_p->buf, &sdesc, &buf_p->raw_srv)); // A UAV is used for all other access modes D3D11_UNORDERED_ACCESS_VIEW_DESC udesc = { .Format = DXGI_FORMAT_R32_TYPELESS, .ViewDimension = D3D11_UAV_DIMENSION_BUFFER, .Buffer = { .NumElements = PL_ALIGN2(buf->params.size, sizeof(float)) / sizeof(float), .Flags = D3D11_BUFFER_UAV_FLAG_RAW, }, }; D3D(ID3D11Device_CreateUnorderedAccessView(p->dev, (ID3D11Resource *) buf_p->buf, &udesc, &buf_p->raw_uav)); } // Create a typed SRV for PL_BUF_TEXEL_UNIFORM and PL_BUF_TEXEL_STORAGE if (params->format) { if (params->uniform) { D3D11_SHADER_RESOURCE_VIEW_DESC sdesc = { .Format = fmt_to_dxgi(params->format), .ViewDimension = D3D11_SRV_DIMENSION_BUFFER, .Buffer = { .NumElements = PL_ALIGN(buf->params.size, buf->params.format->texel_size) / buf->params.format->texel_size, }, }; D3D(ID3D11Device_CreateShaderResourceView(p->dev, (ID3D11Resource *) buf_p->buf, &sdesc, &buf_p->texel_srv)); } // Create a typed UAV for PL_BUF_TEXEL_STORAGE if (params->storable) { D3D11_UNORDERED_ACCESS_VIEW_DESC udesc = { .Format = fmt_to_dxgi(buf->params.format), .ViewDimension = D3D11_UAV_DIMENSION_BUFFER, .Buffer = { .NumElements = PL_ALIGN(buf->params.size, buf->params.format->texel_size) / buf->params.format->texel_size, }, }; D3D(ID3D11Device_CreateUnorderedAccessView(p->dev, (ID3D11Resource *) buf_p->buf, &udesc, &buf_p->texel_uav)); } } if (!buf_p->data) { // Create the staging buffer regardless of whether params->host_readable // is set or not, so that buf_copy can copy to system-memory-backed // buffers // TODO: Consider sharing a big staging buffer for this, rather than // having one staging buffer per buffer desc.BindFlags = 0; desc.MiscFlags = 0; desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; desc.Usage = D3D11_USAGE_STAGING; D3D(ID3D11Device_CreateBuffer(p->dev, &desc, NULL, &buf_p->staging)); } pl_d3d11_flush_message_queue(ctx, "After buffer create"); return buf; error: pl_d3d11_buf_destroy(gpu, buf); return NULL; } void pl_d3d11_buf_write(pl_gpu gpu, pl_buf buf, size_t offset, const void *data, size_t size) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct pl_buf_d3d11 *buf_p = PL_PRIV(buf); if (buf_p->data) { memcpy(buf_p->data + offset, data, size); buf_p->dirty = true; } else { ID3D11DeviceContext_UpdateSubresource(p->imm, (ID3D11Resource *) buf_p->buf, 0, (&(D3D11_BOX) { .left = offset, .top = 0, .front = 0, .right = offset + size, .bottom = 1, .back = 1, }), data, 0, 0); } } void pl_d3d11_buf_resolve(pl_gpu gpu, pl_buf buf) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct pl_buf_d3d11 *buf_p = PL_PRIV(buf); if (!buf_p->data || !buf_p->dirty) return; ID3D11DeviceContext_UpdateSubresource(p->imm, (ID3D11Resource *) buf_p->buf, 0, NULL, buf_p->data, 0, 0); } bool pl_d3d11_buf_read(pl_gpu gpu, pl_buf buf, size_t offset, void *dest, size_t size) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; struct pl_buf_d3d11 *buf_p = PL_PRIV(buf); // If there is a system-memory mirror of the buffer contents, use it if (buf_p->data) { memcpy(dest, buf_p->data + offset, size); return true; } ID3D11DeviceContext_CopyResource(p->imm, (ID3D11Resource *) buf_p->staging, (ID3D11Resource *) buf_p->buf); D3D11_MAPPED_SUBRESOURCE lock; D3D(ID3D11DeviceContext_Map(p->imm, (ID3D11Resource *) buf_p->staging, 0, D3D11_MAP_READ, 0, &lock)); char *csrc = lock.pData; memcpy(dest, csrc + offset, size); ID3D11DeviceContext_Unmap(p->imm, (ID3D11Resource *) buf_p->staging, 0); pl_d3d11_flush_message_queue(ctx, "After buffer read"); return true; error: return false; } void pl_d3d11_buf_copy(pl_gpu gpu, pl_buf dst, size_t dst_offset, pl_buf src, size_t src_offset, size_t size) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; struct pl_buf_d3d11 *src_p = PL_PRIV(src); struct pl_buf_d3d11 *dst_p = PL_PRIV(dst); // Handle system memory copies in case one or both of the buffers has a // system memory mirror if (src_p->data && dst_p->data) { memcpy(dst_p->data + dst_offset, src_p->data + src_offset, size); dst_p->dirty = true; } else if (src_p->data) { pl_d3d11_buf_write(gpu, dst, dst_offset, src_p->data + src_offset, size); } else if (dst_p->data) { if (pl_d3d11_buf_read(gpu, src, src_offset, dst_p->data + dst_offset, size)) { dst_p->dirty = true; } else { PL_ERR(gpu, "Failed to read from GPU during buffer copy"); } } else { ID3D11DeviceContext_CopySubresourceRegion(p->imm, (ID3D11Resource *) dst_p->buf, 0, dst_offset, 0, 0, (ID3D11Resource *) src_p->buf, 0, (&(D3D11_BOX) { .left = src_offset, .top = 0, .front = 0, .right = src_offset + size, .bottom = 1, .back = 1, })); } pl_d3d11_flush_message_queue(ctx, "After buffer copy"); } libplacebo-v7.349.0/src/d3d11/gpu_pass.c000066400000000000000000001372521463457750100175760ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "gpu.h" #include "formats.h" #include "glsl/spirv.h" #include "../cache.h" struct stream_buf_slice { const void *data; unsigned int size; unsigned int offset; }; // Upload one or more slices of single-use data to a suballocated dynamic // buffer. Only call this once per-buffer per-pass, since it will discard or // reallocate the buffer when full. static bool stream_buf_upload(pl_gpu gpu, struct d3d_stream_buf *stream, struct stream_buf_slice *slices, int num_slices) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; unsigned int align = PL_DEF(stream->align, sizeof(float)); // Get total size, rounded up to the buffer's alignment size_t size = 0; for (int i = 0; i < num_slices; i++) size += PL_ALIGN2(slices[i].size, align); if (size > gpu->limits.max_buf_size) { PL_ERR(gpu, "Streaming buffer is too large"); return -1; } // If the data doesn't fit, realloc the buffer if (size > stream->size) { size_t new_size = stream->size; // Arbitrary base size if (!new_size) new_size = 16 * 1024; while (new_size < size) new_size *= 2; new_size = PL_MIN(new_size, gpu->limits.max_buf_size); ID3D11Buffer *new_buf; D3D11_BUFFER_DESC vbuf_desc = { .ByteWidth = new_size, .Usage = D3D11_USAGE_DYNAMIC, .BindFlags = stream->bind_flags, .CPUAccessFlags = D3D11_CPU_ACCESS_WRITE, }; D3D(ID3D11Device_CreateBuffer(p->dev, &vbuf_desc, NULL, &new_buf)); SAFE_RELEASE(stream->buf); stream->buf = new_buf; stream->size = new_size; stream->used = 0; } bool discard = false; size_t offset = stream->used; if (offset + size > stream->size) { // We reached the end of the buffer, so discard and wrap around discard = true; offset = 0; } D3D11_MAPPED_SUBRESOURCE map = {0}; UINT type = discard ? D3D11_MAP_WRITE_DISCARD : D3D11_MAP_WRITE_NO_OVERWRITE; D3D(ID3D11DeviceContext_Map(p->imm, (ID3D11Resource *) stream->buf, 0, type, 0, &map)); // Upload each slice char *cdata = map.pData; stream->used = offset; for (int i = 0; i < num_slices; i++) { slices[i].offset = stream->used; memcpy(cdata + slices[i].offset, slices[i].data, slices[i].size); stream->used += PL_ALIGN2(slices[i].size, align); } ID3D11DeviceContext_Unmap(p->imm, (ID3D11Resource *) stream->buf, 0); return true; error: return false; } static const char *get_shader_target(pl_gpu gpu, enum glsl_shader_stage stage) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); switch (p->fl) { default: switch (stage) { case GLSL_SHADER_VERTEX: return "vs_5_0"; case GLSL_SHADER_FRAGMENT: return "ps_5_0"; case GLSL_SHADER_COMPUTE: return "cs_5_0"; } break; case D3D_FEATURE_LEVEL_10_1: switch (stage) { case GLSL_SHADER_VERTEX: return "vs_4_1"; case GLSL_SHADER_FRAGMENT: return "ps_4_1"; case GLSL_SHADER_COMPUTE: return "cs_4_1"; } break; case D3D_FEATURE_LEVEL_10_0: switch (stage) { case GLSL_SHADER_VERTEX: return "vs_4_0"; case GLSL_SHADER_FRAGMENT: return "ps_4_0"; case GLSL_SHADER_COMPUTE: return "cs_4_0"; } break; case D3D_FEATURE_LEVEL_9_3: switch (stage) { case GLSL_SHADER_VERTEX: return "vs_4_0_level_9_3"; case GLSL_SHADER_FRAGMENT: return "ps_4_0_level_9_3"; case GLSL_SHADER_COMPUTE: return NULL; } break; case D3D_FEATURE_LEVEL_9_2: case D3D_FEATURE_LEVEL_9_1: switch (stage) { case GLSL_SHADER_VERTEX: return "vs_4_0_level_9_1"; case GLSL_SHADER_FRAGMENT: return "ps_4_0_level_9_1"; case GLSL_SHADER_COMPUTE: return NULL; } break; } return NULL; } static SpvExecutionModel stage_to_spv(enum glsl_shader_stage stage) { static const SpvExecutionModel spv_execution_model[] = { [GLSL_SHADER_VERTEX] = SpvExecutionModelVertex, [GLSL_SHADER_FRAGMENT] = SpvExecutionModelFragment, [GLSL_SHADER_COMPUTE] = SpvExecutionModelGLCompute, }; return spv_execution_model[stage]; } #define SC(cmd) \ do { \ spvc_result res = (cmd); \ if (res != SPVC_SUCCESS) { \ PL_ERR(gpu, "%s: %s (%d) (%s:%d)", \ #cmd, sc ? spvc_context_get_last_error_string(sc) : "", \ res, __FILE__, __LINE__); \ goto error; \ } \ } while (0) // Some decorations, like SpvDecorationNonWritable, are actually found on the // members of a buffer block, rather than the buffer block itself. If all // members have a certain decoration, SPIRV-Cross considers it to apply to the // buffer block too, which determines things like whether a SRV or UAV is used // for an SSBO. This function checks if SPIRV-Cross considers a decoration to // apply to a buffer block. static spvc_result buffer_block_has_decoration(spvc_compiler sc_comp, spvc_variable_id id, SpvDecoration decoration, bool *out) { const SpvDecoration *decorations; size_t num_decorations = 0; spvc_result res = spvc_compiler_get_buffer_block_decorations(sc_comp, id, &decorations, &num_decorations); if (res != SPVC_SUCCESS) return res; for (size_t j = 0; j < num_decorations; j++) { if (decorations[j] == decoration) { *out = true; return res; } } *out = false; return res; } static bool alloc_hlsl_reg_bindings(pl_gpu gpu, pl_pass pass, struct d3d_pass_stage *pass_s, spvc_context sc, spvc_compiler sc_comp, spvc_resources resources, spvc_resource_type res_type, enum glsl_shader_stage stage) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct pl_pass_d3d11 *pass_p = PL_PRIV(pass); const spvc_reflected_resource *res_list; size_t res_count; SC(spvc_resources_get_resource_list_for_type(resources, res_type, &res_list, &res_count)); // In a raster pass, one of the UAV slots is used by the runtime for the RTV int uav_offset = stage == GLSL_SHADER_COMPUTE ? 0 : 1; int max_uavs = p->max_uavs - uav_offset; for (int i = 0; i < res_count; i++) { unsigned int binding = spvc_compiler_get_decoration(sc_comp, res_list[i].id, SpvDecorationBinding); unsigned int descriptor_set = spvc_compiler_get_decoration(sc_comp, res_list[i].id, SpvDecorationDescriptorSet); if (descriptor_set != 0) continue; pass_p->max_binding = PL_MAX(pass_p->max_binding, binding); spvc_hlsl_resource_binding hlslbind; spvc_hlsl_resource_binding_init(&hlslbind); hlslbind.stage = stage_to_spv(stage); hlslbind.binding = binding; hlslbind.desc_set = descriptor_set; bool has_cbv = false, has_sampler = false, has_srv = false, has_uav = false; switch (res_type) { case SPVC_RESOURCE_TYPE_UNIFORM_BUFFER: has_cbv = true; break; case SPVC_RESOURCE_TYPE_STORAGE_BUFFER:; bool non_writable_bb = false; SC(buffer_block_has_decoration(sc_comp, res_list[i].id, SpvDecorationNonWritable, &non_writable_bb)); if (non_writable_bb) { has_srv = true; } else { has_uav = true; } break; case SPVC_RESOURCE_TYPE_STORAGE_IMAGE:; bool non_writable = spvc_compiler_has_decoration(sc_comp, res_list[i].id, SpvDecorationNonWritable); if (non_writable) { has_srv = true; } else { has_uav = true; } break; case SPVC_RESOURCE_TYPE_SEPARATE_IMAGE: has_srv = true; break; case SPVC_RESOURCE_TYPE_SAMPLED_IMAGE:; spvc_type type = spvc_compiler_get_type_handle(sc_comp, res_list[i].type_id); SpvDim dimension = spvc_type_get_image_dimension(type); // Uniform texel buffers are technically sampled images, but they // aren't sampled from, so don't allocate a sampler if (dimension != SpvDimBuffer) has_sampler = true; has_srv = true; break; default: break; } if (has_cbv) { hlslbind.cbv.register_binding = pass_s->cbvs.num; PL_ARRAY_APPEND(pass, pass_s->cbvs, binding); if (pass_s->cbvs.num > D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT) { PL_ERR(gpu, "Too many constant buffers in shader"); goto error; } } if (has_sampler) { hlslbind.sampler.register_binding = pass_s->samplers.num; PL_ARRAY_APPEND(pass, pass_s->samplers, binding); if (pass_s->samplers.num > D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT) { PL_ERR(gpu, "Too many samplers in shader"); goto error; } } if (has_srv) { hlslbind.srv.register_binding = pass_s->srvs.num; PL_ARRAY_APPEND(pass, pass_s->srvs, binding); if (pass_s->srvs.num > p->max_srvs) { PL_ERR(gpu, "Too many SRVs in shader"); goto error; } } if (has_uav) { // UAV registers are shared between the vertex and fragment shaders // in a raster pass, so check if the UAV for this resource has // already been allocated bool uav_bound = false; for (int j = 0; j < pass_p->uavs.num; j++) { if (pass_p->uavs.elem[j] == binding) { uav_bound = true; break; } } if (!uav_bound) { hlslbind.uav.register_binding = pass_p->uavs.num + uav_offset; PL_ARRAY_APPEND(pass, pass_p->uavs, binding); if (pass_p->uavs.num > max_uavs) { PL_ERR(gpu, "Too many UAVs in shader"); goto error; } } } SC(spvc_compiler_hlsl_add_resource_binding(sc_comp, &hlslbind)); } return true; error: return false; } static const char *shader_names[] = { [GLSL_SHADER_VERTEX] = "vertex", [GLSL_SHADER_FRAGMENT] = "fragment", [GLSL_SHADER_COMPUTE] = "compute", }; static ID3DBlob *shader_compile_glsl(pl_gpu gpu, pl_pass pass, struct d3d_pass_stage *pass_s, enum glsl_shader_stage stage, const char *glsl) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct pl_pass_d3d11 *pass_p = PL_PRIV(pass); void *tmp = pl_tmp(NULL); spvc_context sc = NULL; spvc_compiler sc_comp = NULL; const char *hlsl = NULL; ID3DBlob *out = NULL; ID3DBlob *errors = NULL; HRESULT hr; pl_clock_t start = pl_clock_now(); pl_str spirv = pl_spirv_compile_glsl(p->spirv, tmp, gpu->glsl, stage, glsl); if (!spirv.len) goto error; pl_clock_t after_glsl = pl_clock_now(); pl_log_cpu_time(gpu->log, start, after_glsl, "translating GLSL to SPIR-V"); SC(spvc_context_create(&sc)); spvc_parsed_ir sc_ir; SC(spvc_context_parse_spirv(sc, (SpvId *) spirv.buf, spirv.len / sizeof(SpvId), &sc_ir)); SC(spvc_context_create_compiler(sc, SPVC_BACKEND_HLSL, sc_ir, SPVC_CAPTURE_MODE_TAKE_OWNERSHIP, &sc_comp)); spvc_compiler_options sc_opts; SC(spvc_compiler_create_compiler_options(sc_comp, &sc_opts)); int sc_shader_model; if (p->fl >= D3D_FEATURE_LEVEL_11_0) { sc_shader_model = 50; } else if (p->fl >= D3D_FEATURE_LEVEL_10_1) { sc_shader_model = 41; } else { sc_shader_model = 40; } SC(spvc_compiler_options_set_uint(sc_opts, SPVC_COMPILER_OPTION_HLSL_SHADER_MODEL, sc_shader_model)); // Unlike Vulkan and OpenGL, in D3D11, the clip-space is "flipped" with // respect to framebuffer-space. In other words, if you render to a pixel at // (0, -1), you have to sample from (0, 1) to get the value back. We unflip // it by setting the following option, which inserts the equivalent of // `gl_Position.y = -gl_Position.y` into the vertex shader if (stage == GLSL_SHADER_VERTEX) { SC(spvc_compiler_options_set_bool(sc_opts, SPVC_COMPILER_OPTION_FLIP_VERTEX_Y, SPVC_TRUE)); } // Bind readonly images and imageBuffers as SRVs. This is done because a lot // of hardware (especially FL11_x hardware) has very poor format support for // reading values from UAVs. It allows the common case of readonly and // writeonly images to support more formats, though the less common case of // readwrite images still requires format support for UAV loads (represented // by the PL_FMT_CAP_READWRITE cap in libplacebo.) // // Note that setting this option comes at the cost of GLSL support. Readonly // and readwrite images are the same type in GLSL, but SRV and UAV bound // textures are different types in HLSL, so for example, a GLSL function // with an image parameter may fail to compile as HLSL if it's called with a // readonly image and a readwrite image at different call sites. SC(spvc_compiler_options_set_bool(sc_opts, SPVC_COMPILER_OPTION_HLSL_NONWRITABLE_UAV_TEXTURE_AS_SRV, SPVC_TRUE)); SC(spvc_compiler_install_compiler_options(sc_comp, sc_opts)); spvc_set active = NULL; SC(spvc_compiler_get_active_interface_variables(sc_comp, &active)); spvc_resources resources = NULL; SC(spvc_compiler_create_shader_resources_for_active_variables( sc_comp, &resources, active)); // Allocate HLSL registers for each resource type alloc_hlsl_reg_bindings(gpu, pass, pass_s, sc, sc_comp, resources, SPVC_RESOURCE_TYPE_SAMPLED_IMAGE, stage); alloc_hlsl_reg_bindings(gpu, pass, pass_s, sc, sc_comp, resources, SPVC_RESOURCE_TYPE_SEPARATE_IMAGE, stage); alloc_hlsl_reg_bindings(gpu, pass, pass_s, sc, sc_comp, resources, SPVC_RESOURCE_TYPE_UNIFORM_BUFFER, stage); alloc_hlsl_reg_bindings(gpu, pass, pass_s, sc, sc_comp, resources, SPVC_RESOURCE_TYPE_STORAGE_BUFFER, stage); alloc_hlsl_reg_bindings(gpu, pass, pass_s, sc, sc_comp, resources, SPVC_RESOURCE_TYPE_STORAGE_IMAGE, stage); if (stage == GLSL_SHADER_COMPUTE) { // Check if the gl_NumWorkGroups builtin is used. If it is, we have to // emulate it with a constant buffer, so allocate it a CBV register. spvc_variable_id num_workgroups_id = spvc_compiler_hlsl_remap_num_workgroups_builtin(sc_comp); if (num_workgroups_id) { pass_p->num_workgroups_used = true; spvc_hlsl_resource_binding binding; spvc_hlsl_resource_binding_init(&binding); binding.stage = stage_to_spv(stage); binding.binding = pass_p->max_binding + 1; // Allocate a CBV register for the buffer binding.cbv.register_binding = pass_s->cbvs.num; PL_ARRAY_APPEND(pass, pass_s->cbvs, HLSL_BINDING_NUM_WORKGROUPS); if (pass_s->cbvs.num > D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT) { PL_ERR(gpu, "Not enough constant buffer slots for gl_NumWorkGroups"); goto error; } spvc_compiler_set_decoration(sc_comp, num_workgroups_id, SpvDecorationDescriptorSet, 0); spvc_compiler_set_decoration(sc_comp, num_workgroups_id, SpvDecorationBinding, binding.binding); SC(spvc_compiler_hlsl_add_resource_binding(sc_comp, &binding)); } } SC(spvc_compiler_compile(sc_comp, &hlsl)); pl_clock_t after_spvc = pl_clock_now(); pl_log_cpu_time(gpu->log, after_glsl, after_spvc, "translating SPIR-V to HLSL"); hr = p->D3DCompile(hlsl, strlen(hlsl), NULL, NULL, NULL, "main", get_shader_target(gpu, stage), D3DCOMPILE_SKIP_VALIDATION | D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, &out, &errors); if (FAILED(hr)) { SAFE_RELEASE(out); PL_ERR(gpu, "D3DCompile failed: %s\n%.*s", pl_hresult_to_str(hr), (int) ID3D10Blob_GetBufferSize(errors), (char *) ID3D10Blob_GetBufferPointer(errors)); goto error; } pl_log_cpu_time(gpu->log, after_spvc, pl_clock_now(), "translating HLSL to DXBC"); error:; if (hlsl) { int level = out ? PL_LOG_DEBUG : PL_LOG_ERR; PL_MSG(gpu, level, "%s shader HLSL source:", shader_names[stage]); pl_msg_source(gpu->log, level, hlsl); } if (sc) spvc_context_destroy(sc); SAFE_RELEASE(errors); pl_free(tmp); return out; } struct __attribute__((packed)) d3d11_cache_header { int32_t num_main_cbvs; int32_t num_main_srvs; int32_t num_main_samplers; int32_t num_vertex_cbvs; int32_t num_vertex_srvs; int32_t num_vertex_samplers; int32_t num_uavs; uint32_t vert_bc_len; uint32_t frag_bc_len; uint32_t comp_bc_len; uint8_t num_workgroups_used; }; static inline uint64_t pass_cache_signature(pl_gpu gpu, const struct pl_pass_params *params) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); uint64_t hash = CACHE_KEY_D3D_DXBC; // seed to uniquely identify d3d11 shaders pl_hash_merge(&hash, pl_str0_hash(params->glsl_shader)); if (params->type == PL_PASS_RASTER) pl_hash_merge(&hash, pl_str0_hash(params->vertex_shader)); // and add the compiler version information into the verification signature pl_hash_merge(&hash, p->spirv->signature); unsigned spvc_major, spvc_minor, spvc_patch; spvc_get_version(&spvc_major, &spvc_minor, &spvc_patch); pl_hash_merge(&hash, spvc_major); pl_hash_merge(&hash, spvc_minor); pl_hash_merge(&hash, spvc_patch); pl_hash_merge(&hash, ((uint64_t)p->d3d_compiler_ver.major << 48) | ((uint64_t)p->d3d_compiler_ver.minor << 32) | ((uint64_t)p->d3d_compiler_ver.build << 16) | (uint64_t)p->d3d_compiler_ver.revision); pl_hash_merge(&hash, p->fl); return hash; } static inline size_t cache_payload_size(struct d3d11_cache_header *header) { size_t required = (header->num_main_cbvs + header->num_main_srvs + header->num_main_samplers + header->num_vertex_cbvs + header->num_vertex_srvs + header->num_vertex_samplers + header->num_uavs) * sizeof(int) + header->vert_bc_len + header->frag_bc_len + header->comp_bc_len; return required; } static bool d3d11_use_cached_program(pl_gpu gpu, struct pl_pass_t *pass, const struct pl_pass_params *params, pl_cache_obj *obj, pl_str *vert_bc, pl_str *frag_bc, pl_str *comp_bc) { struct pl_pass_d3d11 *pass_p = PL_PRIV(pass); const pl_cache gpu_cache = pl_gpu_cache(gpu); if (!gpu_cache) return false; obj->key = pass_cache_signature(gpu, params); if (!pl_cache_get(gpu_cache, obj)) return false; pl_str cache = (pl_str) { obj->data, obj->size }; if (cache.len < sizeof(struct d3d11_cache_header)) return false; struct d3d11_cache_header *header = (struct d3d11_cache_header *) cache.buf; cache = pl_str_drop(cache, sizeof(*header)); // determine required cache size before reading anything size_t required = cache_payload_size(header); if (cache.len < required) return false; pass_p->num_workgroups_used = header->num_workgroups_used; #define GET_ARRAY(object, name, num_elems) \ do { \ PL_ARRAY_MEMDUP(pass, (object)->name, cache.buf, num_elems); \ cache = pl_str_drop(cache, num_elems * sizeof(*(object)->name.elem)); \ } while (0) #define GET_STAGE_ARRAY(stage, name) \ GET_ARRAY(&pass_p->stage, name, header->num_##stage##_##name) GET_STAGE_ARRAY(main, cbvs); GET_STAGE_ARRAY(main, srvs); GET_STAGE_ARRAY(main, samplers); GET_STAGE_ARRAY(vertex, cbvs); GET_STAGE_ARRAY(vertex, srvs); GET_STAGE_ARRAY(vertex, samplers); GET_ARRAY(pass_p, uavs, header->num_uavs); #define GET_SHADER(ptr) \ do { \ if (ptr) \ *ptr = pl_str_take(cache, header->ptr##_len); \ cache = pl_str_drop(cache, header->ptr##_len); \ } while (0) GET_SHADER(vert_bc); GET_SHADER(frag_bc); GET_SHADER(comp_bc); return true; } static void d3d11_update_program_cache(pl_gpu gpu, struct pl_pass_t *pass, uint64_t key, const pl_str *vs_str, const pl_str *ps_str, const pl_str *cs_str) { struct pl_pass_d3d11 *pass_p = PL_PRIV(pass); const pl_cache gpu_cache = pl_gpu_cache(gpu); if (!gpu_cache) return; struct d3d11_cache_header header = { .num_workgroups_used = pass_p->num_workgroups_used, .num_main_cbvs = pass_p->main.cbvs.num, .num_main_srvs = pass_p->main.srvs.num, .num_main_samplers = pass_p->main.samplers.num, .num_vertex_cbvs = pass_p->vertex.cbvs.num, .num_vertex_srvs = pass_p->vertex.srvs.num, .num_vertex_samplers = pass_p->vertex.samplers.num, .num_uavs = pass_p->uavs.num, .vert_bc_len = vs_str ? vs_str->len : 0, .frag_bc_len = ps_str ? ps_str->len : 0, .comp_bc_len = cs_str ? cs_str->len : 0, }; size_t cache_size = sizeof(header) + cache_payload_size(&header); pl_str cache = {0}; pl_str_append(NULL, &cache, (pl_str){ (uint8_t *) &header, sizeof(header) }); #define WRITE_ARRAY(name) pl_str_append(NULL, &cache, \ (pl_str){ (uint8_t *) pass_p->name.elem, \ sizeof(*pass_p->name.elem) * pass_p->name.num }) WRITE_ARRAY(main.cbvs); WRITE_ARRAY(main.srvs); WRITE_ARRAY(main.samplers); WRITE_ARRAY(vertex.cbvs); WRITE_ARRAY(vertex.srvs); WRITE_ARRAY(vertex.samplers); WRITE_ARRAY(uavs); if (vs_str) pl_str_append(NULL, &cache, *vs_str); if (ps_str) pl_str_append(NULL, &cache, *ps_str); if (cs_str) pl_str_append(NULL, &cache, *cs_str); pl_assert(cache_size == cache.len); pl_cache_str(gpu_cache, key, &cache); } void pl_d3d11_pass_destroy(pl_gpu gpu, pl_pass pass) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; struct pl_pass_d3d11 *pass_p = PL_PRIV(pass); SAFE_RELEASE(pass_p->vs); SAFE_RELEASE(pass_p->ps); SAFE_RELEASE(pass_p->cs); SAFE_RELEASE(pass_p->layout); SAFE_RELEASE(pass_p->bstate); SAFE_RELEASE(pass_p->num_workgroups_buf); pl_d3d11_flush_message_queue(ctx, "After pass destroy"); pl_free((void *) pass); } static bool pass_create_raster(pl_gpu gpu, struct pl_pass_t *pass, const struct pl_pass_params *params) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; struct pl_pass_d3d11 *pass_p = PL_PRIV(pass); ID3DBlob *vs_blob = NULL; pl_str vs_str = {0}; ID3DBlob *ps_blob = NULL; pl_str ps_str = {0}; D3D11_INPUT_ELEMENT_DESC *in_descs = NULL; pl_cache_obj obj = {0}; bool success = false; if (d3d11_use_cached_program(gpu, pass, params, &obj, &vs_str, &ps_str, NULL)) PL_DEBUG(gpu, "Using cached DXBC shaders"); pl_assert((vs_str.len == 0) == (ps_str.len == 0)); if (vs_str.len == 0) { vs_blob = shader_compile_glsl(gpu, pass, &pass_p->vertex, GLSL_SHADER_VERTEX, params->vertex_shader); if (!vs_blob) goto error; vs_str = (pl_str) { .buf = ID3D10Blob_GetBufferPointer(vs_blob), .len = ID3D10Blob_GetBufferSize(vs_blob), }; ps_blob = shader_compile_glsl(gpu, pass, &pass_p->main, GLSL_SHADER_FRAGMENT, params->glsl_shader); if (!ps_blob) goto error; ps_str = (pl_str) { .buf = ID3D10Blob_GetBufferPointer(ps_blob), .len = ID3D10Blob_GetBufferSize(ps_blob), }; } D3D(ID3D11Device_CreateVertexShader(p->dev, vs_str.buf, vs_str.len, NULL, &pass_p->vs)); D3D(ID3D11Device_CreatePixelShader(p->dev, ps_str.buf, ps_str.len, NULL, &pass_p->ps)); in_descs = pl_calloc_ptr(pass, params->num_vertex_attribs, in_descs); for (int i = 0; i < params->num_vertex_attribs; i++) { struct pl_vertex_attrib *va = ¶ms->vertex_attribs[i]; in_descs[i] = (D3D11_INPUT_ELEMENT_DESC) { // The semantic name doesn't mean much and is just used to verify // the input description matches the shader. SPIRV-Cross always // uses TEXCOORD, so we should too. .SemanticName = "TEXCOORD", .SemanticIndex = va->location, .AlignedByteOffset = va->offset, .Format = fmt_to_dxgi(va->fmt), }; } D3D(ID3D11Device_CreateInputLayout(p->dev, in_descs, params->num_vertex_attribs, vs_str.buf, vs_str.len, &pass_p->layout)); static const D3D11_BLEND blend_options[] = { [PL_BLEND_ZERO] = D3D11_BLEND_ZERO, [PL_BLEND_ONE] = D3D11_BLEND_ONE, [PL_BLEND_SRC_ALPHA] = D3D11_BLEND_SRC_ALPHA, [PL_BLEND_ONE_MINUS_SRC_ALPHA] = D3D11_BLEND_INV_SRC_ALPHA, }; D3D11_BLEND_DESC bdesc = { .RenderTarget[0] = { .RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL, }, }; if (params->blend_params) { bdesc.RenderTarget[0] = (D3D11_RENDER_TARGET_BLEND_DESC) { .BlendEnable = TRUE, .SrcBlend = blend_options[params->blend_params->src_rgb], .DestBlend = blend_options[params->blend_params->dst_rgb], .BlendOp = D3D11_BLEND_OP_ADD, .SrcBlendAlpha = blend_options[params->blend_params->src_alpha], .DestBlendAlpha = blend_options[params->blend_params->dst_alpha], .BlendOpAlpha = D3D11_BLEND_OP_ADD, .RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL, }; } D3D(ID3D11Device_CreateBlendState(p->dev, &bdesc, &pass_p->bstate)); d3d11_update_program_cache(gpu, pass, obj.key, &vs_str, &ps_str, NULL); success = true; error: SAFE_RELEASE(vs_blob); SAFE_RELEASE(ps_blob); pl_cache_obj_free(&obj); pl_free(in_descs); return success; } static bool pass_create_compute(pl_gpu gpu, struct pl_pass_t *pass, const struct pl_pass_params *params) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; struct pl_pass_d3d11 *pass_p = PL_PRIV(pass); ID3DBlob *cs_blob = NULL; pl_str cs_str = {0}; pl_cache_obj obj = {0}; bool success = false; if (d3d11_use_cached_program(gpu, pass, params, &obj, NULL, NULL, &cs_str)) PL_DEBUG(gpu, "Using cached DXBC shader"); if (cs_str.len == 0) { cs_blob = shader_compile_glsl(gpu, pass, &pass_p->main, GLSL_SHADER_COMPUTE, params->glsl_shader); if (!cs_blob) goto error; cs_str = (pl_str) { .buf = ID3D10Blob_GetBufferPointer(cs_blob), .len = ID3D10Blob_GetBufferSize(cs_blob), }; } D3D(ID3D11Device_CreateComputeShader(p->dev, cs_str.buf, cs_str.len, NULL, &pass_p->cs)); if (pass_p->num_workgroups_used) { D3D11_BUFFER_DESC bdesc = { .BindFlags = D3D11_BIND_CONSTANT_BUFFER, .ByteWidth = sizeof(pass_p->last_num_wgs), }; D3D(ID3D11Device_CreateBuffer(p->dev, &bdesc, NULL, &pass_p->num_workgroups_buf)); } d3d11_update_program_cache(gpu, pass, obj.key, NULL, NULL, &cs_str); success = true; error: pl_cache_obj_free(&obj); SAFE_RELEASE(cs_blob); return success; } const struct pl_pass_t *pl_d3d11_pass_create(pl_gpu gpu, const struct pl_pass_params *params) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; struct pl_pass_t *pass = pl_zalloc_obj(NULL, pass, struct pl_pass_d3d11); pass->params = pl_pass_params_copy(pass, params); struct pl_pass_d3d11 *pass_p = PL_PRIV(pass); *pass_p = (struct pl_pass_d3d11) { .max_binding = -1, }; if (params->type == PL_PASS_COMPUTE) { if (!pass_create_compute(gpu, pass, params)) goto error; } else { if (!pass_create_raster(gpu, pass, params)) goto error; } // Pre-allocate resource arrays to use in pl_pass_run pass_p->cbv_arr = pl_calloc(pass, PL_MAX(pass_p->main.cbvs.num, pass_p->vertex.cbvs.num), sizeof(*pass_p->cbv_arr)); pass_p->srv_arr = pl_calloc(pass, PL_MAX(pass_p->main.srvs.num, pass_p->vertex.srvs.num), sizeof(*pass_p->srv_arr)); pass_p->sampler_arr = pl_calloc(pass, PL_MAX(pass_p->main.samplers.num, pass_p->vertex.samplers.num), sizeof(*pass_p->sampler_arr)); pass_p->uav_arr = pl_calloc(pass, pass_p->uavs.num, sizeof(*pass_p->uav_arr)); // Find the highest binding number used in `params->descriptors` if we // haven't found it already. (If the shader was compiled fresh rather than // loaded from cache, `pass_p->max_binding` should already be set.) if (pass_p->max_binding == -1) { for (int i = 0; i < params->num_descriptors; i++) { pass_p->max_binding = PL_MAX(pass_p->max_binding, params->descriptors[i].binding); } } // Build a mapping from binding numbers to descriptor array indexes int *binding_map = pl_calloc_ptr(pass, pass_p->max_binding + 1, binding_map); for (int i = 0; i <= pass_p->max_binding; i++) binding_map[i] = HLSL_BINDING_NOT_USED; for (int i = 0; i < params->num_descriptors; i++) binding_map[params->descriptors[i].binding] = i; #define MAP_RESOURCES(array) \ do { \ for (int i = 0; i < array.num; i++) { \ if (array.elem[i] > pass_p->max_binding) { \ array.elem[i] = HLSL_BINDING_NOT_USED; \ } else if (array.elem[i] >= 0) { \ array.elem[i] = binding_map[array.elem[i]]; \ } \ } \ } while (0) // During shader compilation (or after loading a compiled shader from cache) // the entries of the following resource lists are shader binding numbers, // however, it's more efficient for `pl_pass_run` if they refer to indexes // of the `params->descriptors` array instead, so remap them here MAP_RESOURCES(pass_p->main.cbvs); MAP_RESOURCES(pass_p->main.samplers); MAP_RESOURCES(pass_p->main.srvs); MAP_RESOURCES(pass_p->vertex.cbvs); MAP_RESOURCES(pass_p->vertex.samplers); MAP_RESOURCES(pass_p->vertex.srvs); MAP_RESOURCES(pass_p->uavs); pl_free(binding_map); pl_d3d11_flush_message_queue(ctx, "After pass create"); return pass; error: pl_d3d11_pass_destroy(gpu, pass); return NULL; } // Shared logic between VS, PS and CS for filling the resource arrays that are // passed to ID3D11DeviceContext methods static void fill_resources(pl_gpu gpu, pl_pass pass, struct d3d_pass_stage *pass_s, const struct pl_pass_run_params *params, ID3D11Buffer **cbvs, ID3D11ShaderResourceView **srvs, ID3D11SamplerState **samplers) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct pl_pass_d3d11 *pass_p = PL_PRIV(pass); for (int i = 0; i < pass_s->cbvs.num; i++) { int binding = pass_s->cbvs.elem[i]; if (binding == HLSL_BINDING_NUM_WORKGROUPS) { cbvs[i] = pass_p->num_workgroups_buf; continue; } else if (binding < 0) { cbvs[i] = NULL; continue; } pl_buf buf = params->desc_bindings[binding].object; pl_d3d11_buf_resolve(gpu, buf); struct pl_buf_d3d11 *buf_p = PL_PRIV(buf); cbvs[i] = buf_p->buf; } for (int i = 0; i < pass_s->srvs.num; i++) { int binding = pass_s->srvs.elem[i]; if (binding < 0) { srvs[i] = NULL; continue; } pl_tex tex; struct pl_tex_d3d11 *tex_p; pl_buf buf; struct pl_buf_d3d11 *buf_p; switch (pass->params.descriptors[binding].type) { case PL_DESC_SAMPLED_TEX: case PL_DESC_STORAGE_IMG: tex = params->desc_bindings[binding].object; tex_p = PL_PRIV(tex); srvs[i] = tex_p->srv; break; case PL_DESC_BUF_STORAGE: buf = params->desc_bindings[binding].object; buf_p = PL_PRIV(buf); srvs[i] = buf_p->raw_srv; break; case PL_DESC_BUF_TEXEL_UNIFORM: case PL_DESC_BUF_TEXEL_STORAGE: buf = params->desc_bindings[binding].object; buf_p = PL_PRIV(buf); srvs[i] = buf_p->texel_srv; break; default: break; } } for (int i = 0; i < pass_s->samplers.num; i++) { int binding = pass_s->samplers.elem[i]; if (binding < 0) { samplers[i] = NULL; continue; } struct pl_desc_binding *db = ¶ms->desc_bindings[binding]; samplers[i] = p->samplers[db->sample_mode][db->address_mode]; } } static void fill_uavs(pl_pass pass, const struct pl_pass_run_params *params, ID3D11UnorderedAccessView **uavs) { struct pl_pass_d3d11 *pass_p = PL_PRIV(pass); for (int i = 0; i < pass_p->uavs.num; i++) { int binding = pass_p->uavs.elem[i]; if (binding < 0) { uavs[i] = NULL; continue; } pl_tex tex; struct pl_tex_d3d11 *tex_p; pl_buf buf; struct pl_buf_d3d11 *buf_p; switch (pass->params.descriptors[binding].type) { case PL_DESC_BUF_STORAGE: buf = params->desc_bindings[binding].object; buf_p = PL_PRIV(buf); uavs[i] = buf_p->raw_uav; break; case PL_DESC_STORAGE_IMG: tex = params->desc_bindings[binding].object; tex_p = PL_PRIV(tex); uavs[i] = tex_p->uav; break; case PL_DESC_BUF_TEXEL_STORAGE: buf = params->desc_bindings[binding].object; buf_p = PL_PRIV(buf); uavs[i] = buf_p->texel_uav; break; default: break; } } } static void pass_run_raster(pl_gpu gpu, const struct pl_pass_run_params *params) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); pl_pass pass = params->pass; struct pl_pass_d3d11 *pass_p = PL_PRIV(pass); if (p->fl <= D3D_FEATURE_LEVEL_9_3 && params->index_buf) { // Index buffers are unsupported because we can't tell if they are an // index buffer or a vertex buffer on creation, and FL9_x allows only // one binding type per-buffer PL_ERR(gpu, "Index buffers are unsupported in FL9_x"); return; } if (p->fl <= D3D_FEATURE_LEVEL_9_1 && params->index_data && params->index_fmt != PL_INDEX_UINT16) { PL_ERR(gpu, "32-bit index format is unsupported in FL9_1"); return; } // Figure out how much vertex/index data to upload, if any size_t vertex_alloc = params->vertex_data ? pl_vertex_buf_size(params) : 0; size_t index_alloc = params->index_data ? pl_index_buf_size(params) : 0; static const DXGI_FORMAT index_fmts[PL_INDEX_FORMAT_COUNT] = { [PL_INDEX_UINT16] = DXGI_FORMAT_R16_UINT, [PL_INDEX_UINT32] = DXGI_FORMAT_R32_UINT, }; // Upload vertex data. On >=FL10_0 we use the same buffer for index data, so // upload that too. bool share_vertex_index_buf = p->fl > D3D_FEATURE_LEVEL_9_3; if (vertex_alloc || (share_vertex_index_buf && index_alloc)) { struct stream_buf_slice slices[] = { { .data = params->vertex_data, .size = vertex_alloc }, { .data = params->index_data, .size = index_alloc }, }; if (!stream_buf_upload(gpu, &p->vbuf, slices, share_vertex_index_buf ? 2 : 1)) { PL_ERR(gpu, "Failed to upload vertex data"); return; } if (vertex_alloc) { ID3D11DeviceContext_IASetVertexBuffers(p->imm, 0, 1, &p->vbuf.buf, &(UINT) { pass->params.vertex_stride }, &slices[0].offset); } if (share_vertex_index_buf && index_alloc) { ID3D11DeviceContext_IASetIndexBuffer(p->imm, p->vbuf.buf, index_fmts[params->index_fmt], slices[1].offset); } } // Upload index data for <=FL9_3, which must be in its own buffer if (!share_vertex_index_buf && index_alloc) { struct stream_buf_slice slices[] = { { .data = params->index_data, .size = index_alloc }, }; if (!stream_buf_upload(gpu, &p->ibuf, slices, PL_ARRAY_SIZE(slices))) { PL_ERR(gpu, "Failed to upload index data"); return; } ID3D11DeviceContext_IASetIndexBuffer(p->imm, p->ibuf.buf, index_fmts[params->index_fmt], slices[0].offset); } if (params->vertex_buf) { struct pl_buf_d3d11 *buf_p = PL_PRIV(params->vertex_buf); ID3D11DeviceContext_IASetVertexBuffers(p->imm, 0, 1, &buf_p->buf, &(UINT) { pass->params.vertex_stride }, &(UINT) { params->buf_offset }); } if (params->index_buf) { struct pl_buf_d3d11 *buf_p = PL_PRIV(params->index_buf); ID3D11DeviceContext_IASetIndexBuffer(p->imm, buf_p->buf, index_fmts[params->index_fmt], params->index_offset); } ID3D11DeviceContext_IASetInputLayout(p->imm, pass_p->layout); static const D3D_PRIMITIVE_TOPOLOGY prim_topology[] = { [PL_PRIM_TRIANGLE_LIST] = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST, [PL_PRIM_TRIANGLE_STRIP] = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, }; ID3D11DeviceContext_IASetPrimitiveTopology(p->imm, prim_topology[pass->params.vertex_type]); ID3D11DeviceContext_VSSetShader(p->imm, pass_p->vs, NULL, 0); ID3D11Buffer **cbvs = pass_p->cbv_arr; ID3D11ShaderResourceView **srvs = pass_p->srv_arr; ID3D11SamplerState **samplers = pass_p->sampler_arr; ID3D11UnorderedAccessView **uavs = pass_p->uav_arr; // Set vertex shader resources. The device context is called conditionally // because the debug layer complains if these are called with 0 resources. fill_resources(gpu, pass, &pass_p->vertex, params, cbvs, srvs, samplers); if (pass_p->vertex.cbvs.num) ID3D11DeviceContext_VSSetConstantBuffers(p->imm, 0, pass_p->vertex.cbvs.num, cbvs); if (pass_p->vertex.srvs.num) ID3D11DeviceContext_VSSetShaderResources(p->imm, 0, pass_p->vertex.srvs.num, srvs); if (pass_p->vertex.samplers.num) ID3D11DeviceContext_VSSetSamplers(p->imm, 0, pass_p->vertex.samplers.num, samplers); ID3D11DeviceContext_RSSetState(p->imm, p->rstate); ID3D11DeviceContext_RSSetViewports(p->imm, 1, (&(D3D11_VIEWPORT) { .TopLeftX = params->viewport.x0, .TopLeftY = params->viewport.y0, .Width = pl_rect_w(params->viewport), .Height = pl_rect_h(params->viewport), .MinDepth = 0, .MaxDepth = 1, })); ID3D11DeviceContext_RSSetScissorRects(p->imm, 1, (&(D3D11_RECT) { .left = params->scissors.x0, .top = params->scissors.y0, .right = params->scissors.x1, .bottom = params->scissors.y1, })); ID3D11DeviceContext_PSSetShader(p->imm, pass_p->ps, NULL, 0); // Set pixel shader resources fill_resources(gpu, pass, &pass_p->main, params, cbvs, srvs, samplers); if (pass_p->main.cbvs.num) ID3D11DeviceContext_PSSetConstantBuffers(p->imm, 0, pass_p->main.cbvs.num, cbvs); if (pass_p->main.srvs.num) ID3D11DeviceContext_PSSetShaderResources(p->imm, 0, pass_p->main.srvs.num, srvs); if (pass_p->main.samplers.num) ID3D11DeviceContext_PSSetSamplers(p->imm, 0, pass_p->main.samplers.num, samplers); ID3D11DeviceContext_OMSetBlendState(p->imm, pass_p->bstate, NULL, D3D11_DEFAULT_SAMPLE_MASK); ID3D11DeviceContext_OMSetDepthStencilState(p->imm, p->dsstate, 0); fill_uavs(pass, params, uavs); struct pl_tex_d3d11 *target_p = PL_PRIV(params->target); ID3D11DeviceContext_OMSetRenderTargetsAndUnorderedAccessViews( p->imm, 1, &target_p->rtv, NULL, 1, pass_p->uavs.num, uavs, NULL); if (params->index_data || params->index_buf) { ID3D11DeviceContext_DrawIndexed(p->imm, params->vertex_count, 0, 0); } else { ID3D11DeviceContext_Draw(p->imm, params->vertex_count, 0); } // Unbind everything. It's easier to do this than to actually track state, // and if we leave the RTV bound, it could trip up D3D's conflict checker. // Also, apparently unbinding SRVs can prevent a 10level9 bug? // https://docs.microsoft.com/en-us/windows/win32/direct3d11/overviews-direct3d-11-devices-downlevel-prevent-null-srvs for (int i = 0; i < PL_MAX(pass_p->main.cbvs.num, pass_p->vertex.cbvs.num); i++) cbvs[i] = NULL; for (int i = 0; i < PL_MAX(pass_p->main.srvs.num, pass_p->vertex.srvs.num); i++) srvs[i] = NULL; for (int i = 0; i < PL_MAX(pass_p->main.samplers.num, pass_p->vertex.samplers.num); i++) samplers[i] = NULL; for (int i = 0; i < pass_p->uavs.num; i++) uavs[i] = NULL; if (pass_p->vertex.cbvs.num) ID3D11DeviceContext_VSSetConstantBuffers(p->imm, 0, pass_p->vertex.cbvs.num, cbvs); if (pass_p->vertex.srvs.num) ID3D11DeviceContext_VSSetShaderResources(p->imm, 0, pass_p->vertex.srvs.num, srvs); if (pass_p->vertex.samplers.num) ID3D11DeviceContext_VSSetSamplers(p->imm, 0, pass_p->vertex.samplers.num, samplers); if (pass_p->main.cbvs.num) ID3D11DeviceContext_PSSetConstantBuffers(p->imm, 0, pass_p->main.cbvs.num, cbvs); if (pass_p->main.srvs.num) ID3D11DeviceContext_PSSetShaderResources(p->imm, 0, pass_p->main.srvs.num, srvs); if (pass_p->main.samplers.num) ID3D11DeviceContext_PSSetSamplers(p->imm, 0, pass_p->main.samplers.num, samplers); ID3D11DeviceContext_OMSetRenderTargetsAndUnorderedAccessViews( p->imm, 0, NULL, NULL, 1, pass_p->uavs.num, uavs, NULL); } static void pass_run_compute(pl_gpu gpu, const struct pl_pass_run_params *params) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); pl_pass pass = params->pass; struct pl_pass_d3d11 *pass_p = PL_PRIV(pass); // Update gl_NumWorkGroups emulation buffer if necessary if (pass_p->num_workgroups_used) { bool needs_update = false; for (int i = 0; i < 3; i++) { if (pass_p->last_num_wgs.num_wgs[i] != params->compute_groups[i]) needs_update = true; pass_p->last_num_wgs.num_wgs[i] = params->compute_groups[i]; } if (needs_update) { ID3D11DeviceContext_UpdateSubresource(p->imm, (ID3D11Resource *) pass_p->num_workgroups_buf, 0, NULL, &pass_p->last_num_wgs, 0, 0); } } ID3D11DeviceContext_CSSetShader(p->imm, pass_p->cs, NULL, 0); ID3D11Buffer **cbvs = pass_p->cbv_arr; ID3D11ShaderResourceView **srvs = pass_p->srv_arr; ID3D11UnorderedAccessView **uavs = pass_p->uav_arr; ID3D11SamplerState **samplers = pass_p->sampler_arr; fill_resources(gpu, pass, &pass_p->main, params, cbvs, srvs, samplers); fill_uavs(pass, params, uavs); if (pass_p->main.cbvs.num) ID3D11DeviceContext_CSSetConstantBuffers(p->imm, 0, pass_p->main.cbvs.num, cbvs); if (pass_p->main.srvs.num) ID3D11DeviceContext_CSSetShaderResources(p->imm, 0, pass_p->main.srvs.num, srvs); if (pass_p->main.samplers.num) ID3D11DeviceContext_CSSetSamplers(p->imm, 0, pass_p->main.samplers.num, samplers); if (pass_p->uavs.num) ID3D11DeviceContext_CSSetUnorderedAccessViews(p->imm, 0, pass_p->uavs.num, uavs, NULL); ID3D11DeviceContext_Dispatch(p->imm, params->compute_groups[0], params->compute_groups[1], params->compute_groups[2]); // Unbind everything for (int i = 0; i < pass_p->main.cbvs.num; i++) cbvs[i] = NULL; for (int i = 0; i < pass_p->main.srvs.num; i++) srvs[i] = NULL; for (int i = 0; i < pass_p->main.samplers.num; i++) samplers[i] = NULL; for (int i = 0; i < pass_p->uavs.num; i++) uavs[i] = NULL; if (pass_p->main.cbvs.num) ID3D11DeviceContext_CSSetConstantBuffers(p->imm, 0, pass_p->main.cbvs.num, cbvs); if (pass_p->main.srvs.num) ID3D11DeviceContext_CSSetShaderResources(p->imm, 0, pass_p->main.srvs.num, srvs); if (pass_p->main.samplers.num) ID3D11DeviceContext_CSSetSamplers(p->imm, 0, pass_p->main.samplers.num, samplers); if (pass_p->uavs.num) ID3D11DeviceContext_CSSetUnorderedAccessViews(p->imm, 0, pass_p->uavs.num, uavs, NULL); } void pl_d3d11_pass_run(pl_gpu gpu, const struct pl_pass_run_params *params) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; pl_pass pass = params->pass; pl_d3d11_timer_start(gpu, params->timer); if (pass->params.type == PL_PASS_COMPUTE) { pass_run_compute(gpu, params); } else { pass_run_raster(gpu, params); } pl_d3d11_timer_end(gpu, params->timer); pl_d3d11_flush_message_queue(ctx, "After pass run"); } libplacebo-v7.349.0/src/d3d11/gpu_tex.c000066400000000000000000000634401463457750100174250ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "gpu.h" #include "formats.h" static inline UINT tex_subresource(pl_tex tex) { struct pl_tex_d3d11 *tex_p = PL_PRIV(tex); return tex_p->array_slice >= 0 ? tex_p->array_slice : 0; } static bool tex_init(pl_gpu gpu, pl_tex tex) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; struct pl_tex_d3d11 *tex_p = PL_PRIV(tex); // View formats may be omitted when they match the texture format, but for // simplicity's sake we always set it. It will match the texture format for // textures created with tex_create, but it can be different for video // textures wrapped with pl_d3d11_wrap. DXGI_FORMAT fmt = fmt_to_dxgi(tex->params.format); if (tex->params.sampleable || tex->params.storable) { D3D11_SHADER_RESOURCE_VIEW_DESC srvdesc = { .Format = fmt, }; switch (pl_tex_params_dimension(tex->params)) { case 1: if (tex_p->array_slice >= 0) { srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1DARRAY; srvdesc.Texture1DArray.MipLevels = 1; srvdesc.Texture1DArray.FirstArraySlice = tex_p->array_slice; srvdesc.Texture1DArray.ArraySize = 1; } else { srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1D; srvdesc.Texture1D.MipLevels = 1; } break; case 2: if (tex_p->array_slice >= 0) { srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2DARRAY; srvdesc.Texture2DArray.MipLevels = 1; srvdesc.Texture2DArray.FirstArraySlice = tex_p->array_slice; srvdesc.Texture2DArray.ArraySize = 1; } else { srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; srvdesc.Texture2D.MipLevels = 1; } break; case 3: // D3D11 does not have Texture3D arrays srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE3D; srvdesc.Texture3D.MipLevels = 1; break; } D3D(ID3D11Device_CreateShaderResourceView(p->dev, tex_p->res, &srvdesc, &tex_p->srv)); } if (tex->params.renderable) { D3D11_RENDER_TARGET_VIEW_DESC rtvdesc = { .Format = fmt, }; switch (pl_tex_params_dimension(tex->params)) { case 1: if (tex_p->array_slice >= 0) { rtvdesc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE1DARRAY; rtvdesc.Texture1DArray.FirstArraySlice = tex_p->array_slice; rtvdesc.Texture1DArray.ArraySize = 1; } else { rtvdesc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE1D; } break; case 2: if (tex_p->array_slice >= 0) { rtvdesc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2DARRAY; rtvdesc.Texture2DArray.FirstArraySlice = tex_p->array_slice; rtvdesc.Texture2DArray.ArraySize = 1; } else { rtvdesc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2D; } break; case 3: // D3D11 does not have Texture3D arrays rtvdesc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE3D; rtvdesc.Texture3D.WSize = -1; break; } D3D(ID3D11Device_CreateRenderTargetView(p->dev, tex_p->res, &rtvdesc, &tex_p->rtv)); } if (p->fl >= D3D_FEATURE_LEVEL_11_0 && tex->params.storable) { D3D11_UNORDERED_ACCESS_VIEW_DESC uavdesc = { .Format = fmt, }; switch (pl_tex_params_dimension(tex->params)) { case 1: if (tex_p->array_slice >= 0) { uavdesc.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE1DARRAY; uavdesc.Texture1DArray.FirstArraySlice = tex_p->array_slice; uavdesc.Texture1DArray.ArraySize = 1; } else { uavdesc.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE1D; } break; case 2: if (tex_p->array_slice >= 0) { uavdesc.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2DARRAY; uavdesc.Texture2DArray.FirstArraySlice = tex_p->array_slice; uavdesc.Texture2DArray.ArraySize = 1; } else { uavdesc.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D; } break; case 3: // D3D11 does not have Texture3D arrays uavdesc.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE3D; uavdesc.Texture3D.WSize = -1; break; } D3D(ID3D11Device_CreateUnorderedAccessView(p->dev, tex_p->res, &uavdesc, &tex_p->uav)); } return true; error: return false; } void pl_d3d11_tex_destroy(pl_gpu gpu, pl_tex tex) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; struct pl_tex_d3d11 *tex_p = PL_PRIV(tex); SAFE_RELEASE(tex_p->srv); SAFE_RELEASE(tex_p->rtv); SAFE_RELEASE(tex_p->uav); SAFE_RELEASE(tex_p->res); SAFE_RELEASE(tex_p->staging); pl_d3d11_flush_message_queue(ctx, "After texture destroy"); pl_free((void *) tex); } pl_tex pl_d3d11_tex_create(pl_gpu gpu, const struct pl_tex_params *params) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; struct pl_tex_t *tex = pl_zalloc_obj(NULL, tex, struct pl_tex_d3d11); tex->params = *params; tex->params.initial_data = NULL; tex->sampler_type = PL_SAMPLER_NORMAL; struct pl_tex_d3d11 *tex_p = PL_PRIV(tex); DXGI_FORMAT dxfmt = fmt_to_dxgi(params->format); D3D11_USAGE usage = D3D11_USAGE_DEFAULT; D3D11_BIND_FLAG bind_flags = 0; if (params->format->emulated) { tex_p->texel_fmt = pl_find_fmt(gpu, params->format->type, 1, 0, params->format->host_bits[0], PL_FMT_CAP_TEXEL_UNIFORM); if (!tex_p->texel_fmt) { PL_ERR(gpu, "Failed picking texel format for emulated texture!"); goto error; } tex->params.storable = true; } if (p->fl >= D3D_FEATURE_LEVEL_11_0) { // On >=FL11_0, blit emulation needs image storage tex->params.storable |= params->blit_src || params->blit_dst; // Blit emulation can use a sampler for linear filtering during stretch if ((tex->params.format->caps & PL_FMT_CAP_LINEAR) && params->blit_src) tex->params.sampleable = true; } else { // On params.sampleable |= params->blit_src; tex->params.renderable |= params->blit_dst; } if (tex->params.sampleable) bind_flags |= D3D11_BIND_SHADER_RESOURCE; if (tex->params.renderable) bind_flags |= D3D11_BIND_RENDER_TARGET; if (p->fl >= D3D_FEATURE_LEVEL_11_0 && tex->params.storable) bind_flags |= D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS; // Apparently IMMUTABLE textures are efficient, so try to infer whether we // can use one if (params->initial_data && !params->format->emulated && !tex->params.renderable && !tex->params.storable && !params->host_writable) { usage = D3D11_USAGE_IMMUTABLE; } // In FL9_x, resources with only D3D11_BIND_SHADER_RESOURCE can't be copied // from GPU-accessible memory to CPU-accessible memory. The only other bind // flag we set on this FL is D3D11_BIND_RENDER_TARGET, so set it. if (p->fl <= D3D_FEATURE_LEVEL_9_3 && tex->params.host_readable) bind_flags |= D3D11_BIND_RENDER_TARGET; // In FL9_x, when using DEFAULT or IMMUTABLE, BindFlags cannot be zero if (p->fl <= D3D_FEATURE_LEVEL_9_3 && !bind_flags) bind_flags |= D3D11_BIND_SHADER_RESOURCE; D3D11_SUBRESOURCE_DATA data; D3D11_SUBRESOURCE_DATA *pdata = NULL; if (params->initial_data && !params->format->emulated) { data = (D3D11_SUBRESOURCE_DATA) { .pSysMem = params->initial_data, .SysMemPitch = params->w * params->format->texel_size, }; if (params->d) data.SysMemSlicePitch = data.SysMemPitch * params->h; pdata = &data; } switch (pl_tex_params_dimension(*params)) { case 1:; D3D11_TEXTURE1D_DESC desc1d = { .Width = params->w, .MipLevels = 1, .ArraySize = 1, .Format = dxfmt, .Usage = usage, .BindFlags = bind_flags, }; D3D(ID3D11Device_CreateTexture1D(p->dev, &desc1d, pdata, &tex_p->tex1d)); tex_p->res = (ID3D11Resource *)tex_p->tex1d; // Create a staging texture with CPU access for pl_tex_download() if (params->host_readable) { desc1d.BindFlags = 0; desc1d.CPUAccessFlags = D3D11_CPU_ACCESS_READ; desc1d.Usage = D3D11_USAGE_STAGING; D3D(ID3D11Device_CreateTexture1D(p->dev, &desc1d, NULL, &tex_p->staging1d)); tex_p->staging = (ID3D11Resource *) tex_p->staging1d; } break; case 2:; D3D11_TEXTURE2D_DESC desc2d = { .Width = params->w, .Height = params->h, .MipLevels = 1, .ArraySize = 1, .SampleDesc.Count = 1, .Format = dxfmt, .Usage = usage, .BindFlags = bind_flags, }; D3D(ID3D11Device_CreateTexture2D(p->dev, &desc2d, pdata, &tex_p->tex2d)); tex_p->res = (ID3D11Resource *)tex_p->tex2d; // Create a staging texture with CPU access for pl_tex_download() if (params->host_readable) { desc2d.BindFlags = 0; desc2d.CPUAccessFlags = D3D11_CPU_ACCESS_READ; desc2d.Usage = D3D11_USAGE_STAGING; D3D(ID3D11Device_CreateTexture2D(p->dev, &desc2d, NULL, &tex_p->staging2d)); tex_p->staging = (ID3D11Resource *) tex_p->staging2d; } break; case 3:; D3D11_TEXTURE3D_DESC desc3d = { .Width = params->w, .Height = params->h, .Depth = params->d, .MipLevels = 1, .Format = dxfmt, .Usage = usage, .BindFlags = bind_flags, }; D3D(ID3D11Device_CreateTexture3D(p->dev, &desc3d, pdata, &tex_p->tex3d)); tex_p->res = (ID3D11Resource *)tex_p->tex3d; // Create a staging texture with CPU access for pl_tex_download() if (params->host_readable) { desc3d.BindFlags = 0; desc3d.CPUAccessFlags = D3D11_CPU_ACCESS_READ; desc3d.Usage = D3D11_USAGE_STAGING; D3D(ID3D11Device_CreateTexture3D(p->dev, &desc3d, NULL, &tex_p->staging3d)); tex_p->staging = (ID3D11Resource *) tex_p->staging3d; } break; default: pl_unreachable(); } tex_p->array_slice = -1; if (!tex_init(gpu, tex)) goto error; if (params->initial_data && params->format->emulated) { struct pl_tex_transfer_params ul_params = { .tex = tex, .ptr = (void *) params->initial_data, .rc = { 0, 0, 0, params->w, params->h, params->d }, }; // Since we re-use GPU helpers which require writable images, just fake it bool writable = tex->params.host_writable; tex->params.host_writable = true; if (!pl_tex_upload(gpu, &ul_params)) goto error; tex->params.host_writable = writable; } pl_d3d11_flush_message_queue(ctx, "After texture create"); return tex; error: pl_d3d11_tex_destroy(gpu, tex); return NULL; } pl_tex pl_d3d11_wrap(pl_gpu gpu, const struct pl_d3d11_wrap_params *params) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; struct pl_tex_t *tex = pl_zalloc_obj(NULL, tex, struct pl_tex_d3d11); tex->sampler_type = PL_SAMPLER_NORMAL; struct pl_tex_d3d11 *tex_p = PL_PRIV(tex); DXGI_FORMAT fmt = DXGI_FORMAT_UNKNOWN; D3D11_USAGE usage = D3D11_USAGE_DEFAULT; D3D11_BIND_FLAG bind_flags = 0; UINT mip_levels = 1; UINT array_size = 1; UINT sample_count = 1; D3D11_RESOURCE_DIMENSION type; ID3D11Resource_GetType(params->tex, &type); switch (type) { case D3D11_RESOURCE_DIMENSION_TEXTURE1D: D3D(ID3D11Resource_QueryInterface(params->tex, &IID_ID3D11Texture1D, (void **) &tex_p->tex1d)); tex_p->res = (ID3D11Resource *) tex_p->tex1d; D3D11_TEXTURE1D_DESC desc1d; ID3D11Texture1D_GetDesc(tex_p->tex1d, &desc1d); tex->params.w = desc1d.Width; mip_levels = desc1d.MipLevels; array_size = desc1d.ArraySize; fmt = desc1d.Format; usage = desc1d.Usage; bind_flags = desc1d.BindFlags; break; case D3D11_RESOURCE_DIMENSION_TEXTURE2D: D3D(ID3D11Resource_QueryInterface(params->tex, &IID_ID3D11Texture2D, (void **) &tex_p->tex2d)); tex_p->res = (ID3D11Resource *) tex_p->tex2d; D3D11_TEXTURE2D_DESC desc2d; ID3D11Texture2D_GetDesc(tex_p->tex2d, &desc2d); tex->params.w = desc2d.Width; tex->params.h = desc2d.Height; mip_levels = desc2d.MipLevels; array_size = desc2d.ArraySize; fmt = desc2d.Format; sample_count = desc2d.SampleDesc.Count; usage = desc2d.Usage; bind_flags = desc2d.BindFlags; // Allow the format and size of 2D textures to be overridden to support // shader views of video resources if (params->fmt) { fmt = params->fmt; tex->params.w = params->w; tex->params.h = params->h; } break; case D3D11_RESOURCE_DIMENSION_TEXTURE3D: D3D(ID3D11Resource_QueryInterface(params->tex, &IID_ID3D11Texture3D, (void **) &tex_p->tex3d)); tex_p->res = (ID3D11Resource *) tex_p->tex3d; D3D11_TEXTURE3D_DESC desc3d; ID3D11Texture3D_GetDesc(tex_p->tex3d, &desc3d); tex->params.w = desc3d.Width; tex->params.h = desc3d.Height; tex->params.d = desc3d.Depth; mip_levels = desc3d.MipLevels; fmt = desc3d.Format; usage = desc3d.Usage; bind_flags = desc3d.BindFlags; break; case D3D11_RESOURCE_DIMENSION_UNKNOWN: case D3D11_RESOURCE_DIMENSION_BUFFER: PL_ERR(gpu, "Resource is not suitable to wrap"); goto error; } if (mip_levels != 1) { PL_ERR(gpu, "Mipmapped textures not supported for wrapping"); goto error; } if (sample_count != 1) { PL_ERR(gpu, "Multisampled textures not supported for wrapping"); goto error; } if (usage != D3D11_USAGE_DEFAULT) { PL_ERR(gpu, "Resource is not D3D11_USAGE_DEFAULT"); goto error; } if (array_size > 1) { if (params->array_slice < 0 || params->array_slice >= array_size) { PL_ERR(gpu, "array_slice out of range"); goto error; } tex_p->array_slice = params->array_slice; } else { tex_p->array_slice = -1; } if (bind_flags & D3D11_BIND_SHADER_RESOURCE) { tex->params.sampleable = true; // Blit emulation uses a render pass on fl < D3D_FEATURE_LEVEL_11_0) tex->params.blit_src = true; } if (bind_flags & D3D11_BIND_RENDER_TARGET) { tex->params.renderable = true; // Blit emulation uses a render pass on fl < D3D_FEATURE_LEVEL_11_0) tex->params.blit_dst = true; } static const D3D11_BIND_FLAG storable_flags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE; if ((bind_flags & storable_flags) == storable_flags) { tex->params.storable = true; // Blit emulation uses image storage on >=FL11_0. A feature level check // isn't required because params.blit_src = tex->params.blit_dst = true; } for (int i = 0; i < gpu->num_formats; i++) { DXGI_FORMAT target_fmt = fmt_to_dxgi(gpu->formats[i]); if (fmt == target_fmt) { tex->params.format = gpu->formats[i]; break; } } if (!tex->params.format) { PL_ERR(gpu, "Could not find a suitable pl_fmt for wrapped resource"); goto error; } if (!tex_init(gpu, tex)) goto error; pl_d3d11_flush_message_queue(ctx, "After texture wrap"); return tex; error: pl_d3d11_tex_destroy(gpu, tex); return NULL; } void pl_d3d11_tex_invalidate(pl_gpu gpu, pl_tex tex) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; struct pl_tex_d3d11 *tex_p = PL_PRIV(tex); // Resource discarding requires D3D11.1 if (!p->imm1) return; // Prefer discarding a view to discarding the whole resource. The reason // for this is that a pl_tex can refer to a single member of a texture // array. Discarding the SRV, RTV or UAV should only discard that member. if (tex_p->rtv) { ID3D11DeviceContext1_DiscardView(p->imm1, (ID3D11View *) tex_p->rtv); } else if (tex_p->uav) { ID3D11DeviceContext1_DiscardView(p->imm1, (ID3D11View *) tex_p->uav); } else if (tex_p->srv) { ID3D11DeviceContext1_DiscardView(p->imm1, (ID3D11View *) tex_p->srv); } else if (tex_p->array_slice < 0) { // If there are no views, only discard if the ID3D11Resource is not a // texture array ID3D11DeviceContext1_DiscardResource(p->imm1, tex_p->res); } pl_d3d11_flush_message_queue(ctx, "After texture invalidate"); } void pl_d3d11_tex_clear_ex(pl_gpu gpu, pl_tex tex, const union pl_clear_color color) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; struct pl_tex_d3d11 *tex_p = PL_PRIV(tex); if (tex->params.format->type == PL_FMT_UINT) { if (tex_p->uav) { ID3D11DeviceContext_ClearUnorderedAccessViewUint(p->imm, tex_p->uav, color.u); } else { float c[4] = { color.u[0], color.u[1], color.u[2], color.u[3] }; ID3D11DeviceContext_ClearRenderTargetView(p->imm, tex_p->rtv, c); } } else if (tex->params.format->type == PL_FMT_SINT) { if (tex_p->uav) { ID3D11DeviceContext_ClearUnorderedAccessViewUint(p->imm, tex_p->uav, (const uint32_t *)color.i); } else { float c[4] = { color.i[0], color.i[1], color.i[2], color.i[3] }; ID3D11DeviceContext_ClearRenderTargetView(p->imm, tex_p->rtv, c); } } else if (tex_p->rtv) { ID3D11DeviceContext_ClearRenderTargetView(p->imm, tex_p->rtv, color.f); } else { ID3D11DeviceContext_ClearUnorderedAccessViewFloat(p->imm, tex_p->uav, color.f); } pl_d3d11_flush_message_queue(ctx, "After texture clear"); } #define pl_rect3d_to_box(rc) \ ((D3D11_BOX) { \ .left = rc.x0, .top = rc.y0, .front = rc.z0, \ .right = rc.x1, .bottom = rc.y1, .back = rc.z1, \ }) void pl_d3d11_tex_blit(pl_gpu gpu, const struct pl_tex_blit_params *params) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; struct pl_tex_d3d11 *src_p = PL_PRIV(params->src); DXGI_FORMAT src_fmt = fmt_to_dxgi(params->src->params.format); struct pl_tex_d3d11 *dst_p = PL_PRIV(params->dst); DXGI_FORMAT dst_fmt = fmt_to_dxgi(params->dst->params.format); // If the blit operation doesn't require flipping, scaling or format // conversion, we can use CopySubresourceRegion pl_rect3d src_rc = params->src_rc, dst_rc = params->dst_rc; if (pl_rect3d_eq(src_rc, dst_rc) && src_fmt == dst_fmt) { pl_rect3d rc = params->src_rc; pl_rect3d_normalize(&rc); ID3D11DeviceContext_CopySubresourceRegion(p->imm, dst_p->res, tex_subresource(params->dst), rc.x0, rc.y0, rc.z0, src_p->res, tex_subresource(params->src), &pl_rect3d_to_box(rc)); } else if (p->fl >= D3D_FEATURE_LEVEL_11_0) { if (!pl_tex_blit_compute(gpu, params)) PL_ERR(gpu, "Failed compute shader fallback blit"); } else { pl_tex_blit_raster(gpu, params); } pl_d3d11_flush_message_queue(ctx, "After texture blit"); } bool pl_d3d11_tex_upload(pl_gpu gpu, const struct pl_tex_transfer_params *params) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; pl_tex tex = params->tex; pl_fmt fmt = tex->params.format; struct pl_tex_d3d11 *tex_p = PL_PRIV(tex); struct pl_tex_transfer_params *slices = NULL; bool ret = false; pl_d3d11_timer_start(gpu, params->timer); if (fmt->emulated) { int num_slices = pl_tex_transfer_slices(gpu, tex_p->texel_fmt, params, &slices); for (int i = 0; i < num_slices; i++) { // Copy the source data buffer into an intermediate buffer pl_buf tbuf = pl_buf_create(gpu, pl_buf_params( .memory_type = PL_BUF_MEM_DEVICE, .format = tex_p->texel_fmt, .size = pl_tex_transfer_size(&slices[i]), .initial_data = slices[i].ptr, .storable = true, )); if (!tbuf) { PL_ERR(gpu, "Failed creating buffer for tex upload fallback!"); goto error; } slices[i].ptr = NULL; slices[i].buf = tbuf; slices[i].buf_offset = 0; bool ok = pl_tex_upload_texel(gpu, &slices[i]); pl_buf_destroy(gpu, &tbuf); if (!ok) goto error; } } else { ID3D11DeviceContext_UpdateSubresource(p->imm, tex_p->res, tex_subresource(tex), &pl_rect3d_to_box(params->rc), params->ptr, params->row_pitch, params->depth_pitch); } ret = true; error: pl_d3d11_timer_end(gpu, params->timer); pl_d3d11_flush_message_queue(ctx, "After texture upload"); pl_free(slices); return ret; } bool pl_d3d11_tex_download(pl_gpu gpu, const struct pl_tex_transfer_params *params) { struct pl_gpu_d3d11 *p = PL_PRIV(gpu); struct d3d11_ctx *ctx = p->ctx; const struct pl_tex_t *tex = params->tex; pl_fmt fmt = tex->params.format; struct pl_tex_d3d11 *tex_p = PL_PRIV(tex); struct pl_tex_transfer_params *slices = NULL; bool ret = false; if (!tex_p->staging) return false; pl_d3d11_timer_start(gpu, params->timer); if (fmt->emulated) { pl_buf tbuf = NULL; int num_slices = pl_tex_transfer_slices(gpu, tex_p->texel_fmt, params, &slices); for (int i = 0; i < num_slices; i++) { const size_t slice_size = pl_tex_transfer_size(&slices[i]); bool ok = pl_buf_recreate(gpu, &tbuf, pl_buf_params( .storable = true, .size = slice_size, .memory_type = PL_BUF_MEM_DEVICE, .format = tex_p->texel_fmt, .host_readable = true, )); if (!ok) { PL_ERR(gpu, "Failed creating buffer for tex download fallback!"); goto error; } void *ptr = slices[i].ptr; slices[i].ptr = NULL; slices[i].buf = tbuf; slices[i].buf_offset = 0; // Download into an intermediate buffer first ok = pl_tex_download_texel(gpu, &slices[i]); ok = ok && pl_buf_read(gpu, tbuf, 0, ptr, slice_size); if (!ok) { pl_buf_destroy(gpu, &tbuf); goto error; } } pl_buf_destroy(gpu, &tbuf); } else { ID3D11DeviceContext_CopySubresourceRegion(p->imm, (ID3D11Resource *) tex_p->staging, 0, params->rc.x0, params->rc.y0, params->rc.z0, tex_p->res, tex_subresource(tex), &pl_rect3d_to_box(params->rc)); D3D11_MAPPED_SUBRESOURCE lock; D3D(ID3D11DeviceContext_Map(p->imm, (ID3D11Resource *) tex_p->staging, 0, D3D11_MAP_READ, 0, &lock)); char *cdst = params->ptr; char *csrc = lock.pData; size_t line_size = pl_rect_w(params->rc) * tex->params.format->texel_size; for (int z = 0; z < pl_rect_d(params->rc); z++) { for (int y = 0; y < pl_rect_h(params->rc); y++) { memcpy(cdst + z * params->depth_pitch + y * params->row_pitch, csrc + (params->rc.z0 + z) * lock.DepthPitch + (params->rc.y0 + y) * lock.RowPitch + params->rc.x0, line_size); } } ID3D11DeviceContext_Unmap(p->imm, (ID3D11Resource*)tex_p->staging, 0); } ret = true; error: pl_d3d11_timer_end(gpu, params->timer); pl_d3d11_flush_message_queue(ctx, "After texture download"); pl_free(slices); return ret; } libplacebo-v7.349.0/src/d3d11/meson.build000066400000000000000000000023361463457750100177450ustar00rootroot00000000000000d3d11 = get_option('d3d11') d3d11_header = cc.check_header('d3d11.h', required: false) # needed publicly d3d11_headers_extra = [ # needed internally cc.check_header('d3d11_4.h', required: d3d11), cc.check_header('dxgi1_6.h', required: d3d11), ] d3d11_deps = [ dependency('spirv-cross-c-shared', version: '>=0.29.0', required: d3d11), cc.find_library('version', required: d3d11), ] d3d11 = d3d11.require(d3d11_header) foreach h : d3d11_headers_extra d3d11 = d3d11.require(h) endforeach foreach d : d3d11_deps d3d11 = d3d11.require(d.found()) endforeach components.set('d3d11', d3d11.allowed()) if d3d11.allowed() conf_internal.set('PL_HAVE_DXGI_DEBUG', cc.has_header_symbol('dxgidebug.h', 'IID_IDXGIInfoQueue')) conf_internal.set('PL_HAVE_DXGI_DEBUG_D3D11', cc.has_header_symbol('d3d11sdklayers.h', 'DXGI_DEBUG_D3D11')) add_project_arguments(['-DCOBJMACROS'], language: ['c', 'cpp']) build_deps += declare_dependency(dependencies: d3d11_deps) tests += 'd3d11.c' sources += [ 'd3d11/context.c', 'd3d11/formats.c', 'd3d11/gpu.c', 'd3d11/gpu_buf.c', 'd3d11/gpu_tex.c', 'd3d11/gpu_pass.c', 'd3d11/swapchain.c', 'd3d11/utils.c', ] elif d3d11_header sources += 'd3d11/stubs.c' endif libplacebo-v7.349.0/src/d3d11/stubs.c000066400000000000000000000027321463457750100171070ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "../common.h" #include "log.h" #include const struct pl_d3d11_params pl_d3d11_default_params = { PL_D3D11_DEFAULTS }; pl_d3d11 pl_d3d11_create(pl_log log, const struct pl_d3d11_params *params) { pl_fatal(log, "libplacebo compiled without D3D11 support!"); return NULL; } void pl_d3d11_destroy(pl_d3d11 *pd3d11) { pl_d3d11 d3d11 = *pd3d11; pl_assert(!d3d11); } pl_d3d11 pl_d3d11_get(pl_gpu gpu) { return NULL; } pl_swapchain pl_d3d11_create_swapchain(pl_d3d11 d3d11, const struct pl_d3d11_swapchain_params *params) { pl_unreachable(); } IDXGISwapChain *pl_d3d11_swapchain_unwrap(pl_swapchain sw) { pl_unreachable(); } pl_tex pl_d3d11_wrap(pl_gpu gpu, const struct pl_d3d11_wrap_params *params) { pl_unreachable(); } libplacebo-v7.349.0/src/d3d11/swapchain.c000066400000000000000000000540731463457750100177310ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include #include #include "gpu.h" #include "swapchain.h" #include "utils.h" struct d3d11_csp_mapping { DXGI_COLOR_SPACE_TYPE d3d11_csp; DXGI_FORMAT d3d11_fmt; struct pl_color_space out_csp; }; static struct d3d11_csp_mapping map_pl_csp_to_d3d11(const struct pl_color_space *hint, bool use_8bit_sdr) { if (pl_color_space_is_hdr(hint) && hint->transfer != PL_COLOR_TRC_LINEAR) { struct pl_color_space pl_csp = pl_color_space_hdr10; pl_csp.hdr = (struct pl_hdr_metadata) { // Whitelist only values that we support signalling metadata for .prim = hint->hdr.prim, .min_luma = hint->hdr.min_luma, .max_luma = hint->hdr.max_luma, .max_cll = hint->hdr.max_cll, .max_fall = hint->hdr.max_fall, }; return (struct d3d11_csp_mapping){ .d3d11_csp = DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020, .d3d11_fmt = DXGI_FORMAT_R10G10B10A2_UNORM, .out_csp = pl_csp, }; } else if (pl_color_primaries_is_wide_gamut(hint->primaries) || hint->transfer == PL_COLOR_TRC_LINEAR) { // scRGB a la VK_COLOR_SPACE_EXTENDED_SRGB_LINEAR_EXT, // so could be utilized for HDR/wide gamut content as well // with content that goes beyond 0.0-1.0. return (struct d3d11_csp_mapping){ .d3d11_csp = DXGI_COLOR_SPACE_RGB_FULL_G10_NONE_P709, .d3d11_fmt = DXGI_FORMAT_R16G16B16A16_FLOAT, .out_csp = { .primaries = PL_COLOR_PRIM_BT_709, .transfer = PL_COLOR_TRC_LINEAR, } }; } return (struct d3d11_csp_mapping){ .d3d11_csp = DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709, .d3d11_fmt = use_8bit_sdr ? DXGI_FORMAT_R8G8B8A8_UNORM : DXGI_FORMAT_R10G10B10A2_UNORM, .out_csp = pl_color_space_monitor, }; } struct priv { struct pl_sw_fns impl; struct d3d11_ctx *ctx; IDXGISwapChain *swapchain; pl_tex backbuffer; // Currently requested or applied swap chain configuration. // Affected by received colorspace hints. struct d3d11_csp_mapping csp_map; // Whether a swapchain backbuffer format reconfiguration has been // requested by means of an additional resize action. bool update_swapchain_format; // Whether 10-bit backbuffer format is disabled for SDR content. bool disable_10bit_sdr; // Fallback to 8-bit RGB was triggered due to lack of compatiblity bool fallback_8bit_rgb; }; static void d3d11_sw_destroy(pl_swapchain sw) { struct priv *p = PL_PRIV(sw); pl_tex_destroy(sw->gpu, &p->backbuffer); SAFE_RELEASE(p->swapchain); pl_free((void *) sw); } static int d3d11_sw_latency(pl_swapchain sw) { struct priv *p = PL_PRIV(sw); struct d3d11_ctx *ctx = p->ctx; UINT max_latency; IDXGIDevice1_GetMaximumFrameLatency(ctx->dxgi_dev, &max_latency); return max_latency; } static pl_tex get_backbuffer(pl_swapchain sw) { struct priv *p = PL_PRIV(sw); struct d3d11_ctx *ctx = p->ctx; ID3D11Texture2D *backbuffer = NULL; pl_tex tex = NULL; D3D(IDXGISwapChain_GetBuffer(p->swapchain, 0, &IID_ID3D11Texture2D, (void **) &backbuffer)); tex = pl_d3d11_wrap(sw->gpu, pl_d3d11_wrap_params( .tex = (ID3D11Resource *) backbuffer, )); error: SAFE_RELEASE(backbuffer); return tex; } static bool d3d11_sw_resize(pl_swapchain sw, int *width, int *height) { struct priv *p = PL_PRIV(sw); struct d3d11_ctx *ctx = p->ctx; DXGI_SWAP_CHAIN_DESC desc = {0}; IDXGISwapChain_GetDesc(p->swapchain, &desc); int w = PL_DEF(*width, desc.BufferDesc.Width); int h = PL_DEF(*height, desc.BufferDesc.Height); bool format_changed = p->csp_map.d3d11_fmt != desc.BufferDesc.Format; if (format_changed) { PL_INFO(ctx, "Attempting to reconfigure swap chain format: %s -> %s", pl_get_dxgi_format_name(desc.BufferDesc.Format), pl_get_dxgi_format_name(p->csp_map.d3d11_fmt)); } if (w != desc.BufferDesc.Width || h != desc.BufferDesc.Height || format_changed) { if (p->backbuffer) { PL_ERR(sw, "Tried resizing the swapchain while a frame was in " "progress! Please submit the current frame first."); return false; } HRESULT hr = IDXGISwapChain_ResizeBuffers(p->swapchain, 0, w, h, p->csp_map.d3d11_fmt, desc.Flags); if (hr == E_INVALIDARG && p->csp_map.d3d11_fmt != DXGI_FORMAT_R8G8B8A8_UNORM) { PL_WARN(sw, "Reconfiguring the swapchain failed, re-trying with R8G8B8A8_UNORM fallback."); D3D(IDXGISwapChain_ResizeBuffers(p->swapchain, 0, w, h, DXGI_FORMAT_R8G8B8A8_UNORM, desc.Flags)); // re-configure the colorspace to 8-bit RGB SDR fallback p->csp_map = map_pl_csp_to_d3d11(&pl_color_space_unknown, true); p->fallback_8bit_rgb = true; } else if (FAILED(hr)) { PL_ERR(sw, "Reconfiguring the swapchain failed with error: %s", pl_hresult_to_str(hr)); return false; } } *width = w; *height = h; p->update_swapchain_format = false; return true; error: return false; } static bool d3d11_sw_start_frame(pl_swapchain sw, struct pl_swapchain_frame *out_frame) { struct priv *p = PL_PRIV(sw); struct d3d11_ctx *ctx = p->ctx; if (ctx->is_failed) return false; if (p->backbuffer) { PL_ERR(sw, "Attempted calling `pl_swapchain_start_frame` while a frame " "was already in progress! Call `pl_swapchain_submit_frame` first."); return false; } if (p->update_swapchain_format) { int w = 0, h = 0; if (!d3d11_sw_resize(sw, &w, &h)) return false; } p->backbuffer = get_backbuffer(sw); if (!p->backbuffer) return false; int bits = 0; pl_fmt fmt = p->backbuffer->params.format; for (int i = 0; i < fmt->num_components; i++) bits = PL_MAX(bits, fmt->component_depth[i]); *out_frame = (struct pl_swapchain_frame) { .fbo = p->backbuffer, .flipped = false, .color_repr = { .sys = PL_COLOR_SYSTEM_RGB, .levels = PL_COLOR_LEVELS_FULL, .alpha = PL_ALPHA_UNKNOWN, .bits = { .sample_depth = bits, .color_depth = bits, }, }, .color_space = p->csp_map.out_csp, }; return true; } static bool d3d11_sw_submit_frame(pl_swapchain sw) { struct priv *p = PL_PRIV(sw); struct d3d11_ctx *ctx = p->ctx; // Release the backbuffer. We shouldn't hold onto it unnecessarily, because // it prevents external code from resizing the swapchain, which we'd // otherwise support just fine. pl_tex_destroy(sw->gpu, &p->backbuffer); return !ctx->is_failed; } static void d3d11_sw_swap_buffers(pl_swapchain sw) { struct priv *p = PL_PRIV(sw); struct d3d11_ctx *ctx = p->ctx; // Present can fail with a device removed error D3D(IDXGISwapChain_Present(p->swapchain, 1, 0)); error: return; } static DXGI_HDR_METADATA_HDR10 set_hdr10_metadata(const struct pl_hdr_metadata *hdr) { return (DXGI_HDR_METADATA_HDR10) { .RedPrimary = { roundf(hdr->prim.red.x * 50000), roundf(hdr->prim.red.y * 50000) }, .GreenPrimary = { roundf(hdr->prim.green.x * 50000), roundf(hdr->prim.green.y * 50000) }, .BluePrimary = { roundf(hdr->prim.blue.x * 50000), roundf(hdr->prim.blue.y * 50000) }, .WhitePoint = { roundf(hdr->prim.white.x * 50000), roundf(hdr->prim.white.y * 50000) }, .MaxMasteringLuminance = roundf(hdr->max_luma), .MinMasteringLuminance = roundf(hdr->min_luma * 10000), .MaxContentLightLevel = roundf(hdr->max_cll), .MaxFrameAverageLightLevel = roundf(hdr->max_fall), }; } static bool set_swapchain_metadata(struct d3d11_ctx *ctx, IDXGISwapChain3 *swapchain3, struct d3d11_csp_mapping *csp_map) { IDXGISwapChain4 *swapchain4 = NULL; bool ret = false; bool is_hdr = pl_color_space_is_hdr(&csp_map->out_csp); DXGI_HDR_METADATA_HDR10 hdr10 = is_hdr ? set_hdr10_metadata(&csp_map->out_csp.hdr) : (DXGI_HDR_METADATA_HDR10){ 0 }; D3D(IDXGISwapChain3_SetColorSpace1(swapchain3, csp_map->d3d11_csp)); // if we succeeded to set the color space, it's good enough, // since older versions of Windows 10 will not have swapchain v4 available. ret = true; if (FAILED(IDXGISwapChain3_QueryInterface(swapchain3, &IID_IDXGISwapChain4, (void **)&swapchain4))) { PL_TRACE(ctx, "v4 swap chain interface is not available, skipping HDR10 " "metadata configuration."); goto error; } D3D(IDXGISwapChain4_SetHDRMetaData(swapchain4, is_hdr ? DXGI_HDR_METADATA_TYPE_HDR10 : DXGI_HDR_METADATA_TYPE_NONE, is_hdr ? sizeof(hdr10) : 0, is_hdr ? &hdr10 : NULL)); goto success; error: csp_map->out_csp.hdr = (struct pl_hdr_metadata) { 0 }; success: SAFE_RELEASE(swapchain4); return ret; } static bool d3d11_format_supported(struct d3d11_ctx *ctx, DXGI_FORMAT fmt) { UINT sup = 0; UINT wanted_sup = D3D11_FORMAT_SUPPORT_TEXTURE2D | D3D11_FORMAT_SUPPORT_DISPLAY | D3D11_FORMAT_SUPPORT_SHADER_SAMPLE | D3D11_FORMAT_SUPPORT_RENDER_TARGET | D3D11_FORMAT_SUPPORT_BLENDABLE; D3D(ID3D11Device_CheckFormatSupport(ctx->dev, fmt, &sup)); return (sup & wanted_sup) == wanted_sup; error: return false; } static bool d3d11_csp_supported(struct d3d11_ctx *ctx, IDXGISwapChain3 *swapchain3, DXGI_COLOR_SPACE_TYPE color_space) { UINT csp_support_flags = 0; D3D(IDXGISwapChain3_CheckColorSpaceSupport(swapchain3, color_space, &csp_support_flags)); return (csp_support_flags & DXGI_SWAP_CHAIN_COLOR_SPACE_SUPPORT_FLAG_PRESENT); error: return false; } static void update_swapchain_color_config(pl_swapchain sw, const struct pl_color_space *csp, bool is_internal) { struct priv *p = PL_PRIV(sw); struct d3d11_ctx *ctx = p->ctx; IDXGISwapChain3 *swapchain3 = NULL; struct d3d11_csp_mapping old_map = p->csp_map; // ignore config changes in fallback mode if (p->fallback_8bit_rgb) goto cleanup; HRESULT hr = IDXGISwapChain_QueryInterface(p->swapchain, &IID_IDXGISwapChain3, (void **)&swapchain3); if (FAILED(hr)) { PL_TRACE(ctx, "v3 swap chain interface is not available, skipping " "color space configuration."); swapchain3 = NULL; } // Lack of swap chain v3 means we cannot control swap chain color space; // Only effective formats are the 8 and 10 bit RGB ones. struct d3d11_csp_mapping csp_map = map_pl_csp_to_d3d11(swapchain3 ? csp : &pl_color_space_unknown, p->disable_10bit_sdr); if (p->csp_map.d3d11_fmt == csp_map.d3d11_fmt && p->csp_map.d3d11_csp == csp_map.d3d11_csp && pl_color_space_equal(&p->csp_map.out_csp, &csp_map.out_csp)) goto cleanup; PL_INFO(ctx, "%s swap chain configuration%s: format: %s, color space: %s.", is_internal ? "Initial" : "New", is_internal ? "" : " received from hint", pl_get_dxgi_format_name(csp_map.d3d11_fmt), pl_get_dxgi_csp_name(csp_map.d3d11_csp)); bool fmt_supported = d3d11_format_supported(ctx, csp_map.d3d11_fmt); bool csp_supported = swapchain3 ? d3d11_csp_supported(ctx, swapchain3, csp_map.d3d11_csp) : true; if (!fmt_supported || !csp_supported) { PL_ERR(ctx, "New swap chain configuration was deemed not supported: " "format: %s, color space: %s. Failling back to 8bit RGB.", fmt_supported ? "supported" : "unsupported", csp_supported ? "supported" : "unsupported"); // fall back to 8bit sRGB if requested configuration is not supported csp_map = map_pl_csp_to_d3d11(&pl_color_space_unknown, true); } p->csp_map = csp_map; p->update_swapchain_format = true; if (!swapchain3) goto cleanup; if (!set_swapchain_metadata(ctx, swapchain3, &p->csp_map)) { // format succeeded, but color space configuration failed p->csp_map = old_map; p->csp_map.d3d11_fmt = csp_map.d3d11_fmt; } pl_d3d11_flush_message_queue(ctx, "After colorspace hint"); cleanup: SAFE_RELEASE(swapchain3); } static void d3d11_sw_colorspace_hint(pl_swapchain sw, const struct pl_color_space *csp) { update_swapchain_color_config(sw, csp, false); } IDXGISwapChain *pl_d3d11_swapchain_unwrap(pl_swapchain sw) { struct priv *p = PL_PRIV(sw); IDXGISwapChain_AddRef(p->swapchain); return p->swapchain; } static const struct pl_sw_fns d3d11_swapchain = { .destroy = d3d11_sw_destroy, .latency = d3d11_sw_latency, .resize = d3d11_sw_resize, .colorspace_hint = d3d11_sw_colorspace_hint, .start_frame = d3d11_sw_start_frame, .submit_frame = d3d11_sw_submit_frame, .swap_buffers = d3d11_sw_swap_buffers, }; static HRESULT create_swapchain_1_2(struct d3d11_ctx *ctx, IDXGIFactory2 *factory, const struct pl_d3d11_swapchain_params *params, bool flip, UINT width, UINT height, DXGI_FORMAT format, IDXGISwapChain **swapchain_out) { IDXGISwapChain *swapchain = NULL; IDXGISwapChain1 *swapchain1 = NULL; HRESULT hr; DXGI_SWAP_CHAIN_DESC1 desc = { .Width = width, .Height = height, .Format = format, .SampleDesc.Count = 1, .BufferUsage = DXGI_USAGE_SHADER_INPUT | DXGI_USAGE_RENDER_TARGET_OUTPUT, .Flags = params->flags, }; if (ID3D11Device_GetFeatureLevel(ctx->dev) >= D3D_FEATURE_LEVEL_11_0) desc.BufferUsage |= DXGI_USAGE_UNORDERED_ACCESS; if (flip) { UINT max_latency; IDXGIDevice1_GetMaximumFrameLatency(ctx->dxgi_dev, &max_latency); // Make sure we have at least enough buffers to allow `max_latency` // frames in-flight at once, plus one frame for the frontbuffer desc.BufferCount = max_latency + 1; if (IsWindows10OrGreater()) { desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; } else { desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL; } desc.BufferCount = PL_MIN(desc.BufferCount, DXGI_MAX_SWAP_CHAIN_BUFFERS); } else { desc.SwapEffect = DXGI_SWAP_EFFECT_DISCARD; desc.BufferCount = 1; } if (params->window) { hr = IDXGIFactory2_CreateSwapChainForHwnd(factory, (IUnknown *) ctx->dev, params->window, &desc, NULL, NULL, &swapchain1); } else if (params->core_window) { hr = IDXGIFactory2_CreateSwapChainForCoreWindow(factory, (IUnknown *) ctx->dev, params->core_window, &desc, NULL, &swapchain1); } else { hr = IDXGIFactory2_CreateSwapChainForComposition(factory, (IUnknown *) ctx->dev, &desc, NULL, &swapchain1); } if (FAILED(hr)) goto done; hr = IDXGISwapChain1_QueryInterface(swapchain1, &IID_IDXGISwapChain, (void **) &swapchain); if (FAILED(hr)) goto done; *swapchain_out = swapchain; swapchain = NULL; done: SAFE_RELEASE(swapchain1); SAFE_RELEASE(swapchain); return hr; } static HRESULT create_swapchain_1_1(struct d3d11_ctx *ctx, IDXGIFactory1 *factory, const struct pl_d3d11_swapchain_params *params, UINT width, UINT height, DXGI_FORMAT format, IDXGISwapChain **swapchain_out) { DXGI_SWAP_CHAIN_DESC desc = { .BufferDesc = { .Width = width, .Height = height, .Format = format, }, .SampleDesc.Count = 1, .BufferUsage = DXGI_USAGE_SHADER_INPUT | DXGI_USAGE_RENDER_TARGET_OUTPUT, .BufferCount = 1, .OutputWindow = params->window, .Windowed = TRUE, .SwapEffect = DXGI_SWAP_EFFECT_DISCARD, .Flags = params->flags, }; return IDXGIFactory1_CreateSwapChain(factory, (IUnknown *) ctx->dev, &desc, swapchain_out); } static IDXGISwapChain *create_swapchain(struct d3d11_ctx *ctx, const struct pl_d3d11_swapchain_params *params, DXGI_FORMAT format) { IDXGIDevice1 *dxgi_dev = NULL; IDXGIAdapter1 *adapter = NULL; IDXGIFactory1 *factory = NULL; IDXGIFactory2 *factory2 = NULL; IDXGISwapChain *swapchain = NULL; bool success = false; HRESULT hr; D3D(ID3D11Device_QueryInterface(ctx->dev, &IID_IDXGIDevice1, (void **) &dxgi_dev)); D3D(IDXGIDevice1_GetParent(dxgi_dev, &IID_IDXGIAdapter1, (void **) &adapter)); D3D(IDXGIAdapter1_GetParent(adapter, &IID_IDXGIFactory1, (void **) &factory)); hr = IDXGIFactory1_QueryInterface(factory, &IID_IDXGIFactory2, (void **) &factory2); if (FAILED(hr)) factory2 = NULL; bool flip = factory2 && !params->blit; UINT width = PL_DEF(params->width, 1); UINT height = PL_DEF(params->height, 1); // If both width and height are unset, the default size is the window size if (params->window && params->width == 0 && params->height == 0) { RECT rc; if (GetClientRect(params->window, &rc)) { width = PL_DEF(rc.right - rc.left, 1); height = PL_DEF(rc.bottom - rc.top, 1); } } // Return here to retry creating the swapchain do { if (factory2) { // Create a DXGI 1.2+ (Windows 8+) swap chain if possible hr = create_swapchain_1_2(ctx, factory2, params, flip, width, height, format, &swapchain); } else { // Fall back to DXGI 1.1 (Windows 7) hr = create_swapchain_1_1(ctx, factory, params, width, height, format, &swapchain); } if (SUCCEEDED(hr)) break; pl_d3d11_after_error(ctx, hr); if (flip) { PL_DEBUG(ctx, "Failed to create flip-model swapchain, trying bitblt"); flip = false; continue; } PL_FATAL(ctx, "Failed to create swapchain: %s", pl_hresult_to_str(hr)); goto error; } while (true); // Prevent DXGI from making changes to the window, otherwise it will hook // the Alt+Enter keystroke and make it trigger an ugly transition to // legacy exclusive fullscreen mode. IDXGIFactory_MakeWindowAssociation(factory, params->window, DXGI_MWA_NO_WINDOW_CHANGES | DXGI_MWA_NO_ALT_ENTER | DXGI_MWA_NO_PRINT_SCREEN); success = true; error: if (!success) SAFE_RELEASE(swapchain); SAFE_RELEASE(factory2); SAFE_RELEASE(factory); SAFE_RELEASE(adapter); SAFE_RELEASE(dxgi_dev); return swapchain; } pl_swapchain pl_d3d11_create_swapchain(pl_d3d11 d3d11, const struct pl_d3d11_swapchain_params *params) { struct d3d11_ctx *ctx = PL_PRIV(d3d11); pl_gpu gpu = d3d11->gpu; bool success = false; struct pl_swapchain_t *sw = pl_zalloc_obj(NULL, sw, struct priv); struct priv *p = PL_PRIV(sw); *sw = (struct pl_swapchain_t) { .log = gpu->log, .gpu = gpu, }; *p = (struct priv) { .impl = d3d11_swapchain, .ctx = ctx, // default to standard 8 or 10 bit RGB, unset pl_color_space .csp_map = { .d3d11_fmt = params->disable_10bit_sdr ? DXGI_FORMAT_R8G8B8A8_UNORM : (d3d11_format_supported(ctx, DXGI_FORMAT_R10G10B10A2_UNORM) ? DXGI_FORMAT_R10G10B10A2_UNORM : DXGI_FORMAT_R8G8B8A8_UNORM), }, .disable_10bit_sdr = params->disable_10bit_sdr, }; if (params->swapchain) { p->swapchain = params->swapchain; IDXGISwapChain_AddRef(params->swapchain); } else { p->swapchain = create_swapchain(ctx, params, p->csp_map.d3d11_fmt); if (!p->swapchain) goto error; } DXGI_SWAP_CHAIN_DESC scd = {0}; IDXGISwapChain_GetDesc(p->swapchain, &scd); if (scd.SwapEffect == DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL || scd.SwapEffect == DXGI_SWAP_EFFECT_FLIP_DISCARD) { PL_INFO(gpu, "Using flip-model presentation"); } else { PL_INFO(gpu, "Using bitblt-model presentation"); } p->csp_map.d3d11_fmt = scd.BufferDesc.Format; update_swapchain_color_config(sw, &pl_color_space_unknown, true); success = true; error: if (!success) { PL_FATAL(gpu, "Failed to create Direct3D 11 swapchain"); d3d11_sw_destroy(sw); sw = NULL; } return sw; } libplacebo-v7.349.0/src/d3d11/utils.c000066400000000000000000000456521463457750100171170ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include "utils.h" // D3D11.3 message IDs, not present in mingw-w64 v9 #define D3D11_MESSAGE_ID_CREATE_FENCE (0x30020c) #define D3D11_MESSAGE_ID_DESTROY_FENCE (0x30020a) #ifdef PL_HAVE_DXGI_DEBUG static enum pl_log_level log_level_override(unsigned int id) { switch (id) { // These warnings can happen when a pl_timer is used too often before a // blocking pl_swapchain_swap_buffers() or pl_gpu_finish(), overflowing // its internal ring buffer and causing older query objects to be reused // before their results are read. This is expected behavior, so reduce // the log level to PL_LOG_TRACE to prevent log spam. case D3D11_MESSAGE_ID_QUERY_BEGIN_ABANDONING_PREVIOUS_RESULTS: case D3D11_MESSAGE_ID_QUERY_END_ABANDONING_PREVIOUS_RESULTS: return PL_LOG_TRACE; // D3D11 writes log messages every time an object is created or // destroyed. That results in a lot of log spam, so force PL_LOG_TRACE. #define OBJ_LIFETIME_MESSAGES(obj) \ case D3D11_MESSAGE_ID_CREATE_ ## obj: \ case D3D11_MESSAGE_ID_DESTROY_ ## obj OBJ_LIFETIME_MESSAGES(CONTEXT): OBJ_LIFETIME_MESSAGES(BUFFER): OBJ_LIFETIME_MESSAGES(TEXTURE1D): OBJ_LIFETIME_MESSAGES(TEXTURE2D): OBJ_LIFETIME_MESSAGES(TEXTURE3D): OBJ_LIFETIME_MESSAGES(SHADERRESOURCEVIEW): OBJ_LIFETIME_MESSAGES(RENDERTARGETVIEW): OBJ_LIFETIME_MESSAGES(DEPTHSTENCILVIEW): OBJ_LIFETIME_MESSAGES(VERTEXSHADER): OBJ_LIFETIME_MESSAGES(HULLSHADER): OBJ_LIFETIME_MESSAGES(DOMAINSHADER): OBJ_LIFETIME_MESSAGES(GEOMETRYSHADER): OBJ_LIFETIME_MESSAGES(PIXELSHADER): OBJ_LIFETIME_MESSAGES(INPUTLAYOUT): OBJ_LIFETIME_MESSAGES(SAMPLER): OBJ_LIFETIME_MESSAGES(BLENDSTATE): OBJ_LIFETIME_MESSAGES(DEPTHSTENCILSTATE): OBJ_LIFETIME_MESSAGES(RASTERIZERSTATE): OBJ_LIFETIME_MESSAGES(QUERY): OBJ_LIFETIME_MESSAGES(PREDICATE): OBJ_LIFETIME_MESSAGES(COUNTER): OBJ_LIFETIME_MESSAGES(COMMANDLIST): OBJ_LIFETIME_MESSAGES(CLASSINSTANCE): OBJ_LIFETIME_MESSAGES(CLASSLINKAGE): OBJ_LIFETIME_MESSAGES(COMPUTESHADER): OBJ_LIFETIME_MESSAGES(UNORDEREDACCESSVIEW): OBJ_LIFETIME_MESSAGES(VIDEODECODER): OBJ_LIFETIME_MESSAGES(VIDEOPROCESSORENUM): OBJ_LIFETIME_MESSAGES(VIDEOPROCESSOR): OBJ_LIFETIME_MESSAGES(DECODEROUTPUTVIEW): OBJ_LIFETIME_MESSAGES(PROCESSORINPUTVIEW): OBJ_LIFETIME_MESSAGES(PROCESSOROUTPUTVIEW): OBJ_LIFETIME_MESSAGES(DEVICECONTEXTSTATE): OBJ_LIFETIME_MESSAGES(FENCE): return PL_LOG_TRACE; #undef OBJ_LIFETIME_MESSAGES // Don't force the log level of any other messages. It will be mapped // from the D3D severity code instead. default: return PL_LOG_NONE; } } #endif void pl_d3d11_flush_message_queue(struct d3d11_ctx *ctx, const char *header) { #ifdef PL_HAVE_DXGI_DEBUG if (!ctx->iqueue) return; static const enum pl_log_level severity_map[] = { [DXGI_INFO_QUEUE_MESSAGE_SEVERITY_CORRUPTION] = PL_LOG_FATAL, [DXGI_INFO_QUEUE_MESSAGE_SEVERITY_ERROR] = PL_LOG_ERR, [DXGI_INFO_QUEUE_MESSAGE_SEVERITY_WARNING] = PL_LOG_WARN, [DXGI_INFO_QUEUE_MESSAGE_SEVERITY_INFO] = PL_LOG_DEBUG, [DXGI_INFO_QUEUE_MESSAGE_SEVERITY_MESSAGE] = PL_LOG_DEBUG, }; enum pl_log_level header_printed = PL_LOG_NONE; // After the storage limit is reached and ID3D11InfoQueue::ClearStoredMessages // is called message counter seems to be initialized to -1 which is quite big // number if we read it as uint64_t. Any subsequent call to the // ID3D11InfoQueue::GetNumStoredMessages will be off by one. // Use ID3D11InfoQueue_GetNumStoredMessagesAllowedByRetrievalFilter without // any filter set, which seem to be unaffected by this bug and return correct // number of messages. // IDXGIInfoQueue seems to be unaffected, but keep the same way of retrival uint64_t messages = IDXGIInfoQueue_GetNumStoredMessagesAllowedByRetrievalFilters(ctx->iqueue, DXGI_DEBUG_ALL); // Just to be on the safe side, check also for the mentioned -1 value... if (!messages || messages == UINT64_C(-1)) return; uint64_t discarded = IDXGIInfoQueue_GetNumMessagesDiscardedByMessageCountLimit(ctx->iqueue, DXGI_DEBUG_ALL); if (discarded > ctx->last_discarded) { PL_WARN(ctx, "%s:", header); header_printed = PL_LOG_WARN; // Notify number of messages skipped due to the message count limit PL_WARN(ctx, " (skipped %"PRIu64" debug layer messages)", discarded - ctx->last_discarded); ctx->last_discarded = discarded; } // Copy debug layer messages to libplacebo's log output for (uint64_t i = 0; i < messages; i++) { SIZE_T len; if (FAILED(IDXGIInfoQueue_GetMessage(ctx->iqueue, DXGI_DEBUG_ALL, i, NULL, &len))) goto error; pl_grow((void *) ctx->d3d11, &ctx->dxgi_msg, len); DXGI_INFO_QUEUE_MESSAGE *dxgi_msg = ctx->dxgi_msg; if (FAILED(IDXGIInfoQueue_GetMessage(ctx->iqueue, DXGI_DEBUG_ALL, i, dxgi_msg, &len))) goto error; enum pl_log_level level = PL_LOG_NONE; if (IsEqualGUID(&dxgi_msg->Producer, &DXGI_DEBUG_D3D11)) level = log_level_override(dxgi_msg->ID); if (level == PL_LOG_NONE) level = severity_map[dxgi_msg->Severity]; if (pl_msg_test(ctx->log, level)) { // If the header hasn't been printed, or it was printed for a lower // log level than the current message, print it (again) if (header_printed == PL_LOG_NONE || header_printed > level) { PL_MSG(ctx, level, "%s:", header); pl_log_stack_trace(ctx->log, level); header_printed = level; } PL_MSG(ctx, level, " %d: %.*s", (int) dxgi_msg->ID, (int) dxgi_msg->DescriptionByteLength, dxgi_msg->pDescription); } if (dxgi_msg->Severity <= DXGI_INFO_QUEUE_MESSAGE_SEVERITY_ERROR) pl_debug_abort(); } error: IDXGIInfoQueue_ClearStoredMessages(ctx->iqueue, DXGI_DEBUG_ALL); #endif } HRESULT pl_d3d11_check_device_removed(struct d3d11_ctx *ctx, HRESULT hr) { // This can be called before we have a device if (!ctx->dev) return hr; switch (hr) { case DXGI_ERROR_DEVICE_HUNG: case DXGI_ERROR_DEVICE_RESET: case DXGI_ERROR_DRIVER_INTERNAL_ERROR: ctx->is_failed = true; break; case D3DDDIERR_DEVICEREMOVED: case DXGI_ERROR_DEVICE_REMOVED: hr = ID3D11Device_GetDeviceRemovedReason(ctx->dev); ctx->is_failed = true; break; } if (ctx->is_failed) PL_ERR(ctx, "Device lost!"); return hr; } HRESULT pl_d3d11_after_error(struct d3d11_ctx *ctx, HRESULT hr) { hr = pl_d3d11_check_device_removed(ctx, hr); pl_d3d11_flush_message_queue(ctx, "After error"); return hr; } struct dll_version pl_get_dll_version(const wchar_t *name) { void *data = NULL; struct dll_version ret = {0}; DWORD size = GetFileVersionInfoSizeW(name, &(DWORD) {0}); if (!size) goto error; data = pl_alloc(NULL, size); if (!GetFileVersionInfoW(name, 0, size, data)) goto error; VS_FIXEDFILEINFO *ffi; UINT ffi_len; if (!VerQueryValueW(data, L"\\", (void**)&ffi, &ffi_len)) goto error; if (ffi_len < sizeof(*ffi)) goto error; ret = (struct dll_version) { .major = HIWORD(ffi->dwFileVersionMS), .minor = LOWORD(ffi->dwFileVersionMS), .build = HIWORD(ffi->dwFileVersionLS), .revision = LOWORD(ffi->dwFileVersionLS), }; error: pl_free(data); return ret; } wchar_t *pl_from_utf8(void *ctx, const char *str) { int count = MultiByteToWideChar(CP_UTF8, 0, str, -1, NULL, 0); pl_assert(count > 0); wchar_t *ret = pl_calloc_ptr(ctx, count, ret); MultiByteToWideChar(CP_UTF8, 0, str, -1, ret, count); return ret; } char *pl_to_utf8(void *ctx, const wchar_t *str) { int count = WideCharToMultiByte(CP_UTF8, 0, str, -1, NULL, 0, NULL, NULL); pl_assert(count > 0); char *ret = pl_calloc_ptr(ctx, count, ret); WideCharToMultiByte(CP_UTF8, 0, str, -1, ret, count, NULL, NULL); return ret; } static const char *hresult_str(HRESULT hr) { switch (hr) { #define CASE(name) case name: return #name CASE(S_OK); CASE(S_FALSE); CASE(E_ABORT); CASE(E_ACCESSDENIED); CASE(E_FAIL); CASE(E_HANDLE); CASE(E_INVALIDARG); CASE(E_NOINTERFACE); CASE(E_NOTIMPL); CASE(E_OUTOFMEMORY); CASE(E_POINTER); CASE(E_UNEXPECTED); CASE(DXGI_ERROR_ACCESS_DENIED); CASE(DXGI_ERROR_ACCESS_LOST); CASE(DXGI_ERROR_CANNOT_PROTECT_CONTENT); CASE(DXGI_ERROR_DEVICE_HUNG); CASE(DXGI_ERROR_DEVICE_REMOVED); CASE(DXGI_ERROR_DEVICE_RESET); CASE(DXGI_ERROR_DRIVER_INTERNAL_ERROR); CASE(DXGI_ERROR_FRAME_STATISTICS_DISJOINT); CASE(DXGI_ERROR_GRAPHICS_VIDPN_SOURCE_IN_USE); CASE(DXGI_ERROR_INVALID_CALL); CASE(DXGI_ERROR_MORE_DATA); CASE(DXGI_ERROR_NAME_ALREADY_EXISTS); CASE(DXGI_ERROR_NONEXCLUSIVE); CASE(DXGI_ERROR_NOT_CURRENTLY_AVAILABLE); CASE(DXGI_ERROR_NOT_FOUND); CASE(DXGI_ERROR_REMOTE_CLIENT_DISCONNECTED); CASE(DXGI_ERROR_REMOTE_OUTOFMEMORY); CASE(DXGI_ERROR_RESTRICT_TO_OUTPUT_STALE); CASE(DXGI_ERROR_SDK_COMPONENT_MISSING); CASE(DXGI_ERROR_SESSION_DISCONNECTED); CASE(DXGI_ERROR_UNSUPPORTED); CASE(DXGI_ERROR_WAIT_TIMEOUT); CASE(DXGI_ERROR_WAS_STILL_DRAWING); #undef CASE default: return "Unknown error"; } } static char *format_error(void *ctx, DWORD error) { wchar_t *wstr; if (!FormatMessageW(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL, error, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPWSTR)&wstr, 0, NULL)) { return NULL; } // Trim any trailing newline from the message for (int i = wcslen(wstr) - 1; i >= 0; i--) { if (wstr[i] != '\r' && wstr[i] != '\n') { wstr[i + 1] = '\0'; break; } } char *str = pl_to_utf8(ctx, wstr); LocalFree(wstr); return str; } char *pl_hresult_to_str_buf(char *buf, size_t buf_size, HRESULT hr) { char *fmsg = format_error(NULL, hr); const char *code = hresult_str(hr); if (fmsg) { snprintf(buf, buf_size, "%s (%s, 0x%08lx)", fmsg, code, hr); } else { snprintf(buf, buf_size, "%s, 0x%08lx", code, hr); } pl_free(fmsg); return buf; } #define D3D11_DXGI_ENUM(prefix, define) { case prefix ## define: return #define; } const char *pl_get_dxgi_format_name(DXGI_FORMAT fmt) { switch (fmt) { D3D11_DXGI_ENUM(DXGI_FORMAT_, UNKNOWN); D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32B32A32_TYPELESS); D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32B32A32_FLOAT); D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32B32A32_UINT); D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32B32A32_SINT); D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32B32_TYPELESS); D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32B32_FLOAT); D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32B32_UINT); D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32B32_SINT); D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16B16A16_TYPELESS); D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16B16A16_FLOAT); D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16B16A16_UNORM); D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16B16A16_UINT); D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16B16A16_SNORM); D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16B16A16_SINT); D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32_TYPELESS); D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32_FLOAT); D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32_UINT); D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32_SINT); D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G8X24_TYPELESS); D3D11_DXGI_ENUM(DXGI_FORMAT_, D32_FLOAT_S8X24_UINT); D3D11_DXGI_ENUM(DXGI_FORMAT_, R32_FLOAT_X8X24_TYPELESS); D3D11_DXGI_ENUM(DXGI_FORMAT_, X32_TYPELESS_G8X24_UINT); D3D11_DXGI_ENUM(DXGI_FORMAT_, R10G10B10A2_TYPELESS); D3D11_DXGI_ENUM(DXGI_FORMAT_, R10G10B10A2_UNORM); D3D11_DXGI_ENUM(DXGI_FORMAT_, R10G10B10A2_UINT); D3D11_DXGI_ENUM(DXGI_FORMAT_, R11G11B10_FLOAT); D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8B8A8_TYPELESS); D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8B8A8_UNORM); D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8B8A8_UNORM_SRGB); D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8B8A8_UINT); D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8B8A8_SNORM); D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8B8A8_SINT); D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16_TYPELESS); D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16_FLOAT); D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16_UNORM); D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16_UINT); D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16_SNORM); D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16_SINT); D3D11_DXGI_ENUM(DXGI_FORMAT_, R32_TYPELESS); D3D11_DXGI_ENUM(DXGI_FORMAT_, D32_FLOAT); D3D11_DXGI_ENUM(DXGI_FORMAT_, R32_FLOAT); D3D11_DXGI_ENUM(DXGI_FORMAT_, R32_UINT); D3D11_DXGI_ENUM(DXGI_FORMAT_, R32_SINT); D3D11_DXGI_ENUM(DXGI_FORMAT_, R24G8_TYPELESS); D3D11_DXGI_ENUM(DXGI_FORMAT_, D24_UNORM_S8_UINT); D3D11_DXGI_ENUM(DXGI_FORMAT_, R24_UNORM_X8_TYPELESS); D3D11_DXGI_ENUM(DXGI_FORMAT_, X24_TYPELESS_G8_UINT); D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8_TYPELESS); D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8_UNORM); D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8_UINT); D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8_SNORM); D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8_SINT); D3D11_DXGI_ENUM(DXGI_FORMAT_, R16_TYPELESS); D3D11_DXGI_ENUM(DXGI_FORMAT_, R16_FLOAT); D3D11_DXGI_ENUM(DXGI_FORMAT_, D16_UNORM); D3D11_DXGI_ENUM(DXGI_FORMAT_, R16_UNORM); D3D11_DXGI_ENUM(DXGI_FORMAT_, R16_UINT); D3D11_DXGI_ENUM(DXGI_FORMAT_, R16_SNORM); D3D11_DXGI_ENUM(DXGI_FORMAT_, R16_SINT); D3D11_DXGI_ENUM(DXGI_FORMAT_, R8_TYPELESS); D3D11_DXGI_ENUM(DXGI_FORMAT_, R8_UNORM); D3D11_DXGI_ENUM(DXGI_FORMAT_, R8_UINT); D3D11_DXGI_ENUM(DXGI_FORMAT_, R8_SNORM); D3D11_DXGI_ENUM(DXGI_FORMAT_, R8_SINT); D3D11_DXGI_ENUM(DXGI_FORMAT_, A8_UNORM); D3D11_DXGI_ENUM(DXGI_FORMAT_, R1_UNORM); D3D11_DXGI_ENUM(DXGI_FORMAT_, R9G9B9E5_SHAREDEXP); D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8_B8G8_UNORM); D3D11_DXGI_ENUM(DXGI_FORMAT_, G8R8_G8B8_UNORM); D3D11_DXGI_ENUM(DXGI_FORMAT_, BC1_TYPELESS); D3D11_DXGI_ENUM(DXGI_FORMAT_, BC1_UNORM); D3D11_DXGI_ENUM(DXGI_FORMAT_, BC1_UNORM_SRGB); D3D11_DXGI_ENUM(DXGI_FORMAT_, BC2_TYPELESS); D3D11_DXGI_ENUM(DXGI_FORMAT_, BC2_UNORM); D3D11_DXGI_ENUM(DXGI_FORMAT_, BC2_UNORM_SRGB); D3D11_DXGI_ENUM(DXGI_FORMAT_, BC3_TYPELESS); D3D11_DXGI_ENUM(DXGI_FORMAT_, BC3_UNORM); D3D11_DXGI_ENUM(DXGI_FORMAT_, BC3_UNORM_SRGB); D3D11_DXGI_ENUM(DXGI_FORMAT_, BC4_TYPELESS); D3D11_DXGI_ENUM(DXGI_FORMAT_, BC4_UNORM); D3D11_DXGI_ENUM(DXGI_FORMAT_, BC4_SNORM); D3D11_DXGI_ENUM(DXGI_FORMAT_, BC5_TYPELESS); D3D11_DXGI_ENUM(DXGI_FORMAT_, BC5_UNORM); D3D11_DXGI_ENUM(DXGI_FORMAT_, BC5_SNORM); D3D11_DXGI_ENUM(DXGI_FORMAT_, B5G6R5_UNORM); D3D11_DXGI_ENUM(DXGI_FORMAT_, B5G5R5A1_UNORM); D3D11_DXGI_ENUM(DXGI_FORMAT_, B8G8R8A8_UNORM); D3D11_DXGI_ENUM(DXGI_FORMAT_, B8G8R8X8_UNORM); D3D11_DXGI_ENUM(DXGI_FORMAT_, R10G10B10_XR_BIAS_A2_UNORM); D3D11_DXGI_ENUM(DXGI_FORMAT_, B8G8R8A8_TYPELESS); D3D11_DXGI_ENUM(DXGI_FORMAT_, B8G8R8A8_UNORM_SRGB); D3D11_DXGI_ENUM(DXGI_FORMAT_, B8G8R8X8_TYPELESS); D3D11_DXGI_ENUM(DXGI_FORMAT_, B8G8R8X8_UNORM_SRGB); D3D11_DXGI_ENUM(DXGI_FORMAT_, BC6H_TYPELESS); D3D11_DXGI_ENUM(DXGI_FORMAT_, BC6H_UF16); D3D11_DXGI_ENUM(DXGI_FORMAT_, BC6H_SF16); D3D11_DXGI_ENUM(DXGI_FORMAT_, BC7_TYPELESS); D3D11_DXGI_ENUM(DXGI_FORMAT_, BC7_UNORM); D3D11_DXGI_ENUM(DXGI_FORMAT_, BC7_UNORM_SRGB); D3D11_DXGI_ENUM(DXGI_FORMAT_, AYUV); D3D11_DXGI_ENUM(DXGI_FORMAT_, Y410); D3D11_DXGI_ENUM(DXGI_FORMAT_, Y416); D3D11_DXGI_ENUM(DXGI_FORMAT_, NV12); D3D11_DXGI_ENUM(DXGI_FORMAT_, P010); D3D11_DXGI_ENUM(DXGI_FORMAT_, P016); D3D11_DXGI_ENUM(DXGI_FORMAT_, 420_OPAQUE); D3D11_DXGI_ENUM(DXGI_FORMAT_, YUY2); D3D11_DXGI_ENUM(DXGI_FORMAT_, Y210); D3D11_DXGI_ENUM(DXGI_FORMAT_, Y216); D3D11_DXGI_ENUM(DXGI_FORMAT_, NV11); D3D11_DXGI_ENUM(DXGI_FORMAT_, AI44); D3D11_DXGI_ENUM(DXGI_FORMAT_, IA44); D3D11_DXGI_ENUM(DXGI_FORMAT_, P8); D3D11_DXGI_ENUM(DXGI_FORMAT_, A8P8); D3D11_DXGI_ENUM(DXGI_FORMAT_, B4G4R4A4_UNORM); D3D11_DXGI_ENUM(DXGI_FORMAT_, P208); D3D11_DXGI_ENUM(DXGI_FORMAT_, V208); D3D11_DXGI_ENUM(DXGI_FORMAT_, V408); D3D11_DXGI_ENUM(DXGI_FORMAT_, FORCE_UINT); } return ""; } const char *pl_get_dxgi_csp_name(DXGI_COLOR_SPACE_TYPE csp) { switch ((int) csp) { D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RGB_FULL_G22_NONE_P709); D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RGB_FULL_G10_NONE_P709); D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RGB_STUDIO_G22_NONE_P709); D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RGB_STUDIO_G22_NONE_P2020); D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RESERVED); D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_FULL_G22_NONE_P709_X601); D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_G22_LEFT_P601); D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_FULL_G22_LEFT_P601); D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_G22_LEFT_P709); D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_FULL_G22_LEFT_P709); D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_G22_LEFT_P2020); D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_FULL_G22_LEFT_P2020); D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RGB_FULL_G2084_NONE_P2020); D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_G2084_LEFT_P2020); D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RGB_STUDIO_G2084_NONE_P2020); D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_G22_TOPLEFT_P2020); D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_G2084_TOPLEFT_P2020); D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RGB_FULL_G22_NONE_P2020); D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_GHLG_TOPLEFT_P2020); D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_FULL_GHLG_TOPLEFT_P2020); D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RGB_STUDIO_G24_NONE_P709); D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RGB_STUDIO_G24_NONE_P2020); D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_G24_LEFT_P709); D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_G24_LEFT_P2020); D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_G24_TOPLEFT_P2020); D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, CUSTOM); } return ""; } libplacebo-v7.349.0/src/d3d11/utils.h000066400000000000000000000101741463457750100171130ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "common.h" #define DXGI_COLOR_SPACE_RGB_STUDIO_G24_NONE_P709 ((DXGI_COLOR_SPACE_TYPE)20) #define DXGI_COLOR_SPACE_RGB_STUDIO_G24_NONE_P2020 ((DXGI_COLOR_SPACE_TYPE)21) #define DXGI_COLOR_SPACE_YCBCR_STUDIO_G24_LEFT_P709 ((DXGI_COLOR_SPACE_TYPE)22) #define DXGI_COLOR_SPACE_YCBCR_STUDIO_G24_LEFT_P2020 ((DXGI_COLOR_SPACE_TYPE)23) #define DXGI_COLOR_SPACE_YCBCR_STUDIO_G24_TOPLEFT_P2020 ((DXGI_COLOR_SPACE_TYPE)24) // Flush debug messages from D3D11's info queue to libplacebo's log output. // Should be called regularly. void pl_d3d11_flush_message_queue(struct d3d11_ctx *ctx, const char *header); // Some D3D11 functions can fail with a set of HRESULT codes which indicate the // device has been removed. This is equivalent to libplacebo's gpu_is_failed // state and indicates that the pl_gpu needs to be recreated. This function // checks for one of those HRESULTs, sets the failed state, and returns a // specific HRESULT that indicates why the device was removed (eg. GPU hang, // driver crash, etc.) HRESULT pl_d3d11_check_device_removed(struct d3d11_ctx *ctx, HRESULT hr); // Helper function for the D3D() macro, though it can be called directly when // handling D3D11 errors if the D3D() macro isn't suitable for some reason. // Calls `pl_d3d11_check_device_removed` and `pl_d3d11_drain_debug_messages` and // returns the specific HRESULT from `pl_d3d11_check_device_removed` for logging // purposes. HRESULT pl_d3d11_after_error(struct d3d11_ctx *ctx, HRESULT hr); // Convenience macro for running DXGI/D3D11 functions and performing appropriate // actions on failure. Can also be used for any HRESULT-returning function. #define D3D(call) \ do { \ HRESULT hr_ = (call); \ if (FAILED(hr_)) { \ hr_ = pl_d3d11_after_error(ctx, hr_); \ PL_ERR(ctx, "%s: %s (%s:%d)", #call, pl_hresult_to_str(hr_), \ __FILE__, __LINE__); \ goto error; \ } \ } while (0); // Conditionally release a COM interface and set the pointer to NULL #define SAFE_RELEASE(iface) \ do { \ if (iface) \ (iface)->lpVtbl->Release(iface); \ (iface) = NULL; \ } while (0) struct dll_version { uint16_t major; uint16_t minor; uint16_t build; uint16_t revision; }; // Get the version number of a DLL. This calls GetFileVersionInfoW, which should // call LoadLibraryExW internally, so it should get the same copy of the DLL // that is loaded into memory if there is a copy in System32 and a copy in the // %PATH% or application directory. struct dll_version pl_get_dll_version(const wchar_t *name); wchar_t *pl_from_utf8(void *ctx, const char *str); char *pl_to_utf8(void *ctx, const wchar_t *str); #define pl_hresult_to_str(hr) pl_hresult_to_str_buf((char[256]){0}, 256, (hr)) char *pl_hresult_to_str_buf(char *buf, size_t buf_size, HRESULT hr); const char *pl_get_dxgi_csp_name(DXGI_COLOR_SPACE_TYPE csp); const char *pl_get_dxgi_format_name(DXGI_FORMAT fmt); libplacebo-v7.349.0/src/dispatch.c000066400000000000000000001575601463457750100167440ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "common.h" #include "log.h" #include "shaders.h" #include "dispatch.h" #include "gpu.h" #include "pl_thread.h" // Maximum number of passes to keep around at once. If full, passes older than // MIN_AGE are evicted to make room. (Failing that, the passes array doubles) #define MAX_PASSES 100 #define MIN_AGE 10 enum { TMP_PRELUDE, // GLSL version, global definitions, etc. TMP_MAIN, // main GLSL shader body TMP_VERT_HEAD, // vertex shader inputs/outputs TMP_VERT_BODY, // vertex shader body TMP_COUNT, }; struct pl_dispatch_t { pl_mutex lock; pl_log log; pl_gpu gpu; uint8_t current_ident; uint8_t current_index; bool dynamic_constants; int max_passes; void (*info_callback)(void *, const struct pl_dispatch_info *); void *info_priv; PL_ARRAY(pl_shader) shaders; // to avoid re-allocations PL_ARRAY(struct pass *) passes; // compiled passes // temporary buffers to help avoid re_allocations during pass creation PL_ARRAY(const struct pl_buffer_var *) buf_tmp; pl_str_builder tmp[TMP_COUNT]; uint8_t *ubo_tmp; }; enum pass_var_type { PASS_VAR_NONE = 0, PASS_VAR_GLOBAL, // regular/global uniforms PASS_VAR_UBO, // uniform buffers PASS_VAR_PUSHC // push constants }; // Cached metadata about a variable's effective placement / update method struct pass_var { int index; // for pl_var_update enum pass_var_type type; struct pl_var_layout layout; void *cached_data; }; struct pass { uint64_t signature; pl_pass pass; int last_index; // contains cached data and update metadata, same order as pl_shader struct pass_var *vars; int num_var_locs; // for uniform buffer updates struct pl_shader_desc ubo_desc; // temporary int ubo_index; pl_buf ubo; // Cached pl_pass_run_params. This will also contain mutable allocations // for the push constants, descriptor bindings (including the binding for // the UBO pre-filled), vertex array and variable updates struct pl_pass_run_params run_params; // for pl_dispatch_info pl_timer timer; uint64_t ts_last; uint64_t ts_peak; uint64_t ts_sum; uint64_t samples[PL_ARRAY_SIZE(((struct pl_dispatch_info *) NULL)->samples)]; int ts_idx; }; static void pass_destroy(pl_dispatch dp, struct pass *pass) { if (!pass) return; pl_buf_destroy(dp->gpu, &pass->ubo); pl_pass_destroy(dp->gpu, &pass->pass); pl_timer_destroy(dp->gpu, &pass->timer); pl_free(pass); } pl_dispatch pl_dispatch_create(pl_log log, pl_gpu gpu) { struct pl_dispatch_t *dp = pl_zalloc_ptr(NULL, dp); pl_mutex_init(&dp->lock); dp->log = log; dp->gpu = gpu; dp->max_passes = MAX_PASSES; for (int i = 0; i < PL_ARRAY_SIZE(dp->tmp); i++) dp->tmp[i] = pl_str_builder_alloc(dp); return dp; } void pl_dispatch_destroy(pl_dispatch *ptr) { pl_dispatch dp = *ptr; if (!dp) return; for (int i = 0; i < dp->passes.num; i++) pass_destroy(dp, dp->passes.elem[i]); for (int i = 0; i < dp->shaders.num; i++) pl_shader_free(&dp->shaders.elem[i]); pl_mutex_destroy(&dp->lock); pl_free(dp); *ptr = NULL; } pl_shader pl_dispatch_begin_ex(pl_dispatch dp, bool unique) { pl_mutex_lock(&dp->lock); struct pl_shader_params params = { .id = unique ? dp->current_ident++ : 0, .gpu = dp->gpu, .index = dp->current_index, .dynamic_constants = dp->dynamic_constants, }; pl_shader sh = NULL; PL_ARRAY_POP(dp->shaders, &sh); pl_mutex_unlock(&dp->lock); if (sh) { pl_shader_reset(sh, ¶ms); return sh; } return pl_shader_alloc(dp->log, ¶ms); } void pl_dispatch_mark_dynamic(pl_dispatch dp, bool dynamic) { dp->dynamic_constants = dynamic; } void pl_dispatch_callback(pl_dispatch dp, void *priv, void (*cb)(void *priv, const struct pl_dispatch_info *)) { dp->info_callback = cb; dp->info_priv = priv; } pl_shader pl_dispatch_begin(pl_dispatch dp) { return pl_dispatch_begin_ex(dp, false); } static bool add_pass_var(pl_dispatch dp, void *tmp, struct pass *pass, struct pl_pass_params *params, const struct pl_shader_var *sv, struct pass_var *pv, bool greedy) { pl_gpu gpu = dp->gpu; if (pv->type) return true; // Try not to use push constants for "large" values like matrices in the // first pass, since this is likely to exceed the VGPR/pushc size budgets bool try_pushc = greedy || (sv->var.dim_m == 1 && sv->var.dim_a == 1) || sv->dynamic; if (try_pushc && gpu->glsl.vulkan && gpu->limits.max_pushc_size) { pv->layout = pl_std430_layout(params->push_constants_size, &sv->var); size_t new_size = pv->layout.offset + pv->layout.size; if (new_size <= gpu->limits.max_pushc_size) { params->push_constants_size = new_size; pv->type = PASS_VAR_PUSHC; return true; } } // If we haven't placed all PCs yet, don't place anything else, since // we want to try and fit more stuff into PCs before "giving up" if (!greedy) return true; int num_locs = sv->var.dim_v * sv->var.dim_m * sv->var.dim_a; bool can_var = pass->num_var_locs + num_locs <= gpu->limits.max_variable_comps; // Attempt using uniform buffer next. The GLSL version 440 check is due // to explicit offsets on UBO entries. In theory we could leave away // the offsets and support UBOs for older GL as well, but this is a nice // safety net for driver bugs (and also rules out potentially buggy drivers) // Also avoid UBOs for highly dynamic stuff since that requires synchronizing // the UBO writes every frame bool try_ubo = !can_var || !sv->dynamic; if (try_ubo && gpu->glsl.version >= 440 && gpu->limits.max_ubo_size) { if (sh_buf_desc_append(tmp, gpu, &pass->ubo_desc, &pv->layout, sv->var)) { pv->type = PASS_VAR_UBO; return true; } } // Otherwise, use global uniforms if (can_var) { pv->type = PASS_VAR_GLOBAL; pv->index = params->num_variables; pv->layout = pl_var_host_layout(0, &sv->var); PL_ARRAY_APPEND_RAW(tmp, params->variables, params->num_variables, sv->var); pass->num_var_locs += num_locs; return true; } // Ran out of variable binding methods. The most likely scenario in which // this can happen is if we're using a GPU that does not support global // input vars and we've exhausted the UBO size limits. PL_ERR(dp, "Unable to add input variable: possibly exhausted " "variable count / UBO size limits?"); return false; } #define ADD(b, ...) pl_str_builder_addf(b, __VA_ARGS__) #define ADD_CAT(b, cat) pl_str_builder_concat(b, cat) #define ADD_CONST(b, s) pl_str_builder_const_str(b, s) static void add_var(pl_str_builder body, const struct pl_var *var) { const char *type = pl_var_glsl_type_name(*var); if (var->dim_a > 1) { ADD(body, "%s "$"[%d];\n", type, sh_ident_unpack(var->name), var->dim_a); } else { ADD(body, "%s "$";\n", type, sh_ident_unpack(var->name)); } } static int cmp_buffer_var(const void *pa, const void *pb) { const struct pl_buffer_var * const *a = pa, * const *b = pb; return PL_CMP((*a)->layout.offset, (*b)->layout.offset); } static void add_buffer_vars(pl_dispatch dp, void *tmp, pl_str_builder body, const struct pl_buffer_var *vars, int num) { // Sort buffer vars by offset PL_ARRAY_RESIZE(dp, dp->buf_tmp, num); for (int i = 0; i < num; i++) dp->buf_tmp.elem[i] = &vars[i]; qsort(dp->buf_tmp.elem, num, sizeof(&vars[0]), cmp_buffer_var); ADD(body, "{\n"); for (int i = 0; i < num; i++) { const struct pl_buffer_var *bv = dp->buf_tmp.elem[i]; // Add an explicit offset wherever possible if (dp->gpu->glsl.version >= 440) ADD(body, " layout(offset=%zu) ", bv->layout.offset); add_var(body, &bv->var); } ADD(body, "};\n"); } struct generate_params { void *tmp; pl_shader sh; struct pass *pass; struct pl_pass_params *pass_params; ident_t out_mat; ident_t out_off; int vert_idx; }; static void generate_shaders(pl_dispatch dp, const struct generate_params *params, pl_str_builder *out_vert_builder, pl_str_builder *out_glsl_builder) { pl_gpu gpu = dp->gpu; pl_shader sh = params->sh; void *tmp = params->tmp; struct pass *pass = params->pass; struct pl_pass_params *pass_params = params->pass_params; pl_str_builder shader_body = sh_finalize_internal(sh); pl_str_builder pre = dp->tmp[TMP_PRELUDE]; ADD(pre, "#version %d%s\n", gpu->glsl.version, (gpu->glsl.gles && gpu->glsl.version > 100) ? " es" : ""); if (pass_params->type == PL_PASS_COMPUTE) ADD(pre, "#extension GL_ARB_compute_shader : enable\n"); // Enable this unconditionally if the GPU supports it, since we have no way // of knowing whether subgroups are being used or not if (gpu->glsl.subgroup_size) { ADD(pre, "#extension GL_KHR_shader_subgroup_basic : enable \n" "#extension GL_KHR_shader_subgroup_vote : enable \n" "#extension GL_KHR_shader_subgroup_arithmetic : enable \n" "#extension GL_KHR_shader_subgroup_ballot : enable \n" "#extension GL_KHR_shader_subgroup_shuffle : enable \n" "#extension GL_KHR_shader_subgroup_clustered : enable \n" "#extension GL_KHR_shader_subgroup_quad : enable \n"); } // Enable all extensions needed for different types of input bool has_ssbo = false, has_ubo = false, has_img = false, has_texel = false, has_ext = false, has_nofmt = false, has_gather = false; for (int i = 0; i < sh->descs.num; i++) { switch (sh->descs.elem[i].desc.type) { case PL_DESC_BUF_UNIFORM: has_ubo = true; break; case PL_DESC_BUF_STORAGE: has_ssbo = true; break; case PL_DESC_BUF_TEXEL_UNIFORM: has_texel = true; break; case PL_DESC_BUF_TEXEL_STORAGE: { pl_buf buf = sh->descs.elem[i].binding.object; has_nofmt |= !buf->params.format->glsl_format; has_texel = true; break; } case PL_DESC_STORAGE_IMG: { pl_tex tex = sh->descs.elem[i].binding.object; has_nofmt |= !tex->params.format->glsl_format; has_img = true; break; } case PL_DESC_SAMPLED_TEX: { pl_tex tex = sh->descs.elem[i].binding.object; has_gather |= tex->params.format->gatherable; switch (tex->sampler_type) { case PL_SAMPLER_NORMAL: break; case PL_SAMPLER_RECT: break; case PL_SAMPLER_EXTERNAL: has_ext = true; break; case PL_SAMPLER_TYPE_COUNT: pl_unreachable(); } break; } case PL_DESC_INVALID: case PL_DESC_TYPE_COUNT: pl_unreachable(); } } if (has_img && !gpu->glsl.gles) ADD(pre, "#extension GL_ARB_shader_image_load_store : enable\n"); if (has_ubo) ADD(pre, "#extension GL_ARB_uniform_buffer_object : enable\n"); if (has_ssbo) ADD(pre, "#extension GL_ARB_shader_storage_buffer_object : enable\n"); if (has_texel) ADD(pre, "#extension GL_ARB_texture_buffer_object : enable\n"); if (has_ext) { if (gpu->glsl.version >= 300) { ADD(pre, "#extension GL_OES_EGL_image_external_essl3 : enable\n"); } else { ADD(pre, "#extension GL_OES_EGL_image_external : enable\n"); } } if (has_nofmt) ADD(pre, "#extension GL_EXT_shader_image_load_formatted : enable\n"); if (has_gather && !gpu->glsl.gles) ADD(pre, "#extension GL_ARB_texture_gather : enable\n"); if (gpu->glsl.gles) { // Use 32-bit precision for floats if possible ADD(pre, "#ifdef GL_FRAGMENT_PRECISION_HIGH \n" "precision highp float; \n" "#else \n" "precision mediump float; \n" "#endif \n"); // Always use 16-bit precision for samplers ADD(pre, "precision mediump sampler2D; \n"); if (gpu->limits.max_tex_1d_dim) ADD(pre, "precision mediump sampler1D; \n"); if (gpu->limits.max_tex_3d_dim && gpu->glsl.version > 100) ADD(pre, "precision mediump sampler3D; \n"); // Integer math has a good chance of caring about precision ADD(pre, "precision highp int; \n"); } // textureLod() doesn't work on external/rect samplers, simply disable // LOD sampling in this case. We don't currently support mipmaps anyway. for (int i = 0; i < sh->descs.num; i++) { if (pass_params->descriptors[i].type != PL_DESC_SAMPLED_TEX) continue; pl_tex tex = sh->descs.elem[i].binding.object; if (tex->sampler_type != PL_SAMPLER_NORMAL) { ADD(pre, "#define textureLod(t, p, b) texture(t, p) \n" "#define textureLodOffset(t, p, b, o) \\\n" " textureOffset(t, p, o) \n"); break; } } // Add all of the push constants as their own element if (pass_params->push_constants_size) { // We re-use add_buffer_vars to make sure variables are sorted, this // is important because the push constants can be out-of-order in // `pass->vars` PL_ARRAY(struct pl_buffer_var) pc_bvars = {0}; for (int i = 0; i < sh->vars.num; i++) { if (pass->vars[i].type != PASS_VAR_PUSHC) continue; PL_ARRAY_APPEND(tmp, pc_bvars, (struct pl_buffer_var) { .var = sh->vars.elem[i].var, .layout = pass->vars[i].layout, }); } ADD(pre, "layout(std430, push_constant) uniform PushC "); add_buffer_vars(dp, tmp, pre, pc_bvars.elem, pc_bvars.num); } // Add all of the specialization constants for (int i = 0; i < sh->consts.num; i++) { static const char *types[PL_VAR_TYPE_COUNT] = { [PL_VAR_SINT] = "int", [PL_VAR_UINT] = "uint", [PL_VAR_FLOAT] = "float", }; const struct pl_shader_const *sc = &sh->consts.elem[i]; ADD(pre, "layout(constant_id=%"PRIu32") const %s "$" = 1; \n", pass_params->constants[i].id, types[sc->type], sh_ident_unpack(sc->name)); } static const char sampler_prefixes[PL_FMT_TYPE_COUNT] = { [PL_FMT_FLOAT] = ' ', [PL_FMT_UNORM] = ' ', [PL_FMT_SNORM] = ' ', [PL_FMT_UINT] = 'u', [PL_FMT_SINT] = 'i', }; // Add all of the required descriptors for (int i = 0; i < sh->descs.num; i++) { const struct pl_shader_desc *sd = &sh->descs.elem[i]; const struct pl_desc *desc = &pass_params->descriptors[i]; switch (desc->type) { case PL_DESC_SAMPLED_TEX: { static const char *types[][4] = { [PL_SAMPLER_NORMAL][1] = "sampler1D", [PL_SAMPLER_NORMAL][2] = "sampler2D", [PL_SAMPLER_NORMAL][3] = "sampler3D", [PL_SAMPLER_RECT][2] = "sampler2DRect", [PL_SAMPLER_EXTERNAL][2] = "samplerExternalOES", }; pl_tex tex = sd->binding.object; int dims = pl_tex_params_dimension(tex->params); const char *type = types[tex->sampler_type][dims]; char prefix = sampler_prefixes[tex->params.format->type]; ident_t id = sh_ident_unpack(desc->name); pl_assert(type && prefix); // Vulkan requires explicit bindings; GL always sets the // bindings manually to avoid relying on the user doing so if (gpu->glsl.vulkan) { ADD(pre, "layout(binding=%d) uniform %c%s "$";\n", desc->binding, prefix, type, id); } else if (gpu->glsl.gles && prefix != ' ') { ADD(pre, "uniform highp %c%s "$";\n", prefix, type, id); } else { ADD(pre, "uniform %c%s "$";\n", prefix, type, id); } break; } case PL_DESC_STORAGE_IMG: { static const char *types[] = { [1] = "image1D", [2] = "image2D", [3] = "image3D", }; // For better compatibility, we have to explicitly label the // type of data we will be reading/writing to this image. pl_tex tex = sd->binding.object; const char *format = tex->params.format->glsl_format; int dims = pl_tex_params_dimension(tex->params); if (gpu->glsl.vulkan) { if (format) { ADD(pre, "layout(binding=%d, %s) ", desc->binding, format); } else { ADD(pre, "layout(binding=%d) ", desc->binding); } } else if (format) { ADD(pre, "layout(%s) ", format); } ADD_CONST(pre, pl_desc_access_glsl_name(desc->access)); if (sd->memory & PL_MEMORY_COHERENT) ADD(pre, " coherent"); if (sd->memory & PL_MEMORY_VOLATILE) ADD(pre, " volatile"); ADD(pre, " restrict uniform %s "$";\n", types[dims], sh_ident_unpack(desc->name)); break; } case PL_DESC_BUF_UNIFORM: if (gpu->glsl.vulkan) { ADD(pre, "layout(std140, binding=%d) ", desc->binding); } else { ADD(pre, "layout(std140) "); } ADD(pre, "uniform "$" ", sh_ident_unpack(desc->name)); add_buffer_vars(dp, tmp, pre, sd->buffer_vars, sd->num_buffer_vars); break; case PL_DESC_BUF_STORAGE: if (gpu->glsl.version >= 140) ADD(pre, "layout(std430, binding=%d) ", desc->binding); ADD_CONST(pre, pl_desc_access_glsl_name(desc->access)); if (sd->memory & PL_MEMORY_COHERENT) ADD(pre, " coherent"); if (sd->memory & PL_MEMORY_VOLATILE) ADD(pre, " volatile"); ADD(pre, " restrict buffer "$" ", sh_ident_unpack(desc->name)); add_buffer_vars(dp, tmp, pre, sd->buffer_vars, sd->num_buffer_vars); break; case PL_DESC_BUF_TEXEL_UNIFORM: { pl_buf buf = sd->binding.object; char prefix = sampler_prefixes[buf->params.format->type]; if (gpu->glsl.vulkan) ADD(pre, "layout(binding=%d) ", desc->binding); ADD(pre, "uniform %csamplerBuffer "$";\n", prefix, sh_ident_unpack(desc->name)); break; } case PL_DESC_BUF_TEXEL_STORAGE: { pl_buf buf = sd->binding.object; const char *format = buf->params.format->glsl_format; char prefix = sampler_prefixes[buf->params.format->type]; if (gpu->glsl.vulkan) { if (format) { ADD(pre, "layout(binding=%d, %s) ", desc->binding, format); } else { ADD(pre, "layout(binding=%d) ", desc->binding); } } else if (format) { ADD(pre, "layout(%s) ", format); } ADD_CONST(pre, pl_desc_access_glsl_name(desc->access)); if (sd->memory & PL_MEMORY_COHERENT) ADD(pre, " coherent"); if (sd->memory & PL_MEMORY_VOLATILE) ADD(pre, " volatile"); ADD(pre, " restrict uniform %cimageBuffer "$";\n", prefix, sh_ident_unpack(desc->name)); break; } case PL_DESC_INVALID: case PL_DESC_TYPE_COUNT: pl_unreachable(); } } // Add all of the remaining variables for (int i = 0; i < sh->vars.num; i++) { const struct pl_var *var = &sh->vars.elem[i].var; const struct pass_var *pv = &pass->vars[i]; if (pv->type != PASS_VAR_GLOBAL) continue; ADD(pre, "uniform "); add_var(pre, var); } pl_str_builder glsl = dp->tmp[TMP_MAIN]; ADD_CAT(glsl, pre); switch(pass_params->type) { case PL_PASS_RASTER: { pl_assert(params->vert_idx >= 0); pl_str_builder vert_head = dp->tmp[TMP_VERT_HEAD]; pl_str_builder vert_body = dp->tmp[TMP_VERT_BODY]; // Older GLSL doesn't support the use of explicit locations bool has_loc = gpu->glsl.version >= 430; // Set up a trivial vertex shader ADD_CAT(vert_head, pre); ADD(vert_body, "void main() {\n"); for (int i = 0; i < sh->vas.num; i++) { const struct pl_vertex_attrib *va = &pass_params->vertex_attribs[i]; const struct pl_shader_va *sva = &sh->vas.elem[i]; const char *type = va->fmt->glsl_type; // Use the pl_shader_va for the name in the fragment shader since // the pl_vertex_attrib is already mangled for the vertex shader ident_t id = sh_ident_unpack(sva->attr.name); if (has_loc) { ADD(vert_head, "layout(location=%d) in %s "$";\n", va->location, type, sh_ident_unpack(va->name)); } else { ADD(vert_head, "in %s "$";\n", type, sh_ident_unpack(va->name)); } if (i == params->vert_idx) { pl_assert(va->fmt->num_components == 2); ADD(vert_body, "vec2 va_pos = "$"; \n", sh_ident_unpack(va->name)); if (params->out_mat) ADD(vert_body, "va_pos = "$" * va_pos; \n", params->out_mat); if (params->out_off) ADD(vert_body, "va_pos += "$"; \n", params->out_off); ADD(vert_body, "gl_Position = vec4(va_pos, 0.0, 1.0); \n"); } else { // Everything else is just blindly passed through if (has_loc) { ADD(vert_head, "layout(location=%d) out %s "$";\n", va->location, type, id); ADD(glsl, "layout(location=%d) in %s "$";\n", va->location, type, id); } else { ADD(vert_head, "out %s "$";\n", type, id); ADD(glsl, "in %s "$";\n", type, id); } ADD(vert_body, $" = "$";\n", id, sh_ident_unpack(va->name)); } } ADD(vert_body, "}"); ADD_CAT(vert_head, vert_body); pl_hash_merge(&pass->signature, pl_str_builder_hash(vert_head)); *out_vert_builder = vert_head; if (has_loc) { ADD(glsl, "layout(location=0) out vec4 out_color;\n"); } else { ADD(glsl, "out vec4 out_color;\n"); } break; } case PL_PASS_COMPUTE: ADD(glsl, "layout (local_size_x = %d, local_size_y = %d) in;\n", sh->group_size[0], sh->group_size[1]); break; case PL_PASS_INVALID: case PL_PASS_TYPE_COUNT: pl_unreachable(); } // Set up the main shader body ADD_CAT(glsl, shader_body); ADD(glsl, "void main() {\n"); pl_assert(sh->input == PL_SHADER_SIG_NONE); switch (pass_params->type) { case PL_PASS_RASTER: pl_assert(sh->output == PL_SHADER_SIG_COLOR); ADD(glsl, "out_color = "$"();\n", sh->name); break; case PL_PASS_COMPUTE: ADD(glsl, $"();\n", sh->name); break; case PL_PASS_INVALID: case PL_PASS_TYPE_COUNT: pl_unreachable(); } ADD(glsl, "}"); pl_hash_merge(&pass->signature, pl_str_builder_hash(glsl)); *out_glsl_builder = glsl; } #undef ADD #undef ADD_CAT #define pass_age(pass) (dp->current_index - (pass)->last_index) static int cmp_pass_age(const void *ptra, const void *ptrb) { const struct pass *a = *(const struct pass **) ptra; const struct pass *b = *(const struct pass **) ptrb; return b->last_index - a->last_index; } static void garbage_collect_passes(pl_dispatch dp) { if (dp->passes.num <= dp->max_passes) return; // Garbage collect oldest passes, starting at the middle qsort(dp->passes.elem, dp->passes.num, sizeof(struct pass *), cmp_pass_age); int idx = dp->passes.num / 2; while (idx < dp->passes.num && pass_age(dp->passes.elem[idx]) < MIN_AGE) idx++; for (int i = idx; i < dp->passes.num; i++) pass_destroy(dp, dp->passes.elem[i]); int num_evicted = dp->passes.num - idx; dp->passes.num = idx; if (num_evicted) { PL_DEBUG(dp, "Evicted %d passes from dispatch cache, consider " "using more dynamic shaders", num_evicted); } else { dp->max_passes *= 2; } } static struct pass *finalize_pass(pl_dispatch dp, pl_shader sh, pl_tex target, int vert_idx, const struct pl_blend_params *blend, bool load, const struct pl_dispatch_vertex_params *vparams, const pl_transform2x2 *proj) { struct pass *pass = pl_alloc_ptr(dp, pass); *pass = (struct pass) { .signature = 0x0, // updated incrementally below .last_index = dp->current_index, .ubo_desc = { .desc = { .name = sh_ident_pack(sh_fresh(sh, "UBO")), .type = PL_DESC_BUF_UNIFORM, }, }, }; // For identifiers tied to the lifetime of this shader void *tmp = sh->tmp; struct pl_pass_params params = { .type = pl_shader_is_compute(sh) ? PL_PASS_COMPUTE : PL_PASS_RASTER, .num_descriptors = sh->descs.num, .vertex_type = vparams ? vparams->vertex_type : PL_PRIM_TRIANGLE_STRIP, .vertex_stride = vparams ? vparams->vertex_stride : 0, .blend_params = blend, }; struct generate_params gen_params = { .tmp = tmp, .pass = pass, .pass_params = ¶ms, .sh = sh, .vert_idx = vert_idx, }; if (params.type == PL_PASS_RASTER) { assert(target); params.target_format = target->params.format; params.load_target = load; // Fill in the vertex attributes array params.num_vertex_attribs = sh->vas.num; params.vertex_attribs = pl_calloc_ptr(tmp, sh->vas.num, params.vertex_attribs); int va_loc = 0; for (int i = 0; i < sh->vas.num; i++) { struct pl_vertex_attrib *va = ¶ms.vertex_attribs[i]; *va = sh->vas.elem[i].attr; // Mangle the name to make sure it doesn't conflict with the // fragment shader input, this will be converted back to a legal // string by the shader compilation code va->name = sh_ident_pack(sh_fresh(sh, "va")); // Place the vertex attribute va->location = va_loc; if (!vparams) { va->offset = params.vertex_stride; params.vertex_stride += va->fmt->texel_size; } // The number of vertex attribute locations consumed by a vertex // attribute is the number of vec4s it consumes, rounded up const size_t va_loc_size = sizeof(float[4]); va_loc += PL_DIV_UP(va->fmt->texel_size, va_loc_size); } // Hash in the raster state configuration pl_hash_merge(&pass->signature, (uint64_t) params.vertex_type); pl_hash_merge(&pass->signature, (uint64_t) params.vertex_stride); pl_hash_merge(&pass->signature, (uint64_t) params.load_target); pl_hash_merge(&pass->signature, target->params.format->signature); if (blend) { pl_static_assert(sizeof(*blend) == sizeof(enum pl_blend_mode) * 4); pl_hash_merge(&pass->signature, pl_var_hash(*blend)); } // Load projection matrix if required if (proj && memcmp(&proj->mat, &pl_matrix2x2_identity, sizeof(proj->mat)) != 0) { gen_params.out_mat = sh_var(sh, (struct pl_shader_var) { .var = pl_var_mat2("proj"), .data = PL_TRANSPOSE_2X2(proj->mat.m), }); } if (proj && (proj->c[0] || proj->c[1])) { gen_params.out_off = sh_var(sh, (struct pl_shader_var) { .var = pl_var_vec2("offset"), .data = proj->c, }); } } // Place all of the compile-time constants uint8_t *constant_data = NULL; if (sh->consts.num) { params.num_constants = sh->consts.num; params.constants = pl_alloc(tmp, sh->consts.num * sizeof(struct pl_constant)); // Compute offsets size_t total_size = 0; uint32_t const_id = 0; for (int i = 0; i < sh->consts.num; i++) { params.constants[i] = (struct pl_constant) { .type = sh->consts.elem[i].type, .id = const_id++, .offset = total_size, }; total_size += pl_var_type_size(sh->consts.elem[i].type); } // Write values into the constants buffer params.constant_data = constant_data = pl_alloc(pass, total_size); for (int i = 0; i < sh->consts.num; i++) { const struct pl_shader_const *sc = &sh->consts.elem[i]; void *data = constant_data + params.constants[i].offset; memcpy(data, sc->data, pl_var_type_size(sc->type)); } } // Place all the variables; these will dynamically end up in different // locations based on what the underlying GPU supports (UBOs, pushc, etc.) // // We go through the list twice, once to place stuff that we definitely // want inside PCs, and then a second time to opportunistically place the rest. pass->vars = pl_calloc_ptr(pass, sh->vars.num, pass->vars); for (int i = 0; i < sh->vars.num; i++) { if (!add_pass_var(dp, tmp, pass, ¶ms, &sh->vars.elem[i], &pass->vars[i], false)) goto error; } for (int i = 0; i < sh->vars.num; i++) { if (!add_pass_var(dp, tmp, pass, ¶ms, &sh->vars.elem[i], &pass->vars[i], true)) goto error; } // Now that we know the variable placement, finalize pushc/UBO sizes params.push_constants_size = PL_ALIGN2(params.push_constants_size, 4); size_t ubo_size = sh_buf_desc_size(&pass->ubo_desc); if (ubo_size) { pass->ubo_index = sh->descs.num; PL_ARRAY_APPEND(sh, sh->descs, pass->ubo_desc); // don't mangle names }; // Place and fill in the descriptors const int num_descs = sh->descs.num; int binding[PL_DESC_TYPE_COUNT] = {0}; params.num_descriptors = num_descs; params.descriptors = pl_calloc_ptr(tmp, num_descs, params.descriptors); for (int i = 0; i < num_descs; i++) { struct pl_desc *desc = ¶ms.descriptors[i]; *desc = sh->descs.elem[i].desc; desc->binding = binding[pl_desc_namespace(dp->gpu, desc->type)]++; } // Finalize the shader and look it up in the pass cache pl_str_builder vert_builder = NULL, glsl_builder = NULL; generate_shaders(dp, &gen_params, &vert_builder, &glsl_builder); for (int i = 0; i < dp->passes.num; i++) { struct pass *p = dp->passes.elem[i]; if (p->signature != pass->signature) continue; // Found existing shader, re-use directly if (p->ubo) sh->descs.elem[p->ubo_index].binding.object = p->ubo; pl_free(p->run_params.constant_data); p->run_params.constant_data = pl_steal(p, constant_data); p->last_index = dp->current_index; pl_free(pass); return p; } // Need to compile new shader, execute templates now if (vert_builder) { pl_str vert = pl_str_builder_exec(vert_builder); params.vertex_shader = (char *) vert.buf; } pl_str glsl = pl_str_builder_exec(glsl_builder); params.glsl_shader = (char *) glsl.buf; // Turn all shader identifiers into actual strings before passing it // to the `pl_gpu` #define FIX_IDENT(name) \ name = sh_ident_tostr(sh_ident_unpack(name)) for (int i = 0; i < params.num_variables; i++) FIX_IDENT(params.variables[i].name); for (int i = 0; i < params.num_descriptors; i++) FIX_IDENT(params.descriptors[i].name); for (int i = 0; i < params.num_vertex_attribs; i++) FIX_IDENT(params.vertex_attribs[i].name); #undef FIX_IDENT pass->pass = pl_pass_create(dp->gpu, ¶ms); if (!pass->pass) { PL_ERR(dp, "Failed creating render pass for dispatch"); // Add it anyway } struct pl_pass_run_params *rparams = &pass->run_params; rparams->pass = pass->pass; rparams->constant_data = constant_data; rparams->push_constants = pl_zalloc(pass, params.push_constants_size); rparams->desc_bindings = pl_calloc_ptr(pass, params.num_descriptors, rparams->desc_bindings); if (ubo_size && pass->pass) { // Create the UBO pass->ubo = pl_buf_create(dp->gpu, pl_buf_params( .size = ubo_size, .uniform = true, .host_writable = true, )); if (!pass->ubo) { PL_ERR(dp, "Failed creating uniform buffer for dispatch"); goto error; } sh->descs.elem[pass->ubo_index].binding.object = pass->ubo; } if (params.type == PL_PASS_RASTER && !vparams) { // Generate the vertex array placeholder rparams->vertex_count = 4; // single quad size_t vert_size = rparams->vertex_count * params.vertex_stride; rparams->vertex_data = pl_zalloc(pass, vert_size); } pass->timer = pl_timer_create(dp->gpu); PL_ARRAY_APPEND(dp, dp->passes, pass); return pass; error: pass_destroy(dp, pass); return NULL; } static void update_pass_var(pl_dispatch dp, struct pass *pass, const struct pl_shader_var *sv, struct pass_var *pv) { struct pl_var_layout host_layout = pl_var_host_layout(0, &sv->var); pl_assert(host_layout.size); // Use the cache to skip updates if possible if (pv->cached_data && !memcmp(sv->data, pv->cached_data, host_layout.size)) return; if (!pv->cached_data) pv->cached_data = pl_alloc(pass, host_layout.size); memcpy(pv->cached_data, sv->data, host_layout.size); struct pl_pass_run_params *rparams = &pass->run_params; switch (pv->type) { case PASS_VAR_NONE: pl_unreachable(); case PASS_VAR_GLOBAL: { struct pl_var_update vu = { .index = pv->index, .data = sv->data, }; PL_ARRAY_APPEND_RAW(pass, rparams->var_updates, rparams->num_var_updates, vu); break; } case PASS_VAR_UBO: { pl_assert(pass->ubo); const size_t offset = pv->layout.offset; if (host_layout.stride == pv->layout.stride) { pl_assert(host_layout.size == pv->layout.size); pl_buf_write(dp->gpu, pass->ubo, offset, sv->data, host_layout.size); } else { // Coalesce strided UBO write into a single pl_buf_write to avoid // unnecessary synchronization overhead by assembling the correctly // strided upload in RAM pl_grow(dp, &dp->ubo_tmp, pv->layout.size); uint8_t * const tmp = dp->ubo_tmp; const uint8_t *src = sv->data; const uint8_t *end = src + host_layout.size; uint8_t *dst = tmp; while (src < end) { memcpy(dst, src, host_layout.stride); src += host_layout.stride; dst += pv->layout.stride; } pl_buf_write(dp->gpu, pass->ubo, offset, tmp, pv->layout.size); } break; } case PASS_VAR_PUSHC: pl_assert(rparams->push_constants); memcpy_layout(rparams->push_constants, pv->layout, sv->data, host_layout); break; }; } static void compute_vertex_attribs(pl_dispatch dp, pl_shader sh, int width, int height, ident_t *out_scale) { // Simulate vertex attributes using global definitions *out_scale = sh_var(sh, (struct pl_shader_var) { .var = pl_var_vec2("out_scale"), .data = &(float[2]){ 1.0 / width, 1.0 / height }, .dynamic = true, }); GLSLP("#define frag_pos(id) (vec2(id) + vec2(0.5)) \n" "#define frag_map(id) ("$" * frag_pos(id)) \n" "#define gl_FragCoord vec4(frag_pos(gl_GlobalInvocationID), 0.0, 1.0) \n", *out_scale); for (int n = 0; n < sh->vas.num; n++) { const struct pl_shader_va *sva = &sh->vas.elem[n]; ident_t points[4]; for (int i = 0; i < PL_ARRAY_SIZE(points); i++) { points[i] = sh_var(sh, (struct pl_shader_var) { .var = pl_var_from_fmt(sva->attr.fmt, "pt"), .data = sva->data[i], }); } GLSLP("#define "$"_map(id) " "(mix(mix("$", "$", frag_map(id).x), " " mix("$", "$", frag_map(id).x), " "frag_map(id).y)) \n" "#define "$" ("$"_map(gl_GlobalInvocationID)) \n", sh_ident_unpack(sva->attr.name), points[0], points[1], points[2], points[3], sh_ident_unpack(sva->attr.name), sh_ident_unpack(sva->attr.name)); } } static void translate_compute_shader(pl_dispatch dp, pl_shader sh, const pl_rect2d *rc, const struct pl_dispatch_params *params) { int width = abs(pl_rect_w(*rc)), height = abs(pl_rect_h(*rc)); if (sh->transpose) PL_SWAP(width, height); ident_t out_scale; compute_vertex_attribs(dp, sh, width, height, &out_scale); // Simulate a framebuffer using storage images pl_assert(params->target->params.storable); pl_assert(sh->output == PL_SHADER_SIG_COLOR); ident_t fbo = sh_desc(sh, (struct pl_shader_desc) { .binding.object = params->target, .desc = { .name = "out_image", .type = PL_DESC_STORAGE_IMG, .access = params->blend_params ? PL_DESC_ACCESS_READWRITE : PL_DESC_ACCESS_WRITEONLY, }, }); ident_t base = sh_var(sh, (struct pl_shader_var) { .data = &(int[2]){ rc->x0, rc->y0 }, .dynamic = true, .var = { .name = "base", .type = PL_VAR_SINT, .dim_v = 2, .dim_m = 1, .dim_a = 1, }, }); int dx = rc->x0 > rc->x1 ? -1 : 1, dy = rc->y0 > rc->y1 ? -1 : 1; GLSL("ivec2 dir = ivec2(%d, %d);\n", dx, dy); // hard-code, not worth var GLSL("ivec2 pos = "$" + dir * ivec2(gl_GlobalInvocationID).%c%c;\n", base, sh->transpose ? 'y' : 'x', sh->transpose ? 'x' : 'y'); GLSL("vec2 fpos = "$" * vec2(gl_GlobalInvocationID);\n", out_scale); GLSL("if (fpos.x < 1.0 && fpos.y < 1.0) {\n"); if (params->blend_params) { GLSL("vec4 orig = imageLoad("$", pos);\n", fbo); static const char *modes[] = { [PL_BLEND_ZERO] = "0.0", [PL_BLEND_ONE] = "1.0", [PL_BLEND_SRC_ALPHA] = "color.a", [PL_BLEND_ONE_MINUS_SRC_ALPHA] = "(1.0 - color.a)", }; GLSL("color = vec4(color.rgb * vec3(%s), color.a * %s) \n" " + vec4(orig.rgb * vec3(%s), orig.a * %s);\n", modes[params->blend_params->src_rgb], modes[params->blend_params->src_alpha], modes[params->blend_params->dst_rgb], modes[params->blend_params->dst_alpha]); } GLSL("imageStore("$", pos, color);\n", fbo); GLSL("}\n"); sh->output = PL_SHADER_SIG_NONE; } static void run_pass(pl_dispatch dp, pl_shader sh, struct pass *pass) { pl_shader_info shader = &sh->info->info; pl_pass_run(dp->gpu, &pass->run_params); for (uint64_t ts; (ts = pl_timer_query(dp->gpu, pass->timer));) { PL_TRACE(dp, "Spent %.3f ms on shader: %s", ts / 1e6, shader->description); uint64_t old = pass->samples[pass->ts_idx]; pass->samples[pass->ts_idx] = ts; pass->ts_last = ts; pass->ts_peak = PL_MAX(pass->ts_peak, ts); pass->ts_sum += ts; pass->ts_idx = (pass->ts_idx + 1) % PL_ARRAY_SIZE(pass->samples); if (old) { pass->ts_sum -= old; if (old == pass->ts_peak) { uint64_t new_peak = 0; for (int i = 0; i < PL_ARRAY_SIZE(pass->samples); i++) new_peak = PL_MAX(new_peak, pass->samples[i]); pass->ts_peak = new_peak; } } } if (!dp->info_callback) return; struct pl_dispatch_info info; info.signature = pass->signature; info.shader = shader; // Test to see if the ring buffer already wrapped around once if (pass->samples[pass->ts_idx]) { info.num_samples = PL_ARRAY_SIZE(pass->samples); int num_wrapped = info.num_samples - pass->ts_idx; memcpy(info.samples, &pass->samples[pass->ts_idx], num_wrapped * sizeof(info.samples[0])); memcpy(&info.samples[num_wrapped], pass->samples, pass->ts_idx * sizeof(info.samples[0])); } else { info.num_samples = pass->ts_idx; memcpy(info.samples, pass->samples, pass->ts_idx * sizeof(info.samples[0])); } info.last = pass->ts_last; info.peak = pass->ts_peak; info.average = pass->ts_sum / PL_MAX(info.num_samples, 1); dp->info_callback(dp->info_priv, &info); } bool pl_dispatch_finish(pl_dispatch dp, const struct pl_dispatch_params *params) { pl_shader sh = *params->shader; bool ret = false; pl_mutex_lock(&dp->lock); if (sh->failed) { PL_ERR(sh, "Trying to dispatch a failed shader."); goto error; } if (!sh->mutable) { PL_ERR(dp, "Trying to dispatch non-mutable shader?"); goto error; } if (sh->input != PL_SHADER_SIG_NONE || sh->output != PL_SHADER_SIG_COLOR) { PL_ERR(dp, "Trying to dispatch shader with incompatible signature!"); goto error; } const struct pl_tex_params *tpars = ¶ms->target->params; if (pl_tex_params_dimension(*tpars) != 2 || !tpars->renderable) { PL_ERR(dp, "Trying to dispatch a shader using an invalid target " "texture. The target must be a renderable 2D texture."); goto error; } const struct pl_gpu_limits *limits = &dp->gpu->limits; bool can_compute = tpars->storable; if (can_compute && params->blend_params) can_compute = tpars->format->caps & PL_FMT_CAP_READWRITE; if (pl_shader_is_compute(sh) && !can_compute) { PL_ERR(dp, "Trying to dispatch using a compute shader with a " "non-storable or incompatible target texture."); goto error; } else if (can_compute && limits->compute_queues > limits->fragment_queues) { if (sh_try_compute(sh, 16, 16, true, 0)) PL_TRACE(dp, "Upgrading fragment shader to compute shader."); } pl_rect2d rc = params->rect; if (!pl_rect_w(rc)) { rc.x0 = 0; rc.x1 = tpars->w; } if (!pl_rect_h(rc)) { rc.y0 = 0; rc.y1 = tpars->h; } int w, h, tw = abs(pl_rect_w(rc)), th = abs(pl_rect_h(rc)); if (pl_shader_output_size(sh, &w, &h) && (w != tw || h != th)) { PL_ERR(dp, "Trying to dispatch a shader with explicit output size " "requirements %dx%d%s using a target rect of size %dx%d.", w, h, sh->transpose ? " (transposed)" : "", tw, th); goto error; } int vert_idx = -1; const pl_transform2x2 *proj = NULL; if (pl_shader_is_compute(sh)) { // Translate the compute shader to simulate vertices etc. translate_compute_shader(dp, sh, &rc, params); } else { // Add the vertex information encoding the position pl_rect2df vert_rect = { .x0 = 2.0 * rc.x0 / tpars->w - 1.0, .y0 = 2.0 * rc.y0 / tpars->h - 1.0, .x1 = 2.0 * rc.x1 / tpars->w - 1.0, .y1 = 2.0 * rc.y1 / tpars->h - 1.0, }; if (sh->transpose) { static const pl_transform2x2 transpose_proj = {{{ { 0, 1 }, { 1, 0 }, }}}; proj = &transpose_proj; PL_SWAP(vert_rect.x0, vert_rect.y0); PL_SWAP(vert_rect.x1, vert_rect.y1); } sh_attr_vec2(sh, "position", &vert_rect); vert_idx = sh->vas.num - 1; } // We need to set pl_pass_params.load_target when either blending is // enabled or we're drawing to some scissored sub-rect of the texture pl_rect2d full = { 0, 0, tpars->w, tpars->h }; pl_rect2d rc_norm = rc; pl_rect2d_normalize(&rc_norm); rc_norm.x0 = PL_MAX(rc_norm.x0, 0); rc_norm.y0 = PL_MAX(rc_norm.y0, 0); rc_norm.x1 = PL_MIN(rc_norm.x1, tpars->w); rc_norm.y1 = PL_MIN(rc_norm.y1, tpars->h); bool load = params->blend_params || !pl_rect2d_eq(rc_norm, full); struct pass *pass = finalize_pass(dp, sh, params->target, vert_idx, params->blend_params, load, NULL, proj); // Silently return on failed passes if (!pass || !pass->pass) goto error; struct pl_pass_run_params *rparams = &pass->run_params; // Update the descriptor bindings for (int i = 0; i < sh->descs.num; i++) rparams->desc_bindings[i] = sh->descs.elem[i].binding; // Update all of the variables (if needed) rparams->num_var_updates = 0; for (int i = 0; i < sh->vars.num; i++) update_pass_var(dp, pass, &sh->vars.elem[i], &pass->vars[i]); // Update the vertex data if (rparams->vertex_data) { uintptr_t vert_base = (uintptr_t) rparams->vertex_data; size_t stride = rparams->pass->params.vertex_stride; for (int i = 0; i < sh->vas.num; i++) { const struct pl_shader_va *sva = &sh->vas.elem[i]; struct pl_vertex_attrib *va = &rparams->pass->params.vertex_attribs[i]; size_t size = sva->attr.fmt->texel_size; uintptr_t va_base = vert_base + va->offset; // use placed offset for (int n = 0; n < 4; n++) memcpy((void *) (va_base + n * stride), sva->data[n], size); } } // For compute shaders: also update the dispatch dimensions if (pl_shader_is_compute(sh)) { int width = abs(pl_rect_w(rc)), height = abs(pl_rect_h(rc)); if (sh->transpose) PL_SWAP(width, height); // Round up to make sure we don't leave off a part of the target int block_w = sh->group_size[0], block_h = sh->group_size[1], num_x = PL_DIV_UP(width, block_w), num_y = PL_DIV_UP(height, block_h); rparams->compute_groups[0] = num_x; rparams->compute_groups[1] = num_y; rparams->compute_groups[2] = 1; } else { // Update the scissors for performance rparams->scissors = rc_norm; } // Dispatch the actual shader rparams->target = params->target; rparams->timer = PL_DEF(params->timer, pass->timer); run_pass(dp, sh, pass); ret = true; // fall through error: // Reset the temporary buffers which we use to build the shader for (int i = 0; i < PL_ARRAY_SIZE(dp->tmp); i++) pl_str_builder_reset(dp->tmp[i]); pl_mutex_unlock(&dp->lock); pl_dispatch_abort(dp, params->shader); return ret; } bool pl_dispatch_compute(pl_dispatch dp, const struct pl_dispatch_compute_params *params) { pl_shader sh = *params->shader; bool ret = false; pl_mutex_lock(&dp->lock); if (sh->failed) { PL_ERR(sh, "Trying to dispatch a failed shader."); goto error; } if (!sh->mutable) { PL_ERR(dp, "Trying to dispatch non-mutable shader?"); goto error; } if (sh->input != PL_SHADER_SIG_NONE) { PL_ERR(dp, "Trying to dispatch shader with incompatible signature!"); goto error; } if (!pl_shader_is_compute(sh)) { PL_ERR(dp, "Trying to dispatch a non-compute shader using " "`pl_dispatch_compute`!"); goto error; } if (sh->vas.num) { if (!params->width || !params->height) { PL_ERR(dp, "Trying to dispatch a targetless compute shader that " "uses vertex attributes, this requires specifying the size " "of the effective rendering area!"); goto error; } compute_vertex_attribs(dp, sh, params->width, params->height, &(ident_t){0}); } struct pass *pass = finalize_pass(dp, sh, NULL, -1, NULL, false, NULL, NULL); // Silently return on failed passes if (!pass || !pass->pass) goto error; struct pl_pass_run_params *rparams = &pass->run_params; // Update the descriptor bindings for (int i = 0; i < sh->descs.num; i++) rparams->desc_bindings[i] = sh->descs.elem[i].binding; // Update all of the variables (if needed) rparams->num_var_updates = 0; for (int i = 0; i < sh->vars.num; i++) update_pass_var(dp, pass, &sh->vars.elem[i], &pass->vars[i]); // Update the dispatch size int groups = 1; for (int i = 0; i < 3; i++) { groups *= params->dispatch_size[i]; rparams->compute_groups[i] = params->dispatch_size[i]; } if (!groups) { pl_assert(params->width && params->height); int block_w = sh->group_size[0], block_h = sh->group_size[1], num_x = PL_DIV_UP(params->width, block_w), num_y = PL_DIV_UP(params->height, block_h); rparams->compute_groups[0] = num_x; rparams->compute_groups[1] = num_y; rparams->compute_groups[2] = 1; } // Dispatch the actual shader rparams->timer = PL_DEF(params->timer, pass->timer); run_pass(dp, sh, pass); ret = true; // fall through error: // Reset the temporary buffers which we use to build the shader for (int i = 0; i < PL_ARRAY_SIZE(dp->tmp); i++) pl_str_builder_reset(dp->tmp[i]); pl_mutex_unlock(&dp->lock); pl_dispatch_abort(dp, params->shader); return ret; } bool pl_dispatch_vertex(pl_dispatch dp, const struct pl_dispatch_vertex_params *params) { pl_shader sh = *params->shader; bool ret = false; pl_mutex_lock(&dp->lock); if (sh->failed) { PL_ERR(sh, "Trying to dispatch a failed shader."); goto error; } if (!sh->mutable) { PL_ERR(dp, "Trying to dispatch non-mutable shader?"); goto error; } if (sh->input != PL_SHADER_SIG_NONE || sh->output != PL_SHADER_SIG_COLOR) { PL_ERR(dp, "Trying to dispatch shader with incompatible signature!"); goto error; } const struct pl_tex_params *tpars = ¶ms->target->params; if (pl_tex_params_dimension(*tpars) != 2 || !tpars->renderable) { PL_ERR(dp, "Trying to dispatch a shader using an invalid target " "texture. The target must be a renderable 2D texture."); goto error; } if (pl_shader_is_compute(sh)) { PL_ERR(dp, "Trying to dispatch a compute shader using pl_dispatch_vertex."); goto error; } if (sh->vas.num) { PL_ERR(dp, "Trying to dispatch a custom vertex shader with already " "attached vertex attributes."); goto error; } if (sh->transpose) { PL_ERR(dp, "Trying to dispatch a transposed shader using " "pl_dispatch_vertex, unlikely to be correct. Erroring as a " "safety precaution!"); goto error; } int pos_idx = params->vertex_position_idx; if (pos_idx < 0 || pos_idx >= params->num_vertex_attribs) { PL_ERR(dp, "Vertex position index out of range?"); goto error; } // Attach all of the vertex attributes to the shader manually sh->vas.num = params->num_vertex_attribs; PL_ARRAY_RESIZE(sh, sh->vas, sh->vas.num); for (int i = 0; i < params->num_vertex_attribs; i++) { ident_t id = sh_fresh(sh, params->vertex_attribs[i].name); sh->vas.elem[i].attr = params->vertex_attribs[i]; sh->vas.elem[i].attr.name = sh_ident_pack(id); GLSLP("#define %s "$"\n", params->vertex_attribs[i].name, id); } // Compute the coordinate projection matrix pl_transform2x2 proj = pl_transform2x2_identity; switch (params->vertex_coords) { case PL_COORDS_ABSOLUTE: proj.mat.m[0][0] /= tpars->w; proj.mat.m[1][1] /= tpars->h; // fall through case PL_COORDS_RELATIVE: proj.mat.m[0][0] *= 2.0; proj.mat.m[1][1] *= 2.0; proj.c[0] -= 1.0; proj.c[1] -= 1.0; // fall through case PL_COORDS_NORMALIZED: if (params->vertex_flipped) { proj.mat.m[1][1] = -proj.mat.m[1][1]; proj.c[1] += 2.0; } break; } struct pass *pass = finalize_pass(dp, sh, params->target, pos_idx, params->blend_params, true, params, &proj); // Silently return on failed passes if (!pass || !pass->pass) goto error; struct pl_pass_run_params *rparams = &pass->run_params; // Update the descriptor bindings for (int i = 0; i < sh->descs.num; i++) rparams->desc_bindings[i] = sh->descs.elem[i].binding; // Update all of the variables (if needed) rparams->num_var_updates = 0; for (int i = 0; i < sh->vars.num; i++) update_pass_var(dp, pass, &sh->vars.elem[i], &pass->vars[i]); // Update the scissors rparams->scissors = params->scissors; if (params->vertex_flipped) { rparams->scissors.y0 = tpars->h - rparams->scissors.y0; rparams->scissors.y1 = tpars->h - rparams->scissors.y1; } pl_rect2d_normalize(&rparams->scissors); // Dispatch the actual shader rparams->target = params->target; rparams->vertex_count = params->vertex_count; rparams->vertex_data = params->vertex_data; rparams->vertex_buf = params->vertex_buf; rparams->buf_offset = params->buf_offset; rparams->index_data = params->index_data; rparams->index_fmt = params->index_fmt; rparams->index_buf = params->index_buf; rparams->index_offset = params->index_offset; rparams->timer = PL_DEF(params->timer, pass->timer); run_pass(dp, sh, pass); ret = true; // fall through error: // Reset the temporary buffers which we use to build the shader for (int i = 0; i < PL_ARRAY_SIZE(dp->tmp); i++) pl_str_builder_reset(dp->tmp[i]); pl_mutex_unlock(&dp->lock); pl_dispatch_abort(dp, params->shader); return ret; } void pl_dispatch_abort(pl_dispatch dp, pl_shader *psh) { pl_shader sh = *psh; if (!sh) return; // Free unused memory as early as possible sh_deref(sh); // Re-add the shader to the internal pool of shaders pl_mutex_lock(&dp->lock); PL_ARRAY_APPEND(dp, dp->shaders, sh); pl_mutex_unlock(&dp->lock); *psh = NULL; } void pl_dispatch_reset_frame(pl_dispatch dp) { pl_mutex_lock(&dp->lock); dp->current_ident = 0; dp->current_index++; garbage_collect_passes(dp); pl_mutex_unlock(&dp->lock); } size_t pl_dispatch_save(pl_dispatch dp, uint8_t *out) { return pl_cache_save(pl_gpu_cache(dp->gpu), out, out ? SIZE_MAX : 0); } void pl_dispatch_load(pl_dispatch dp, const uint8_t *cache) { pl_cache_load(pl_gpu_cache(dp->gpu), cache, SIZE_MAX); } libplacebo-v7.349.0/src/dispatch.h000066400000000000000000000024201463457750100167310ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "common.h" // Like `pl_dispatch_begin`, but has an extra `unique` parameter. If this is // true, the generated shader will be uniquely namespaced `unique` and may be // freely merged with other shaders (`sh_subpass`). Otherwise, all shaders have // the same namespace and merging them is an error. pl_shader pl_dispatch_begin_ex(pl_dispatch dp, bool unique); // Set the `dynamic_constants` field for newly created `pl_shader` objects. // // This is a private API because it's sort of clunky/stateful. void pl_dispatch_mark_dynamic(pl_dispatch dp, bool dynamic); libplacebo-v7.349.0/src/dither.c000066400000000000000000000217511463457750100164140ustar00rootroot00000000000000/* * Generate a noise texture for dithering images. * Copyright © 2013 Wessel Dankers * * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . * * The original code is taken from mpv, under the same license. */ #include #include #include #include #include #include #include #include "common.h" #include void pl_generate_bayer_matrix(float *data, int size) { pl_assert(size >= 0); // Start with a single entry of 0 data[0] = 0; for (int sz = 1; sz < size; sz *= 2) { // Make three copies of the current, appropriately shifted and scaled for (int y = 0; y < sz; y ++) { for (int x = 0; x < sz; x++) { int offsets[] = {0, sz * size + sz, sz, sz * size}; int pos = y * size + x; for (int i = 1; i < 4; i++) data[pos + offsets[i]] = data[pos] + i / (4.0 * sz * sz); } } } } #define MAX_SIZEB 8 #define MAX_SIZE (1 << MAX_SIZEB) #define MAX_SIZE2 (MAX_SIZE * MAX_SIZE) typedef uint_fast32_t index_t; #define WRAP_SIZE2(k, x) ((index_t)((index_t)(x) & ((k)->size2 - 1))) #define XY(k, x, y) ((index_t)(((x) | ((y) << (k)->sizeb)))) struct ctx { unsigned int sizeb, size, size2; unsigned int gauss_radius; unsigned int gauss_middle; uint64_t gauss[MAX_SIZE2]; index_t randomat[MAX_SIZE2]; bool calcmat[MAX_SIZE2]; uint64_t gaussmat[MAX_SIZE2]; index_t unimat[MAX_SIZE2]; }; static void makegauss(struct ctx *k, unsigned int sizeb) { pl_assert(sizeb >= 1 && sizeb <= MAX_SIZEB); k->sizeb = sizeb; k->size = 1 << k->sizeb; k->size2 = k->size * k->size; k->gauss_radius = k->size / 2 - 1; k->gauss_middle = XY(k, k->gauss_radius, k->gauss_radius); unsigned int gauss_size = k->gauss_radius * 2 + 1; unsigned int gauss_size2 = gauss_size * gauss_size; for (index_t c = 0; c < k->size2; c++) k->gauss[c] = 0; double sigma = -log(1.5 / (double) UINT64_MAX * gauss_size2) / k->gauss_radius; for (index_t gy = 0; gy <= k->gauss_radius; gy++) { for (index_t gx = 0; gx <= gy; gx++) { int cx = (int)gx - k->gauss_radius; int cy = (int)gy - k->gauss_radius; int sq = cx * cx + cy * cy; double e = exp(-sqrt(sq) * sigma); uint64_t v = e / gauss_size2 * (double) UINT64_MAX; k->gauss[XY(k, gx, gy)] = k->gauss[XY(k, gy, gx)] = k->gauss[XY(k, gx, gauss_size - 1 - gy)] = k->gauss[XY(k, gy, gauss_size - 1 - gx)] = k->gauss[XY(k, gauss_size - 1 - gx, gy)] = k->gauss[XY(k, gauss_size - 1 - gy, gx)] = k->gauss[XY(k, gauss_size - 1 - gx, gauss_size - 1 - gy)] = k->gauss[XY(k, gauss_size - 1 - gy, gauss_size - 1 - gx)] = v; } } #ifndef NDEBUG uint64_t total = 0; for (index_t c = 0; c < k->size2; c++) { uint64_t oldtotal = total; total += k->gauss[c]; assert(total >= oldtotal); } #endif } static void setbit(struct ctx *k, index_t c) { if (k->calcmat[c]) return; k->calcmat[c] = true; uint64_t *m = k->gaussmat; uint64_t *me = k->gaussmat + k->size2; uint64_t *g = k->gauss + WRAP_SIZE2(k, k->gauss_middle + k->size2 - c); uint64_t *ge = k->gauss + k->size2; while (g < ge) *m++ += *g++; g = k->gauss; while (m < me) *m++ += *g++; } static index_t getmin(struct ctx *k) { uint64_t min = UINT64_MAX; index_t resnum = 0; unsigned int size2 = k->size2; for (index_t c = 0; c < size2; c++) { if (k->calcmat[c]) continue; uint64_t total = k->gaussmat[c]; if (total <= min) { if (total != min) { min = total; resnum = 0; } k->randomat[resnum++] = c; } } assert(resnum > 0); if (resnum == 1) return k->randomat[0]; if (resnum == size2) return size2 / 2; return k->randomat[rand() % resnum]; } static void makeuniform(struct ctx *k) { unsigned int size2 = k->size2; for (index_t c = 0; c < size2; c++) { index_t r = getmin(k); setbit(k, r); k->unimat[r] = c; } } void pl_generate_blue_noise(float *data, int size) { pl_assert(size > 0); int shift = PL_LOG2(size); pl_assert((1 << shift) == size); struct ctx *k = pl_zalloc_ptr(NULL, k); makegauss(k, shift); makeuniform(k); float invscale = k->size2; for(index_t y = 0; y < k->size; y++) { for(index_t x = 0; x < k->size; x++) data[x + y * k->size] = k->unimat[XY(k, x, y)] / invscale; } pl_free(k); } const struct pl_error_diffusion_kernel pl_error_diffusion_simple = { .name = "simple", .description = "Simple error diffusion", .shift = 1, .pattern = {{0, 0, 0, 1, 0}, {0, 0, 1, 0, 0}, {0, 0, 0, 0, 0}}, .divisor = 2, }; const struct pl_error_diffusion_kernel pl_error_diffusion_false_fs = { .name = "false-fs", .description = "False Floyd-Steinberg kernel", .shift = 1, .pattern = {{0, 0, 0, 3, 0}, {0, 0, 3, 2, 0}, {0, 0, 0, 0, 0}}, .divisor = 8, }; const struct pl_error_diffusion_kernel pl_error_diffusion_sierra_lite = { .name = "sierra-lite", .description = "Sierra Lite kernel", .shift = 2, .pattern = {{0, 0, 0, 2, 0}, {0, 1, 1, 0, 0}, {0, 0, 0, 0, 0}}, .divisor = 4, }; const struct pl_error_diffusion_kernel pl_error_diffusion_floyd_steinberg = { .name = "floyd-steinberg", .description = "Floyd Steinberg kernel", .shift = 2, .pattern = {{0, 0, 0, 7, 0}, {0, 3, 5, 1, 0}, {0, 0, 0, 0, 0}}, .divisor = 16, }; const struct pl_error_diffusion_kernel pl_error_diffusion_atkinson = { .name = "atkinson", .description = "Atkinson kernel", .shift = 2, .pattern = {{0, 0, 0, 1, 1}, {0, 1, 1, 1, 0}, {0, 0, 1, 0, 0}}, .divisor = 8, }; const struct pl_error_diffusion_kernel pl_error_diffusion_jarvis_judice_ninke = { .name = "jarvis-judice-ninke", .description = "Jarvis, Judice & Ninke kernel", .shift = 3, .pattern = {{0, 0, 0, 7, 5}, {3, 5, 7, 5, 3}, {1, 3, 5, 3, 1}}, .divisor = 48, }; const struct pl_error_diffusion_kernel pl_error_diffusion_stucki = { .name = "stucki", .description = "Stucki kernel", .shift = 3, .pattern = {{0, 0, 0, 8, 4}, {2, 4, 8, 4, 2}, {1, 2, 4, 2, 1}}, .divisor = 42, }; const struct pl_error_diffusion_kernel pl_error_diffusion_burkes = { .name = "burkes", .description = "Burkes kernel", .shift = 3, .pattern = {{0, 0, 0, 8, 4}, {2, 4, 8, 4, 2}, {0, 0, 0, 0, 0}}, .divisor = 32, }; const struct pl_error_diffusion_kernel pl_error_diffusion_sierra2 = { .name = "sierra-2", .description = "Two-row Sierra", .shift = 3, .pattern = {{0, 0, 0, 4, 3}, {1, 2, 3, 2, 1}, {0, 0, 0, 0, 0}}, .divisor = 16, }; const struct pl_error_diffusion_kernel pl_error_diffusion_sierra3 = { .name = "sierra-3", .description = "Three-row Sierra", .shift = 3, .pattern = {{0, 0, 0, 5, 3}, {2, 4, 5, 4, 2}, {0, 2, 3, 2, 0}}, .divisor = 32, }; const struct pl_error_diffusion_kernel * const pl_error_diffusion_kernels[] = { &pl_error_diffusion_simple, &pl_error_diffusion_false_fs, &pl_error_diffusion_sierra_lite, &pl_error_diffusion_floyd_steinberg, &pl_error_diffusion_atkinson, &pl_error_diffusion_jarvis_judice_ninke, &pl_error_diffusion_stucki, &pl_error_diffusion_burkes, &pl_error_diffusion_sierra2, &pl_error_diffusion_sierra3, NULL }; const int pl_num_error_diffusion_kernels = PL_ARRAY_SIZE(pl_error_diffusion_kernels) - 1; // Find the error diffusion kernel with the given name, or NULL on failure. const struct pl_error_diffusion_kernel *pl_find_error_diffusion_kernel(const char *name) { for (int i = 0; i < pl_num_error_diffusion_kernels; i++) { if (strcmp(name, pl_error_diffusion_kernels[i]->name) == 0) return pl_error_diffusion_kernels[i]; } return NULL; } libplacebo-v7.349.0/src/dummy.c000066400000000000000000000252071463457750100162700ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include #include "gpu.h" #include const struct pl_gpu_dummy_params pl_gpu_dummy_default_params = { PL_GPU_DUMMY_DEFAULTS }; static const struct pl_gpu_fns pl_fns_dummy; struct priv { struct pl_gpu_fns impl; struct pl_gpu_dummy_params params; }; pl_gpu pl_gpu_dummy_create(pl_log log, const struct pl_gpu_dummy_params *params) { params = PL_DEF(params, &pl_gpu_dummy_default_params); struct pl_gpu_t *gpu = pl_zalloc_obj(NULL, gpu, struct priv); gpu->log = log; gpu->glsl = params->glsl; gpu->limits = params->limits; struct priv *p = PL_PRIV(gpu); p->impl = pl_fns_dummy; p->params = *params; // Forcibly override these, because we know for sure what the values are gpu->limits.align_tex_xfer_pitch = 1; gpu->limits.align_tex_xfer_offset = 1; gpu->limits.align_vertex_stride = 1; // Set up the dummy formats, add one for each possible format type that we // can represent on the host PL_ARRAY(pl_fmt) formats = {0}; for (enum pl_fmt_type type = 1; type < PL_FMT_TYPE_COUNT; type++) { for (int comps = 1; comps <= 4; comps++) { for (int depth = 8; depth < 128; depth *= 2) { if (type == PL_FMT_FLOAT && depth < 16) continue; static const char *cnames[] = { [1] = "r", [2] = "rg", [3] = "rgb", [4] = "rgba", }; static const char *tnames[] = { [PL_FMT_UNORM] = "", [PL_FMT_SNORM] = "s", [PL_FMT_UINT] = "u", [PL_FMT_SINT] = "i", [PL_FMT_FLOAT] = "f", }; const char *tname = tnames[type]; if (type == PL_FMT_FLOAT && depth == 16) tname = "hf"; struct pl_fmt_t *fmt = pl_alloc_ptr(gpu, fmt); *fmt = (struct pl_fmt_t) { .name = pl_asprintf(fmt, "%s%d%s", cnames[comps], depth, tname), .type = type, .num_components = comps, .opaque = false, .gatherable = true, .internal_size = comps * depth / 8, .texel_size = comps * depth / 8, .texel_align = 1, .caps = PL_FMT_CAP_SAMPLEABLE | PL_FMT_CAP_LINEAR | PL_FMT_CAP_RENDERABLE | PL_FMT_CAP_BLENDABLE | PL_FMT_CAP_VERTEX | PL_FMT_CAP_HOST_READABLE, }; for (int i = 0; i < comps; i++) { fmt->component_depth[i] = depth; fmt->host_bits[i] = depth; fmt->sample_order[i] = i; } if (gpu->glsl.compute) fmt->caps |= PL_FMT_CAP_STORABLE; if (gpu->limits.max_buffer_texels && gpu->limits.max_ubo_size) fmt->caps |= PL_FMT_CAP_TEXEL_UNIFORM; if (gpu->limits.max_buffer_texels && gpu->limits.max_ssbo_size) fmt->caps |= PL_FMT_CAP_TEXEL_STORAGE; fmt->glsl_type = pl_var_glsl_type_name(pl_var_from_fmt(fmt, "")); fmt->glsl_format = pl_fmt_glsl_format(fmt, comps); fmt->fourcc = pl_fmt_fourcc(fmt); if (!fmt->glsl_format) fmt->caps &= ~(PL_FMT_CAP_STORABLE | PL_FMT_CAP_TEXEL_STORAGE); PL_ARRAY_APPEND(gpu, formats, fmt); } } } gpu->formats = formats.elem; gpu->num_formats = formats.num; return pl_gpu_finalize(gpu); } static void dumb_destroy(pl_gpu gpu) { pl_free((void *) gpu); } void pl_gpu_dummy_destroy(pl_gpu *gpu) { pl_gpu_destroy(*gpu); *gpu = NULL; } struct buf_priv { uint8_t *data; }; static pl_buf dumb_buf_create(pl_gpu gpu, const struct pl_buf_params *params) { struct pl_buf_t *buf = pl_zalloc_obj(NULL, buf, struct buf_priv); buf->params = *params; buf->params.initial_data = NULL; struct buf_priv *p = PL_PRIV(buf); p->data = malloc(params->size); if (!p->data) { PL_ERR(gpu, "Failed allocating memory for dummy buffer!"); pl_free(buf); return NULL; } if (params->initial_data) memcpy(p->data, params->initial_data, params->size); if (params->host_mapped) buf->data = p->data; return buf; } static void dumb_buf_destroy(pl_gpu gpu, pl_buf buf) { struct buf_priv *p = PL_PRIV(buf); free(p->data); pl_free((void *) buf); } uint8_t *pl_buf_dummy_data(pl_buf buf) { struct buf_priv *p = PL_PRIV(buf); return p->data; } static void dumb_buf_write(pl_gpu gpu, pl_buf buf, size_t buf_offset, const void *data, size_t size) { struct buf_priv *p = PL_PRIV(buf); memcpy(p->data + buf_offset, data, size); } static bool dumb_buf_read(pl_gpu gpu, pl_buf buf, size_t buf_offset, void *dest, size_t size) { struct buf_priv *p = PL_PRIV(buf); memcpy(dest, p->data + buf_offset, size); return true; } static void dumb_buf_copy(pl_gpu gpu, pl_buf dst, size_t dst_offset, pl_buf src, size_t src_offset, size_t size) { struct buf_priv *dstp = PL_PRIV(dst); struct buf_priv *srcp = PL_PRIV(src); memcpy(dstp->data + dst_offset, srcp->data + src_offset, size); } struct tex_priv { void *data; }; static size_t tex_size(pl_gpu gpu, pl_tex tex) { size_t size = tex->params.format->texel_size * tex->params.w; size *= PL_DEF(tex->params.h, 1); size *= PL_DEF(tex->params.d, 1); return size; } static pl_tex dumb_tex_create(pl_gpu gpu, const struct pl_tex_params *params) { struct pl_tex_t *tex = pl_zalloc_obj(NULL, tex, void *); tex->params = *params; tex->params.initial_data = NULL; struct tex_priv *p = PL_PRIV(tex); p->data = malloc(tex_size(gpu, tex)); if (!p->data) { PL_ERR(gpu, "Failed allocating memory for dummy texture!"); pl_free(tex); return NULL; } if (params->initial_data) memcpy(p->data, params->initial_data, tex_size(gpu, tex)); return tex; } pl_tex pl_tex_dummy_create(pl_gpu gpu, const struct pl_tex_dummy_params *params) { // Only do minimal sanity checking, since this is just a dummy texture pl_assert(params->format && params->w >= 0 && params->h >= 0 && params->d >= 0); struct pl_tex_t *tex = pl_zalloc_obj(NULL, tex, struct tex_priv); tex->sampler_type = params->sampler_type; tex->params = (struct pl_tex_params) { .w = params->w, .h = params->h, .d = params->d, .format = params->format, .sampleable = true, .user_data = params->user_data, }; return tex; } static void dumb_tex_destroy(pl_gpu gpu, pl_tex tex) { struct tex_priv *p = PL_PRIV(tex); if (p->data) free(p->data); pl_free((void *) tex); } uint8_t *pl_tex_dummy_data(pl_tex tex) { struct tex_priv *p = PL_PRIV(tex); return p->data; } static bool dumb_tex_upload(pl_gpu gpu, const struct pl_tex_transfer_params *params) { pl_tex tex = params->tex; struct tex_priv *p = PL_PRIV(tex); pl_assert(p->data); const uint8_t *src = params->ptr; uint8_t *dst = p->data; if (params->buf) { struct buf_priv *bufp = PL_PRIV(params->buf); src = (uint8_t *) bufp->data + params->buf_offset; } size_t texel_size = tex->params.format->texel_size; size_t row_size = pl_rect_w(params->rc) * texel_size; for (int z = params->rc.z0; z < params->rc.z1; z++) { size_t src_plane = z * params->depth_pitch; size_t dst_plane = z * tex->params.h * tex->params.w * texel_size; for (int y = params->rc.y0; y < params->rc.y1; y++) { size_t src_row = src_plane + y * params->row_pitch; size_t dst_row = dst_plane + y * tex->params.w * texel_size; size_t pos = params->rc.x0 * texel_size; memcpy(&dst[dst_row + pos], &src[src_row + pos], row_size); } } return true; } static bool dumb_tex_download(pl_gpu gpu, const struct pl_tex_transfer_params *params) { pl_tex tex = params->tex; struct tex_priv *p = PL_PRIV(tex); pl_assert(p->data); const uint8_t *src = p->data; uint8_t *dst = params->ptr; if (params->buf) { struct buf_priv *bufp = PL_PRIV(params->buf); dst = (uint8_t *) bufp->data + params->buf_offset; } size_t texel_size = tex->params.format->texel_size; size_t row_size = pl_rect_w(params->rc) * texel_size; for (int z = params->rc.z0; z < params->rc.z1; z++) { size_t src_plane = z * tex->params.h * tex->params.w * texel_size; size_t dst_plane = z * params->depth_pitch; for (int y = params->rc.y0; y < params->rc.y1; y++) { size_t src_row = src_plane + y * tex->params.w * texel_size; size_t dst_row = dst_plane + y * params->row_pitch; size_t pos = params->rc.x0 * texel_size; memcpy(&dst[dst_row + pos], &src[src_row + pos], row_size); } } return true; } static int dumb_desc_namespace(pl_gpu gpu, enum pl_desc_type type) { return 0; // safest behavior: never alias bindings } static pl_pass dumb_pass_create(pl_gpu gpu, const struct pl_pass_params *params) { PL_ERR(gpu, "Creating render passes is not supported for dummy GPUs"); return NULL; } static void dumb_gpu_finish(pl_gpu gpu) { // no-op } static const struct pl_gpu_fns pl_fns_dummy = { .destroy = dumb_destroy, .buf_create = dumb_buf_create, .buf_destroy = dumb_buf_destroy, .buf_write = dumb_buf_write, .buf_read = dumb_buf_read, .buf_copy = dumb_buf_copy, .tex_create = dumb_tex_create, .tex_destroy = dumb_tex_destroy, .tex_upload = dumb_tex_upload, .tex_download = dumb_tex_download, .desc_namespace = dumb_desc_namespace, .pass_create = dumb_pass_create, .gpu_finish = dumb_gpu_finish, }; libplacebo-v7.349.0/src/filters.c000066400000000000000000000762671463457750100166210ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ /* * Some of the filter code originally derives (via mpv) from Glumpy: * # Copyright (c) 2009-2016 Nicolas P. Rougier. All rights reserved. * # Distributed under the (new) BSD License. * (https://github.com/glumpy/glumpy/blob/master/glumpy/library/build-spatial-filters.py) * * The math underlying each filter function was written from scratch, with * some algorithms coming from a number of different sources, including: * - https://en.wikipedia.org/wiki/Window_function * - https://en.wikipedia.org/wiki/Jinc * - http://vector-agg.cvs.sourceforge.net/viewvc/vector-agg/agg-2.5/include/agg_image_filters.h * - Vapoursynth plugin fmtconv (WTFPL Licensed), which is based on * dither plugin for avisynth from the same author: * https://github.com/vapoursynth/fmtconv/tree/master/src/fmtc * - Paul Heckbert's "zoom" * - XBMC: ConvolutionKernels.cpp etc. * - https://github.com/AviSynth/jinc-resize (only used to verify the math) */ #include #include "common.h" #include "filters.h" #include "log.h" #ifdef PL_HAVE_WIN32 #define j1 _j1 #endif bool pl_filter_function_eq(const struct pl_filter_function *a, const struct pl_filter_function *b) { return (a ? a->weight : NULL) == (b ? b->weight : NULL); } bool pl_filter_config_eq(const struct pl_filter_config *a, const struct pl_filter_config *b) { if (!a || !b) return a == b; bool eq = pl_filter_function_eq(a->kernel, b->kernel) && pl_filter_function_eq(a->window, b->window) && a->radius == b->radius && a->clamp == b->clamp && a->blur == b->blur && a->taper == b->taper && a->polar == b->polar && a->antiring == b->antiring; for (int i = 0; i < PL_FILTER_MAX_PARAMS; i++) { if (a->kernel->tunable[i]) eq &= a->params[i] == b->params[i]; if (a->window && a->window->tunable[i]) eq &= a->wparams[i] == b->wparams[i]; } return eq; } double pl_filter_sample(const struct pl_filter_config *c, double x) { const float radius = pl_filter_radius_bound(c); // All filters are symmetric, and in particular only need to be defined // for [0, radius]. x = fabs(x); // Return early for values outside of the kernel radius, since the functions // are not necessarily valid outside of this interval. No such check is // needed for the window, because it's always stretched to fit. if (x > radius) return 0.0; // Apply the blur and taper coefficients as needed double kx = x <= c->taper ? 0.0 : (x - c->taper) / (1.0 - c->taper / radius); if (c->blur > 0.0) kx /= c->blur; pl_assert(!c->kernel->opaque); double k = c->kernel->weight(&(const struct pl_filter_ctx) { .radius = radius, .params = { c->kernel->tunable[0] ? c->params[0] : c->kernel->params[0], c->kernel->tunable[1] ? c->params[1] : c->kernel->params[1], }, }, kx); // Apply the optional windowing function if (c->window) { pl_assert(!c->window->opaque); double wx = x / radius * c->window->radius; k *= c->window->weight(&(struct pl_filter_ctx) { .radius = c->window->radius, .params = { c->window->tunable[0] ? c->wparams[0] : c->window->params[0], c->window->tunable[1] ? c->wparams[1] : c->window->params[1], }, }, wx); } return k < 0 ? (1 - c->clamp) * k : k; } static void filter_cutoffs(const struct pl_filter_config *c, float cutoff, float *out_radius, float *out_radius_zero) { const float bound = pl_filter_radius_bound(c); float prev = 0.0, fprev = pl_filter_sample(c, prev); bool found_root = false; const float step = 1e-2f; for (float x = 0.0; x < bound + step; x += step) { float fx = pl_filter_sample(c, x); if ((fprev > cutoff && fx <= cutoff) || (fprev < -cutoff && fx >= -cutoff)) { // Found zero crossing float root = x - fx * (x - prev) / (fx - fprev); // secant method root = fminf(root, bound); *out_radius = root; if (!found_root) // first root *out_radius_zero = root; found_root = true; } prev = x; fprev = fx; } if (!found_root) *out_radius_zero = *out_radius = bound; } // Compute a single row of weights for a given filter in one dimension, indexed // by the indicated subpixel offset. Writes `f->row_size` values to `out`. static void compute_row(struct pl_filter_t *f, double offset, float *out) { double wsum = 0.0; for (int i = 0; i < f->row_size; i++) { // For the example of a filter with row size 4 and offset 0.3, we have: // // 0 1 * 2 3 // // * indicates the sampled position. What we want to compute is the // distance from each index to that sampled position. pl_assert(f->row_size % 2 == 0); const int base = f->row_size / 2 - 1; // index to the left of the center const double center = base + offset; // offset of center relative to idx 0 double w = pl_filter_sample(&f->params.config, i - center); out[i] = w; wsum += w; } // Readjust weights to preserve energy pl_assert(wsum > 0); for (int i = 0; i < f->row_size; i++) out[i] /= wsum; } // Needed for backwards compatibility with v1 configuration API static struct pl_filter_function *dupfilter(void *alloc, const struct pl_filter_function *f) { return f ? pl_memdup(alloc, (void *)f, sizeof(*f)) : NULL; } pl_filter pl_filter_generate(pl_log log, const struct pl_filter_params *params) { pl_assert(params); if (params->lut_entries <= 0 || !params->config.kernel) { pl_fatal(log, "Invalid params: missing lut_entries or config.kernel"); return NULL; } if (params->config.kernel->opaque) { pl_err(log, "Trying to use opaque kernel '%s' in non-opaque context!", params->config.kernel->name); return NULL; } if (params->config.window && params->config.window->opaque) { pl_err(log, "Trying to use opaque window '%s' in non-opaque context!", params->config.window->name); return NULL; } struct pl_filter_t *f = pl_zalloc_ptr(NULL, f); f->params = *params; f->params.config.kernel = dupfilter(f, params->config.kernel); f->params.config.window = dupfilter(f, params->config.window); // Compute main lobe and total filter size filter_cutoffs(¶ms->config, params->cutoff, &f->radius, &f->radius_zero); f->radius_cutoff = f->radius; // backwards compatibility float *weights; if (params->config.polar) { // Compute a 1D array indexed by radius weights = pl_alloc(f, params->lut_entries * sizeof(float)); for (int i = 0; i < params->lut_entries; i++) { double x = f->radius * i / (params->lut_entries - 1); weights[i] = pl_filter_sample(¶ms->config, x); } } else { // Pick the most appropriate row size f->row_size = ceilf(f->radius) * 2; if (params->max_row_size && f->row_size > params->max_row_size) { pl_info(log, "Required filter size %d exceeds the maximum allowed " "size of %d. This may result in adverse effects (aliasing, " "or moiré artifacts).", f->row_size, params->max_row_size); f->row_size = params->max_row_size; f->insufficient = true; } f->row_stride = PL_ALIGN(f->row_size, params->row_stride_align); // Compute a 2D array indexed by the subpixel position weights = pl_calloc(f, params->lut_entries * f->row_stride, sizeof(float)); for (int i = 0; i < params->lut_entries; i++) { compute_row(f, i / (double)(params->lut_entries - 1), weights + f->row_stride * i); } } f->weights = weights; return f; } void pl_filter_free(pl_filter *filter) { pl_free_ptr((void **) filter); } // Built-in filter functions static double box(const struct pl_filter_ctx *f, double x) { return 1.0; } const struct pl_filter_function pl_filter_function_box = { .weight = box, .name = "box", .radius = 1.0, .resizable = true, }; static const struct pl_filter_function filter_function_dirichlet = { .name = "dirichlet", // alias .weight = box, .radius = 1.0, .resizable = true, }; static double triangle(const struct pl_filter_ctx *f, double x) { return 1.0 - x / f->radius; } const struct pl_filter_function pl_filter_function_triangle = { .name = "triangle", .weight = triangle, .radius = 1.0, .resizable = true, }; static double cosine(const struct pl_filter_ctx *f, double x) { return cos(x); } const struct pl_filter_function pl_filter_function_cosine = { .name = "cosine", .weight = cosine, .radius = M_PI / 2.0, }; static double hann(const struct pl_filter_ctx *f, double x) { return 0.5 + 0.5 * cos(M_PI * x); } const struct pl_filter_function pl_filter_function_hann = { .name = "hann", .weight = hann, .radius = 1.0, }; static const struct pl_filter_function filter_function_hanning = { .name = "hanning", // alias .weight = hann, .radius = 1.0, }; static double hamming(const struct pl_filter_ctx *f, double x) { return 0.54 + 0.46 * cos(M_PI * x); } const struct pl_filter_function pl_filter_function_hamming = { .name = "hamming", .weight = hamming, .radius = 1.0, }; static double welch(const struct pl_filter_ctx *f, double x) { return 1.0 - x * x; } const struct pl_filter_function pl_filter_function_welch = { .name = "welch", .weight = welch, .radius = 1.0, }; static double bessel_i0(double x) { double s = 1.0; double y = x * x / 4.0; double t = y; int i = 2; while (t > 1e-12) { s += t; t *= y / (i * i); i += 1; } return s; } static double kaiser(const struct pl_filter_ctx *f, double x) { double alpha = fmax(f->params[0], 0.0); double scale = bessel_i0(alpha); return bessel_i0(alpha * sqrt(1.0 - x * x)) / scale; } const struct pl_filter_function pl_filter_function_kaiser = { .name = "kaiser", .weight = kaiser, .radius = 1.0, .params = {2.0}, .tunable = {true}, }; static double blackman(const struct pl_filter_ctx *f, double x) { double a = f->params[0]; double a0 = (1 - a) / 2.0, a1 = 1 / 2.0, a2 = a / 2.0; x *= M_PI; return a0 + a1 * cos(x) + a2 * cos(2 * x); } const struct pl_filter_function pl_filter_function_blackman = { .name = "blackman", .weight = blackman, .radius = 1.0, .params = {0.16}, .tunable = {true}, }; static double bohman(const struct pl_filter_ctx *f, double x) { double pix = M_PI * x; return (1.0 - x) * cos(pix) + sin(pix) / M_PI; } const struct pl_filter_function pl_filter_function_bohman = { .name = "bohman", .weight = bohman, .radius = 1.0, }; static double gaussian(const struct pl_filter_ctx *f, double x) { return exp(-2.0 * x * x / f->params[0]); } const struct pl_filter_function pl_filter_function_gaussian = { .name = "gaussian", .weight = gaussian, .radius = 2.0, .resizable = true, .params = {1.0}, .tunable = {true}, }; static double quadratic(const struct pl_filter_ctx *f, double x) { if (x < 0.5) { return 1.0 - 4.0/3.0 * (x * x); } else { return 2.0 / 3.0 * (x - 1.5) * (x - 1.5); } } const struct pl_filter_function pl_filter_function_quadratic = { .name = "quadratic", .weight = quadratic, .radius = 1.5, }; static const struct pl_filter_function filter_function_quadric = { .name = "quadric", // alias .weight = quadratic, .radius = 1.5, }; static double sinc(const struct pl_filter_ctx *f, double x) { if (x < 1e-8) return 1.0; x *= M_PI; return sin(x) / x; } const struct pl_filter_function pl_filter_function_sinc = { .name = "sinc", .weight = sinc, .radius = 1.0, .resizable = true, }; static double jinc(const struct pl_filter_ctx *f, double x) { if (x < 1e-8) return 1.0; x *= M_PI; return 2.0 * j1(x) / x; } const struct pl_filter_function pl_filter_function_jinc = { .name = "jinc", .weight = jinc, .radius = 1.2196698912665045, // first zero .resizable = true, }; static double sphinx(const struct pl_filter_ctx *f, double x) { if (x < 1e-8) return 1.0; x *= M_PI; return 3.0 * (sin(x) - x * cos(x)) / (x * x * x); } const struct pl_filter_function pl_filter_function_sphinx = { .name = "sphinx", .weight = sphinx, .radius = 1.4302966531242027, // first zero .resizable = true, }; static double cubic(const struct pl_filter_ctx *f, double x) { const double b = f->params[0], c = f->params[1]; double p0 = 6.0 - 2.0 * b, p2 = -18.0 + 12.0 * b + 6.0 * c, p3 = 12.0 - 9.0 * b - 6.0 * c, q0 = 8.0 * b + 24.0 * c, q1 = -12.0 * b - 48.0 * c, q2 = 6.0 * b + 30.0 * c, q3 = -b - 6.0 * c; if (x < 1.0) { return (p0 + x * x * (p2 + x * p3)) / p0; } else { return (q0 + x * (q1 + x * (q2 + x * q3))) / p0; } } const struct pl_filter_function pl_filter_function_cubic = { .name = "cubic", .weight = cubic, .radius = 2.0, .params = {1.0, 0.0}, .tunable = {true, true}, }; const struct pl_filter_function pl_filter_function_hermite = { .name = "hermite", .weight = cubic, .radius = 1.0, .params = {0.0, 0.0}, }; const struct pl_filter_function pl_filter_function_bicubic = { .name = "bicubic", .weight = cubic, .radius = 2.0, .params = {1.0, 0.0}, .tunable = {true, true}, }; const struct pl_filter_function pl_filter_function_bcspline = { .name = "bcspline", .weight = cubic, .radius = 2.0, .params = {1.0, 0.0}, .tunable = {true, true}, }; const struct pl_filter_function pl_filter_function_catmull_rom = { .name = "catmull_rom", .weight = cubic, .radius = 2.0, .params = {0.0, 0.5}, .tunable = {true, true}, }; const struct pl_filter_function pl_filter_function_mitchell = { .name = "mitchell", .weight = cubic, .radius = 2.0, .params = {1/3.0, 1/3.0}, .tunable = {true, true}, }; const struct pl_filter_function pl_filter_function_robidoux = { .name = "robidoux", .weight = cubic, .radius = 2.0, .params = {12 / (19 + 9 * M_SQRT2), 113 / (58 + 216 * M_SQRT2)}, .tunable = {true, true}, }; const struct pl_filter_function pl_filter_function_robidouxsharp = { .name = "robidouxsharp", .weight = cubic, .radius = 2.0, .params = {6 / (13 + 7 * M_SQRT2), 7 / (2 + 12 * M_SQRT2)}, .tunable = {true, true}, }; static double spline16(const struct pl_filter_ctx *f, double x) { if (x < 1.0) { return ((x - 9.0/5.0 ) * x - 1.0/5.0 ) * x + 1.0; } else { return ((-1.0/3.0 * (x-1) + 4.0/5.0) * (x-1) - 7.0/15.0 ) * (x-1); } } const struct pl_filter_function pl_filter_function_spline16 = { .name = "spline16", .weight = spline16, .radius = 2.0, }; static double spline36(const struct pl_filter_ctx *f, double x) { if (x < 1.0) { return ((13.0/11.0 * x - 453.0/209.0) * x - 3.0/209.0) * x + 1.0; } else if (x < 2.0) { return ((-6.0/11.0 * (x-1) + 270.0/209.0) * (x-1) - 156.0/ 209.0) * (x-1); } else { return ((1.0/11.0 * (x-2) - 45.0/209.0) * (x-2) + 26.0/209.0) * (x-2); } } const struct pl_filter_function pl_filter_function_spline36 = { .name = "spline36", .weight = spline36, .radius = 3.0, }; static double spline64(const struct pl_filter_ctx *f, double x) { if (x < 1.0) { return ((49.0/41.0 * x - 6387.0/2911.0) * x - 3.0/2911.0) * x + 1.0; } else if (x < 2.0) { return ((-24.0/41.0 * (x-1) + 4032.0/2911.0) * (x-1) - 2328.0/2911.0) * (x-1); } else if (x < 3.0) { return ((6.0/41.0 * (x-2) - 1008.0/2911.0) * (x-2) + 582.0/2911.0) * (x-2); } else { return ((-1.0/41.0 * (x-3) + 168.0/2911.0) * (x-3) - 97.0/2911.0) * (x-3); } } const struct pl_filter_function pl_filter_function_spline64 = { .name = "spline64", .weight = spline64, .radius = 4.0, }; static double oversample(const struct pl_filter_ctx *f, double x) { return 0.0; } const struct pl_filter_function pl_filter_function_oversample = { .name = "oversample", .weight = oversample, .params = {0.0}, .tunable = {true}, .opaque = true, }; const struct pl_filter_function * const pl_filter_functions[] = { &pl_filter_function_box, &filter_function_dirichlet, // alias &pl_filter_function_triangle, &pl_filter_function_cosine, &pl_filter_function_hann, &filter_function_hanning, // alias &pl_filter_function_hamming, &pl_filter_function_welch, &pl_filter_function_kaiser, &pl_filter_function_blackman, &pl_filter_function_bohman, &pl_filter_function_gaussian, &pl_filter_function_quadratic, &filter_function_quadric, // alias &pl_filter_function_sinc, &pl_filter_function_jinc, &pl_filter_function_sphinx, &pl_filter_function_cubic, &pl_filter_function_hermite, &pl_filter_function_bicubic, &pl_filter_function_bcspline, &pl_filter_function_catmull_rom, &pl_filter_function_mitchell, &pl_filter_function_robidoux, &pl_filter_function_robidouxsharp, &pl_filter_function_spline16, &pl_filter_function_spline36, &pl_filter_function_spline64, &pl_filter_function_oversample, NULL, }; const int pl_num_filter_functions = PL_ARRAY_SIZE(pl_filter_functions) - 1; const struct pl_filter_function *pl_find_filter_function(const char *name) { if (!name) return NULL; for (int i = 0; i < pl_num_filter_functions; i++) { if (strcmp(name, pl_filter_functions[i]->name) == 0) return pl_filter_functions[i]; } return NULL; } // Built-in filter function configs const struct pl_filter_config pl_filter_spline16 = { .name = "spline16", .description = "Spline (2 taps)", .kernel = &pl_filter_function_spline16, .allowed = PL_FILTER_ALL, }; const struct pl_filter_config pl_filter_spline36 = { .name = "spline36", .description = "Spline (3 taps)", .kernel = &pl_filter_function_spline36, .allowed = PL_FILTER_ALL, }; const struct pl_filter_config pl_filter_spline64 = { .name = "spline64", .description = "Spline (4 taps)", .kernel = &pl_filter_function_spline64, .allowed = PL_FILTER_ALL, }; const struct pl_filter_config pl_filter_nearest = { .name = "nearest", .description = "Nearest neighbor", .kernel = &pl_filter_function_box, .radius = 0.5, .allowed = PL_FILTER_UPSCALING, .recommended = PL_FILTER_UPSCALING, }; const struct pl_filter_config pl_filter_box = { .name = "box", .description = "Box averaging", .kernel = &pl_filter_function_box, .radius = 0.5, .allowed = PL_FILTER_SCALING, .recommended = PL_FILTER_DOWNSCALING, }; const struct pl_filter_config pl_filter_bilinear = { .name = "bilinear", .description = "Bilinear", .kernel = &pl_filter_function_triangle, .allowed = PL_FILTER_ALL, .recommended = PL_FILTER_SCALING, }; const struct pl_filter_config filter_linear = { .name = "linear", .description = "Linear mixing", .kernel = &pl_filter_function_triangle, .allowed = PL_FILTER_FRAME_MIXING, .recommended = PL_FILTER_FRAME_MIXING, }; static const struct pl_filter_config filter_triangle = { .name = "triangle", .kernel = &pl_filter_function_triangle, .allowed = PL_FILTER_SCALING, }; const struct pl_filter_config pl_filter_gaussian = { .name = "gaussian", .description = "Gaussian", .kernel = &pl_filter_function_gaussian, .params = {1.0}, .allowed = PL_FILTER_ALL, .recommended = PL_FILTER_SCALING, }; const struct pl_filter_config pl_filter_sinc = { .name = "sinc", .description = "Sinc (unwindowed)", .kernel = &pl_filter_function_sinc, .radius = 3.0, .allowed = PL_FILTER_ALL, }; const struct pl_filter_config pl_filter_lanczos = { .name = "lanczos", .description = "Lanczos", .kernel = &pl_filter_function_sinc, .window = &pl_filter_function_sinc, .radius = 3.0, .allowed = PL_FILTER_ALL, .recommended = PL_FILTER_SCALING, }; const struct pl_filter_config pl_filter_ginseng = { .name = "ginseng", .description = "Ginseng (Jinc-Sinc)", .kernel = &pl_filter_function_sinc, .window = &pl_filter_function_jinc, .radius = 3.0, .allowed = PL_FILTER_ALL, }; #define JINC_ZERO3 3.2383154841662362076499 #define JINC_ZERO4 4.2410628637960698819573 const struct pl_filter_config pl_filter_ewa_jinc = { .name = "ewa_jinc", .description = "EWA Jinc (unwindowed)", .kernel = &pl_filter_function_jinc, .radius = JINC_ZERO3, .polar = true, .allowed = PL_FILTER_SCALING, }; const struct pl_filter_config pl_filter_ewa_lanczos = { .name = "ewa_lanczos", .description = "Jinc (EWA Lanczos)", .kernel = &pl_filter_function_jinc, .window = &pl_filter_function_jinc, .radius = JINC_ZERO3, .polar = true, .allowed = PL_FILTER_SCALING, .recommended = PL_FILTER_UPSCALING, }; const struct pl_filter_config pl_filter_ewa_lanczossharp = { .name = "ewa_lanczossharp", .description = "Sharpened Jinc", .kernel = &pl_filter_function_jinc, .window = &pl_filter_function_jinc, .radius = JINC_ZERO3, .blur = 0.98125058372237073562493, .polar = true, .allowed = PL_FILTER_SCALING, .recommended = PL_FILTER_UPSCALING, }; const struct pl_filter_config pl_filter_ewa_lanczos4sharpest = { .name = "ewa_lanczos4sharpest", .description = "Sharpened Jinc-AR, 4 taps", .kernel = &pl_filter_function_jinc, .window = &pl_filter_function_jinc, .radius = JINC_ZERO4, .blur = 0.88451209326050047745788, .antiring = 0.8, .polar = true, .allowed = PL_FILTER_SCALING, .recommended = PL_FILTER_UPSCALING, }; const struct pl_filter_config pl_filter_ewa_ginseng = { .name = "ewa_ginseng", .description = "EWA Ginseng", .kernel = &pl_filter_function_jinc, .window = &pl_filter_function_sinc, .radius = JINC_ZERO3, .polar = true, .allowed = PL_FILTER_SCALING, }; const struct pl_filter_config pl_filter_ewa_hann = { .name = "ewa_hann", .description = "EWA Hann", .kernel = &pl_filter_function_jinc, .window = &pl_filter_function_hann, .radius = JINC_ZERO3, .polar = true, .allowed = PL_FILTER_SCALING, }; static const struct pl_filter_config filter_ewa_hanning = { .name = "ewa_hanning", .kernel = &pl_filter_function_jinc, .window = &pl_filter_function_hann, .radius = JINC_ZERO3, .polar = true, .allowed = PL_FILTER_SCALING, }; // Spline family const struct pl_filter_config pl_filter_bicubic = { .name = "bicubic", .description = "Bicubic", .kernel = &pl_filter_function_cubic, .params = {1.0, 0.0}, .allowed = PL_FILTER_SCALING, .recommended = PL_FILTER_SCALING, }; static const struct pl_filter_config filter_cubic = { .name = "cubic", .description = "Cubic", .kernel = &pl_filter_function_cubic, .params = {1.0, 0.0}, .allowed = PL_FILTER_FRAME_MIXING, }; const struct pl_filter_config pl_filter_hermite = { .name = "hermite", .description = "Hermite", .kernel = &pl_filter_function_hermite, .allowed = PL_FILTER_ALL, .recommended = PL_FILTER_DOWNSCALING | PL_FILTER_FRAME_MIXING, }; const struct pl_filter_config pl_filter_catmull_rom = { .name = "catmull_rom", .description = "Catmull-Rom", .kernel = &pl_filter_function_cubic, .params = {0.0, 0.5}, .allowed = PL_FILTER_ALL, .recommended = PL_FILTER_SCALING, }; const struct pl_filter_config pl_filter_mitchell = { .name = "mitchell", .description = "Mitchell-Netravali", .kernel = &pl_filter_function_cubic, .params = {1/3.0, 1/3.0}, .allowed = PL_FILTER_ALL, .recommended = PL_FILTER_DOWNSCALING, }; const struct pl_filter_config pl_filter_mitchell_clamp = { .name = "mitchell_clamp", .description = "Mitchell (clamped)", .kernel = &pl_filter_function_cubic, .params = {1/3.0, 1/3.0}, .clamp = 1.0, .allowed = PL_FILTER_ALL, }; const struct pl_filter_config pl_filter_robidoux = { .name = "robidoux", .description = "Robidoux", .kernel = &pl_filter_function_cubic, .params = {12 / (19 + 9 * M_SQRT2), 113 / (58 + 216 * M_SQRT2)}, .allowed = PL_FILTER_ALL, }; const struct pl_filter_config pl_filter_robidouxsharp = { .name = "robidouxsharp", .description = "RobidouxSharp", .kernel = &pl_filter_function_cubic, .params = {6 / (13 + 7 * M_SQRT2), 7 / (2 + 12 * M_SQRT2)}, .allowed = PL_FILTER_ALL, }; const struct pl_filter_config pl_filter_ewa_robidoux = { .name = "ewa_robidoux", .description = "EWA Robidoux", .kernel = &pl_filter_function_cubic, .params = {12 / (19 + 9 * M_SQRT2), 113 / (58 + 216 * M_SQRT2)}, .polar = true, .allowed = PL_FILTER_SCALING, }; const struct pl_filter_config pl_filter_ewa_robidouxsharp = { .name = "ewa_robidouxsharp", .description = "EWA RobidouxSharp", .kernel = &pl_filter_function_cubic, .params = {6 / (13 + 7 * M_SQRT2), 7 / (2 + 12 * M_SQRT2)}, .polar = true, .allowed = PL_FILTER_SCALING, }; const struct pl_filter_config pl_filter_oversample = { .name = "oversample", .description = "Oversampling", .kernel = &pl_filter_function_oversample, .params = {0.0}, .allowed = PL_FILTER_UPSCALING | PL_FILTER_FRAME_MIXING, .recommended = PL_FILTER_UPSCALING | PL_FILTER_FRAME_MIXING, }; const struct pl_filter_config * const pl_filter_configs[] = { // Sorted roughly in terms of priority / relevance &pl_filter_bilinear, &filter_triangle, // alias &filter_linear, // pseudo-alias (frame mixing only) &pl_filter_nearest, &pl_filter_spline16, &pl_filter_spline36, &pl_filter_spline64, &pl_filter_lanczos, &pl_filter_ewa_lanczos, &pl_filter_ewa_lanczossharp, &pl_filter_ewa_lanczos4sharpest, &pl_filter_bicubic, &filter_cubic, // pseudo-alias (frame mixing only) &pl_filter_hermite, &pl_filter_gaussian, &pl_filter_oversample, &pl_filter_mitchell, &pl_filter_mitchell_clamp, &pl_filter_sinc, &pl_filter_ginseng, &pl_filter_ewa_jinc, &pl_filter_ewa_ginseng, &pl_filter_ewa_hann, &filter_ewa_hanning, // alias &pl_filter_catmull_rom, &pl_filter_robidoux, &pl_filter_robidouxsharp, &pl_filter_ewa_robidoux, &pl_filter_ewa_robidouxsharp, NULL, }; const int pl_num_filter_configs = PL_ARRAY_SIZE(pl_filter_configs) - 1; const struct pl_filter_config * pl_find_filter_config(const char *name, enum pl_filter_usage usage) { if (!name) return NULL; for (int i = 0; i < pl_num_filter_configs; i++) { if ((pl_filter_configs[i]->allowed & usage) != usage) continue; if (strcmp(name, pl_filter_configs[i]->name) == 0) return pl_filter_configs[i]; } return NULL; } // Backwards compatibility with older API const struct pl_filter_function_preset pl_filter_function_presets[] = { {"none", NULL}, {"box", &pl_filter_function_box}, {"dirichlet", &filter_function_dirichlet}, // alias {"triangle", &pl_filter_function_triangle}, {"cosine", &pl_filter_function_cosine}, {"hann", &pl_filter_function_hann}, {"hanning", &filter_function_hanning}, // alias {"hamming", &pl_filter_function_hamming}, {"welch", &pl_filter_function_welch}, {"kaiser", &pl_filter_function_kaiser}, {"blackman", &pl_filter_function_blackman}, {"bohman", &pl_filter_function_bohman}, {"gaussian", &pl_filter_function_gaussian}, {"quadratic", &pl_filter_function_quadratic}, {"quadric", &filter_function_quadric}, // alias {"sinc", &pl_filter_function_sinc}, {"jinc", &pl_filter_function_jinc}, {"sphinx", &pl_filter_function_sphinx}, {"cubic", &pl_filter_function_cubic}, {"hermite", &pl_filter_function_hermite}, {"bicubic", &pl_filter_function_bicubic}, {"bcspline", &pl_filter_function_bcspline}, {"catmull_rom", &pl_filter_function_catmull_rom}, // alias {"mitchell", &pl_filter_function_mitchell}, {"robidoux", &pl_filter_function_robidoux}, {"robidouxsharp", &pl_filter_function_robidouxsharp}, {"spline16", &pl_filter_function_spline16}, {"spline36", &pl_filter_function_spline36}, {"spline64", &pl_filter_function_spline64}, {0}, }; const int pl_num_filter_function_presets = PL_ARRAY_SIZE(pl_filter_function_presets) - 1; const struct pl_filter_function_preset *pl_find_filter_function_preset(const char *name) { if (!name) return NULL; for (int i = 0; pl_filter_function_presets[i].name; i++) { if (strcmp(pl_filter_function_presets[i].name, name) == 0) return &pl_filter_function_presets[i]; } return NULL; } const struct pl_filter_preset *pl_find_filter_preset(const char *name) { if (!name) return NULL; for (int i = 0; pl_filter_presets[i].name; i++) { if (strcmp(pl_filter_presets[i].name, name) == 0) return &pl_filter_presets[i]; } return NULL; } const struct pl_filter_preset pl_filter_presets[] = { {"none", NULL, "Built-in sampling"}, COMMON_FILTER_PRESETS, {0} }; const int pl_num_filter_presets = PL_ARRAY_SIZE(pl_filter_presets) - 1; libplacebo-v7.349.0/src/filters.h000066400000000000000000000066411463457750100166130ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include static inline float pl_filter_radius_bound(const struct pl_filter_config *c) { const float r = c->radius && c->kernel->resizable ? c->radius : c->kernel->radius; return c->blur > 0.0 ? r * c->blur : r; } #define COMMON_FILTER_PRESETS \ /* Highest priority / recommended filters */ \ {"bilinear", &pl_filter_bilinear, "Bilinear"}, \ {"nearest", &pl_filter_nearest, "Nearest neighbour"}, \ {"bicubic", &pl_filter_bicubic, "Bicubic"}, \ {"lanczos", &pl_filter_lanczos, "Lanczos"}, \ {"ewa_lanczos", &pl_filter_ewa_lanczos, "Jinc (EWA Lanczos)"}, \ {"ewa_lanczossharp", &pl_filter_ewa_lanczossharp, "Sharpened Jinc"}, \ {"ewa_lanczos4sharpest",&pl_filter_ewa_lanczos4sharpest, "Sharpened Jinc-AR, 4 taps"},\ {"gaussian", &pl_filter_gaussian, "Gaussian"}, \ {"spline16", &pl_filter_spline16, "Spline (2 taps)"}, \ {"spline36", &pl_filter_spline36, "Spline (3 taps)"}, \ {"spline64", &pl_filter_spline64, "Spline (4 taps)"}, \ {"mitchell", &pl_filter_mitchell, "Mitchell-Netravali"}, \ \ /* Remaining filters */ \ {"sinc", &pl_filter_sinc, "Sinc (unwindowed)"}, \ {"ginseng", &pl_filter_ginseng, "Ginseng (Jinc-Sinc)"}, \ {"ewa_jinc", &pl_filter_ewa_jinc, "EWA Jinc (unwindowed)"}, \ {"ewa_ginseng", &pl_filter_ewa_ginseng, "EWA Ginseng"}, \ {"ewa_hann", &pl_filter_ewa_hann, "EWA Hann"}, \ {"hermite", &pl_filter_hermite, "Hermite"}, \ {"catmull_rom", &pl_filter_catmull_rom, "Catmull-Rom"}, \ {"robidoux", &pl_filter_robidoux, "Robidoux"}, \ {"robidouxsharp", &pl_filter_robidouxsharp, "RobidouxSharp"}, \ {"ewa_robidoux", &pl_filter_ewa_robidoux, "EWA Robidoux"}, \ {"ewa_robidouxsharp", &pl_filter_ewa_robidouxsharp, "EWA RobidouxSharp"}, \ \ /* Aliases */ \ {"triangle", &pl_filter_bilinear}, \ {"ewa_hanning", &pl_filter_ewa_hann} libplacebo-v7.349.0/src/format.c000066400000000000000000000141411463457750100164200ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include "common.h" void pl_str_append_asprintf_c(void *alloc, pl_str *str, const char *fmt, ...) { va_list ap; va_start(ap, fmt); pl_str_append_vasprintf_c(alloc, str, fmt, ap); va_end(ap); } void pl_str_append_vasprintf_c(void *alloc, pl_str *str, const char *fmt, va_list ap) { for (const char *c; (c = strchr(fmt, '%')) != NULL; fmt = c + 1) { // Append the preceding string literal pl_str_append_raw(alloc, str, fmt, c - fmt); c++; // skip '%' char buf[32]; int len; // The format character follows the % sign switch (c[0]) { case '%': pl_str_append_raw(alloc, str, c, 1); continue; case 's': { const char *arg = va_arg(ap, const char *); pl_str_append_raw(alloc, str, arg, strlen(arg)); continue; } case '.': { // only used for %.*s assert(c[1] == '*'); assert(c[2] == 's'); len = va_arg(ap, int); pl_str_append_raw(alloc, str, va_arg(ap, char *), len); c += 2; // skip '*s' continue; } case 'c': buf[0] = (char) va_arg(ap, int); len = 1; break; case 'd': len = pl_str_print_int(buf, sizeof(buf), va_arg(ap, int)); break; case 'h': ; // only used for %hx assert(c[1] == 'x'); len = pl_str_print_hex(buf, sizeof(buf), (unsigned short) va_arg(ap, unsigned int)); c++; break; case 'u': len = pl_str_print_uint(buf, sizeof(buf), va_arg(ap, unsigned int)); break; case 'l': assert(c[1] == 'l'); switch (c[2]) { case 'u': len = pl_str_print_uint64(buf, sizeof(buf), va_arg(ap, unsigned long long)); break; case 'd': len = pl_str_print_int64(buf, sizeof(buf), va_arg(ap, long long)); break; default: pl_unreachable(); } c += 2; break; case 'z': assert(c[1] == 'u'); len = pl_str_print_uint64(buf, sizeof(buf), va_arg(ap, size_t)); c++; break; case 'f': len = pl_str_print_double(buf, sizeof(buf), va_arg(ap, double)); break; default: fprintf(stderr, "Invalid conversion character: '%c'!\n", c[0]); abort(); } pl_str_append_raw(alloc, str, buf, len); } // Append the remaining string literal pl_str_append(alloc, str, pl_str0(fmt)); } size_t pl_str_append_memprintf_c(void *alloc, pl_str *str, const char *fmt, const void *args) { const uint8_t *ptr = args; for (const char *c; (c = strchr(fmt, '%')) != NULL; fmt = c + 1) { pl_str_append_raw(alloc, str, fmt, c - fmt); c++; char buf[32]; int len; #define LOAD(var) \ do { \ memcpy(&(var), ptr, sizeof(var)); \ ptr += sizeof(var); \ } while (0) switch (c[0]) { case '%': pl_str_append_raw(alloc, str, c, 1); continue; case 's': { len = strlen((const char *) ptr); pl_str_append_raw(alloc, str, ptr, len); ptr += len + 1; // also skip \0 continue; } case '.': { assert(c[1] == '*'); assert(c[2] == 's'); LOAD(len); pl_str_append_raw(alloc, str, ptr, len); ptr += len; // no trailing \0 c += 2; continue; } case 'c': LOAD(buf[0]); len = 1; break; case 'd': ; int d; LOAD(d); len = pl_str_print_int(buf, sizeof(buf), d); break; case 'h': ; assert(c[1] == 'x'); unsigned short hx; LOAD(hx); len = pl_str_print_hex(buf, sizeof(buf), hx); c++; break; case 'u': ; unsigned u; LOAD(u); len = pl_str_print_uint(buf, sizeof(buf), u); break; case 'l': assert(c[1] == 'l'); switch (c[2]) { case 'u': ; long long unsigned llu; LOAD(llu); len = pl_str_print_uint64(buf, sizeof(buf), llu); break; case 'd': ; long long int lld; LOAD(lld); len = pl_str_print_int64(buf, sizeof(buf), lld); break; default: pl_unreachable(); } c += 2; break; case 'z': ; assert(c[1] == 'u'); size_t zu; LOAD(zu); len = pl_str_print_uint64(buf, sizeof(buf), zu); c++; break; case 'f': ; double f; LOAD(f); len = pl_str_print_double(buf, sizeof(buf), f); break; default: fprintf(stderr, "Invalid conversion character: '%c'!\n", c[0]); abort(); } pl_str_append_raw(alloc, str, buf, len); } #undef LOAD pl_str_append(alloc, str, pl_str0(fmt)); return (uintptr_t) ptr - (uintptr_t) args; } libplacebo-v7.349.0/src/gamut_mapping.c000066400000000000000000001304511463457750100177630ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include "common.h" #include "colorspace.h" #include "pl_thread.h" #include #define fclampf(x, lo, hi) fminf(fmaxf(x, lo), hi) static void fix_constants(struct pl_gamut_map_constants *c) { c->perceptual_deadzone = fclampf(c->perceptual_deadzone, 0.0f, 1.0f); c->perceptual_strength = fclampf(c->perceptual_strength, 0.0f, 1.0f); c->colorimetric_gamma = fclampf(c->colorimetric_gamma, 0.0f, 10.0f); c->softclip_knee = fclampf(c->softclip_knee, 0.0f, 1.0f); c->softclip_desat = fclampf(c->softclip_desat, 0.0f, 1.0f); } static inline bool constants_equal(const struct pl_gamut_map_constants *a, const struct pl_gamut_map_constants *b) { pl_static_assert(sizeof(*a) % sizeof(float) == 0); return !memcmp(a, b, sizeof(*a)); } bool pl_gamut_map_params_equal(const struct pl_gamut_map_params *a, const struct pl_gamut_map_params *b) { return a->function == b->function && a->min_luma == b->min_luma && a->max_luma == b->max_luma && a->lut_size_I == b->lut_size_I && a->lut_size_C == b->lut_size_C && a->lut_size_h == b->lut_size_h && a->lut_stride == b->lut_stride && constants_equal(&a->constants, &b->constants) && pl_raw_primaries_equal(&a->input_gamut, &b->input_gamut) && pl_raw_primaries_equal(&a->output_gamut, &b->output_gamut); } #define FUN(params) (params->function ? *params->function : pl_gamut_map_clip) static void noop(float *lut, const struct pl_gamut_map_params *params); bool pl_gamut_map_params_noop(const struct pl_gamut_map_params *params) { if (FUN(params).map == &noop) return true; struct pl_raw_primaries src = params->input_gamut, dst = params->output_gamut; if (!pl_primaries_compatible(&dst, &src)) return true; bool need_map = !pl_primaries_superset(&dst, &src); need_map |= !pl_cie_xy_equal(&src.white, &dst.white); if (FUN(params).bidirectional) need_map |= !pl_raw_primaries_equal(&dst, &src); return !need_map; } // For some minimal type safety, and code cleanliness struct RGB { float R, G, B; }; struct IPT { float I, P, T; }; struct ICh { float I, C, h; }; static inline struct ICh ipt2ich(struct IPT c) { return (struct ICh) { .I = c.I, .C = sqrtf(c.P * c.P + c.T * c.T), .h = atan2f(c.T, c.P), }; } static inline struct IPT ich2ipt(struct ICh c) { return (struct IPT) { .I = c.I, .P = c.C * cosf(c.h), .T = c.C * sinf(c.h), }; } enum { PQ_LUT_SIZE = 1024 }; static const float pq_eotf_lut[1024+1] = { 0.0000000e+00f, 4.0422718e-09f, 1.3111372e-08f, 2.6236826e-08f, 4.3151495e-08f, 6.3746885e-08f, 8.7982383e-08f, 1.1585362e-07f, 1.4737819e-07f, 1.8258818e-07f, 2.2152586e-07f, 2.6424098e-07f, 3.1078907e-07f, 3.6123021e-07f, 4.1562821e-07f, 4.7405001e-07f, 5.3656521e-07f, 6.0324583e-07f, 6.7416568e-07f, 7.4940095e-07f, 8.2902897e-07f, 9.1312924e-07f, 1.0017822e-06f, 1.0950702e-06f, 1.1930764e-06f, 1.2958861e-06f, 1.4035847e-06f, 1.5162600e-06f, 1.6340000e-06f, 1.7568948e-06f, 1.8850346e-06f, 2.0185119e-06f, 2.1574192e-06f, 2.3018509e-06f, 2.4519029e-06f, 2.6076704e-06f, 2.7692516e-06f, 2.9367449e-06f, 3.1102509e-06f, 3.2898690e-06f, 3.4757019e-06f, 3.6678526e-06f, 3.8664261e-06f, 4.0715262e-06f, 4.2832601e-06f, 4.5017354e-06f, 4.7270617e-06f, 4.9593473e-06f, 5.1987040e-06f, 5.4452441e-06f, 5.6990819e-06f, 5.9603301e-06f, 6.2291055e-06f, 6.5055251e-06f, 6.7897080e-06f, 7.0817717e-06f, 7.3818379e-06f, 7.6900283e-06f, 8.0064675e-06f, 8.3312774e-06f, 8.6645849e-06f, 9.0065169e-06f, 9.3572031e-06f, 9.7167704e-06f, 1.0085351e-05f, 1.0463077e-05f, 1.0850082e-05f, 1.1246501e-05f, 1.1652473e-05f, 1.2068130e-05f, 1.2493614e-05f, 1.2929066e-05f, 1.3374626e-05f, 1.3830439e-05f, 1.4296648e-05f, 1.4773401e-05f, 1.5260848e-05f, 1.5759132e-05f, 1.6268405e-05f, 1.6788821e-05f, 1.7320534e-05f, 1.7863697e-05f, 1.8418467e-05f, 1.8985004e-05f, 1.9563470e-05f, 2.0154019e-05f, 2.0756818e-05f, 2.1372031e-05f, 2.1999824e-05f, 2.2640365e-05f, 2.3293824e-05f, 2.3960372e-05f, 2.4640186e-05f, 2.5333431e-05f, 2.6040288e-05f, 2.6760935e-05f, 2.7495552e-05f, 2.8244319e-05f, 2.9007421e-05f, 2.9785041e-05f, 3.0577373e-05f, 3.1384594e-05f, 3.2206899e-05f, 3.3044481e-05f, 3.3897533e-05f, 3.4766253e-05f, 3.5650838e-05f, 3.6551487e-05f, 3.7468409e-05f, 3.8401794e-05f, 3.9351855e-05f, 4.0318799e-05f, 4.1302836e-05f, 4.2304177e-05f, 4.3323036e-05f, 4.4359629e-05f, 4.5414181e-05f, 4.6486897e-05f, 4.7578006e-05f, 4.8687732e-05f, 4.9816302e-05f, 5.0963944e-05f, 5.2130889e-05f, 5.3317369e-05f, 5.4523628e-05f, 5.5749886e-05f, 5.6996391e-05f, 5.8263384e-05f, 5.9551111e-05f, 6.0859816e-05f, 6.2189750e-05f, 6.3541162e-05f, 6.4914307e-05f, 6.6309439e-05f, 6.7726819e-05f, 6.9166705e-05f, 7.0629384e-05f, 7.2115077e-05f, 7.3624074e-05f, 7.5156646e-05f, 7.6713065e-05f, 7.8293608e-05f, 7.9898553e-05f, 8.1528181e-05f, 8.3182776e-05f, 8.4862623e-05f, 8.6568012e-05f, 8.8299235e-05f, 9.0056585e-05f, 9.1840360e-05f, 9.3650860e-05f, 9.5488388e-05f, 9.7353277e-05f, 9.9245779e-05f, 1.0116623e-04f, 1.0311496e-04f, 1.0509226e-04f, 1.0709847e-04f, 1.0913391e-04f, 1.1119889e-04f, 1.1329376e-04f, 1.1541885e-04f, 1.1757448e-04f, 1.1976100e-04f, 1.2197875e-04f, 1.2422807e-04f, 1.2650931e-04f, 1.2882282e-04f, 1.3116900e-04f, 1.3354812e-04f, 1.3596059e-04f, 1.3840676e-04f, 1.4088701e-04f, 1.4340170e-04f, 1.4595121e-04f, 1.4853593e-04f, 1.5115622e-04f, 1.5381247e-04f, 1.5650507e-04f, 1.5923442e-04f, 1.6200090e-04f, 1.6480492e-04f, 1.6764688e-04f, 1.7052718e-04f, 1.7344629e-04f, 1.7640451e-04f, 1.7940233e-04f, 1.8244015e-04f, 1.8551840e-04f, 1.8863752e-04f, 1.9179792e-04f, 1.9500006e-04f, 1.9824437e-04f, 2.0153130e-04f, 2.0486129e-04f, 2.0823479e-04f, 2.1165227e-04f, 2.1511419e-04f, 2.1862101e-04f, 2.2217319e-04f, 2.2577128e-04f, 2.2941563e-04f, 2.3310679e-04f, 2.3684523e-04f, 2.4063146e-04f, 2.4446597e-04f, 2.4834925e-04f, 2.5228182e-04f, 2.5626417e-04f, 2.6029683e-04f, 2.6438031e-04f, 2.6851514e-04f, 2.7270184e-04f, 2.7694094e-04f, 2.8123299e-04f, 2.8557852e-04f, 2.8997815e-04f, 2.9443230e-04f, 2.9894159e-04f, 3.0350657e-04f, 3.0812783e-04f, 3.1280593e-04f, 3.1754144e-04f, 3.2233495e-04f, 3.2718705e-04f, 3.3209833e-04f, 3.3706938e-04f, 3.4210082e-04f, 3.4719324e-04f, 3.5234727e-04f, 3.5756351e-04f, 3.6284261e-04f, 3.6818526e-04f, 3.7359195e-04f, 3.7906340e-04f, 3.8460024e-04f, 3.9020315e-04f, 3.9587277e-04f, 4.0160977e-04f, 4.0741483e-04f, 4.1328861e-04f, 4.1923181e-04f, 4.2524511e-04f, 4.3132921e-04f, 4.3748480e-04f, 4.4371260e-04f, 4.5001332e-04f, 4.5638768e-04f, 4.6283650e-04f, 4.6936032e-04f, 4.7595999e-04f, 4.8263624e-04f, 4.8938982e-04f, 4.9622151e-04f, 5.0313205e-04f, 5.1012223e-04f, 5.1719283e-04f, 5.2434463e-04f, 5.3157843e-04f, 5.3889502e-04f, 5.4629521e-04f, 5.5377982e-04f, 5.6134968e-04f, 5.6900560e-04f, 5.7674843e-04f, 5.8457900e-04f, 5.9249818e-04f, 6.0050682e-04f, 6.0860578e-04f, 6.1679595e-04f, 6.2507819e-04f, 6.3345341e-04f, 6.4192275e-04f, 6.5048661e-04f, 6.5914616e-04f, 6.6790231e-04f, 6.7675600e-04f, 6.8570816e-04f, 6.9475975e-04f, 7.0391171e-04f, 7.1316500e-04f, 7.2252060e-04f, 7.3197948e-04f, 7.4154264e-04f, 7.5121107e-04f, 7.6098577e-04f, 7.7086777e-04f, 7.8085807e-04f, 7.9095772e-04f, 8.0116775e-04f, 8.1148922e-04f, 8.2192318e-04f, 8.3247071e-04f, 8.4313287e-04f, 8.5391076e-04f, 8.6480548e-04f, 8.7581812e-04f, 8.8694982e-04f, 8.9820168e-04f, 9.0957485e-04f, 9.2107048e-04f, 9.3268971e-04f, 9.4443372e-04f, 9.5630368e-04f, 9.6830115e-04f, 9.8042658e-04f, 9.9268155e-04f, 1.0050673e-03f, 1.0175850e-03f, 1.0302359e-03f, 1.0430213e-03f, 1.0559425e-03f, 1.0690006e-03f, 1.0821970e-03f, 1.0955331e-03f, 1.1090100e-03f, 1.1226290e-03f, 1.1363917e-03f, 1.1502992e-03f, 1.1643529e-03f, 1.1785542e-03f, 1.1929044e-03f, 1.2074050e-03f, 1.2220573e-03f, 1.2368628e-03f, 1.2518229e-03f, 1.2669390e-03f, 1.2822125e-03f, 1.2976449e-03f, 1.3132377e-03f, 1.3289925e-03f, 1.3449105e-03f, 1.3609935e-03f, 1.3772429e-03f, 1.3936602e-03f, 1.4102470e-03f, 1.4270054e-03f, 1.4439360e-03f, 1.4610407e-03f, 1.4783214e-03f, 1.4957794e-03f, 1.5134166e-03f, 1.5312345e-03f, 1.5492348e-03f, 1.5674192e-03f, 1.5857894e-03f, 1.6043471e-03f, 1.6230939e-03f, 1.6420317e-03f, 1.6611622e-03f, 1.6804871e-03f, 1.7000083e-03f, 1.7197275e-03f, 1.7396465e-03f, 1.7597672e-03f, 1.7800914e-03f, 1.8006210e-03f, 1.8213578e-03f, 1.8423038e-03f, 1.8634608e-03f, 1.8848308e-03f, 1.9064157e-03f, 1.9282175e-03f, 1.9502381e-03f, 1.9724796e-03f, 1.9949439e-03f, 2.0176331e-03f, 2.0405492e-03f, 2.0636950e-03f, 2.0870711e-03f, 2.1106805e-03f, 2.1345250e-03f, 2.1586071e-03f, 2.1829286e-03f, 2.2074919e-03f, 2.2322992e-03f, 2.2573525e-03f, 2.2826542e-03f, 2.3082066e-03f, 2.3340118e-03f, 2.3600721e-03f, 2.3863900e-03f, 2.4129676e-03f, 2.4398074e-03f, 2.4669117e-03f, 2.4942828e-03f, 2.5219233e-03f, 2.5498355e-03f, 2.5780219e-03f, 2.6064849e-03f, 2.6352271e-03f, 2.6642509e-03f, 2.6935589e-03f, 2.7231536e-03f, 2.7530377e-03f, 2.7832137e-03f, 2.8136843e-03f, 2.8444520e-03f, 2.8755196e-03f, 2.9068898e-03f, 2.9385662e-03f, 2.9705496e-03f, 3.0028439e-03f, 3.0354517e-03f, 3.0683758e-03f, 3.1016192e-03f, 3.1351846e-03f, 3.1690750e-03f, 3.2032932e-03f, 3.2378422e-03f, 3.2727250e-03f, 3.3079445e-03f, 3.3435038e-03f, 3.3794058e-03f, 3.4156537e-03f, 3.4522505e-03f, 3.4891993e-03f, 3.5265034e-03f, 3.5641658e-03f, 3.6021897e-03f, 3.6405785e-03f, 3.6793353e-03f, 3.7184634e-03f, 3.7579661e-03f, 3.7978468e-03f, 3.8381088e-03f, 3.8787555e-03f, 3.9197904e-03f, 3.9612169e-03f, 4.0030385e-03f, 4.0452587e-03f, 4.0878810e-03f, 4.1309104e-03f, 4.1743478e-03f, 4.2181981e-03f, 4.2624651e-03f, 4.3071525e-03f, 4.3522639e-03f, 4.3978031e-03f, 4.4437739e-03f, 4.4901803e-03f, 4.5370259e-03f, 4.5843148e-03f, 4.6320508e-03f, 4.6802379e-03f, 4.7288801e-03f, 4.7779815e-03f, 4.8275461e-03f, 4.8775780e-03f, 4.9280813e-03f, 4.9790603e-03f, 5.0305191e-03f, 5.0824620e-03f, 5.1348933e-03f, 5.1878172e-03f, 5.2412382e-03f, 5.2951607e-03f, 5.3495890e-03f, 5.4045276e-03f, 5.4599811e-03f, 5.5159540e-03f, 5.5724510e-03f, 5.6294765e-03f, 5.6870353e-03f, 5.7451339e-03f, 5.8037735e-03f, 5.8629606e-03f, 5.9227001e-03f, 5.9829968e-03f, 6.0438557e-03f, 6.1052818e-03f, 6.1672799e-03f, 6.2298552e-03f, 6.2930128e-03f, 6.3567578e-03f, 6.4210953e-03f, 6.4860306e-03f, 6.5515690e-03f, 6.6177157e-03f, 6.6844762e-03f, 6.7518558e-03f, 6.8198599e-03f, 6.8884942e-03f, 6.9577641e-03f, 7.0276752e-03f, 7.0982332e-03f, 7.1694438e-03f, 7.2413127e-03f, 7.3138457e-03f, 7.3870486e-03f, 7.4609273e-03f, 7.5354878e-03f, 7.6107361e-03f, 7.6866782e-03f, 7.7633203e-03f, 7.8406684e-03f, 7.9187312e-03f, 7.9975101e-03f, 8.0770139e-03f, 8.1572490e-03f, 8.2382216e-03f, 8.3199385e-03f, 8.4024059e-03f, 8.4856307e-03f, 8.5696193e-03f, 8.6543786e-03f, 8.7399153e-03f, 8.8262362e-03f, 8.9133482e-03f, 9.0012582e-03f, 9.0899733e-03f, 9.1795005e-03f, 9.2698470e-03f, 9.3610199e-03f, 9.4530265e-03f, 9.5458741e-03f, 9.6395701e-03f, 9.7341219e-03f, 9.8295370e-03f, 9.9258231e-03f, 1.0022988e-02f, 1.0121039e-02f, 1.0219984e-02f, 1.0319830e-02f, 1.0420587e-02f, 1.0522261e-02f, 1.0624862e-02f, 1.0728396e-02f, 1.0832872e-02f, 1.0938299e-02f, 1.1044684e-02f, 1.1152036e-02f, 1.1260365e-02f, 1.1369677e-02f, 1.1479982e-02f, 1.1591288e-02f, 1.1703605e-02f, 1.1816941e-02f, 1.1931305e-02f, 1.2046706e-02f, 1.2163153e-02f, 1.2280656e-02f, 1.2399223e-02f, 1.2518864e-02f, 1.2639596e-02f, 1.2761413e-02f, 1.2884333e-02f, 1.3008365e-02f, 1.3133519e-02f, 1.3259804e-02f, 1.3387231e-02f, 1.3515809e-02f, 1.3645549e-02f, 1.3776461e-02f, 1.3908555e-02f, 1.4041841e-02f, 1.4176331e-02f, 1.4312034e-02f, 1.4448961e-02f, 1.4587123e-02f, 1.4726530e-02f, 1.4867194e-02f, 1.5009126e-02f, 1.5152336e-02f, 1.5296837e-02f, 1.5442638e-02f, 1.5589753e-02f, 1.5738191e-02f, 1.5887965e-02f, 1.6039087e-02f, 1.6191567e-02f, 1.6345419e-02f, 1.6500655e-02f, 1.6657285e-02f, 1.6815323e-02f, 1.6974781e-02f, 1.7135672e-02f, 1.7298007e-02f, 1.7461800e-02f, 1.7627063e-02f, 1.7793810e-02f, 1.7962053e-02f, 1.8131805e-02f, 1.8303080e-02f, 1.8475891e-02f, 1.8650252e-02f, 1.8826176e-02f, 1.9003676e-02f, 1.9182767e-02f, 1.9363463e-02f, 1.9545777e-02f, 1.9729724e-02f, 1.9915319e-02f, 2.0102575e-02f, 2.0291507e-02f, 2.0482131e-02f, 2.0674460e-02f, 2.0868510e-02f, 2.1064296e-02f, 2.1261833e-02f, 2.1461136e-02f, 2.1662222e-02f, 2.1865105e-02f, 2.2069802e-02f, 2.2276328e-02f, 2.2484700e-02f, 2.2694934e-02f, 2.2907045e-02f, 2.3121064e-02f, 2.3336982e-02f, 2.3554827e-02f, 2.3774618e-02f, 2.3996370e-02f, 2.4220102e-02f, 2.4445831e-02f, 2.4673574e-02f, 2.4903349e-02f, 2.5135174e-02f, 2.5369067e-02f, 2.5605046e-02f, 2.5843129e-02f, 2.6083336e-02f, 2.6325684e-02f, 2.6570192e-02f, 2.6816880e-02f, 2.7065767e-02f, 2.7316872e-02f, 2.7570215e-02f, 2.7825815e-02f, 2.8083692e-02f, 2.8343867e-02f, 2.8606359e-02f, 2.8871189e-02f, 2.9138378e-02f, 2.9407946e-02f, 2.9679914e-02f, 2.9954304e-02f, 3.0231137e-02f, 3.0510434e-02f, 3.0792217e-02f, 3.1076508e-02f, 3.1363330e-02f, 3.1652704e-02f, 3.1944653e-02f, 3.2239199e-02f, 3.2536367e-02f, 3.2836178e-02f, 3.3138657e-02f, 3.3443826e-02f, 3.3751710e-02f, 3.4062333e-02f, 3.4375718e-02f, 3.4691890e-02f, 3.5010874e-02f, 3.5332694e-02f, 3.5657377e-02f, 3.5984946e-02f, 3.6315428e-02f, 3.6648848e-02f, 3.6985233e-02f, 3.7324608e-02f, 3.7667000e-02f, 3.8012436e-02f, 3.8360942e-02f, 3.8712547e-02f, 3.9067276e-02f, 3.9425159e-02f, 3.9786223e-02f, 4.0150496e-02f, 4.0518006e-02f, 4.0888783e-02f, 4.1262855e-02f, 4.1640274e-02f, 4.2021025e-02f, 4.2405159e-02f, 4.2792707e-02f, 4.3183699e-02f, 4.3578166e-02f, 4.3976138e-02f, 4.4377647e-02f, 4.4782724e-02f, 4.5191401e-02f, 4.5603709e-02f, 4.6019681e-02f, 4.6439350e-02f, 4.6862749e-02f, 4.7289910e-02f, 4.7720867e-02f, 4.8155654e-02f, 4.8594305e-02f, 4.9036854e-02f, 4.9483336e-02f, 4.9933787e-02f, 5.0388240e-02f, 5.0846733e-02f, 5.1309301e-02f, 5.1775981e-02f, 5.2246808e-02f, 5.2721821e-02f, 5.3201056e-02f, 5.3684551e-02f, 5.4172344e-02f, 5.4664473e-02f, 5.5160978e-02f, 5.5661897e-02f, 5.6167269e-02f, 5.6677135e-02f, 5.7191535e-02f, 5.7710508e-02f, 5.8234097e-02f, 5.8762342e-02f, 5.9295285e-02f, 5.9832968e-02f, 6.0375433e-02f, 6.0922723e-02f, 6.1474882e-02f, 6.2031952e-02f, 6.2593979e-02f, 6.3161006e-02f, 6.3733078e-02f, 6.4310241e-02f, 6.4892540e-02f, 6.5480021e-02f, 6.6072730e-02f, 6.6670715e-02f, 6.7274023e-02f, 6.7882702e-02f, 6.8496800e-02f, 6.9116365e-02f, 6.9741447e-02f, 7.0372096e-02f, 7.1008361e-02f, 7.1650293e-02f, 7.2297942e-02f, 7.2951361e-02f, 7.3610602e-02f, 7.4275756e-02f, 7.4946797e-02f, 7.5623818e-02f, 7.6306873e-02f, 7.6996016e-02f, 7.7691302e-02f, 7.8392787e-02f, 7.9100526e-02f, 7.9814576e-02f, 8.0534993e-02f, 8.1261837e-02f, 8.1995163e-02f, 8.2735032e-02f, 8.3481501e-02f, 8.4234632e-02f, 8.4994483e-02f, 8.5761116e-02f, 8.6534592e-02f, 8.7314974e-02f, 8.8102323e-02f, 8.8896702e-02f, 8.9698176e-02f, 9.0506809e-02f, 9.1322665e-02f, 9.2145810e-02f, 9.2976310e-02f, 9.3814232e-02f, 9.4659643e-02f, 9.5512612e-02f, 9.6373206e-02f, 9.7241496e-02f, 9.8117550e-02f, 9.9001441e-02f, 9.9893238e-02f, 1.0079301e-01f, 1.0170084e-01f, 1.0261679e-01f, 1.0354094e-01f, 1.0447337e-01f, 1.0541414e-01f, 1.0636334e-01f, 1.0732104e-01f, 1.0828731e-01f, 1.0926225e-01f, 1.1024592e-01f, 1.1123841e-01f, 1.1223979e-01f, 1.1325016e-01f, 1.1426958e-01f, 1.1529814e-01f, 1.1633594e-01f, 1.1738304e-01f, 1.1843954e-01f, 1.1950552e-01f, 1.2058107e-01f, 1.2166627e-01f, 1.2276122e-01f, 1.2386601e-01f, 1.2498072e-01f, 1.2610544e-01f, 1.2724027e-01f, 1.2838531e-01f, 1.2954063e-01f, 1.3070635e-01f, 1.3188262e-01f, 1.3306940e-01f, 1.3426686e-01f, 1.3547509e-01f, 1.3669420e-01f, 1.3792428e-01f, 1.3916544e-01f, 1.4041778e-01f, 1.4168140e-01f, 1.4295640e-01f, 1.4424289e-01f, 1.4554098e-01f, 1.4685078e-01f, 1.4817238e-01f, 1.4950591e-01f, 1.5085147e-01f, 1.5220916e-01f, 1.5357912e-01f, 1.5496144e-01f, 1.5635624e-01f, 1.5776364e-01f, 1.5918375e-01f, 1.6061670e-01f, 1.6206260e-01f, 1.6352156e-01f, 1.6499372e-01f, 1.6647920e-01f, 1.6797811e-01f, 1.6949059e-01f, 1.7101676e-01f, 1.7255674e-01f, 1.7411067e-01f, 1.7567867e-01f, 1.7726087e-01f, 1.7885742e-01f, 1.8046844e-01f, 1.8209406e-01f, 1.8373443e-01f, 1.8538967e-01f, 1.8705994e-01f, 1.8874536e-01f, 1.9044608e-01f, 1.9216225e-01f, 1.9389401e-01f, 1.9564150e-01f, 1.9740486e-01f, 1.9918426e-01f, 2.0097984e-01f, 2.0279175e-01f, 2.0462014e-01f, 2.0646517e-01f, 2.0832699e-01f, 2.1020577e-01f, 2.1210165e-01f, 2.1401481e-01f, 2.1594540e-01f, 2.1789359e-01f, 2.1985954e-01f, 2.2184342e-01f, 2.2384540e-01f, 2.2586565e-01f, 2.2790434e-01f, 2.2996165e-01f, 2.3203774e-01f, 2.3413293e-01f, 2.3624714e-01f, 2.3838068e-01f, 2.4053372e-01f, 2.4270646e-01f, 2.4489908e-01f, 2.4711177e-01f, 2.4934471e-01f, 2.5159811e-01f, 2.5387214e-01f, 2.5616702e-01f, 2.5848293e-01f, 2.6082007e-01f, 2.6317866e-01f, 2.6555888e-01f, 2.6796095e-01f, 2.7038507e-01f, 2.7283145e-01f, 2.7530031e-01f, 2.7779186e-01f, 2.8030631e-01f, 2.8284388e-01f, 2.8540479e-01f, 2.8798927e-01f, 2.9059754e-01f, 2.9322983e-01f, 2.9588635e-01f, 2.9856736e-01f, 3.0127308e-01f, 3.0400374e-01f, 3.0675959e-01f, 3.0954086e-01f, 3.1234780e-01f, 3.1518066e-01f, 3.1803969e-01f, 3.2092512e-01f, 3.2383723e-01f, 3.2677625e-01f, 3.2974246e-01f, 3.3273611e-01f, 3.3575747e-01f, 3.3880680e-01f, 3.4188437e-01f, 3.4499045e-01f, 3.4812533e-01f, 3.5128926e-01f, 3.5448255e-01f, 3.5770546e-01f, 3.6095828e-01f, 3.6424131e-01f, 3.6755483e-01f, 3.7089914e-01f, 3.7427454e-01f, 3.7768132e-01f, 3.8111979e-01f, 3.8459027e-01f, 3.8809304e-01f, 3.9162844e-01f, 3.9519678e-01f, 3.9879837e-01f, 4.0243354e-01f, 4.0610261e-01f, 4.0980592e-01f, 4.1354380e-01f, 4.1731681e-01f, 4.2112483e-01f, 4.2496844e-01f, 4.2884798e-01f, 4.3276381e-01f, 4.3671627e-01f, 4.4070572e-01f, 4.4473253e-01f, 4.4879706e-01f, 4.5289968e-01f, 4.5704076e-01f, 4.6122068e-01f, 4.6543981e-01f, 4.6969854e-01f, 4.7399727e-01f, 4.7833637e-01f, 4.8271625e-01f, 4.8713731e-01f, 4.9159995e-01f, 4.9610458e-01f, 5.0065162e-01f, 5.0524147e-01f, 5.0987457e-01f, 5.1455133e-01f, 5.1927219e-01f, 5.2403759e-01f, 5.2884795e-01f, 5.3370373e-01f, 5.3860537e-01f, 5.4355333e-01f, 5.4854807e-01f, 5.5359004e-01f, 5.5867972e-01f, 5.6381757e-01f, 5.6900408e-01f, 5.7423972e-01f, 5.7952499e-01f, 5.8486037e-01f, 5.9024637e-01f, 5.9568349e-01f, 6.0117223e-01f, 6.0671311e-01f, 6.1230664e-01f, 6.1795336e-01f, 6.2365379e-01f, 6.2940847e-01f, 6.3521793e-01f, 6.4108273e-01f, 6.4700342e-01f, 6.5298056e-01f, 6.5901471e-01f, 6.6510643e-01f, 6.7125632e-01f, 6.7746495e-01f, 6.8373290e-01f, 6.9006078e-01f, 6.9644918e-01f, 7.0289872e-01f, 7.0941001e-01f, 7.1598366e-01f, 7.2262031e-01f, 7.2932059e-01f, 7.3608513e-01f, 7.4291460e-01f, 7.4981006e-01f, 7.5677134e-01f, 7.6379952e-01f, 7.7089527e-01f, 7.7805929e-01f, 7.8529226e-01f, 7.9259489e-01f, 7.9996786e-01f, 8.0741191e-01f, 8.1492774e-01f, 8.2251609e-01f, 8.3017769e-01f, 8.3791329e-01f, 8.4572364e-01f, 8.5360950e-01f, 8.6157163e-01f, 8.6961082e-01f, 8.7772786e-01f, 8.8592352e-01f, 8.9419862e-01f, 9.0255397e-01f, 9.1099038e-01f, 9.1950869e-01f, 9.2810973e-01f, 9.3679435e-01f, 9.4556340e-01f, 9.5441776e-01f, 9.6335829e-01f, 9.7238588e-01f, 9.8150143e-01f, 9.9070583e-01f, 1.0000000e+00f, 1.0f, // extra padding to avoid out of bounds access }; static inline float pq_eotf(float x) { float idxf = fminf(fmaxf(x, 0.0f), 1.0f) * (PQ_LUT_SIZE - 1); int ipart = floorf(idxf); float fpart = idxf - ipart; return PL_MIX(pq_eotf_lut[ipart], pq_eotf_lut[ipart + 1], fpart); } static inline float pq_oetf(float x) { x = powf(fmaxf(x, 0.0f), PQ_M1); x = (PQ_C1 + PQ_C2 * x) / (1.0f + PQ_C3 * x); return powf(x, PQ_M2); } // Helper struct containing pre-computed cached values describing a gamut struct gamut { pl_matrix3x3 lms2rgb; pl_matrix3x3 rgb2lms; float min_luma, max_luma; // pq float min_rgb, max_rgb; // 10k normalized struct ICh *peak_cache; // 1-item cache for computed peaks (per hue) }; struct cache { struct ICh src_cache; struct ICh dst_cache; }; static void get_gamuts(struct gamut *dst, struct gamut *src, struct cache *cache, const struct pl_gamut_map_params *params) { const float epsilon = 1e-6; memset(cache, 0, sizeof(*cache)); struct gamut base = { .min_luma = params->min_luma, .max_luma = params->max_luma, .min_rgb = pq_eotf(params->min_luma) - epsilon, .max_rgb = pq_eotf(params->max_luma) + epsilon, }; if (dst) { *dst = base; dst->lms2rgb = dst->rgb2lms = pl_ipt_rgb2lms(¶ms->output_gamut); dst->peak_cache = &cache->dst_cache; pl_matrix3x3_invert(&dst->lms2rgb); } if (src) { *src = base; src->lms2rgb = src->rgb2lms = pl_ipt_rgb2lms(¶ms->input_gamut); src->peak_cache = &cache->src_cache; pl_matrix3x3_invert(&src->lms2rgb); } } static inline struct IPT rgb2ipt(struct RGB c, struct gamut gamut) { const float L = gamut.rgb2lms.m[0][0] * c.R + gamut.rgb2lms.m[0][1] * c.G + gamut.rgb2lms.m[0][2] * c.B; const float M = gamut.rgb2lms.m[1][0] * c.R + gamut.rgb2lms.m[1][1] * c.G + gamut.rgb2lms.m[1][2] * c.B; const float S = gamut.rgb2lms.m[2][0] * c.R + gamut.rgb2lms.m[2][1] * c.G + gamut.rgb2lms.m[2][2] * c.B; const float Lp = pq_oetf(L); const float Mp = pq_oetf(M); const float Sp = pq_oetf(S); return (struct IPT) { .I = 0.4000f * Lp + 0.4000f * Mp + 0.2000f * Sp, .P = 4.4550f * Lp - 4.8510f * Mp + 0.3960f * Sp, .T = 0.8056f * Lp + 0.3572f * Mp - 1.1628f * Sp, }; } static inline struct RGB ipt2rgb(struct IPT c, struct gamut gamut) { const float Lp = c.I + 0.0975689f * c.P + 0.205226f * c.T; const float Mp = c.I - 0.1138760f * c.P + 0.133217f * c.T; const float Sp = c.I + 0.0326151f * c.P - 0.676887f * c.T; const float L = pq_eotf(Lp); const float M = pq_eotf(Mp); const float S = pq_eotf(Sp); return (struct RGB) { .R = gamut.lms2rgb.m[0][0] * L + gamut.lms2rgb.m[0][1] * M + gamut.lms2rgb.m[0][2] * S, .G = gamut.lms2rgb.m[1][0] * L + gamut.lms2rgb.m[1][1] * M + gamut.lms2rgb.m[1][2] * S, .B = gamut.lms2rgb.m[2][0] * L + gamut.lms2rgb.m[2][1] * M + gamut.lms2rgb.m[2][2] * S, }; } static inline bool ingamut(struct IPT c, struct gamut gamut) { const float Lp = c.I + 0.0975689f * c.P + 0.205226f * c.T; const float Mp = c.I - 0.1138760f * c.P + 0.133217f * c.T; const float Sp = c.I + 0.0326151f * c.P - 0.676887f * c.T; if (Lp < gamut.min_luma || Lp > gamut.max_luma || Mp < gamut.min_luma || Mp > gamut.max_luma || Sp < gamut.min_luma || Sp > gamut.max_luma) { // Early exit for values outside legal LMS range return false; } const float L = pq_eotf(Lp); const float M = pq_eotf(Mp); const float S = pq_eotf(Sp); struct RGB rgb = { .R = gamut.lms2rgb.m[0][0] * L + gamut.lms2rgb.m[0][1] * M + gamut.lms2rgb.m[0][2] * S, .G = gamut.lms2rgb.m[1][0] * L + gamut.lms2rgb.m[1][1] * M + gamut.lms2rgb.m[1][2] * S, .B = gamut.lms2rgb.m[2][0] * L + gamut.lms2rgb.m[2][1] * M + gamut.lms2rgb.m[2][2] * S, }; return rgb.R >= gamut.min_rgb && rgb.R <= gamut.max_rgb && rgb.G >= gamut.min_rgb && rgb.G <= gamut.max_rgb && rgb.B >= gamut.min_rgb && rgb.B <= gamut.max_rgb; } struct generate_args { const struct pl_gamut_map_params *params; float *out; int start; int count; }; static PL_THREAD_VOID generate(void *priv) { const struct generate_args *args = priv; const struct pl_gamut_map_params *params = args->params; float *in = args->out; const int end = args->start + args->count; for (int h = args->start; h < end; h++) { for (int C = 0; C < params->lut_size_C; C++) { for (int I = 0; I < params->lut_size_I; I++) { float Ix = (float) I / (params->lut_size_I - 1); float Cx = (float) C / (params->lut_size_C - 1); float hx = (float) h / (params->lut_size_h - 1); struct IPT ipt = ich2ipt((struct ICh) { .I = PL_MIX(params->min_luma, params->max_luma, Ix), .C = PL_MIX(0.0f, 0.5f, Cx), .h = PL_MIX(-M_PI, M_PI, hx), }); in[0] = ipt.I; in[1] = ipt.P; in[2] = ipt.T; in += params->lut_stride; } } } struct pl_gamut_map_params fixed = *params; fix_constants(&fixed.constants); fixed.lut_size_h = args->count; FUN(params).map(args->out, &fixed); PL_THREAD_RETURN(); } void pl_gamut_map_generate(float *out, const struct pl_gamut_map_params *params) { enum { MAX_WORKERS = 32 }; struct generate_args args[MAX_WORKERS]; const int num_per_worker = PL_DIV_UP(params->lut_size_h, MAX_WORKERS); const int num_workers = PL_DIV_UP(params->lut_size_h, num_per_worker); for (int i = 0; i < num_workers; i++) { const int start = i * num_per_worker; const int count = PL_MIN(num_per_worker, params->lut_size_h - start); args[i] = (struct generate_args) { .params = params, .out = out, .start = start, .count = count, }; out += count * params->lut_size_C * params->lut_size_I * params->lut_stride; } pl_thread workers[MAX_WORKERS] = {0}; for (int i = 0; i < num_workers; i++) { if (pl_thread_create(&workers[i], generate, &args[i]) != 0) generate(&args[i]); // fallback } for (int i = 0; i < num_workers; i++) { if (!workers[i]) continue; if (pl_thread_join(workers[i]) != 0) generate(&args[i]); // fallback } } void pl_gamut_map_sample(float x[3], const struct pl_gamut_map_params *params) { struct pl_gamut_map_params fixed = *params; fix_constants(&fixed.constants); fixed.lut_size_I = fixed.lut_size_C = fixed.lut_size_h = 1; fixed.lut_stride = 3; FUN(params).map(x, &fixed); } #define LUT_SIZE(p) (p->lut_size_I * p->lut_size_C * p->lut_size_h * p->lut_stride) #define FOREACH_LUT(lut, C) \ for (struct IPT *_i = (struct IPT *) lut, \ *_end = (struct IPT *) (lut + LUT_SIZE(params)), \ C; \ _i < _end && ( C = *_i, 1 ); \ *_i = C, _i = (struct IPT *) ((float *) _i + params->lut_stride)) // Something like PL_MIX(base, c, x) but follows an exponential curve, note // that this can be used to extend 'c' outwards for x > 1 static inline struct ICh mix_exp(struct ICh c, float x, float gamma, float base) { return (struct ICh) { .I = base + (c.I - base) * powf(x, gamma), .C = c.C * x, .h = c.h, }; } // Drop gamma for colors approaching black and achromatic to avoid numerical // instabilities, and excessive brightness boosting of grain, while also // strongly boosting gamma for values exceeding the target peak static inline float scale_gamma(float gamma, struct ICh ich, struct ICh peak, struct gamut gamut) { const float Imin = gamut.min_luma; const float Irel = fmaxf((ich.I - Imin) / (peak.I - Imin), 0.0f); return gamma * powf(Irel, 3) * fminf(ich.C / peak.C, 1.0f); } static const float maxDelta = 5e-5f; // Find gamut intersection using specified bounds static inline struct ICh desat_bounded(float I, float h, float Cmin, float Cmax, struct gamut gamut) { if (I <= gamut.min_luma) return (struct ICh) { .I = gamut.min_luma, .C = 0, .h = h }; if (I >= gamut.max_luma) return (struct ICh) { .I = gamut.max_luma, .C = 0, .h = h }; const float maxDI = I * maxDelta; struct ICh res = { .I = I, .C = (Cmin + Cmax) / 2, .h = h }; do { if (ingamut(ich2ipt(res), gamut)) { Cmin = res.C; } else { Cmax = res.C; } res.C = (Cmin + Cmax) / 2; } while (Cmax - Cmin > maxDI); return res; } // Finds maximally saturated in-gamut color (for given hue) static inline struct ICh saturate(float hue, struct gamut gamut) { if (gamut.peak_cache->I && fabsf(gamut.peak_cache->h - hue) < 1e-3) return *gamut.peak_cache; static const float invphi = 0.6180339887498948f; static const float invphi2 = 0.38196601125010515f; struct ICh lo = { .I = gamut.min_luma, .h = hue }; struct ICh hi = { .I = gamut.max_luma, .h = hue }; float de = hi.I - lo.I; struct ICh a = { .I = lo.I + invphi2 * de }; struct ICh b = { .I = lo.I + invphi * de }; a = desat_bounded(a.I, hue, 0.0f, 0.5f, gamut); b = desat_bounded(b.I, hue, 0.0f, 0.5f, gamut); while (de > maxDelta) { de *= invphi; if (a.C > b.C) { hi = b; b = a; a.I = lo.I + invphi2 * de; a = desat_bounded(a.I, hue, lo.C - maxDelta, 0.5f, gamut); } else { lo = a; a = b; b.I = lo.I + invphi * de; b = desat_bounded(b.I, hue, hi.C - maxDelta, 0.5f, gamut); } } struct ICh peak = a.C > b.C ? a : b; *gamut.peak_cache = peak; return peak; } // Clip a color along the exponential curve given by `gamma` static inline struct IPT clip_gamma(struct IPT ipt, float gamma, struct gamut gamut) { if (ipt.I <= gamut.min_luma) return (struct IPT) { .I = gamut.min_luma }; if (ingamut(ipt, gamut)) return ipt; struct ICh ich = ipt2ich(ipt); if (!gamma) return ich2ipt(desat_bounded(ich.I, ich.h, 0.0f, ich.C, gamut)); const float maxDI = fmaxf(ich.I * maxDelta, 1e-7f); struct ICh peak = saturate(ich.h, gamut); gamma = scale_gamma(gamma, ich, peak, gamut); float lo = 0.0f, hi = 1.0f, x = 0.5f; do { struct ICh test = mix_exp(ich, x, gamma, peak.I); if (ingamut(ich2ipt(test), gamut)) { lo = x; } else { hi = x; } x = (lo + hi) / 2.0f; } while (hi - lo > maxDI); return ich2ipt(mix_exp(ich, x, gamma, peak.I)); } static float softclip(float value, float source, float target, const struct pl_gamut_map_constants *c) { if (!target) return 0.0f; const float peak = source / target; const float x = fminf(value / target, peak); const float j = c->softclip_knee; if (x <= j || peak <= 1.0) return value; // Apply simple mobius function const float a = -j*j * (peak - 1.0f) / (j*j - 2.0f * j + peak); const float b = (j*j - 2.0f * j * peak + peak) / fmaxf(1e-6f, peak - 1.0f); const float scale = (b*b + 2.0f * b*j + j*j) / (b - a); return scale * (x + a) / (x + b) * target; } static int cmp_float(const void *a, const void *b) { float fa = *(const float*) a; float fb = *(const float*) b; return PL_CMP(fa, fb); } static float wrap(float h) { if (h > M_PI) { return h - 2 * M_PI; } else if (h < -M_PI) { return h + 2 * M_PI; } else { return h; } } enum { S = 12, // number of hue shift vertices N = S + 2, // +2 for the endpoints }; // Hue-shift helper struct struct hueshift { float dh[N]; float dddh[N]; float K[N]; float prev_hue; float prev_shift; struct { float hue, delta; } hueshift[N]; }; static void hueshift_prepare(struct hueshift *s, struct gamut src, struct gamut dst) { const float O = pq_eotf(src.min_luma), X = pq_eotf(src.max_luma); const float M = (O + X) / 2.0f; const struct RGB refpoints[S] = { {X, O, O}, {O, X, O}, {O, O, X}, {O, X, X}, {X, O, X}, {X, X, O}, {O, X, M}, {X, O, M}, {X, M, O}, {O, M, X}, {M, O, X}, {M, X, O}, }; memset(s, 0, sizeof(*s)); for (int i = 0; i < S; i++) { struct ICh ich_src = ipt2ich(rgb2ipt(refpoints[i], src)); struct ICh ich_dst = ipt2ich(rgb2ipt(refpoints[i], dst)); const float delta = wrap(ich_dst.h - ich_src.h); s->hueshift[i+1].hue = ich_src.h; s->hueshift[i+1].delta = delta; } // Sort and wrap endpoints qsort(s->hueshift + 1, S, sizeof(*s->hueshift), cmp_float); s->hueshift[0] = s->hueshift[S]; s->hueshift[S+1] = s->hueshift[1]; s->hueshift[0].hue -= 2 * M_PI; s->hueshift[S+1].hue += 2 * M_PI; // Construction of cubic spline coefficients float tmp[N][N] = {0}; for (int i = N - 1; i > 0; i--) { s->dh[i-1] = s->hueshift[i].hue - s->hueshift[i-1].hue; s->dddh[i] = (s->hueshift[i].delta - s->hueshift[i-1].delta) / s->dh[i-1]; } for (int i = 1; i < N - 1; i++) { tmp[i][i] = 2 * (s->dh[i-1] + s->dh[i]); if (i != 1) tmp[i][i-1] = tmp[i-1][i] = s->dh[i-1]; tmp[i][N-1] = 6 * (s->dddh[i+1] - s->dddh[i]); } for (int i = 1; i < N - 2; i++) { const float q = (tmp[i+1][i] / tmp[i][i]); for (int j = 1; j <= N - 1; j++) tmp[i+1][j] -= q * tmp[i][j]; } for (int i = N - 2; i > 0; i--) { float sum = 0.0f; for (int j = i; j <= N - 2; j++) sum += tmp[i][j] * s->K[j]; s->K[i] = (tmp[i][N-1] - sum) / tmp[i][i]; } s->prev_hue = -10.0f; } static struct ICh hueshift_apply(struct hueshift *s, struct ICh ich) { if (fabsf(ich.h - s->prev_hue) < 1e-6f) goto done; // Determine perceptual hue shift delta by interpolation of refpoints for (int i = 0; i < N - 1; i++) { if (s->hueshift[i+1].hue > ich.h) { pl_assert(s->hueshift[i].hue <= ich.h); float a = (s->K[i+1] - s->K[i]) / (6 * s->dh[i]); float b = s->K[i] / 2; float c = s->dddh[i+1] - (2 * s->dh[i] * s->K[i] + s->K[i+1] * s->dh[i]) / 6; float d = s->hueshift[i].delta; float x = ich.h - s->hueshift[i].hue; float delta = ((a * x + b) * x + c) * x + d; s->prev_shift = ich.h + delta; s->prev_hue = ich.h; break; } } done: return (struct ICh) { .I = ich.I, .C = ich.C, .h = s->prev_shift, }; } static void perceptual(float *lut, const struct pl_gamut_map_params *params) { const struct pl_gamut_map_constants *c = ¶ms->constants; struct cache cache; struct gamut dst, src; get_gamuts(&dst, &src, &cache, params); FOREACH_LUT(lut, ipt) { struct ICh ich = ipt2ich(ipt); struct ICh src_peak = saturate(ich.h, src); struct ICh dst_peak = saturate(ich.h, dst); struct IPT mapped = rgb2ipt(ipt2rgb(ipt, src), dst); // Protect in gamut region const float maxC = fmaxf(src_peak.C, dst_peak.C); float k = pl_smoothstep(c->perceptual_deadzone, 1.0f, ich.C / maxC); k *= c->perceptual_strength; ipt.I = PL_MIX(ipt.I, mapped.I, k); ipt.P = PL_MIX(ipt.P, mapped.P, k); ipt.T = PL_MIX(ipt.T, mapped.T, k); struct RGB rgb = ipt2rgb(ipt, dst); const float maxRGB = fmaxf(rgb.R, fmaxf(rgb.G, rgb.B)); rgb.R = fmaxf(softclip(rgb.R, maxRGB, dst.max_rgb, c), dst.min_rgb); rgb.G = fmaxf(softclip(rgb.G, maxRGB, dst.max_rgb, c), dst.min_rgb); rgb.B = fmaxf(softclip(rgb.B, maxRGB, dst.max_rgb, c), dst.min_rgb); ipt = rgb2ipt(rgb, dst); } } const struct pl_gamut_map_function pl_gamut_map_perceptual = { .name = "perceptual", .description = "Perceptual mapping", .bidirectional = true, .map = perceptual, }; static void softclip_map(float *lut, const struct pl_gamut_map_params *params) { const struct pl_gamut_map_constants *c = ¶ms->constants; // Separate cache after hueshift, because this invalidates previous cache struct cache cache_pre, cache_post; struct gamut dst_pre, src_pre, src_post, dst_post; struct hueshift hueshift; get_gamuts(&dst_pre, &src_pre, &cache_pre, params); get_gamuts(&dst_post, &src_post, &cache_post, params); hueshift_prepare(&hueshift, src_pre, dst_pre); FOREACH_LUT(lut, ipt) { struct gamut src = src_pre; struct gamut dst = dst_pre; if (ipt.I <= dst.min_luma) { ipt.P = ipt.T = 0.0f; continue; } struct ICh ich = ipt2ich(ipt); if (ich.C <= 1e-2f) continue; // Fast path for achromatic colors float margin = 1.0f; struct ICh shifted = hueshift_apply(&hueshift, ich); if (fabsf(shifted.h - ich.h) >= 1e-3f) { struct ICh src_border = desat_bounded(ich.I, ich.h, 0.0f, 0.5f, src); struct ICh dst_border = desat_bounded(ich.I, ich.h, 0.0f, 0.5f, dst); const float k = pl_smoothstep(dst_border.C * c->softclip_knee, src_border.C, ich.C); ich.h = PL_MIX(ich.h, shifted.h, k); src = src_post; dst = dst_post; // Expand/contract chromaticity margin to correspond to the altered // size of the hue leaf after applying the hue delta struct ICh shift_border = desat_bounded(ich.I, ich.h, 0.0f, 0.5f, src); margin *= fmaxf(1.0f, src_border.C / shift_border.C); } // Determine intersections with source and target gamuts, and // apply softclip to the chromaticity struct ICh source = saturate(ich.h, src); struct ICh target = saturate(ich.h, dst); struct ICh border = desat_bounded(ich.I, ich.h, 0.0f, target.C, dst); const float chromaticity = PL_MIX(target.C, border.C, c->softclip_desat); ich.C = softclip(ich.C, margin * source.C, chromaticity, c); // Soft-clip the resulting RGB color. This will generally distort // hues slightly, but hopefully in an aesthetically pleasing way. struct ICh saturated = { ich.I, chromaticity, ich.h }; struct RGB peak = ipt2rgb(ich2ipt(saturated), dst); struct RGB rgb = ipt2rgb(ich2ipt(ich), dst); rgb.R = fmaxf(softclip(rgb.R, peak.R, dst.max_rgb, c), dst.min_rgb); rgb.G = fmaxf(softclip(rgb.G, peak.G, dst.max_rgb, c), dst.min_rgb); rgb.B = fmaxf(softclip(rgb.B, peak.B, dst.max_rgb, c), dst.min_rgb); ipt = rgb2ipt(rgb, dst); } } const struct pl_gamut_map_function pl_gamut_map_softclip = { .name = "softclip", .description = "Soft clipping", .map = softclip_map, }; static void relative(float *lut, const struct pl_gamut_map_params *params) { const struct pl_gamut_map_constants *c = ¶ms->constants; struct cache cache; struct gamut dst; get_gamuts(&dst, NULL, &cache, params); FOREACH_LUT(lut, ipt) ipt = clip_gamma(ipt, c->colorimetric_gamma, dst); } const struct pl_gamut_map_function pl_gamut_map_relative = { .name = "relative", .description = "Colorimetric clip", .map = relative, }; static void desaturate(float *lut, const struct pl_gamut_map_params *params) { struct cache cache; struct gamut dst; get_gamuts(&dst, NULL, &cache, params); FOREACH_LUT(lut, ipt) ipt = clip_gamma(ipt, 0.0f, dst); } const struct pl_gamut_map_function pl_gamut_map_desaturate = { .name = "desaturate", .description = "Desaturating clip", .map = desaturate, }; static void saturation(float *lut, const struct pl_gamut_map_params *params) { struct cache cache; struct gamut dst, src; get_gamuts(&dst, &src, &cache, params); FOREACH_LUT(lut, ipt) ipt = rgb2ipt(ipt2rgb(ipt, src), dst); } const struct pl_gamut_map_function pl_gamut_map_saturation = { .name = "saturation", .description = "Saturation mapping", .bidirectional = true, .map = saturation, }; static void absolute(float *lut, const struct pl_gamut_map_params *params) { const struct pl_gamut_map_constants *c = ¶ms->constants; struct cache cache; struct gamut dst; get_gamuts(&dst, NULL, &cache, params); pl_matrix3x3 m = pl_get_adaptation_matrix(params->output_gamut.white, params->input_gamut.white); FOREACH_LUT(lut, ipt) { struct RGB rgb = ipt2rgb(ipt, dst); pl_matrix3x3_apply(&m, (float *) &rgb); ipt = rgb2ipt(rgb, dst); ipt = clip_gamma(ipt, c->colorimetric_gamma, dst); } } const struct pl_gamut_map_function pl_gamut_map_absolute = { .name = "absolute", .description = "Absolute colorimetric clip", .map = absolute, }; static void highlight(float *lut, const struct pl_gamut_map_params *params) { struct cache cache; struct gamut dst; get_gamuts(&dst, NULL, &cache, params); FOREACH_LUT(lut, ipt) { if (!ingamut(ipt, dst)) { ipt.I = fminf(ipt.I + 0.1f, 1.0f); ipt.P = fclampf(-1.2f * ipt.P, -0.5f, 0.5f); ipt.T = fclampf(-1.2f * ipt.T, -0.5f, 0.5f); } } } const struct pl_gamut_map_function pl_gamut_map_highlight = { .name = "highlight", .description = "Highlight out-of-gamut pixels", .map = highlight, }; static void linear(float *lut, const struct pl_gamut_map_params *params) { struct cache cache; struct gamut dst, src; get_gamuts(&dst, &src, &cache, params); float gain = 1.0f; for (float hue = -M_PI; hue < M_PI; hue += 0.1f) gain = fminf(gain, saturate(hue, dst).C / saturate(hue, src).C); FOREACH_LUT(lut, ipt) { struct ICh ich = ipt2ich(ipt); ich.C *= gain; ipt = ich2ipt(ich); } } const struct pl_gamut_map_function pl_gamut_map_linear = { .name = "linear", .description = "Linear desaturate", .map = linear, }; static void darken(float *lut, const struct pl_gamut_map_params *params) { const struct pl_gamut_map_constants *c = ¶ms->constants; struct cache cache; struct gamut dst, src; get_gamuts(&dst, &src, &cache, params); static const struct RGB points[6] = { {1, 0, 0}, {0, 1, 0}, {0, 0, 1}, {0, 1, 1}, {1, 0, 1}, {1, 1, 0}, }; float gain = 1.0f; for (int i = 0; i < PL_ARRAY_SIZE(points); i++) { const struct RGB p = ipt2rgb(rgb2ipt(points[i], src), dst); const float maxRGB = PL_MAX3(p.R, p.G, p.B); gain = fminf(gain, 1.0 / maxRGB); } FOREACH_LUT(lut, ipt) { struct RGB rgb = ipt2rgb(ipt, dst); rgb.R *= gain; rgb.G *= gain; rgb.B *= gain; ipt = rgb2ipt(rgb, dst); ipt = clip_gamma(ipt, c->colorimetric_gamma, dst); } } const struct pl_gamut_map_function pl_gamut_map_darken = { .name = "darken", .description = "Darken and clip", .map = darken, }; static void noop(float *lut, const struct pl_gamut_map_params *params) { return; } const struct pl_gamut_map_function pl_gamut_map_clip = { .name = "clip", .description = "No gamut mapping (hard clip)", .map = noop, }; const struct pl_gamut_map_function * const pl_gamut_map_functions[] = { &pl_gamut_map_clip, &pl_gamut_map_perceptual, &pl_gamut_map_softclip, &pl_gamut_map_relative, &pl_gamut_map_saturation, &pl_gamut_map_absolute, &pl_gamut_map_desaturate, &pl_gamut_map_darken, &pl_gamut_map_highlight, &pl_gamut_map_linear, NULL }; const int pl_num_gamut_map_functions = PL_ARRAY_SIZE(pl_gamut_map_functions) - 1; const struct pl_gamut_map_function *pl_find_gamut_map_function(const char *name) { for (int i = 0; i < pl_num_gamut_map_functions; i++) { if (strcmp(name, pl_gamut_map_functions[i]->name) == 0) return pl_gamut_map_functions[i]; } return NULL; } libplacebo-v7.349.0/src/glsl/000077500000000000000000000000001463457750100157245ustar00rootroot00000000000000libplacebo-v7.349.0/src/glsl/glslang.cc000066400000000000000000000073571463457750100176760ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "config_internal.h" #include extern "C" { #include "pl_alloc.h" #include "pl_thread.h" } #include #include #include #include "glslang.h" #if (GLSLANG_VERSION_MAJOR * 1000 + GLSLANG_VERSION_MINOR) >= 11013 #include #define DefaultTBuiltInResource *GetDefaultResources() #endif using namespace glslang; static pl_static_mutex pl_glslang_mutex = PL_STATIC_MUTEX_INITIALIZER; static int pl_glslang_refcount; bool pl_glslang_init(void) { bool ret = true; pl_static_mutex_lock(&pl_glslang_mutex); if (pl_glslang_refcount++ == 0) ret = InitializeProcess(); pl_static_mutex_unlock(&pl_glslang_mutex); return ret; } void pl_glslang_uninit(void) { pl_static_mutex_lock(&pl_glslang_mutex); if (--pl_glslang_refcount == 0) FinalizeProcess(); pl_static_mutex_unlock(&pl_glslang_mutex); } struct pl_glslang_res *pl_glslang_compile(struct pl_glsl_version glsl_ver, struct pl_spirv_version spirv_ver, enum glsl_shader_stage stage, const char *text) { assert(pl_glslang_refcount); struct pl_glslang_res *res = pl_zalloc_ptr(NULL, res); EShLanguage lang; switch (stage) { case GLSL_SHADER_VERTEX: lang = EShLangVertex; break; case GLSL_SHADER_FRAGMENT: lang = EShLangFragment; break; case GLSL_SHADER_COMPUTE: lang = EShLangCompute; break; default: abort(); } TShader *shader = new TShader(lang); shader->setEnvClient(EShClientVulkan, (EShTargetClientVersion) spirv_ver.env_version); shader->setEnvTarget(EShTargetSpv, (EShTargetLanguageVersion) spirv_ver.spv_version); shader->setStrings(&text, 1); TBuiltInResource limits = DefaultTBuiltInResource; limits.maxComputeWorkGroupSizeX = glsl_ver.max_group_size[0]; limits.maxComputeWorkGroupSizeY = glsl_ver.max_group_size[1]; limits.maxComputeWorkGroupSizeZ = glsl_ver.max_group_size[2]; limits.minProgramTexelOffset = glsl_ver.min_gather_offset; limits.maxProgramTexelOffset = glsl_ver.max_gather_offset; if (!shader->parse(&limits, 0, true, EShMsgDefault)) { res->error_msg = pl_str0dup0(res, shader->getInfoLog()); delete shader; return res; } TProgram *prog = new TProgram(); prog->addShader(shader); if (!prog->link(EShMsgDefault)) { res->error_msg = pl_str0dup0(res, prog->getInfoLog()); delete shader; delete prog; return res; } SpvOptions options; options.disableOptimizer = false; options.stripDebugInfo = true; options.optimizeSize = true; options.validate = true; std::vector spirv; GlslangToSpv(*prog->getIntermediate(lang), spirv, &options); res->success = true; res->size = spirv.size() * sizeof(unsigned int); res->data = pl_memdup(res, spirv.data(), res->size), delete shader; delete prog; return res; } libplacebo-v7.349.0/src/glsl/glslang.h000066400000000000000000000032231463457750100175240ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include #include typedef struct TLimits TLimits; typedef struct TBuiltInResource TBuiltInResource; #include #ifdef __cplusplus extern "C" { #endif #include "utils.h" bool pl_glslang_init(void); void pl_glslang_uninit(void); struct pl_glslang_res { // Compilation status bool success; const char *error_msg; // Compiled shader memory, or NULL void *data; size_t size; }; // Compile GLSL into a SPIRV stream, if possible. The resulting // pl_glslang_res can simply be freed with pl_free() when done. struct pl_glslang_res *pl_glslang_compile(struct pl_glsl_version glsl_ver, struct pl_spirv_version spirv_ver, enum glsl_shader_stage stage, const char *shader); extern const TBuiltInResource DefaultTBuiltInResource; #ifdef __cplusplus } #endif libplacebo-v7.349.0/src/glsl/glslang_resources.c000066400000000000000000000114461463457750100216170ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "glslang.h" // Taken from glslang's examples, which apparently generally bases the choices // on OpenGL specification limits // // Note: This lives in a separate file so we can compile this struct using C99 // designated initializers instead of using C++ struct initializers, because // the latter will break on every upstream struct extension. const TBuiltInResource DefaultTBuiltInResource = { .maxLights = 32, .maxClipPlanes = 6, .maxTextureUnits = 32, .maxTextureCoords = 32, .maxVertexAttribs = 64, .maxVertexUniformComponents = 4096, .maxVaryingFloats = 64, .maxVertexTextureImageUnits = 32, .maxCombinedTextureImageUnits = 80, .maxTextureImageUnits = 32, .maxFragmentUniformComponents = 4096, .maxDrawBuffers = 32, .maxVertexUniformVectors = 128, .maxVaryingVectors = 8, .maxFragmentUniformVectors = 16, .maxVertexOutputVectors = 16, .maxFragmentInputVectors = 15, .minProgramTexelOffset = -8, .maxProgramTexelOffset = 7, .maxClipDistances = 8, .maxComputeWorkGroupCountX = 65535, .maxComputeWorkGroupCountY = 65535, .maxComputeWorkGroupCountZ = 65535, .maxComputeWorkGroupSizeX = 1024, .maxComputeWorkGroupSizeY = 1024, .maxComputeWorkGroupSizeZ = 64, .maxComputeUniformComponents = 1024, .maxComputeTextureImageUnits = 16, .maxComputeImageUniforms = 8, .maxComputeAtomicCounters = 8, .maxComputeAtomicCounterBuffers = 1, .maxVaryingComponents = 60, .maxVertexOutputComponents = 64, .maxGeometryInputComponents = 64, .maxGeometryOutputComponents = 128, .maxFragmentInputComponents = 128, .maxImageUnits = 8, .maxCombinedImageUnitsAndFragmentOutputs = 8, .maxCombinedShaderOutputResources = 8, .maxImageSamples = 0, .maxVertexImageUniforms = 0, .maxTessControlImageUniforms = 0, .maxTessEvaluationImageUniforms = 0, .maxGeometryImageUniforms = 0, .maxFragmentImageUniforms = 8, .maxCombinedImageUniforms = 8, .maxGeometryTextureImageUnits = 16, .maxGeometryOutputVertices = 256, .maxGeometryTotalOutputComponents = 1024, .maxGeometryUniformComponents = 1024, .maxGeometryVaryingComponents = 64, .maxTessControlInputComponents = 128, .maxTessControlOutputComponents = 128, .maxTessControlTextureImageUnits = 16, .maxTessControlUniformComponents = 1024, .maxTessControlTotalOutputComponents = 4096, .maxTessEvaluationInputComponents = 128, .maxTessEvaluationOutputComponents = 128, .maxTessEvaluationTextureImageUnits = 16, .maxTessEvaluationUniformComponents = 1024, .maxTessPatchComponents = 120, .maxPatchVertices = 32, .maxTessGenLevel = 64, .maxViewports = 16, .maxVertexAtomicCounters = 0, .maxTessControlAtomicCounters = 0, .maxTessEvaluationAtomicCounters = 0, .maxGeometryAtomicCounters = 0, .maxFragmentAtomicCounters = 8, .maxCombinedAtomicCounters = 8, .maxAtomicCounterBindings = 1, .maxVertexAtomicCounterBuffers = 0, .maxTessControlAtomicCounterBuffers = 0, .maxTessEvaluationAtomicCounterBuffers = 0, .maxGeometryAtomicCounterBuffers = 0, .maxFragmentAtomicCounterBuffers = 1, .maxCombinedAtomicCounterBuffers = 1, .maxAtomicCounterBufferSize = 16384, .maxTransformFeedbackBuffers = 4, .maxTransformFeedbackInterleavedComponents = 64, .maxCullDistances = 8, .maxCombinedClipAndCullDistances = 8, .maxSamples = 4, .maxMeshOutputVerticesNV = 256, .maxMeshOutputPrimitivesNV = 512, .maxMeshWorkGroupSizeX_NV = 32, .maxMeshWorkGroupSizeY_NV = 1, .maxMeshWorkGroupSizeZ_NV = 1, .maxTaskWorkGroupSizeX_NV = 32, .maxTaskWorkGroupSizeY_NV = 1, .maxTaskWorkGroupSizeZ_NV = 1, .maxMeshViewCountNV = 4, .maxDualSourceDrawBuffersEXT = 1, .limits = { .nonInductiveForLoops = 1, .whileLoops = 1, .doWhileLoops = 1, .generalUniformIndexing = 1, .generalAttributeMatrixVectorIndexing = 1, .generalVaryingIndexing = 1, .generalSamplerIndexing = 1, .generalVariableIndexing = 1, .generalConstantMatrixVectorIndexing = 1, }, }; libplacebo-v7.349.0/src/glsl/meson.build000066400000000000000000000042451463457750100200730ustar00rootroot00000000000000# shaderc shaderc = dependency('shaderc', version: '>=2019.1', required: get_option('shaderc')) components.set('shaderc', shaderc.found()) if shaderc.found() build_deps += shaderc sources += 'glsl/spirv_shaderc.c' endif # glslang glslang = disabler() glslang_req = get_option('glslang') if glslang_req.auto() and shaderc.found() # we only need one or the other, and shaderc is preferred message('Skipping `glslang` because `shaderc` is available') elif not glslang_req.disabled() glslang_deps = [ cxx.find_library('glslang-default-resource-limits', required: false) ] # meson doesn't respect generator expressions in INTERFACE_LINK_LIBRARIES # https://github.com/mesonbuild/meson/issues/8232 # TODO: Use the following once it's fixed # glslang = dependency('glslang', method: 'cmake', modules: ['glslang::SPIRV']) prefer_static = get_option('prefer_static') found_lib = false foreach arg : [[prefer_static, false], [not prefer_static, glslang_req]] static = arg[0] required = arg[1] spirv = cxx.find_library('SPIRV', required: required, static: static) if not spirv.found() continue endif glslang_deps += spirv if static glslang_deps += [ # Always required for static linking cxx.find_library('MachineIndependent', required: false, static: true), cxx.find_library('OSDependent', required: false, static: true), cxx.find_library('OGLCompiler', required: false, static: true), cxx.find_library('GenericCodeGen', required: false, static: true), # SPIRV-Tools are required only if optimizer is enabled in glslang build cxx.find_library('SPIRV-Tools', required: false, static: true), cxx.find_library('SPIRV-Tools-opt', required: false, static: true), ] endif found_lib = true break endforeach if found_lib and cc.has_header('glslang/build_info.h') glslang = declare_dependency(dependencies: glslang_deps) endif endif components.set('glslang', glslang.found()) if glslang.found() build_deps += glslang sources += [ 'glsl/glslang.cc', 'glsl/glslang_resources.c', 'glsl/spirv_glslang.c', ] endif libplacebo-v7.349.0/src/glsl/spirv.c000066400000000000000000000036671463457750100172470ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "spirv.h" extern const struct spirv_compiler pl_spirv_shaderc; extern const struct spirv_compiler pl_spirv_glslang; static const struct spirv_compiler *compilers[] = { #ifdef PL_HAVE_SHADERC &pl_spirv_shaderc, #endif #ifdef PL_HAVE_GLSLANG &pl_spirv_glslang, #endif }; pl_spirv pl_spirv_create(pl_log log, struct pl_spirv_version spirv_ver) { for (int i = 0; i < PL_ARRAY_SIZE(compilers); i++) { pl_spirv spirv = compilers[i]->create(log, spirv_ver); if (!spirv) continue; pl_info(log, "Initialized SPIR-V compiler '%s'", compilers[i]->name); return spirv; } pl_fatal(log, "Failed initializing any SPIR-V compiler! Maybe libplacebo " "was built without support for either libshaderc or glslang?"); return NULL; } void pl_spirv_destroy(pl_spirv *pspirv) { pl_spirv spirv = *pspirv; if (!spirv) return; spirv->impl->destroy(spirv); *pspirv = NULL; } pl_str pl_spirv_compile_glsl(pl_spirv spirv, void *alloc, struct pl_glsl_version glsl, enum glsl_shader_stage stage, const char *shader) { return spirv->impl->compile(spirv, alloc, glsl, stage, shader); } libplacebo-v7.349.0/src/glsl/spirv.h000066400000000000000000000033131463457750100172400ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "log.h" #include "utils.h" typedef const struct pl_spirv_t { const struct spirv_compiler *impl; pl_log log; // SPIR-V version specified at creation time. struct pl_spirv_version version; // For cache invalidation, should uniquely identify everything about this // spirv compiler and its configuration. uint64_t signature; } *pl_spirv; // Initialize a SPIR-V compiler instance, or returns NULL on failure. pl_spirv pl_spirv_create(pl_log log, struct pl_spirv_version spirv_ver); void pl_spirv_destroy(pl_spirv *spirv); // Compile GLSL to SPIR-V. Returns {0} on failure. pl_str pl_spirv_compile_glsl(pl_spirv spirv, void *alloc, struct pl_glsl_version glsl_ver, enum glsl_shader_stage stage, const char *shader); struct spirv_compiler { const char *name; void (*destroy)(pl_spirv spirv); __typeof__(pl_spirv_create) *create; __typeof__(pl_spirv_compile_glsl) *compile; }; libplacebo-v7.349.0/src/glsl/spirv_glslang.c000066400000000000000000000073621463457750100207520ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "hash.h" #include "spirv.h" #include "utils.h" #include "glsl/glslang.h" // This header contains only preprocessor definitions #include // This is awkward, but we cannot use upstream macro, it was fixed in 11.11.0 #define PL_GLSLANG_VERSION_GREATER_THAN(major, minor, patch) \ ((GLSLANG_VERSION_MAJOR) > (major) || ((major) == GLSLANG_VERSION_MAJOR && \ ((GLSLANG_VERSION_MINOR) > (minor) || ((minor) == GLSLANG_VERSION_MINOR && \ (GLSLANG_VERSION_PATCH) > (patch))))) #if PL_GLSLANG_VERSION_GREATER_THAN(11, 8, 0) #define GLSLANG_SPV_MAX PL_SPV_VERSION(1, 6) #elif PL_GLSLANG_VERSION_GREATER_THAN(7, 13, 3496) #define GLSLANG_SPV_MAX PL_SPV_VERSION(1, 5) #elif PL_GLSLANG_VERSION_GREATER_THAN(6, 2, 2596) #define GLSLANG_SPV_MAX PL_SPV_VERSION(1, 3) #else #define GLSLANG_SPV_MAX PL_SPV_VERSION(1, 0) #endif const struct spirv_compiler pl_spirv_glslang; static void glslang_destroy(pl_spirv spirv) { pl_glslang_uninit(); pl_free((void *) spirv); } static pl_spirv glslang_create(pl_log log, struct pl_spirv_version spirv_ver) { if (!pl_glslang_init()) { pl_fatal(log, "Failed initializing glslang SPIR-V compiler!"); return NULL; } struct pl_spirv_t *spirv = pl_alloc_ptr(NULL, spirv); *spirv = (struct pl_spirv_t) { .signature = pl_str0_hash(pl_spirv_glslang.name), .impl = &pl_spirv_glslang, .version = spirv_ver, .log = log, }; PL_INFO(spirv, "glslang version: %d.%d.%d", GLSLANG_VERSION_MAJOR, GLSLANG_VERSION_MINOR, GLSLANG_VERSION_PATCH); // Clamp to supported version by glslang if (GLSLANG_SPV_MAX < spirv->version.spv_version) { spirv->version.spv_version = GLSLANG_SPV_MAX; spirv->version.env_version = pl_spirv_version_to_vulkan(GLSLANG_SPV_MAX); } pl_hash_merge(&spirv->signature, (uint64_t) spirv->version.spv_version << 32 | spirv->version.env_version); pl_hash_merge(&spirv->signature, (GLSLANG_VERSION_MAJOR & 0xFF) << 24 | (GLSLANG_VERSION_MINOR & 0xFF) << 16 | (GLSLANG_VERSION_PATCH & 0xFFFF)); return spirv; } static pl_str glslang_compile(pl_spirv spirv, void *alloc, struct pl_glsl_version glsl_ver, enum glsl_shader_stage stage, const char *shader) { struct pl_glslang_res *res; res = pl_glslang_compile(glsl_ver, spirv->version, stage, shader); if (!res || !res->success) { PL_ERR(spirv, "glslang failed: %s", res ? res->error_msg : "(null)"); pl_free(res); return (struct pl_str) {0}; } struct pl_str ret = { .buf = pl_steal(alloc, res->data), .len = res->size, }; pl_free(res); return ret; } const struct spirv_compiler pl_spirv_glslang = { .name = "glslang", .destroy = glslang_destroy, .create = glslang_create, .compile = glslang_compile, }; libplacebo-v7.349.0/src/glsl/spirv_shaderc.c000066400000000000000000000141351463457750100207300ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include #include "hash.h" #include "spirv.h" #include "utils.h" const struct spirv_compiler pl_spirv_shaderc; struct priv { shaderc_compiler_t compiler; }; static void shaderc_destroy(pl_spirv spirv) { struct priv *p = PL_PRIV(spirv); shaderc_compiler_release(p->compiler); pl_free((void *) spirv); } static pl_spirv shaderc_create(pl_log log, struct pl_spirv_version spirv_ver) { struct pl_spirv_t *spirv = pl_alloc_obj(NULL, spirv, struct priv); *spirv = (struct pl_spirv_t) { .signature = pl_str0_hash(pl_spirv_shaderc.name), .impl = &pl_spirv_shaderc, .version = spirv_ver, .log = log, }; struct priv *p = PL_PRIV(spirv); p->compiler = shaderc_compiler_initialize(); if (!p->compiler) goto error; unsigned int ver = 0, rev = 0; shaderc_get_spv_version(&ver, &rev); PL_INFO(spirv, "shaderc SPIR-V version %u.%u rev %u", ver >> 16, (ver >> 8) & 0xff, rev); // Clamp to supported version by shaderc if (ver < spirv->version.spv_version) { spirv->version.spv_version = ver; spirv->version.env_version = pl_spirv_version_to_vulkan(ver); } pl_hash_merge(&spirv->signature, (uint64_t) spirv->version.spv_version << 32 | spirv->version.env_version); pl_hash_merge(&spirv->signature, (uint64_t) ver << 32 | rev); return spirv; error: shaderc_destroy(spirv); return NULL; } static pl_str shaderc_compile(pl_spirv spirv, void *alloc, struct pl_glsl_version glsl_ver, enum glsl_shader_stage stage, const char *shader) { struct priv *p = PL_PRIV(spirv); const size_t len = strlen(shader); shaderc_compile_options_t opts = shaderc_compile_options_initialize(); if (!opts) return (pl_str) {0}; shaderc_compile_options_set_optimization_level(opts, shaderc_optimization_level_performance); shaderc_compile_options_set_target_spirv(opts, spirv->version.spv_version); shaderc_compile_options_set_target_env(opts, shaderc_target_env_vulkan, spirv->version.env_version); for (int i = 0; i < 3; i++) { shaderc_compile_options_set_limit(opts, shaderc_limit_max_compute_work_group_size_x + i, glsl_ver.max_group_size[i]); } shaderc_compile_options_set_limit(opts, shaderc_limit_min_program_texel_offset, glsl_ver.min_gather_offset); shaderc_compile_options_set_limit(opts, shaderc_limit_max_program_texel_offset, glsl_ver.max_gather_offset); static const shaderc_shader_kind kinds[] = { [GLSL_SHADER_VERTEX] = shaderc_glsl_vertex_shader, [GLSL_SHADER_FRAGMENT] = shaderc_glsl_fragment_shader, [GLSL_SHADER_COMPUTE] = shaderc_glsl_compute_shader, }; static const char * const file_name = "input"; static const char * const entry_point = "main"; shaderc_compilation_result_t res; res = shaderc_compile_into_spv(p->compiler, shader, len, kinds[stage], file_name, entry_point, opts); int errs = shaderc_result_get_num_errors(res), warn = shaderc_result_get_num_warnings(res); enum pl_log_level lev = errs ? PL_LOG_ERR : warn ? PL_LOG_INFO : PL_LOG_DEBUG; int s = shaderc_result_get_compilation_status(res); bool success = s == shaderc_compilation_status_success; if (!success) lev = PL_LOG_ERR; const char *msg = shaderc_result_get_error_message(res); if (msg[0]) PL_MSG(spirv, lev, "shaderc output:\n%s", msg); static const char *results[] = { [shaderc_compilation_status_success] = "success", [shaderc_compilation_status_invalid_stage] = "invalid stage", [shaderc_compilation_status_compilation_error] = "error", [shaderc_compilation_status_internal_error] = "internal error", [shaderc_compilation_status_null_result_object] = "no result", [shaderc_compilation_status_invalid_assembly] = "invalid assembly", }; const char *status = s < PL_ARRAY_SIZE(results) ? results[s] : "unknown"; PL_MSG(spirv, lev, "shaderc compile status '%s' (%d errors, %d warnings)", status, errs, warn); pl_str ret = {0}; if (success) { void *bytes = (void *) shaderc_result_get_bytes(res); pl_assert(bytes); ret.len = shaderc_result_get_length(res); ret.buf = pl_memdup(alloc, bytes, ret.len); if (pl_msg_test(spirv->log, PL_LOG_TRACE)) { shaderc_compilation_result_t dis; dis = shaderc_compile_into_spv_assembly(p->compiler, shader, len, kinds[stage], file_name, entry_point, opts); PL_TRACE(spirv, "Generated SPIR-V:\n%.*s", (int) shaderc_result_get_length(dis), shaderc_result_get_bytes(dis)); shaderc_result_release(dis); } } shaderc_result_release(res); shaderc_compile_options_release(opts); return ret; } const struct spirv_compiler pl_spirv_shaderc = { .name = "shaderc", .destroy = shaderc_destroy, .create = shaderc_create, .compile = shaderc_compile, }; libplacebo-v7.349.0/src/glsl/utils.h000066400000000000000000000031071463457750100172360ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include #include #include #define PL_SPV_VERSION(major, minor) ((major) << 16 | (minor) << 8) #define PL_VLK_VERSION(major, minor) ((major) << 22 | (minor) << 12) // Max version that can be used #define PL_MAX_SPIRV_VER PL_SPV_VERSION(1, 6) struct pl_spirv_version { uint32_t env_version; uint32_t spv_version; }; // Returns minimum Vulkan version for given SPIR-V version static inline uint32_t pl_spirv_version_to_vulkan(uint32_t spirv_ver) { if (spirv_ver >= PL_SPV_VERSION(1, 6)) return PL_VLK_VERSION(1, 3); if (spirv_ver >= PL_SPV_VERSION(1, 4)) return PL_VLK_VERSION(1, 2); if (spirv_ver >= PL_SPV_VERSION(1, 1)) return PL_VLK_VERSION(1, 1); return PL_VLK_VERSION(1, 0); } enum glsl_shader_stage { GLSL_SHADER_VERTEX = 0, GLSL_SHADER_FRAGMENT, GLSL_SHADER_COMPUTE, }; libplacebo-v7.349.0/src/gpu.c000066400000000000000000001224701463457750100157300ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "common.h" #include "gpu.h" #define require(expr) pl_require(gpu, expr) void pl_gpu_destroy(pl_gpu gpu) { if (!gpu) return; struct pl_gpu_fns *impl = PL_PRIV(gpu); pl_dispatch_destroy(&impl->dp); impl->destroy(gpu); } pl_dispatch pl_gpu_dispatch(pl_gpu gpu) { const struct pl_gpu_fns *impl = PL_PRIV(gpu); return impl->dp; } pl_cache pl_gpu_cache(pl_gpu gpu) { if (!gpu) return NULL; const struct pl_gpu_fns *impl = PL_PRIV(gpu); return atomic_load(&impl->cache); } void pl_gpu_set_cache(pl_gpu gpu, pl_cache cache) { struct pl_gpu_fns *impl = PL_PRIV(gpu); atomic_store(&impl->cache, cache); } bool pl_fmt_is_ordered(pl_fmt fmt) { bool ret = !fmt->opaque; for (int i = 0; i < fmt->num_components; i++) ret &= fmt->sample_order[i] == i; return ret; } bool pl_fmt_is_float(pl_fmt fmt) { switch (fmt->type) { case PL_FMT_UNKNOWN: // more likely than not case PL_FMT_FLOAT: case PL_FMT_UNORM: case PL_FMT_SNORM: return true; case PL_FMT_UINT: case PL_FMT_SINT: return false; case PL_FMT_TYPE_COUNT: break; } pl_unreachable(); } bool pl_fmt_has_modifier(pl_fmt fmt, uint64_t modifier) { if (!fmt) return false; for (int i = 0; i < fmt->num_modifiers; i++) { if (fmt->modifiers[i] == modifier) return true; } return false; } pl_fmt pl_find_fmt(pl_gpu gpu, enum pl_fmt_type type, int num_components, int min_depth, int host_bits, enum pl_fmt_caps caps) { for (int n = 0; n < gpu->num_formats; n++) { pl_fmt fmt = gpu->formats[n]; if (fmt->type != type || fmt->num_components != num_components) continue; if ((fmt->caps & caps) != caps) continue; // When specifying some particular host representation, ensure the // format is non-opaque, ordered and unpadded if (host_bits && fmt->opaque) continue; if (host_bits && fmt->texel_size * 8 != host_bits * num_components) continue; if (host_bits && !pl_fmt_is_ordered(fmt)) continue; for (int i = 0; i < fmt->num_components; i++) { if (fmt->component_depth[i] < min_depth) goto next_fmt; if (host_bits && fmt->host_bits[i] != host_bits) goto next_fmt; } return fmt; next_fmt: ; // equivalent to `continue` } // ran out of formats PL_TRACE(gpu, "No matching format found"); return NULL; } pl_fmt pl_find_vertex_fmt(pl_gpu gpu, enum pl_fmt_type type, int comps) { static const size_t sizes[] = { [PL_FMT_FLOAT] = sizeof(float), [PL_FMT_UNORM] = sizeof(unsigned), [PL_FMT_UINT] = sizeof(unsigned), [PL_FMT_SNORM] = sizeof(int), [PL_FMT_SINT] = sizeof(int), }; return pl_find_fmt(gpu, type, comps, 0, 8 * sizes[type], PL_FMT_CAP_VERTEX); } pl_fmt pl_find_named_fmt(pl_gpu gpu, const char *name) { if (!name) return NULL; for (int i = 0; i < gpu->num_formats; i++) { pl_fmt fmt = gpu->formats[i]; if (strcmp(name, fmt->name) == 0) return fmt; } // ran out of formats return NULL; } pl_fmt pl_find_fourcc(pl_gpu gpu, uint32_t fourcc) { if (!fourcc) return NULL; for (int i = 0; i < gpu->num_formats; i++) { pl_fmt fmt = gpu->formats[i]; if (fourcc == fmt->fourcc) return fmt; } // ran out of formats return NULL; } static inline bool check_mod(pl_gpu gpu, pl_fmt fmt, uint64_t mod) { for (int i = 0; i < fmt->num_modifiers; i++) { if (fmt->modifiers[i] == mod) return true; } PL_ERR(gpu, "DRM modifier %s not available for format %s. Available modifiers:", PRINT_DRM_MOD(mod), fmt->name); for (int i = 0; i < fmt->num_modifiers; i++) PL_ERR(gpu, " %s", PRINT_DRM_MOD(fmt->modifiers[i])); return false; } pl_tex pl_tex_create(pl_gpu gpu, const struct pl_tex_params *params) { require(params->format); require(!params->import_handle || !params->export_handle); require(!params->import_handle || !params->initial_data); if (params->export_handle) { require(params->export_handle & gpu->export_caps.tex); require(PL_ISPOT(params->export_handle)); } if (params->import_handle) { require(params->import_handle & gpu->import_caps.tex); require(PL_ISPOT(params->import_handle)); if (params->import_handle == PL_HANDLE_DMA_BUF) { if (!check_mod(gpu, params->format, params->shared_mem.drm_format_mod)) goto error; if (params->shared_mem.stride_w) require(params->w && params->shared_mem.stride_w >= params->w); if (params->shared_mem.stride_h) require(params->h && params->shared_mem.stride_h >= params->h); } else if (params->import_handle == PL_HANDLE_MTL_TEX) { require(params->shared_mem.plane <= 2); } } switch (pl_tex_params_dimension(*params)) { case 1: require(params->w > 0); require(params->w <= gpu->limits.max_tex_1d_dim); require(!params->renderable); require(!params->blit_src || gpu->limits.blittable_1d_3d); require(!params->blit_dst || gpu->limits.blittable_1d_3d); require(!params->format->num_planes); break; case 2: require(params->w > 0 && params->h > 0); require(params->w <= gpu->limits.max_tex_2d_dim); require(params->h <= gpu->limits.max_tex_2d_dim); break; case 3: require(params->w > 0 && params->h > 0 && params->d > 0); require(params->w <= gpu->limits.max_tex_3d_dim); require(params->h <= gpu->limits.max_tex_3d_dim); require(params->d <= gpu->limits.max_tex_3d_dim); require(!params->renderable); require(!params->blit_src || gpu->limits.blittable_1d_3d); require(!params->blit_dst || gpu->limits.blittable_1d_3d); require(!params->format->num_planes); break; } enum pl_fmt_caps fmt_caps = params->format->caps; bool fmt_opaque = params->format->opaque; for (int i = 0; i < params->format->num_planes; i++) { pl_fmt pfmt = params->format->planes[i].format; fmt_caps |= pfmt->caps; fmt_opaque &= pfmt->opaque; } require(!params->host_readable || fmt_caps & PL_FMT_CAP_HOST_READABLE); require(!params->host_writable || !fmt_opaque); require(!params->sampleable || fmt_caps & PL_FMT_CAP_SAMPLEABLE); require(!params->renderable || fmt_caps & PL_FMT_CAP_RENDERABLE); require(!params->storable || fmt_caps & PL_FMT_CAP_STORABLE); require(!params->blit_src || fmt_caps & PL_FMT_CAP_BLITTABLE); require(!params->blit_dst || fmt_caps & PL_FMT_CAP_BLITTABLE); const struct pl_gpu_fns *impl = PL_PRIV(gpu); return impl->tex_create(gpu, params); error: if (params->debug_tag) PL_ERR(gpu, " for texture: %s", params->debug_tag); return NULL; } void pl_tex_destroy(pl_gpu gpu, pl_tex *tex) { if (!*tex) return; const struct pl_gpu_fns *impl = PL_PRIV(gpu); impl->tex_destroy(gpu, *tex); *tex = NULL; } static bool pl_tex_params_superset(struct pl_tex_params a, struct pl_tex_params b) { return a.w == b.w && a.h == b.h && a.d == b.d && a.format == b.format && (a.sampleable || !b.sampleable) && (a.renderable || !b.renderable) && (a.storable || !b.storable) && (a.blit_src || !b.blit_src) && (a.blit_dst || !b.blit_dst) && (a.host_writable || !b.host_writable) && (a.host_readable || !b.host_readable); } bool pl_tex_recreate(pl_gpu gpu, pl_tex *tex, const struct pl_tex_params *params) { if (params->initial_data) { PL_ERR(gpu, "pl_tex_recreate may not be used with `initial_data`!"); return false; } if (params->import_handle) { PL_ERR(gpu, "pl_tex_recreate may not be used with `import_handle`!"); return false; } if (*tex && pl_tex_params_superset((*tex)->params, *params)) { pl_tex_invalidate(gpu, *tex); return true; } PL_DEBUG(gpu, "(Re)creating %dx%dx%d texture with format %s: %s", params->w, params->h, params->d, params->format->name, PL_DEF(params->debug_tag, "unknown")); pl_tex_destroy(gpu, tex); *tex = pl_tex_create(gpu, params); return !!*tex; } void pl_tex_clear_ex(pl_gpu gpu, pl_tex dst, const union pl_clear_color color) { require(dst->params.blit_dst); const struct pl_gpu_fns *impl = PL_PRIV(gpu); if (impl->tex_invalidate) impl->tex_invalidate(gpu, dst); impl->tex_clear_ex(gpu, dst, color); return; error: if (dst->params.debug_tag) PL_ERR(gpu, " for texture: %s", dst->params.debug_tag); } void pl_tex_clear(pl_gpu gpu, pl_tex dst, const float color[4]) { if (!pl_fmt_is_float(dst->params.format)) { PL_ERR(gpu, "Cannot call `pl_tex_clear` on integer textures, please " "use `pl_tex_clear_ex` instead."); return; } const union pl_clear_color col = { .f = { color[0], color[1], color[2], color[3] }, }; pl_tex_clear_ex(gpu, dst, col); } void pl_tex_invalidate(pl_gpu gpu, pl_tex tex) { const struct pl_gpu_fns *impl = PL_PRIV(gpu); if (impl->tex_invalidate) impl->tex_invalidate(gpu, tex); } static void strip_coords(pl_tex tex, pl_rect3d *rc) { if (!tex->params.d) { rc->z0 = 0; rc->z1 = 1; } if (!tex->params.h) { rc->y0 = 0; rc->y1 = 1; } } static void infer_rc(pl_tex tex, pl_rect3d *rc) { if (!rc->x0 && !rc->x1) rc->x1 = tex->params.w; if (!rc->y0 && !rc->y1) rc->y1 = tex->params.h; if (!rc->z0 && !rc->z1) rc->z1 = tex->params.d; } void pl_tex_blit(pl_gpu gpu, const struct pl_tex_blit_params *params) { pl_tex src = params->src, dst = params->dst; require(src && dst); pl_fmt src_fmt = src->params.format; pl_fmt dst_fmt = dst->params.format; require(src_fmt->internal_size == dst_fmt->internal_size); require((src_fmt->type == PL_FMT_UINT) == (dst_fmt->type == PL_FMT_UINT)); require((src_fmt->type == PL_FMT_SINT) == (dst_fmt->type == PL_FMT_SINT)); require(src->params.blit_src); require(dst->params.blit_dst); require(params->sample_mode != PL_TEX_SAMPLE_LINEAR || (src_fmt->caps & PL_FMT_CAP_LINEAR)); struct pl_tex_blit_params fixed = *params; infer_rc(src, &fixed.src_rc); infer_rc(dst, &fixed.dst_rc); strip_coords(src, &fixed.src_rc); strip_coords(dst, &fixed.dst_rc); require(fixed.src_rc.x0 >= 0 && fixed.src_rc.x0 < src->params.w); require(fixed.src_rc.x1 > 0 && fixed.src_rc.x1 <= src->params.w); require(fixed.dst_rc.x0 >= 0 && fixed.dst_rc.x0 < dst->params.w); require(fixed.dst_rc.x1 > 0 && fixed.dst_rc.x1 <= dst->params.w); if (src->params.h) { require(fixed.src_rc.y0 >= 0 && fixed.src_rc.y0 < src->params.h); require(fixed.src_rc.y1 > 0 && fixed.src_rc.y1 <= src->params.h); } if (dst->params.h) { require(fixed.dst_rc.y0 >= 0 && fixed.dst_rc.y0 < dst->params.h); require(fixed.dst_rc.y1 > 0 && fixed.dst_rc.y1 <= dst->params.h); } if (src->params.d) { require(fixed.src_rc.z0 >= 0 && fixed.src_rc.z0 < src->params.d); require(fixed.src_rc.z1 > 0 && fixed.src_rc.z1 <= src->params.d); } if (dst->params.d) { require(fixed.dst_rc.z0 >= 0 && fixed.dst_rc.z0 < dst->params.d); require(fixed.dst_rc.z1 > 0 && fixed.dst_rc.z1 <= dst->params.d); } pl_rect3d full = {0, 0, 0, dst->params.w, dst->params.h, dst->params.d}; strip_coords(dst, &full); pl_rect3d rcnorm = fixed.dst_rc; pl_rect3d_normalize(&rcnorm); if (pl_rect3d_eq(rcnorm, full)) pl_tex_invalidate(gpu, dst); const struct pl_gpu_fns *impl = PL_PRIV(gpu); impl->tex_blit(gpu, &fixed); return; error: if (src->params.debug_tag || dst->params.debug_tag) { PL_ERR(gpu, " for textures: src %s, dst %s", PL_DEF(src->params.debug_tag, "(unknown)"), PL_DEF(dst->params.debug_tag, "(unknown)")); } } static bool fix_tex_transfer(pl_gpu gpu, struct pl_tex_transfer_params *params) { pl_tex tex = params->tex; pl_fmt fmt = tex->params.format; pl_rect3d rc = params->rc; // Infer the default values infer_rc(tex, &rc); strip_coords(tex, &rc); if (!params->row_pitch || !tex->params.w) params->row_pitch = pl_rect_w(rc) * fmt->texel_size; if (!params->depth_pitch || !tex->params.d) params->depth_pitch = pl_rect_h(rc) * params->row_pitch; require(params->row_pitch); require(params->depth_pitch); params->rc = rc; // Check the parameters for sanity switch (pl_tex_params_dimension(tex->params)) { case 3: require(rc.z1 > rc.z0); require(rc.z0 >= 0 && rc.z0 < tex->params.d); require(rc.z1 > 0 && rc.z1 <= tex->params.d); require(params->depth_pitch >= pl_rect_h(rc) * params->row_pitch); require(params->depth_pitch % params->row_pitch == 0); // fall through case 2: require(rc.y1 > rc.y0); require(rc.y0 >= 0 && rc.y0 < tex->params.h); require(rc.y1 > 0 && rc.y1 <= tex->params.h); require(params->row_pitch >= pl_rect_w(rc) * fmt->texel_size); require(params->row_pitch % fmt->texel_align == 0); // fall through case 1: require(rc.x1 > rc.x0); require(rc.x0 >= 0 && rc.x0 < tex->params.w); require(rc.x1 > 0 && rc.x1 <= tex->params.w); break; } require(!params->buf ^ !params->ptr); // exactly one if (params->buf) { pl_buf buf = params->buf; size_t size = pl_tex_transfer_size(params); require(params->buf_offset + size >= params->buf_offset); // overflow check require(params->buf_offset + size <= buf->params.size); require(gpu->limits.buf_transfer); } require(!params->callback || gpu->limits.callbacks); return true; error: if (tex->params.debug_tag) PL_ERR(gpu, " for texture: %s", tex->params.debug_tag); return false; } bool pl_tex_upload(pl_gpu gpu, const struct pl_tex_transfer_params *params) { pl_tex tex = params->tex; require(tex->params.host_writable); struct pl_tex_transfer_params fixed = *params; if (!fix_tex_transfer(gpu, &fixed)) goto error; const struct pl_gpu_fns *impl = PL_PRIV(gpu); return impl->tex_upload(gpu, &fixed); error: if (tex->params.debug_tag) PL_ERR(gpu, " for texture: %s", tex->params.debug_tag); return false; } bool pl_tex_download(pl_gpu gpu, const struct pl_tex_transfer_params *params) { pl_tex tex = params->tex; require(tex->params.host_readable); struct pl_tex_transfer_params fixed = *params; if (!fix_tex_transfer(gpu, &fixed)) goto error; const struct pl_gpu_fns *impl = PL_PRIV(gpu); return impl->tex_download(gpu, &fixed); error: if (tex->params.debug_tag) PL_ERR(gpu, " for texture: %s", tex->params.debug_tag); return false; } bool pl_tex_poll(pl_gpu gpu, pl_tex tex, uint64_t t) { const struct pl_gpu_fns *impl = PL_PRIV(gpu); return impl->tex_poll ? impl->tex_poll(gpu, tex, t) : false; } pl_buf pl_buf_create(pl_gpu gpu, const struct pl_buf_params *params) { struct pl_buf_params params_rounded; require(!params->import_handle || !params->export_handle); if (params->export_handle) { require(PL_ISPOT(params->export_handle)); require(params->export_handle & gpu->export_caps.buf); } if (params->import_handle) { require(PL_ISPOT(params->import_handle)); require(params->import_handle & gpu->import_caps.buf); const struct pl_shared_mem *shmem = ¶ms->shared_mem; require(shmem->offset + params->size <= shmem->size); require(params->import_handle != PL_HANDLE_DMA_BUF || !shmem->drm_format_mod); // Fix misalignment on host pointer imports if (params->import_handle == PL_HANDLE_HOST_PTR) { uintptr_t page_mask = ~(gpu->limits.align_host_ptr - 1); uintptr_t ptr_base = (uintptr_t) shmem->handle.ptr & page_mask; size_t ptr_offset = (uintptr_t) shmem->handle.ptr - ptr_base; size_t buf_offset = ptr_offset + shmem->offset; size_t ptr_size = PL_ALIGN2(ptr_offset + shmem->size, gpu->limits.align_host_ptr); if (ptr_base != (uintptr_t) shmem->handle.ptr || ptr_size > shmem->size) { static bool warned_rounding = false; if (!warned_rounding) { warned_rounding = true; PL_WARN(gpu, "Imported host pointer is not page-aligned. " "This should normally be fine on most platforms, " "but may cause issues in some rare circumstances."); } PL_TRACE(gpu, "Rounding imported host pointer %p + %zu -> %zu to " "nearest page boundaries: %p + %zu -> %zu", shmem->handle.ptr, shmem->offset, shmem->size, (void *) ptr_base, buf_offset, ptr_size); } params_rounded = *params; params_rounded.shared_mem.handle.ptr = (void *) ptr_base; params_rounded.shared_mem.offset = buf_offset; params_rounded.shared_mem.size = ptr_size; params = ¶ms_rounded; } } require(params->size > 0 && params->size <= gpu->limits.max_buf_size); require(!params->uniform || params->size <= gpu->limits.max_ubo_size); require(!params->storable || params->size <= gpu->limits.max_ssbo_size); require(!params->drawable || params->size <= gpu->limits.max_vbo_size); if (params->host_mapped) { require(params->size <= gpu->limits.max_mapped_size); require(params->memory_type != PL_BUF_MEM_DEVICE || params->size <= gpu->limits.max_mapped_vram); } if (params->format) { pl_fmt fmt = params->format; require(params->size <= gpu->limits.max_buffer_texels * fmt->texel_size); require(!params->uniform || (fmt->caps & PL_FMT_CAP_TEXEL_UNIFORM)); require(!params->storable || (fmt->caps & PL_FMT_CAP_TEXEL_STORAGE)); } const struct pl_gpu_fns *impl = PL_PRIV(gpu); pl_buf buf = impl->buf_create(gpu, params); if (buf) require(!params->host_mapped || buf->data); return buf; error: if (params->debug_tag) PL_ERR(gpu, " for buffer: %s", params->debug_tag); return NULL; } void pl_buf_destroy(pl_gpu gpu, pl_buf *buf) { if (!*buf) return; const struct pl_gpu_fns *impl = PL_PRIV(gpu); impl->buf_destroy(gpu, *buf); *buf = NULL; } static bool pl_buf_params_superset(struct pl_buf_params a, struct pl_buf_params b) { return a.size >= b.size && a.memory_type == b.memory_type && a.format == b.format && (a.host_writable || !b.host_writable) && (a.host_readable || !b.host_readable) && (a.host_mapped || !b.host_mapped) && (a.uniform || !b.uniform) && (a.storable || !b.storable) && (a.drawable || !b.drawable); } bool pl_buf_recreate(pl_gpu gpu, pl_buf *buf, const struct pl_buf_params *params) { if (params->initial_data) { PL_ERR(gpu, "pl_buf_recreate may not be used with `initial_data`!"); return false; } if (*buf && pl_buf_params_superset((*buf)->params, *params)) return true; PL_INFO(gpu, "(Re)creating %zu buffer", params->size); pl_buf_destroy(gpu, buf); *buf = pl_buf_create(gpu, params); return !!*buf; } void pl_buf_write(pl_gpu gpu, pl_buf buf, size_t buf_offset, const void *data, size_t size) { require(buf->params.host_writable); require(buf_offset + size <= buf->params.size); require(buf_offset == PL_ALIGN2(buf_offset, 4)); const struct pl_gpu_fns *impl = PL_PRIV(gpu); impl->buf_write(gpu, buf, buf_offset, data, size); return; error: if (buf->params.debug_tag) PL_ERR(gpu, " for buffer: %s", buf->params.debug_tag); } bool pl_buf_read(pl_gpu gpu, pl_buf buf, size_t buf_offset, void *dest, size_t size) { require(buf->params.host_readable); require(buf_offset + size <= buf->params.size); const struct pl_gpu_fns *impl = PL_PRIV(gpu); return impl->buf_read(gpu, buf, buf_offset, dest, size); error: if (buf->params.debug_tag) PL_ERR(gpu, " for buffer: %s", buf->params.debug_tag); return false; } void pl_buf_copy(pl_gpu gpu, pl_buf dst, size_t dst_offset, pl_buf src, size_t src_offset, size_t size) { require(src_offset + size <= src->params.size); require(dst_offset + size <= dst->params.size); require(src != dst); const struct pl_gpu_fns *impl = PL_PRIV(gpu); impl->buf_copy(gpu, dst, dst_offset, src, src_offset, size); return; error: if (src->params.debug_tag || dst->params.debug_tag) { PL_ERR(gpu, " for buffers: src %s, dst %s", src->params.debug_tag, dst->params.debug_tag); } } bool pl_buf_export(pl_gpu gpu, pl_buf buf) { require(buf->params.export_handle || buf->params.import_handle); const struct pl_gpu_fns *impl = PL_PRIV(gpu); return impl->buf_export(gpu, buf); error: if (buf->params.debug_tag) PL_ERR(gpu, " for buffer: %s", buf->params.debug_tag); return false; } bool pl_buf_poll(pl_gpu gpu, pl_buf buf, uint64_t t) { const struct pl_gpu_fns *impl = PL_PRIV(gpu); return impl->buf_poll ? impl->buf_poll(gpu, buf, t) : false; } size_t pl_var_type_size(enum pl_var_type type) { switch (type) { case PL_VAR_SINT: return sizeof(int); case PL_VAR_UINT: return sizeof(unsigned int); case PL_VAR_FLOAT: return sizeof(float); case PL_VAR_INVALID: // fall through case PL_VAR_TYPE_COUNT: break; } pl_unreachable(); } #define PL_VAR(TYPE, NAME, M, V) \ struct pl_var pl_var_##NAME(const char *name) { \ return (struct pl_var) { \ .name = name, \ .type = PL_VAR_##TYPE, \ .dim_m = M, \ .dim_v = V, \ .dim_a = 1, \ }; \ } PL_VAR(FLOAT, float, 1, 1) PL_VAR(FLOAT, vec2, 1, 2) PL_VAR(FLOAT, vec3, 1, 3) PL_VAR(FLOAT, vec4, 1, 4) PL_VAR(FLOAT, mat2, 2, 2) PL_VAR(FLOAT, mat2x3, 2, 3) PL_VAR(FLOAT, mat2x4, 2, 4) PL_VAR(FLOAT, mat3, 3, 3) PL_VAR(FLOAT, mat3x4, 3, 4) PL_VAR(FLOAT, mat4x2, 4, 2) PL_VAR(FLOAT, mat4x3, 4, 3) PL_VAR(FLOAT, mat4, 4, 4) PL_VAR(SINT, int, 1, 1) PL_VAR(SINT, ivec2, 1, 2) PL_VAR(SINT, ivec3, 1, 3) PL_VAR(SINT, ivec4, 1, 4) PL_VAR(UINT, uint, 1, 1) PL_VAR(UINT, uvec2, 1, 2) PL_VAR(UINT, uvec3, 1, 3) PL_VAR(UINT, uvec4, 1, 4) #undef PL_VAR const struct pl_named_var pl_var_glsl_types[] = { // float vectors { "float", { .type = PL_VAR_FLOAT, .dim_m = 1, .dim_v = 1, .dim_a = 1, }}, { "vec2", { .type = PL_VAR_FLOAT, .dim_m = 1, .dim_v = 2, .dim_a = 1, }}, { "vec3", { .type = PL_VAR_FLOAT, .dim_m = 1, .dim_v = 3, .dim_a = 1, }}, { "vec4", { .type = PL_VAR_FLOAT, .dim_m = 1, .dim_v = 4, .dim_a = 1, }}, // float matrices { "mat2", { .type = PL_VAR_FLOAT, .dim_m = 2, .dim_v = 2, .dim_a = 1, }}, { "mat2x3", { .type = PL_VAR_FLOAT, .dim_m = 2, .dim_v = 3, .dim_a = 1, }}, { "mat2x4", { .type = PL_VAR_FLOAT, .dim_m = 2, .dim_v = 4, .dim_a = 1, }}, { "mat3", { .type = PL_VAR_FLOAT, .dim_m = 3, .dim_v = 3, .dim_a = 1, }}, { "mat3x4", { .type = PL_VAR_FLOAT, .dim_m = 3, .dim_v = 4, .dim_a = 1, }}, { "mat4x2", { .type = PL_VAR_FLOAT, .dim_m = 4, .dim_v = 2, .dim_a = 1, }}, { "mat4x3", { .type = PL_VAR_FLOAT, .dim_m = 4, .dim_v = 3, .dim_a = 1, }}, { "mat4", { .type = PL_VAR_FLOAT, .dim_m = 4, .dim_v = 4, .dim_a = 1, }}, // integer vectors { "int", { .type = PL_VAR_SINT, .dim_m = 1, .dim_v = 1, .dim_a = 1, }}, { "ivec2", { .type = PL_VAR_SINT, .dim_m = 1, .dim_v = 2, .dim_a = 1, }}, { "ivec3", { .type = PL_VAR_SINT, .dim_m = 1, .dim_v = 3, .dim_a = 1, }}, { "ivec4", { .type = PL_VAR_SINT, .dim_m = 1, .dim_v = 4, .dim_a = 1, }}, // unsigned integer vectors { "uint", { .type = PL_VAR_UINT, .dim_m = 1, .dim_v = 1, .dim_a = 1, }}, { "uvec2", { .type = PL_VAR_UINT, .dim_m = 1, .dim_v = 2, .dim_a = 1, }}, { "uvec3", { .type = PL_VAR_UINT, .dim_m = 1, .dim_v = 3, .dim_a = 1, }}, { "uvec4", { .type = PL_VAR_UINT, .dim_m = 1, .dim_v = 4, .dim_a = 1, }}, {0}, }; #define MAX_DIM 4 const char *pl_var_glsl_type_name(struct pl_var var) { static const char *types[PL_VAR_TYPE_COUNT][MAX_DIM+1][MAX_DIM+1] = { // float vectors [PL_VAR_FLOAT][1][1] = "float", [PL_VAR_FLOAT][1][2] = "vec2", [PL_VAR_FLOAT][1][3] = "vec3", [PL_VAR_FLOAT][1][4] = "vec4", // float matrices [PL_VAR_FLOAT][2][2] = "mat2", [PL_VAR_FLOAT][2][3] = "mat2x3", [PL_VAR_FLOAT][2][4] = "mat2x4", [PL_VAR_FLOAT][3][2] = "mat3x2", [PL_VAR_FLOAT][3][3] = "mat3", [PL_VAR_FLOAT][3][4] = "mat3x4", [PL_VAR_FLOAT][4][2] = "mat4x2", [PL_VAR_FLOAT][4][3] = "mat4x3", [PL_VAR_FLOAT][4][4] = "mat4", // integer vectors [PL_VAR_SINT][1][1] = "int", [PL_VAR_SINT][1][2] = "ivec2", [PL_VAR_SINT][1][3] = "ivec3", [PL_VAR_SINT][1][4] = "ivec4", // unsigned integer vectors [PL_VAR_UINT][1][1] = "uint", [PL_VAR_UINT][1][2] = "uvec2", [PL_VAR_UINT][1][3] = "uvec3", [PL_VAR_UINT][1][4] = "uvec4", }; if (var.dim_v > MAX_DIM || var.dim_m > MAX_DIM) return NULL; return types[var.type][var.dim_m][var.dim_v]; } struct pl_var pl_var_from_fmt(pl_fmt fmt, const char *name) { static const enum pl_var_type vartypes[] = { [PL_FMT_FLOAT] = PL_VAR_FLOAT, [PL_FMT_UNORM] = PL_VAR_FLOAT, [PL_FMT_SNORM] = PL_VAR_FLOAT, [PL_FMT_UINT] = PL_VAR_UINT, [PL_FMT_SINT] = PL_VAR_SINT, }; pl_assert(fmt->type < PL_ARRAY_SIZE(vartypes)); return (struct pl_var) { .type = vartypes[fmt->type], .name = name, .dim_v = fmt->num_components, .dim_m = 1, .dim_a = 1, }; } struct pl_var_layout pl_var_host_layout(size_t offset, const struct pl_var *var) { size_t col_size = pl_var_type_size(var->type) * var->dim_v; return (struct pl_var_layout) { .offset = offset, .stride = col_size, .size = col_size * var->dim_m * var->dim_a, }; } struct pl_var_layout pl_std140_layout(size_t offset, const struct pl_var *var) { size_t el_size = pl_var_type_size(var->type); // std140 packing rules: // 1. The size of generic values is their size in bytes // 2. The size of vectors is the vector length * the base count // 3. Matrices are treated like arrays of column vectors // 4. The size of array rows is that of the element size rounded up to // the nearest multiple of vec4 // 5. All values are aligned to a multiple of their size (stride for arrays), // with the exception of vec3 which is aligned like vec4 size_t stride = el_size * var->dim_v; size_t align = stride; if (var->dim_v == 3) align += el_size; if (var->dim_m * var->dim_a > 1) stride = align = PL_ALIGN2(align, sizeof(float[4])); return (struct pl_var_layout) { .offset = PL_ALIGN2(offset, align), .stride = stride, .size = stride * var->dim_m * var->dim_a, }; } struct pl_var_layout pl_std430_layout(size_t offset, const struct pl_var *var) { size_t el_size = pl_var_type_size(var->type); // std430 packing rules: like std140, except arrays/matrices are always // "tightly" packed, even arrays/matrices of vec3s size_t stride = el_size * var->dim_v; size_t align = stride; if (var->dim_v == 3) align += el_size; if (var->dim_m * var->dim_a > 1) stride = align; return (struct pl_var_layout) { .offset = PL_ALIGN2(offset, align), .stride = stride, .size = stride * var->dim_m * var->dim_a, }; } void memcpy_layout(void *dst_p, struct pl_var_layout dst_layout, const void *src_p, struct pl_var_layout src_layout) { uintptr_t src = (uintptr_t) src_p + src_layout.offset; uintptr_t dst = (uintptr_t) dst_p + dst_layout.offset; if (src_layout.stride == dst_layout.stride) { pl_assert(dst_layout.size == src_layout.size); memcpy((void *) dst, (const void *) src, src_layout.size); return; } size_t stride = PL_MIN(src_layout.stride, dst_layout.stride); uintptr_t end = src + src_layout.size; while (src < end) { pl_assert(dst < dst + dst_layout.size); memcpy((void *) dst, (const void *) src, stride); src += src_layout.stride; dst += dst_layout.stride; } } int pl_desc_namespace(pl_gpu gpu, enum pl_desc_type type) { const struct pl_gpu_fns *impl = PL_PRIV(gpu); int ret = impl->desc_namespace(gpu, type); pl_assert(ret >= 0 && ret < PL_DESC_TYPE_COUNT); return ret; } const char *pl_desc_access_glsl_name(enum pl_desc_access mode) { switch (mode) { case PL_DESC_ACCESS_READWRITE: return ""; case PL_DESC_ACCESS_READONLY: return "readonly"; case PL_DESC_ACCESS_WRITEONLY: return "writeonly"; case PL_DESC_ACCESS_COUNT: break; } pl_unreachable(); } const struct pl_blend_params pl_alpha_overlay = { .src_rgb = PL_BLEND_SRC_ALPHA, .dst_rgb = PL_BLEND_ONE_MINUS_SRC_ALPHA, .src_alpha = PL_BLEND_ONE, .dst_alpha = PL_BLEND_ONE_MINUS_SRC_ALPHA, }; static inline void log_shader_sources(pl_log log, enum pl_log_level level, const struct pl_pass_params *params) { if (!pl_msg_test(log, level) || !params->glsl_shader) return; switch (params->type) { case PL_PASS_RASTER: if (!params->vertex_shader) return; pl_msg(log, level, "vertex shader source:"); pl_msg_source(log, level, params->vertex_shader); pl_msg(log, level, "fragment shader source:"); pl_msg_source(log, level, params->glsl_shader); return; case PL_PASS_COMPUTE: pl_msg(log, level, "compute shader source:"); pl_msg_source(log, level, params->glsl_shader); return; case PL_PASS_INVALID: case PL_PASS_TYPE_COUNT: break; } pl_unreachable(); } static void log_spec_constants(pl_log log, enum pl_log_level lev, const struct pl_pass_params *params, const void *constant_data) { if (!constant_data || !params->num_constants || !pl_msg_test(log, lev)) return; pl_msg(log, lev, "Specialization constant values:"); uintptr_t data_base = (uintptr_t) constant_data; for (int i = 0; i < params->num_constants; i++) { union { int i; unsigned u; float f; } *data = (void *) (data_base + params->constants[i].offset); int id = params->constants[i].id; switch (params->constants[i].type) { case PL_VAR_SINT: pl_msg(log, lev, " constant_id=%d: %d", id, data->i); break; case PL_VAR_UINT: pl_msg(log, lev, " constant_id=%d: %u", id, data->u); break; case PL_VAR_FLOAT: pl_msg(log, lev, " constant_id=%d: %f", id, data->f); break; default: pl_unreachable(); } } } pl_pass pl_pass_create(pl_gpu gpu, const struct pl_pass_params *params) { require(params->glsl_shader); switch(params->type) { case PL_PASS_RASTER: require(params->vertex_shader); require(params->vertex_stride % gpu->limits.align_vertex_stride == 0); for (int i = 0; i < params->num_vertex_attribs; i++) { struct pl_vertex_attrib va = params->vertex_attribs[i]; require(va.name); require(va.fmt); require(va.fmt->caps & PL_FMT_CAP_VERTEX); require(va.offset + va.fmt->texel_size <= params->vertex_stride); } require(params->target_format); require(params->target_format->caps & PL_FMT_CAP_RENDERABLE); require(!params->blend_params || params->target_format->caps & PL_FMT_CAP_BLENDABLE); require(!params->blend_params || params->load_target); break; case PL_PASS_COMPUTE: require(gpu->glsl.compute); break; case PL_PASS_INVALID: case PL_PASS_TYPE_COUNT: pl_unreachable(); } size_t num_var_comps = 0; for (int i = 0; i < params->num_variables; i++) { struct pl_var var = params->variables[i]; num_var_comps += var.dim_v * var.dim_m * var.dim_a; require(var.name); require(pl_var_glsl_type_name(var)); } require(num_var_comps <= gpu->limits.max_variable_comps); require(params->num_constants <= gpu->limits.max_constants); for (int i = 0; i < params->num_constants; i++) require(params->constants[i].type); for (int i = 0; i < params->num_descriptors; i++) { struct pl_desc desc = params->descriptors[i]; require(desc.name); // enforce disjoint descriptor bindings for each namespace int namespace = pl_desc_namespace(gpu, desc.type); for (int j = i+1; j < params->num_descriptors; j++) { struct pl_desc other = params->descriptors[j]; require(desc.binding != other.binding || namespace != pl_desc_namespace(gpu, other.type)); } } require(params->push_constants_size <= gpu->limits.max_pushc_size); require(params->push_constants_size == PL_ALIGN2(params->push_constants_size, 4)); log_shader_sources(gpu->log, PL_LOG_DEBUG, params); log_spec_constants(gpu->log, PL_LOG_DEBUG, params, params->constant_data); const struct pl_gpu_fns *impl = PL_PRIV(gpu); pl_pass pass = impl->pass_create(gpu, params); if (!pass) goto error; return pass; error: log_shader_sources(gpu->log, PL_LOG_ERR, params); pl_log_stack_trace(gpu->log, PL_LOG_ERR); pl_debug_abort(); return NULL; } void pl_pass_destroy(pl_gpu gpu, pl_pass *pass) { if (!*pass) return; const struct pl_gpu_fns *impl = PL_PRIV(gpu); impl->pass_destroy(gpu, *pass); *pass = NULL; } void pl_pass_run(pl_gpu gpu, const struct pl_pass_run_params *params) { pl_pass pass = params->pass; struct pl_pass_run_params new = *params; for (int i = 0; i < pass->params.num_descriptors; i++) { struct pl_desc desc = pass->params.descriptors[i]; struct pl_desc_binding db = params->desc_bindings[i]; require(db.object); switch (desc.type) { case PL_DESC_SAMPLED_TEX: { pl_tex tex = db.object; pl_fmt fmt = tex->params.format; require(tex->params.sampleable); require(db.sample_mode != PL_TEX_SAMPLE_LINEAR || (fmt->caps & PL_FMT_CAP_LINEAR)); break; } case PL_DESC_STORAGE_IMG: { pl_tex tex = db.object; pl_fmt fmt = tex->params.format; require(tex->params.storable); require(desc.access != PL_DESC_ACCESS_READWRITE || (fmt->caps & PL_FMT_CAP_READWRITE)); break; } case PL_DESC_BUF_UNIFORM: { pl_buf buf = db.object; require(buf->params.uniform); break; } case PL_DESC_BUF_STORAGE: { pl_buf buf = db.object; require(buf->params.storable); break; } case PL_DESC_BUF_TEXEL_UNIFORM: { pl_buf buf = db.object; require(buf->params.uniform && buf->params.format); break; } case PL_DESC_BUF_TEXEL_STORAGE: { pl_buf buf = db.object; pl_fmt fmt = buf->params.format; require(buf->params.storable && buf->params.format); require(desc.access != PL_DESC_ACCESS_READWRITE || (fmt->caps & PL_FMT_CAP_READWRITE)); break; } case PL_DESC_INVALID: case PL_DESC_TYPE_COUNT: pl_unreachable(); } } for (int i = 0; i < params->num_var_updates; i++) { struct pl_var_update vu = params->var_updates[i]; require(vu.index >= 0 && vu.index < pass->params.num_variables); require(vu.data); } require(params->push_constants || !pass->params.push_constants_size); switch (pass->params.type) { case PL_PASS_RASTER: { switch (pass->params.vertex_type) { case PL_PRIM_TRIANGLE_LIST: require(params->vertex_count % 3 == 0); // fall through case PL_PRIM_TRIANGLE_STRIP: require(params->vertex_count >= 3); break; case PL_PRIM_TYPE_COUNT: pl_unreachable(); } require(!params->vertex_data ^ !params->vertex_buf); if (params->vertex_buf) { pl_buf vertex_buf = params->vertex_buf; require(vertex_buf->params.drawable); if (!params->index_data && !params->index_buf) { // Cannot bounds check indexed draws size_t vert_size = params->vertex_count * pass->params.vertex_stride; require(params->buf_offset + vert_size <= vertex_buf->params.size); } } require(!params->index_data || !params->index_buf); if (params->index_buf) { pl_buf index_buf = params->index_buf; require(!params->vertex_data); require(index_buf->params.drawable); size_t index_size = pl_index_buf_size(params); require(params->index_offset + index_size <= index_buf->params.size); } pl_tex target = params->target; require(target); require(pl_tex_params_dimension(target->params) == 2); require(target->params.format->signature == pass->params.target_format->signature); require(target->params.renderable); pl_rect2d *vp = &new.viewport; pl_rect2d *sc = &new.scissors; // Sanitize viewport/scissors if (!vp->x0 && !vp->x1) vp->x1 = target->params.w; if (!vp->y0 && !vp->y1) vp->y1 = target->params.h; if (!sc->x0 && !sc->x1) sc->x1 = target->params.w; if (!sc->y0 && !sc->y1) sc->y1 = target->params.h; // Constrain the scissors to the target dimension (to sanitize the // underlying graphics API calls) sc->x0 = PL_CLAMP(sc->x0, 0, target->params.w); sc->y0 = PL_CLAMP(sc->y0, 0, target->params.h); sc->x1 = PL_CLAMP(sc->x1, 0, target->params.w); sc->y1 = PL_CLAMP(sc->y1, 0, target->params.h); // Scissors wholly outside target -> silently drop pass (also needed // to ensure we don't cause UB by specifying invalid scissors) if (!pl_rect_w(*sc) || !pl_rect_h(*sc)) return; require(pl_rect_w(*vp) > 0); require(pl_rect_h(*vp) > 0); require(pl_rect_w(*sc) > 0); require(pl_rect_h(*sc) > 0); if (!pass->params.load_target) pl_tex_invalidate(gpu, target); break; } case PL_PASS_COMPUTE: for (int i = 0; i < PL_ARRAY_SIZE(params->compute_groups); i++) { require(params->compute_groups[i] >= 0); require(params->compute_groups[i] <= gpu->limits.max_dispatch[i]); } break; case PL_PASS_INVALID: case PL_PASS_TYPE_COUNT: pl_unreachable(); } const struct pl_gpu_fns *impl = PL_PRIV(gpu); impl->pass_run(gpu, &new); error: return; } void pl_gpu_flush(pl_gpu gpu) { const struct pl_gpu_fns *impl = PL_PRIV(gpu); if (impl->gpu_flush) impl->gpu_flush(gpu); } void pl_gpu_finish(pl_gpu gpu) { const struct pl_gpu_fns *impl = PL_PRIV(gpu); impl->gpu_finish(gpu); } bool pl_gpu_is_failed(pl_gpu gpu) { const struct pl_gpu_fns *impl = PL_PRIV(gpu); if (!impl->gpu_is_failed) return false; return impl->gpu_is_failed(gpu); } pl_timer pl_timer_create(pl_gpu gpu) { const struct pl_gpu_fns *impl = PL_PRIV(gpu); if (!impl->timer_create) return NULL; return impl->timer_create(gpu); } void pl_timer_destroy(pl_gpu gpu, pl_timer *timer) { if (!*timer) return; const struct pl_gpu_fns *impl = PL_PRIV(gpu); impl->timer_destroy(gpu, *timer); *timer = NULL; } uint64_t pl_timer_query(pl_gpu gpu, pl_timer timer) { if (!timer) return 0; const struct pl_gpu_fns *impl = PL_PRIV(gpu); return impl->timer_query(gpu, timer); } libplacebo-v7.349.0/src/gpu.h000066400000000000000000000174511463457750100157370ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "common.h" #include "log.h" #include #include // To avoid having to include drm_fourcc.h #ifndef DRM_FORMAT_MOD_LINEAR #define DRM_FORMAT_MOD_LINEAR UINT64_C(0x0) #define DRM_FORMAT_MOD_INVALID ((UINT64_C(1) << 56) - 1) #endif // This struct must be the first member of the gpu's priv struct. The `pl_gpu` // helpers will cast the priv struct to this struct! #define GPU_PFN(name) __typeof__(pl_##name) *name struct pl_gpu_fns { // This is a pl_dispatch used (on the pl_gpu itself!) for the purposes of // dispatching compute shaders for performing various emulation tasks (e.g. // partial clears, blits or emulated texture transfers, see below). // // Warning: Care must be taken to avoid recursive calls. pl_dispatch dp; // Internal cache, or NULL. Set by the user (via pl_gpu_set_cache). _Atomic(pl_cache) cache; // Destructors: These also free the corresponding objects, but they // must not be called on NULL. (The NULL checks are done by the pl_*_destroy // wrappers) void (*destroy)(pl_gpu gpu); void (*tex_destroy)(pl_gpu, pl_tex); void (*buf_destroy)(pl_gpu, pl_buf); void (*pass_destroy)(pl_gpu, pl_pass); void (*timer_destroy)(pl_gpu, pl_timer); GPU_PFN(tex_create); GPU_PFN(tex_invalidate); // optional GPU_PFN(tex_clear_ex); // optional if no blittable formats GPU_PFN(tex_blit); // optional if no blittable formats GPU_PFN(tex_upload); GPU_PFN(tex_download); GPU_PFN(tex_poll); // optional: if NULL, textures are always free to use GPU_PFN(buf_create); GPU_PFN(buf_write); GPU_PFN(buf_read); GPU_PFN(buf_copy); GPU_PFN(buf_export); // optional if !gpu->export_caps.buf GPU_PFN(buf_poll); // optional: if NULL, buffers are always free to use GPU_PFN(desc_namespace); GPU_PFN(pass_create); GPU_PFN(pass_run); GPU_PFN(timer_create); // optional GPU_PFN(timer_query); // optional GPU_PFN(gpu_flush); // optional GPU_PFN(gpu_finish); GPU_PFN(gpu_is_failed); // optional }; #undef GPU_PFN // All resources such as textures and buffers allocated from the GPU must be // destroyed before calling pl_destroy. void pl_gpu_destroy(pl_gpu gpu); // Returns true if the device supports interop. This is considered to be // the case if at least one of `gpu->export/import_caps` is nonzero. static inline bool pl_gpu_supports_interop(pl_gpu gpu) { return gpu->export_caps.tex || gpu->import_caps.tex || gpu->export_caps.buf || gpu->import_caps.buf || gpu->export_caps.sync || gpu->import_caps.sync; } // Returns the GPU-internal `pl_dispatch` and `pl_cache` objects. pl_dispatch pl_gpu_dispatch(pl_gpu gpu); pl_cache pl_gpu_cache(pl_gpu gpu); // GPU-internal helpers: these should not be used outside of GPU implementations // This performs several tasks. It sorts the format list, logs GPU metadata, // performs verification and fixes up backwards compatibility fields. This // should be returned as the last step when creating a `pl_gpu`. pl_gpu pl_gpu_finalize(struct pl_gpu_t *gpu); // Look up the right GLSL image format qualifier from a partially filled-in // pl_fmt, or NULL if the format does not have a legal matching GLSL name. // // `components` may differ from fmt->num_components (for emulated formats) const char *pl_fmt_glsl_format(pl_fmt fmt, int components); // Look up the right fourcc from a partially filled-in pl_fmt, or 0 if the // format does not have a legal matching fourcc format. uint32_t pl_fmt_fourcc(pl_fmt fmt); // Compute the total size (in bytes) of a texture transfer operation size_t pl_tex_transfer_size(const struct pl_tex_transfer_params *par); // Split a tex transfer into slices. For emulated formats, `texel_fmt` gives // the format of the underlying texel buffer. // // Returns the number of slices, or 0 on error (e.g. no SSBOs available). // `out_slices` must be freed by caller (on success). int pl_tex_transfer_slices(pl_gpu gpu, pl_fmt texel_fmt, const struct pl_tex_transfer_params *params, struct pl_tex_transfer_params **out_slices); // Helper that wraps pl_tex_upload/download using texture upload buffers to // ensure that params->buf is always set. bool pl_tex_upload_pbo(pl_gpu gpu, const struct pl_tex_transfer_params *params); bool pl_tex_download_pbo(pl_gpu gpu, const struct pl_tex_transfer_params *params); // This requires that params.buf has been set and is of type PL_BUF_TEXEL_* bool pl_tex_upload_texel(pl_gpu gpu, const struct pl_tex_transfer_params *params); bool pl_tex_download_texel(pl_gpu gpu, const struct pl_tex_transfer_params *params); // Both `src` and `dst must be storable. `src` must also be sampleable, if the // blit requires linear sampling. Returns false if these conditions are unmet. bool pl_tex_blit_compute(pl_gpu gpu, const struct pl_tex_blit_params *params); // Helper to do a 2D blit with stretch and scale using a raster pass void pl_tex_blit_raster(pl_gpu gpu, const struct pl_tex_blit_params *params); // Helper for GPU-accelerated endian swapping // // Note: `src` and `dst` can be the same buffer, for an in-place operation. In // this case, `src_offset` and `dst_offset` must be the same. struct pl_buf_copy_swap_params { // Source of the copy operation. Must be `storable`. pl_buf src; size_t src_offset; // Destination of the copy operation. Must be `storable`. pl_buf dst; size_t dst_offset; // Number of bytes to copy. Must be a multiple of 4. size_t size; // Underlying word size. Must be 2 (for 16-bit swap) or 4 (for 32-bit swap) int wordsize; }; bool pl_buf_copy_swap(pl_gpu gpu, const struct pl_buf_copy_swap_params *params); void pl_pass_run_vbo(pl_gpu gpu, const struct pl_pass_run_params *params); // Make a deep-copy of the pass params. Note: cached_program etc. are not // copied, but cleared explicitly. struct pl_pass_params pl_pass_params_copy(void *alloc, const struct pl_pass_params *params); // Helper to compute the size of an index buffer static inline size_t pl_index_buf_size(const struct pl_pass_run_params *params) { switch (params->index_fmt) { case PL_INDEX_UINT16: return params->vertex_count * sizeof(uint16_t); case PL_INDEX_UINT32: return params->vertex_count * sizeof(uint32_t); case PL_INDEX_FORMAT_COUNT: break; } pl_unreachable(); } // Helper to compute the size of a vertex buffer required to fit all indices size_t pl_vertex_buf_size(const struct pl_pass_run_params *params); // Utility function for pretty-printing UUIDs #define UUID_SIZE 16 #define PRINT_UUID(uuid) (print_uuid((char[3 * UUID_SIZE]){0}, (uuid))) const char *print_uuid(char buf[3 * UUID_SIZE], const uint8_t uuid[UUID_SIZE]); // Helper to pretty-print fourcc codes #define PRINT_FOURCC(fcc) \ (!(fcc) ? "" : (char[5]) { \ (fcc) & 0xFF, \ ((fcc) >> 8) & 0xFF, \ ((fcc) >> 16) & 0xFF, \ ((fcc) >> 24) & 0xFF \ }) #define DRM_MOD_SIZE 26 #define PRINT_DRM_MOD(mod) (print_drm_mod((char[DRM_MOD_SIZE]){0}, (mod))) const char *print_drm_mod(char buf[DRM_MOD_SIZE], uint64_t mod); libplacebo-v7.349.0/src/gpu/000077500000000000000000000000001463457750100155565ustar00rootroot00000000000000libplacebo-v7.349.0/src/gpu/utils.c000066400000000000000000001275101463457750100170700ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include "common.h" #include "shaders.h" #include "gpu.h" // GPU-internal helpers static int cmp_fmt(const void *pa, const void *pb) { pl_fmt a = *(pl_fmt *)pa; pl_fmt b = *(pl_fmt *)pb; // Always prefer non-opaque formats if (a->opaque != b->opaque) return PL_CMP(a->opaque, b->opaque); // Always prefer non-emulated formats if (a->emulated != b->emulated) return PL_CMP(a->emulated, b->emulated); // Prefer formats with many optional rendering capabilities const enum pl_fmt_caps caps_whitelist = PL_FMT_CAP_SAMPLEABLE | PL_FMT_CAP_STORABLE | PL_FMT_CAP_LINEAR | PL_FMT_CAP_RENDERABLE | PL_FMT_CAP_BLENDABLE | PL_FMT_CAP_BLITTABLE; enum pl_fmt_caps a_caps = a->caps & caps_whitelist, b_caps = b->caps & caps_whitelist; int ca = __builtin_popcount(a_caps), cb = __builtin_popcount(b_caps); if (ca != cb) return -PL_CMP(ca, cb); // invert to sort higher values first // If the population count is the same but the caps are different, prefer // the caps with a "lower" value (which tend to be more fundamental caps) if (a_caps != b_caps) return PL_CMP(a_caps, b_caps); // If the capabilities are equal, sort based on the component attributes for (int i = 0; i < PL_ARRAY_SIZE(a->component_depth); i++) { int da = a->component_depth[i], db = b->component_depth[i]; if (da != db) return PL_CMP(da, db); int ha = a->host_bits[i], hb = b->host_bits[i]; if (ha != hb) return PL_CMP(ha, hb); int oa = a->sample_order[i], ob = b->sample_order[i]; if (oa != ob) return PL_CMP(oa, ob); } // Fall back to sorting by the name (for stability) return strcmp(a->name, b->name); } #define FMT_BOOL(letter, cap) ((cap) ? (letter) : '-') #define FMT_IDX4(f) (f)[0], (f)[1], (f)[2], (f)[3] static void print_formats(pl_gpu gpu) { if (!pl_msg_test(gpu->log, PL_LOG_DEBUG)) return; #define CAP_HEADER "%-12s" #define CAP_FIELDS "%c%c%c%c%c%c%c%c%c%c%c%c" #define CAP_VALUES \ FMT_BOOL('S', fmt->caps & PL_FMT_CAP_SAMPLEABLE), \ FMT_BOOL('s', fmt->caps & PL_FMT_CAP_STORABLE), \ FMT_BOOL('L', fmt->caps & PL_FMT_CAP_LINEAR), \ FMT_BOOL('R', fmt->caps & PL_FMT_CAP_RENDERABLE), \ FMT_BOOL('b', fmt->caps & PL_FMT_CAP_BLENDABLE), \ FMT_BOOL('B', fmt->caps & PL_FMT_CAP_BLITTABLE), \ FMT_BOOL('V', fmt->caps & PL_FMT_CAP_VERTEX), \ FMT_BOOL('u', fmt->caps & PL_FMT_CAP_TEXEL_UNIFORM), \ FMT_BOOL('t', fmt->caps & PL_FMT_CAP_TEXEL_STORAGE), \ FMT_BOOL('H', fmt->caps & PL_FMT_CAP_HOST_READABLE), \ FMT_BOOL('W', fmt->caps & PL_FMT_CAP_READWRITE), \ FMT_BOOL('G', fmt->gatherable) PL_DEBUG(gpu, "GPU texture formats:"); PL_DEBUG(gpu, " %-20s %-6s %-4s %-4s " CAP_HEADER " %-3s %-13s %-13s %-10s %-10s %-6s", "NAME", "TYPE", "SIZE", "COMP", "CAPS", "EMU", "DEPTH", "HOST_BITS", "GLSL_TYPE", "GLSL_FMT", "FOURCC"); for (int n = 0; n < gpu->num_formats; n++) { pl_fmt fmt = gpu->formats[n]; static const char *types[] = { [PL_FMT_UNKNOWN] = "UNKNOWN", [PL_FMT_UNORM] = "UNORM", [PL_FMT_SNORM] = "SNORM", [PL_FMT_UINT] = "UINT", [PL_FMT_SINT] = "SINT", [PL_FMT_FLOAT] = "FLOAT", }; static const char idx_map[4] = {'R', 'G', 'B', 'A'}; char indices[4] = {' ', ' ', ' ', ' '}; if (!fmt->opaque) { for (int i = 0; i < fmt->num_components; i++) indices[i] = idx_map[fmt->sample_order[i]]; } PL_DEBUG(gpu, " %-20s %-6s %-4zu %c%c%c%c " CAP_FIELDS " %-3s " "{%-2d %-2d %-2d %-2d} {%-2d %-2d %-2d %-2d} %-10s %-10s %-6s", fmt->name, types[fmt->type], fmt->texel_size, FMT_IDX4(indices), CAP_VALUES, fmt->emulated ? "y" : "n", FMT_IDX4(fmt->component_depth), FMT_IDX4(fmt->host_bits), PL_DEF(fmt->glsl_type, ""), PL_DEF(fmt->glsl_format, ""), PRINT_FOURCC(fmt->fourcc)); #undef CAP_HEADER #undef CAP_FIELDS #undef CAP_VALUES for (int i = 0; i < fmt->num_modifiers; i++) { PL_TRACE(gpu, " modifiers[%d]: %s", i, PRINT_DRM_MOD(fmt->modifiers[i])); } } } pl_gpu pl_gpu_finalize(struct pl_gpu_t *gpu) { // Sort formats qsort(gpu->formats, gpu->num_formats, sizeof(pl_fmt), cmp_fmt); // Verification pl_assert(gpu->limits.max_tex_2d_dim); pl_assert(gpu->limits.max_variable_comps || gpu->limits.max_ubo_size); pl_assert(gpu->limits.max_ubo_size <= gpu->limits.max_buf_size); pl_assert(gpu->limits.max_ssbo_size <= gpu->limits.max_buf_size); pl_assert(gpu->limits.max_vbo_size <= gpu->limits.max_buf_size); pl_assert(gpu->limits.max_mapped_size <= gpu->limits.max_buf_size); pl_assert(gpu->limits.max_mapped_vram <= gpu->limits.max_mapped_size); for (int n = 0; n < gpu->num_formats; n++) { pl_fmt fmt = gpu->formats[n]; pl_assert(fmt->name); pl_assert(fmt->type); pl_assert(fmt->num_components); pl_assert(fmt->internal_size); pl_assert(fmt->opaque ? !fmt->texel_size : fmt->texel_size); pl_assert(!fmt->gatherable || (fmt->caps & PL_FMT_CAP_SAMPLEABLE)); for (int i = 0; i < fmt->num_components; i++) { pl_assert(fmt->component_depth[i]); pl_assert(fmt->opaque ? !fmt->host_bits[i] : fmt->host_bits[i]); } for (int i = 0; i < fmt->num_planes; i++) pl_assert(fmt->planes[i].format); enum pl_fmt_caps texel_caps = PL_FMT_CAP_VERTEX | PL_FMT_CAP_TEXEL_UNIFORM | PL_FMT_CAP_TEXEL_STORAGE; if (fmt->caps & texel_caps) { pl_assert(fmt->glsl_type); pl_assert(!fmt->opaque); } if (!fmt->opaque) { pl_assert(fmt->texel_size && fmt->texel_align); pl_assert((fmt->texel_size % fmt->texel_align) == 0); pl_assert(fmt->internal_size == fmt->texel_size || fmt->emulated); } else { pl_assert(!fmt->texel_size && !fmt->texel_align); pl_assert(!(fmt->caps & PL_FMT_CAP_HOST_READABLE)); } // Assert uniqueness of name for (int o = n + 1; o < gpu->num_formats; o++) pl_assert(strcmp(fmt->name, gpu->formats[o]->name) != 0); } // Print info PL_INFO(gpu, "GPU information:"); #define LOG(fmt, field) \ PL_INFO(gpu, " %-26s %" fmt, #field ":", gpu->LOG_STRUCT.field) #define LOG_STRUCT glsl PL_INFO(gpu, " GLSL version: %d%s", gpu->glsl.version, gpu->glsl.vulkan ? " (vulkan)" : gpu->glsl.gles ? " es" : ""); if (gpu->glsl.compute) { LOG("zu", max_shmem_size); LOG(PRIu32, max_group_threads); LOG(PRIu32, max_group_size[0]); LOG(PRIu32, max_group_size[1]); LOG(PRIu32, max_group_size[2]); } LOG(PRIu32, subgroup_size); LOG(PRIi16, min_gather_offset); LOG(PRIi16, max_gather_offset); #undef LOG_STRUCT #define LOG_STRUCT limits PL_INFO(gpu, " Limits:"); // pl_gpu LOG("d", thread_safe); LOG("d", callbacks); // pl_buf LOG("zu", max_buf_size); LOG("zu", max_ubo_size); LOG("zu", max_ssbo_size); LOG("zu", max_vbo_size); LOG("zu", max_mapped_size); LOG(PRIu64, max_buffer_texels); LOG("zu", align_host_ptr); LOG("d", host_cached); // pl_tex LOG(PRIu32, max_tex_1d_dim); LOG(PRIu32, max_tex_2d_dim); LOG(PRIu32, max_tex_3d_dim); LOG("d", blittable_1d_3d); LOG("d", buf_transfer); LOG("zu", align_tex_xfer_pitch); LOG("zu", align_tex_xfer_offset); // pl_pass LOG("zu", max_variable_comps); LOG("zu", max_constants); LOG("zu", max_pushc_size); LOG("zu", align_vertex_stride); if (gpu->glsl.compute) { LOG(PRIu32, max_dispatch[0]); LOG(PRIu32, max_dispatch[1]); LOG(PRIu32, max_dispatch[2]); } LOG(PRIu32, fragment_queues); LOG(PRIu32, compute_queues); #undef LOG_STRUCT #undef LOG if (pl_gpu_supports_interop(gpu)) { PL_INFO(gpu, " External API interop:"); PL_INFO(gpu, " UUID: %s", PRINT_UUID(gpu->uuid)); PL_INFO(gpu, " PCI: %04x:%02x:%02x:%x", gpu->pci.domain, gpu->pci.bus, gpu->pci.device, gpu->pci.function); PL_INFO(gpu, " buf export caps: 0x%x", (unsigned int) gpu->export_caps.buf); PL_INFO(gpu, " buf import caps: 0x%x", (unsigned int) gpu->import_caps.buf); PL_INFO(gpu, " tex export caps: 0x%x", (unsigned int) gpu->export_caps.tex); PL_INFO(gpu, " tex import caps: 0x%x", (unsigned int) gpu->import_caps.tex); PL_INFO(gpu, " sync export caps: 0x%x", (unsigned int) gpu->export_caps.sync); PL_INFO(gpu, " sync import caps: 0x%x", (unsigned int) gpu->import_caps.sync); } print_formats(gpu); // Finally, create a `pl_dispatch` object for internal operations struct pl_gpu_fns *impl = PL_PRIV(gpu); atomic_init(&impl->cache, NULL); impl->dp = pl_dispatch_create(gpu->log, gpu); return gpu; } struct glsl_fmt { enum pl_fmt_type type; int num_components; int depth[4]; const char *glsl_format; }; // List taken from the GLSL specification. (Yes, GLSL supports only exactly // these formats with exactly these names) static const struct glsl_fmt pl_glsl_fmts[] = { {PL_FMT_FLOAT, 1, {16}, "r16f"}, {PL_FMT_FLOAT, 1, {32}, "r32f"}, {PL_FMT_FLOAT, 2, {16, 16}, "rg16f"}, {PL_FMT_FLOAT, 2, {32, 32}, "rg32f"}, {PL_FMT_FLOAT, 4, {16, 16, 16, 16}, "rgba16f"}, {PL_FMT_FLOAT, 4, {32, 32, 32, 32}, "rgba32f"}, {PL_FMT_FLOAT, 3, {11, 11, 10}, "r11f_g11f_b10f"}, {PL_FMT_UNORM, 1, {8}, "r8"}, {PL_FMT_UNORM, 1, {16}, "r16"}, {PL_FMT_UNORM, 2, {8, 8}, "rg8"}, {PL_FMT_UNORM, 2, {16, 16}, "rg16"}, {PL_FMT_UNORM, 4, {8, 8, 8, 8}, "rgba8"}, {PL_FMT_UNORM, 4, {16, 16, 16, 16}, "rgba16"}, {PL_FMT_UNORM, 4, {10, 10, 10, 2}, "rgb10_a2"}, {PL_FMT_SNORM, 1, {8}, "r8_snorm"}, {PL_FMT_SNORM, 1, {16}, "r16_snorm"}, {PL_FMT_SNORM, 2, {8, 8}, "rg8_snorm"}, {PL_FMT_SNORM, 2, {16, 16}, "rg16_snorm"}, {PL_FMT_SNORM, 4, {8, 8, 8, 8}, "rgba8_snorm"}, {PL_FMT_SNORM, 4, {16, 16, 16, 16}, "rgba16_snorm"}, {PL_FMT_UINT, 1, {8}, "r8ui"}, {PL_FMT_UINT, 1, {16}, "r16ui"}, {PL_FMT_UINT, 1, {32}, "r32ui"}, {PL_FMT_UINT, 2, {8, 8}, "rg8ui"}, {PL_FMT_UINT, 2, {16, 16}, "rg16ui"}, {PL_FMT_UINT, 2, {32, 32}, "rg32ui"}, {PL_FMT_UINT, 4, {8, 8, 8, 8}, "rgba8ui"}, {PL_FMT_UINT, 4, {16, 16, 16, 16}, "rgba16ui"}, {PL_FMT_UINT, 4, {32, 32, 32, 32}, "rgba32ui"}, {PL_FMT_UINT, 4, {10, 10, 10, 2}, "rgb10_a2ui"}, {PL_FMT_SINT, 1, {8}, "r8i"}, {PL_FMT_SINT, 1, {16}, "r16i"}, {PL_FMT_SINT, 1, {32}, "r32i"}, {PL_FMT_SINT, 2, {8, 8}, "rg8i"}, {PL_FMT_SINT, 2, {16, 16}, "rg16i"}, {PL_FMT_SINT, 2, {32, 32}, "rg32i"}, {PL_FMT_SINT, 4, {8, 8, 8, 8}, "rgba8i"}, {PL_FMT_SINT, 4, {16, 16, 16, 16}, "rgba16i"}, {PL_FMT_SINT, 4, {32, 32, 32, 32}, "rgba32i"}, }; const char *pl_fmt_glsl_format(pl_fmt fmt, int components) { if (fmt->opaque) return NULL; for (int n = 0; n < PL_ARRAY_SIZE(pl_glsl_fmts); n++) { const struct glsl_fmt *gfmt = &pl_glsl_fmts[n]; if (fmt->type != gfmt->type) continue; if (components != gfmt->num_components) continue; // The component order is irrelevant, so we need to sort the depth // based on the component's index int depth[4] = {0}; for (int i = 0; i < fmt->num_components; i++) depth[fmt->sample_order[i]] = fmt->component_depth[i]; // Copy over any emulated components for (int i = fmt->num_components; i < components; i++) depth[i] = gfmt->depth[i]; for (int i = 0; i < PL_ARRAY_SIZE(depth); i++) { if (depth[i] != gfmt->depth[i]) goto next_fmt; } return gfmt->glsl_format; next_fmt: ; // equivalent to `continue` } return NULL; } #define FOURCC(a,b,c,d) ((uint32_t)(a) | ((uint32_t)(b) << 8) | \ ((uint32_t)(c) << 16) | ((uint32_t)(d) << 24)) struct pl_fmt_fourcc { const char *name; uint32_t fourcc; }; static const struct pl_fmt_fourcc pl_fmt_fourccs[] = { // 8 bpp red {"r8", FOURCC('R','8',' ',' ')}, // 16 bpp red {"r16", FOURCC('R','1','6',' ')}, // 16 bpp rg {"rg8", FOURCC('G','R','8','8')}, {"gr8", FOURCC('R','G','8','8')}, // 32 bpp rg {"rg16", FOURCC('G','R','3','2')}, {"gr16", FOURCC('R','G','3','2')}, // 8 bpp rgb: N/A // 16 bpp rgb {"argb4", FOURCC('B','A','1','2')}, {"abgr4", FOURCC('R','A','1','2')}, {"rgba4", FOURCC('A','B','1','2')}, {"bgra4", FOURCC('A','R','1','2')}, {"a1rgb5", FOURCC('B','A','1','5')}, {"a1bgr5", FOURCC('R','A','1','5')}, {"rgb5a1", FOURCC('A','B','1','5')}, {"bgr5a1", FOURCC('A','R','1','5')}, {"rgb565", FOURCC('B','G','1','6')}, {"bgr565", FOURCC('R','G','1','6')}, // 24 bpp rgb {"rgb8", FOURCC('B','G','2','4')}, {"bgr8", FOURCC('R','G','2','4')}, // 32 bpp rgb {"argb8", FOURCC('B','A','2','4')}, {"abgr8", FOURCC('R','A','2','4')}, {"rgba8", FOURCC('A','B','2','4')}, {"bgra8", FOURCC('A','R','2','4')}, {"a2rgb10", FOURCC('B','A','3','0')}, {"a2bgr10", FOURCC('R','A','3','0')}, {"rgb10a2", FOURCC('A','B','3','0')}, {"bgr10a2", FOURCC('A','R','3','0')}, // 64bpp rgb {"rgba16", FOURCC('A','B','4','8')}, {"bgra16", FOURCC('A','R','4','8')}, {"rgba16hf", FOURCC('A','B','4','H')}, {"bgra16hf", FOURCC('A','R','4','H')}, // packed 16-bit formats // rx10: N/A // rxgx10: N/A {"rxgxbxax10", FOURCC('A','B','1','0')}, // rx12: N/A // rxgx12: N/A // rxgxbxax12: N/A // planar formats {"g8_b8_r8_420", FOURCC('Y','U','1','2')}, {"g8_b8_r8_422", FOURCC('Y','U','1','6')}, {"g8_b8_r8_444", FOURCC('Y','U','2','4')}, // g16_b18_r8_*: N/A // gx10_bx10_rx10_42*: N/A {"gx10_bx10_rx10_444", FOURCC('Q','4','1','0')}, // gx12_bx12_rx12_*:N/A {"g8_br8_420", FOURCC('N','V','1','2')}, {"g8_br8_422", FOURCC('N','V','1','6')}, {"g8_br8_444", FOURCC('N','V','2','4')}, {"g16_br16_420", FOURCC('P','0','1','6')}, // g16_br16_422: N/A // g16_br16_444: N/A {"gx10_bxrx10_420", FOURCC('P','0','1','0')}, {"gx10_bxrx10_422", FOURCC('P','2','1','0')}, // gx10_bxrx10_444: N/A {"gx12_bxrx12_420", FOURCC('P','0','1','2')}, // gx12_bxrx12_422: N/A // gx12_bxrx12_444: N/A }; uint32_t pl_fmt_fourcc(pl_fmt fmt) { for (int n = 0; n < PL_ARRAY_SIZE(pl_fmt_fourccs); n++) { const struct pl_fmt_fourcc *fourcc = &pl_fmt_fourccs[n]; if (strcmp(fmt->name, fourcc->name) == 0) return fourcc->fourcc; } return 0; // no matching format } size_t pl_tex_transfer_size(const struct pl_tex_transfer_params *par) { int w = pl_rect_w(par->rc), h = pl_rect_h(par->rc), d = pl_rect_d(par->rc); size_t pixel_pitch = par->tex->params.format->texel_size; // This generates the absolute bare minimum size of a buffer required to // hold the data of a texture upload/download, by including stride padding // only where strictly necessary. return (d - 1) * par->depth_pitch + (h - 1) * par->row_pitch + w * pixel_pitch; } int pl_tex_transfer_slices(pl_gpu gpu, pl_fmt texel_fmt, const struct pl_tex_transfer_params *params, struct pl_tex_transfer_params **out_slices) { PL_ARRAY(struct pl_tex_transfer_params) slices = {0}; size_t max_size = params->buf ? gpu->limits.max_buf_size : SIZE_MAX; pl_fmt fmt = params->tex->params.format; if (fmt->emulated && texel_fmt) { size_t max_texel = gpu->limits.max_buffer_texels * texel_fmt->texel_size; max_size = PL_MIN(gpu->limits.max_ssbo_size, max_texel); } int slice_w = pl_rect_w(params->rc); int slice_h = pl_rect_h(params->rc); int slice_d = pl_rect_d(params->rc); slice_d = PL_MIN(slice_d, max_size / params->depth_pitch); if (!slice_d) { slice_d = 1; slice_h = PL_MIN(slice_h, max_size / params->row_pitch); if (!slice_h) { slice_h = 1; slice_w = PL_MIN(slice_w, max_size / fmt->texel_size); pl_assert(slice_w); } } for (int z = 0; z < pl_rect_d(params->rc); z += slice_d) { for (int y = 0; y < pl_rect_h(params->rc); y += slice_h) { for (int x = 0; x < pl_rect_w(params->rc); x += slice_w) { struct pl_tex_transfer_params slice = *params; slice.callback = NULL; slice.rc.x0 = params->rc.x0 + x; slice.rc.y0 = params->rc.y0 + y; slice.rc.z0 = params->rc.z0 + z; slice.rc.x1 = PL_MIN(slice.rc.x0 + slice_w, params->rc.x1); slice.rc.y1 = PL_MIN(slice.rc.y0 + slice_h, params->rc.y1); slice.rc.z1 = PL_MIN(slice.rc.z0 + slice_d, params->rc.z1); const size_t offset = z * params->depth_pitch + y * params->row_pitch + x * fmt->texel_size; if (slice.ptr) { slice.ptr = (uint8_t *) slice.ptr + offset; } else { slice.buf_offset += offset; } PL_ARRAY_APPEND(NULL, slices, slice); } } } *out_slices = slices.elem; return slices.num; } bool pl_tex_upload_pbo(pl_gpu gpu, const struct pl_tex_transfer_params *params) { if (params->buf) return pl_tex_upload(gpu, params); const size_t size = pl_tex_transfer_size(params); struct pl_tex_transfer_params fixed = *params; fixed.ptr = NULL; // If we can import host pointers directly, and the function is being used // asynchronously, then we can use host pointer import to skip a memcpy. In // the synchronous case, we still force a host memcpy to avoid stalling the // host until the GPU memcpy completes. bool can_import = gpu->import_caps.buf & PL_HANDLE_HOST_PTR; can_import &= !params->no_import; can_import &= params->callback != NULL; can_import &= size > (32 << 10); // 32 KiB if (can_import) { // Suppress errors for this test because it may fail, in which case we // want to silently fall back. pl_log_level_cap(gpu->log, PL_LOG_DEBUG); fixed.buf = pl_buf_create(gpu, pl_buf_params( .size = size, .import_handle = PL_HANDLE_HOST_PTR, .shared_mem = (struct pl_shared_mem) { .handle.ptr = params->ptr, .size = size, .offset = 0, }, )); pl_log_level_cap(gpu->log, PL_LOG_NONE); } if (!fixed.buf) { fixed.buf = pl_buf_create(gpu, pl_buf_params( .size = size, .host_writable = true, )); if (!fixed.buf) return false; pl_buf_write(gpu, fixed.buf, 0, params->ptr, size); if (params->callback) params->callback(params->priv); fixed.callback = NULL; } bool ok = pl_tex_upload(gpu, &fixed); pl_buf_destroy(gpu, &fixed.buf); return ok; } struct pbo_cb_ctx { pl_gpu gpu; pl_buf buf; void *ptr; void (*callback)(void *priv); void *priv; }; static void pbo_download_cb(void *priv) { struct pbo_cb_ctx *p = priv; pl_buf_read(p->gpu, p->buf, 0, p->ptr, p->buf->params.size); pl_buf_destroy(p->gpu, &p->buf); // Run the original callback p->callback(p->priv); pl_free(priv); }; bool pl_tex_download_pbo(pl_gpu gpu, const struct pl_tex_transfer_params *params) { if (params->buf) return pl_tex_download(gpu, params); const size_t size = pl_tex_transfer_size(params); pl_buf buf = NULL; // If we can import host pointers directly, we can avoid an extra memcpy // (sometimes). In the cases where it isn't avoidable, the extra memcpy // will happen inside VRAM, which is typically faster anyway. bool can_import = gpu->import_caps.buf & PL_HANDLE_HOST_PTR; can_import &= !params->no_import; can_import &= size > (32 << 10); // 32 KiB if (can_import) { // Suppress errors for this test because it may fail, in which case we // want to silently fall back. pl_log_level_cap(gpu->log, PL_LOG_DEBUG); buf = pl_buf_create(gpu, pl_buf_params( .size = size, .import_handle = PL_HANDLE_HOST_PTR, .shared_mem = (struct pl_shared_mem) { .handle.ptr = params->ptr, .size = size, .offset = 0, }, )); pl_log_level_cap(gpu->log, PL_LOG_NONE); } if (!buf) { // Fallback when host pointer import is not supported buf = pl_buf_create(gpu, pl_buf_params( .size = size, .host_readable = true, )); } if (!buf) return false; struct pl_tex_transfer_params newparams = *params; newparams.ptr = NULL; newparams.buf = buf; bool import_handle = buf->params.import_handle; // If the transfer is asynchronous, propagate our host read asynchronously if (params->callback && !import_handle) { newparams.callback = pbo_download_cb; newparams.priv = pl_alloc_struct(NULL, struct pbo_cb_ctx, { .gpu = gpu, .buf = buf, .ptr = params->ptr, .callback = params->callback, .priv = params->priv, }); } if (!pl_tex_download(gpu, &newparams)) { pl_buf_destroy(gpu, &buf); return false; } if (!params->callback) { while (pl_buf_poll(gpu, buf, 10000000)) // 10 ms PL_TRACE(gpu, "pl_tex_download: synchronous/blocking (slow path)"); } bool ok; if (import_handle) { // Buffer download completion already means the host pointer contains // the valid data, no more need to copy. (Note: this applies even for // asynchronous downloads) ok = true; pl_buf_destroy(gpu, &buf); } else if (!params->callback) { // Synchronous read back to the host pointer ok = pl_buf_read(gpu, buf, 0, params->ptr, size); pl_buf_destroy(gpu, &buf); } else { // Nothing left to do here, the rest will be done by pbo_download_cb ok = true; } return ok; } bool pl_tex_upload_texel(pl_gpu gpu, const struct pl_tex_transfer_params *params) { const int threads = PL_MIN(256, pl_rect_w(params->rc)); pl_tex tex = params->tex; pl_fmt fmt = tex->params.format; pl_require(gpu, params->buf); pl_dispatch dp = pl_gpu_dispatch(gpu); pl_shader sh = pl_dispatch_begin(dp); if (!sh_try_compute(sh, threads, 1, false, 0)) { PL_ERR(gpu, "Failed emulating texture transfer!"); pl_dispatch_abort(dp, &sh); return false; } ident_t buf = sh_desc(sh, (struct pl_shader_desc) { .binding.object = params->buf, .desc = { .name = "data", .type = PL_DESC_BUF_TEXEL_STORAGE, }, }); ident_t img = sh_desc(sh, (struct pl_shader_desc) { .binding.object = params->tex, .desc = { .name = "image", .type = PL_DESC_STORAGE_IMG, .access = PL_DESC_ACCESS_WRITEONLY, }, }); // If the transfer width is a natural multiple of the thread size, we // can skip the bounds check. Otherwise, make sure we aren't blitting out // of the range since this would read out of bounds. int groups_x = PL_DIV_UP(pl_rect_w(params->rc), threads); if (groups_x * threads != pl_rect_w(params->rc)) { GLSL("if (gl_GlobalInvocationID.x >= %d) \n" " return; \n", pl_rect_w(params->rc)); } // fmt->texel_align contains the size of an individual color value assert(fmt->texel_size == fmt->num_components * fmt->texel_align); GLSL("vec4 color = vec4(0.0, 0.0, 0.0, 1.0); \n" "ivec3 pos = ivec3(gl_GlobalInvocationID); \n" "ivec3 tex_pos = pos + ivec3("$", "$", "$"); \n" "int base = "$" + pos.z * "$" + pos.y * "$" + pos.x * "$"; \n", SH_INT_DYN(params->rc.x0), SH_INT_DYN(params->rc.y0), SH_INT_DYN(params->rc.z0), SH_INT_DYN(params->buf_offset), SH_INT(params->depth_pitch / fmt->texel_align), SH_INT(params->row_pitch / fmt->texel_align), SH_INT(fmt->texel_size / fmt->texel_align)); for (int i = 0; i < fmt->num_components; i++) GLSL("color[%d] = imageLoad("$", base + %d).r; \n", i, buf, i); int dims = pl_tex_params_dimension(tex->params); static const char *coord_types[] = { [1] = "int", [2] = "ivec2", [3] = "ivec3", }; GLSL("imageStore("$", %s(tex_pos), color);\n", img, coord_types[dims]); return pl_dispatch_compute(dp, pl_dispatch_compute_params( .shader = &sh, .dispatch_size = { groups_x, pl_rect_h(params->rc), pl_rect_d(params->rc), }, )); error: return false; } bool pl_tex_download_texel(pl_gpu gpu, const struct pl_tex_transfer_params *params) { const int threads = PL_MIN(256, pl_rect_w(params->rc)); pl_tex tex = params->tex; pl_fmt fmt = tex->params.format; pl_require(gpu, params->buf); pl_dispatch dp = pl_gpu_dispatch(gpu); pl_shader sh = pl_dispatch_begin(dp); if (!sh_try_compute(sh, threads, 1, false, 0)) { PL_ERR(gpu, "Failed emulating texture transfer!"); pl_dispatch_abort(dp, &sh); return false; } ident_t buf = sh_desc(sh, (struct pl_shader_desc) { .binding.object = params->buf, .desc = { .name = "data", .type = PL_DESC_BUF_TEXEL_STORAGE, }, }); ident_t img = sh_desc(sh, (struct pl_shader_desc) { .binding.object = params->tex, .desc = { .name = "image", .type = PL_DESC_STORAGE_IMG, .access = PL_DESC_ACCESS_READONLY, }, }); int groups_x = PL_DIV_UP(pl_rect_w(params->rc), threads); if (groups_x * threads != pl_rect_w(params->rc)) { GLSL("if (gl_GlobalInvocationID.x >= %d) \n" " return; \n", pl_rect_w(params->rc)); } int dims = pl_tex_params_dimension(tex->params); static const char *coord_types[] = { [1] = "int", [2] = "ivec2", [3] = "ivec3", }; assert(fmt->texel_size == fmt->num_components * fmt->texel_align); GLSL("ivec3 pos = ivec3(gl_GlobalInvocationID); \n" "ivec3 tex_pos = pos + ivec3("$", "$", "$"); \n" "int base = "$" + pos.z * "$" + pos.y * "$" + pos.x * "$"; \n" "vec4 color = imageLoad("$", %s(tex_pos)); \n", SH_INT_DYN(params->rc.x0), SH_INT_DYN(params->rc.y0), SH_INT_DYN(params->rc.z0), SH_INT_DYN(params->buf_offset), SH_INT(params->depth_pitch / fmt->texel_align), SH_INT(params->row_pitch / fmt->texel_align), SH_INT(fmt->texel_size / fmt->texel_align), img, coord_types[dims]); for (int i = 0; i < fmt->num_components; i++) GLSL("imageStore("$", base + %d, vec4(color[%d])); \n", buf, i, i); return pl_dispatch_compute(dp, pl_dispatch_compute_params( .shader = &sh, .dispatch_size = { groups_x, pl_rect_h(params->rc), pl_rect_d(params->rc), }, )); error: return false; } bool pl_tex_blit_compute(pl_gpu gpu, const struct pl_tex_blit_params *params) { if (!params->dst->params.storable) return false; // Normalize `dst_rc`, moving all flipping to `src_rc` instead. pl_rect3d src_rc = params->src_rc; pl_rect3d dst_rc = params->dst_rc; if (pl_rect_w(dst_rc) < 0) { PL_SWAP(src_rc.x0, src_rc.x1); PL_SWAP(dst_rc.x0, dst_rc.x1); } if (pl_rect_h(dst_rc) < 0) { PL_SWAP(src_rc.y0, src_rc.y1); PL_SWAP(dst_rc.y0, dst_rc.y1); } if (pl_rect_d(dst_rc) < 0) { PL_SWAP(src_rc.z0, src_rc.z1); PL_SWAP(dst_rc.z0, dst_rc.z1); } bool needs_scaling = false; needs_scaling |= pl_rect_w(dst_rc) != abs(pl_rect_w(src_rc)); needs_scaling |= pl_rect_h(dst_rc) != abs(pl_rect_h(src_rc)); needs_scaling |= pl_rect_d(dst_rc) != abs(pl_rect_d(src_rc)); // Exception: fast path for 1-pixel blits, which don't require scaling bool is_1pixel = abs(pl_rect_w(src_rc)) == 1 && abs(pl_rect_h(src_rc)) == 1; needs_scaling &= !is_1pixel; // Manual trilinear interpolation would be too slow to justify bool needs_sampling = needs_scaling && params->sample_mode != PL_TEX_SAMPLE_NEAREST; needs_sampling |= !params->src->params.storable; if (needs_sampling && !params->src->params.sampleable) return false; const int threads = 256; int bw = PL_MIN(32, pl_rect_w(dst_rc)); int bh = PL_MIN(threads / bw, pl_rect_h(dst_rc)); pl_dispatch dp = pl_gpu_dispatch(gpu); pl_shader sh = pl_dispatch_begin(dp); if (!sh_try_compute(sh, bw, bh, false, 0)) { pl_dispatch_abort(dp, &sh); return false; } // Avoid over-writing into `dst` int groups_x = PL_DIV_UP(pl_rect_w(dst_rc), bw); if (groups_x * bw != pl_rect_w(dst_rc)) { GLSL("if (gl_GlobalInvocationID.x >= %d) \n" " return; \n", pl_rect_w(dst_rc)); } int groups_y = PL_DIV_UP(pl_rect_h(dst_rc), bh); if (groups_y * bh != pl_rect_h(dst_rc)) { GLSL("if (gl_GlobalInvocationID.y >= %d) \n" " return; \n", pl_rect_h(dst_rc)); } ident_t dst = sh_desc(sh, (struct pl_shader_desc) { .binding.object = params->dst, .desc = { .name = "dst", .type = PL_DESC_STORAGE_IMG, .access = PL_DESC_ACCESS_WRITEONLY, }, }); static const char *vecs[] = { [1] = "float", [2] = "vec2", [3] = "vec3", [4] = "vec4", }; static const char *ivecs[] = { [1] = "int", [2] = "ivec2", [3] = "ivec3", [4] = "ivec4", }; int src_dims = pl_tex_params_dimension(params->src->params); int dst_dims = pl_tex_params_dimension(params->dst->params); GLSL("ivec3 pos = ivec3(gl_GlobalInvocationID); \n" "%s dst_pos = %s(pos + ivec3(%d, %d, %d)); \n", ivecs[dst_dims], ivecs[dst_dims], params->dst_rc.x0, params->dst_rc.y0, params->dst_rc.z0); if (needs_sampling || (needs_scaling && params->src->params.sampleable)) { ident_t src = sh_desc(sh, (struct pl_shader_desc) { .desc = { .name = "src", .type = PL_DESC_SAMPLED_TEX, }, .binding = { .object = params->src, .address_mode = PL_TEX_ADDRESS_CLAMP, .sample_mode = params->sample_mode, } }); if (is_1pixel) { GLSL("%s fpos = %s(0.5); \n", vecs[src_dims], vecs[src_dims]); } else { GLSL("vec3 fpos = (vec3(pos) + vec3(0.5)) / vec3(%d.0, %d.0, %d.0); \n", pl_rect_w(dst_rc), pl_rect_h(dst_rc), pl_rect_d(dst_rc)); } GLSL("%s src_pos = %s(0.5); \n" "src_pos.x = mix(%f, %f, fpos.x); \n", vecs[src_dims], vecs[src_dims], (float) src_rc.x0 / params->src->params.w, (float) src_rc.x1 / params->src->params.w); if (params->src->params.h) { GLSL("src_pos.y = mix(%f, %f, fpos.y); \n", (float) src_rc.y0 / params->src->params.h, (float) src_rc.y1 / params->src->params.h); } if (params->src->params.d) { GLSL("src_pos.z = mix(%f, %f, fpos.z); \n", (float) src_rc.z0 / params->src->params.d, (float) src_rc.z1 / params->src->params.d); } GLSL("imageStore("$", dst_pos, textureLod("$", src_pos, 0.0)); \n", dst, src); } else { ident_t src = sh_desc(sh, (struct pl_shader_desc) { .binding.object = params->src, .desc = { .name = "src", .type = PL_DESC_STORAGE_IMG, .access = PL_DESC_ACCESS_READONLY, }, }); if (is_1pixel) { GLSL("ivec3 src_pos = ivec3(0); \n"); } else if (needs_scaling) { GLSL("ivec3 src_pos = ivec3(vec3(%f, %f, %f) * vec3(pos)); \n", fabs((float) pl_rect_w(src_rc) / pl_rect_w(dst_rc)), fabs((float) pl_rect_h(src_rc) / pl_rect_h(dst_rc)), fabs((float) pl_rect_d(src_rc) / pl_rect_d(dst_rc))); } else { GLSL("ivec3 src_pos = pos; \n"); } GLSL("src_pos = ivec3(%d, %d, %d) * src_pos + ivec3(%d, %d, %d); \n" "imageStore("$", dst_pos, imageLoad("$", %s(src_pos))); \n", src_rc.x1 < src_rc.x0 ? -1 : 1, src_rc.y1 < src_rc.y0 ? -1 : 1, src_rc.z1 < src_rc.z0 ? -1 : 1, src_rc.x0, src_rc.y0, src_rc.z0, dst, src, ivecs[src_dims]); } return pl_dispatch_compute(dp, pl_dispatch_compute_params( .shader = &sh, .dispatch_size = { groups_x, groups_y, pl_rect_d(dst_rc), }, )); } void pl_tex_blit_raster(pl_gpu gpu, const struct pl_tex_blit_params *params) { enum pl_fmt_type src_type = params->src->params.format->type; enum pl_fmt_type dst_type = params->dst->params.format->type; // Only for 2D textures pl_assert(params->src->params.h && !params->src->params.d); pl_assert(params->dst->params.h && !params->dst->params.d); // Integer textures are not supported pl_assert(src_type != PL_FMT_UINT && src_type != PL_FMT_SINT); pl_assert(dst_type != PL_FMT_UINT && dst_type != PL_FMT_SINT); pl_rect2df src_rc = { .x0 = params->src_rc.x0, .x1 = params->src_rc.x1, .y0 = params->src_rc.y0, .y1 = params->src_rc.y1, }; pl_rect2d dst_rc = { .x0 = params->dst_rc.x0, .x1 = params->dst_rc.x1, .y0 = params->dst_rc.y0, .y1 = params->dst_rc.y1, }; pl_dispatch dp = pl_gpu_dispatch(gpu); pl_shader sh = pl_dispatch_begin(dp); sh->output = PL_SHADER_SIG_COLOR; ident_t pos, src = sh_bind(sh, params->src, PL_TEX_ADDRESS_CLAMP, params->sample_mode, "src_tex", &src_rc, &pos, NULL); GLSL("vec4 color = textureLod("$", "$", 0.0); \n", src, pos); pl_dispatch_finish(dp, pl_dispatch_params( .shader = &sh, .target = params->dst, .rect = dst_rc, )); } bool pl_buf_copy_swap(pl_gpu gpu, const struct pl_buf_copy_swap_params *params) { pl_buf src = params->src, dst = params->dst; pl_require(gpu, src->params.storable && dst->params.storable); pl_require(gpu, params->src_offset % sizeof(unsigned) == 0); pl_require(gpu, params->dst_offset % sizeof(unsigned) == 0); pl_require(gpu, params->src_offset + params->size <= src->params.size); pl_require(gpu, params->dst_offset + params->size <= dst->params.size); pl_require(gpu, src != dst || params->src_offset == params->dst_offset); pl_require(gpu, params->size % sizeof(unsigned) == 0); pl_require(gpu, params->wordsize == sizeof(uint16_t) || params->wordsize == sizeof(uint32_t)); const size_t words = params->size / sizeof(unsigned); const size_t src_off = params->src_offset / sizeof(unsigned); const size_t dst_off = params->dst_offset / sizeof(unsigned); const int threads = PL_MIN(256, words); pl_dispatch dp = pl_gpu_dispatch(gpu); pl_shader sh = pl_dispatch_begin(dp); if (!sh_try_compute(sh, threads, 1, false, 0)) { pl_dispatch_abort(dp, &sh); return false; } const size_t groups = PL_DIV_UP(words, threads); if (groups * threads > words) { GLSL("if (gl_GlobalInvocationID.x >= %zu) \n" " return; \n", words); } sh_desc(sh, (struct pl_shader_desc) { .binding.object = src, .desc = { .name = "SrcBuf", .type = PL_DESC_BUF_STORAGE, .access = src == dst ? PL_DESC_ACCESS_READWRITE : PL_DESC_ACCESS_READONLY, }, .num_buffer_vars = 1, .buffer_vars = &(struct pl_buffer_var) { .var = { .name = "src", .type = PL_VAR_UINT, .dim_v = 1, .dim_m = 1, .dim_a = src_off + words, }, }, }); if (src != dst) { sh_desc(sh, (struct pl_shader_desc) { .binding.object = dst, .desc = { .name = "DstBuf", .type = PL_DESC_BUF_STORAGE, .access = PL_DESC_ACCESS_WRITEONLY, }, .num_buffer_vars = 1, .buffer_vars = &(struct pl_buffer_var) { .var = { .name = "dst", .type = PL_VAR_UINT, .dim_v = 1, .dim_m = 1, .dim_a = dst_off + words, }, }, }); } else { GLSL("#define dst src \n"); } GLSL("// pl_buf_copy_swap \n" "{ \n" "uint word = src["$" + gl_GlobalInvocationID.x]; \n" "word = (word & 0xFF00FF00u) >> 8 | \n" " (word & 0x00FF00FFu) << 8; \n", SH_UINT(src_off)); if (params->wordsize > 2) { GLSL("word = (word & 0xFFFF0000u) >> 16 | \n" " (word & 0x0000FFFFu) << 16; \n"); } GLSL("dst["$" + gl_GlobalInvocationID.x] = word; \n" "} \n", SH_UINT(dst_off)); return pl_dispatch_compute(dp, pl_dispatch_compute_params( .shader = &sh, .dispatch_size = {groups, 1, 1}, )); error: if (src->params.debug_tag || dst->params.debug_tag) { PL_ERR(gpu, " for buffers: src %s, dst %s", src->params.debug_tag, dst->params.debug_tag); } return false; } void pl_pass_run_vbo(pl_gpu gpu, const struct pl_pass_run_params *params) { if (!params->vertex_data && !params->index_data) return pl_pass_run(gpu, params); struct pl_pass_run_params newparams = *params; pl_buf vert = NULL, index = NULL; if (params->vertex_data) { vert = pl_buf_create(gpu, pl_buf_params( .size = pl_vertex_buf_size(params), .initial_data = params->vertex_data, .drawable = true, )); if (!vert) { PL_ERR(gpu, "Failed allocating vertex buffer!"); return; } newparams.vertex_buf = vert; newparams.vertex_data = NULL; } if (params->index_data) { index = pl_buf_create(gpu, pl_buf_params( .size = pl_index_buf_size(params), .initial_data = params->index_data, .drawable = true, )); if (!index) { PL_ERR(gpu, "Failed allocating index buffer!"); return; } newparams.index_buf = index; newparams.index_data = NULL; } pl_pass_run(gpu, &newparams); pl_buf_destroy(gpu, &vert); pl_buf_destroy(gpu, &index); } struct pl_pass_params pl_pass_params_copy(void *alloc, const struct pl_pass_params *params) { struct pl_pass_params new = *params; new.glsl_shader = pl_str0dup0(alloc, new.glsl_shader); new.vertex_shader = pl_str0dup0(alloc, new.vertex_shader); if (new.blend_params) new.blend_params = pl_memdup_ptr(alloc, new.blend_params); #define DUPNAMES(field) \ do { \ size_t _size = new.num_##field * sizeof(new.field[0]); \ new.field = pl_memdup(alloc, new.field, _size); \ for (int j = 0; j < new.num_##field; j++) \ new.field[j].name = pl_str0dup0(alloc, new.field[j].name); \ } while (0) DUPNAMES(variables); DUPNAMES(descriptors); DUPNAMES(vertex_attribs); #undef DUPNAMES new.constant_data = NULL; new.constants = pl_memdup(alloc, new.constants, new.num_constants * sizeof(new.constants[0])); return new; } size_t pl_vertex_buf_size(const struct pl_pass_run_params *params) { if (!params->index_data) return params->vertex_count * params->pass->params.vertex_stride; int num_vertices = 0; const void *idx = params->index_data; switch (params->index_fmt) { case PL_INDEX_UINT16: for (int i = 0; i < params->vertex_count; i++) num_vertices = PL_MAX(num_vertices, ((const uint16_t *) idx)[i]); break; case PL_INDEX_UINT32: for (int i = 0; i < params->vertex_count; i++) num_vertices = PL_MAX(num_vertices, ((const uint32_t *) idx)[i]); break; case PL_INDEX_FORMAT_COUNT: pl_unreachable(); } return (num_vertices + 1) * params->pass->params.vertex_stride; } const char *print_uuid(char buf[3 * UUID_SIZE], const uint8_t uuid[UUID_SIZE]) { static const char *hexdigits = "0123456789ABCDEF"; for (int i = 0; i < UUID_SIZE; i++) { uint8_t x = uuid[i]; buf[3 * i + 0] = hexdigits[x >> 4]; buf[3 * i + 1] = hexdigits[x & 0xF]; buf[3 * i + 2] = i == UUID_SIZE - 1 ? '\0' : ':'; } return buf; } const char *print_drm_mod(char buf[DRM_MOD_SIZE], uint64_t mod) { switch (mod) { case DRM_FORMAT_MOD_LINEAR: return "LINEAR"; case DRM_FORMAT_MOD_INVALID: return "INVALID"; } uint8_t vendor = mod >> 56; uint64_t val = mod & ((1ULL << 56) - 1); const char *name = NULL; switch (vendor) { case 0x00: name = "NONE"; break; case 0x01: name = "INTEL"; break; case 0x02: name = "AMD"; break; case 0x03: name = "NVIDIA"; break; case 0x04: name = "SAMSUNG"; break; case 0x08: name = "ARM"; break; } if (name) { snprintf(buf, DRM_MOD_SIZE, "%s 0x%"PRIx64, name, val); } else { snprintf(buf, DRM_MOD_SIZE, "0x%02x 0x%"PRIx64, vendor, val); } return buf; } libplacebo-v7.349.0/src/hash.h000066400000000000000000000124701463457750100160630ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "common.h" #define GOLDEN_RATIO_64 UINT64_C(0x9e3779b97f4a7c15) static inline void pl_hash_merge(uint64_t *accum, uint64_t hash) { *accum ^= hash + GOLDEN_RATIO_64 + (*accum << 6) + (*accum >> 2); } static inline uint64_t pl_mem_hash(const void *mem, size_t size); #define pl_var_hash(x) pl_mem_hash(&(x), sizeof(x)) static inline uint64_t pl_str_hash(pl_str str) { return pl_mem_hash(str.buf, str.len); } static inline uint64_t pl_str0_hash(const char *str) { return pl_mem_hash(str, str ? strlen(str) : 0); } #ifdef PL_HAVE_XXHASH #define XXH_NAMESPACE pl_ #define XXH_INLINE_ALL #define XXH_NO_STREAM #include XXH_FORCE_INLINE uint64_t pl_mem_hash(const void *mem, size_t size) { return XXH3_64bits(mem, size); } #else // !PL_HAVE_XXHASH /* SipHash reference C implementation Modified for use by libplacebo: - Hard-coded a fixed key (k0 and k1) - Hard-coded the output size to 64 bits - Return the result vector directly Copyright (c) 2012-2016 Jean-Philippe Aumasson Copyright (c) 2012-2014 Daniel J. Bernstein To the extent possible under law, the author(s) have dedicated all copyright and related and neighboring rights to this software to the public domain worldwide. This software is distributed without any warranty. . */ /* default: SipHash-2-4 */ #define cROUNDS 2 #define dROUNDS 4 #define ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b)))) #define U8TO64_LE(p) \ (((uint64_t)((p)[0])) | ((uint64_t)((p)[1]) << 8) | \ ((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24) | \ ((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) | \ ((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56)) #define SIPROUND \ do { \ v0 += v1; \ v1 = ROTL(v1, 13); \ v1 ^= v0; \ v0 = ROTL(v0, 32); \ v2 += v3; \ v3 = ROTL(v3, 16); \ v3 ^= v2; \ v0 += v3; \ v3 = ROTL(v3, 21); \ v3 ^= v0; \ v2 += v1; \ v1 = ROTL(v1, 17); \ v1 ^= v2; \ v2 = ROTL(v2, 32); \ } while (0) static inline uint64_t pl_mem_hash(const void *mem, size_t size) { if (!size) return 0x8533321381b8254bULL; uint64_t v0 = 0x736f6d6570736575ULL; uint64_t v1 = 0x646f72616e646f6dULL; uint64_t v2 = 0x6c7967656e657261ULL; uint64_t v3 = 0x7465646279746573ULL; uint64_t k0 = 0xfe9f075098ddb0faULL; uint64_t k1 = 0x68f7f03510e5285cULL; uint64_t m; int i; const uint8_t *buf = mem; const uint8_t *end = buf + size - (size % sizeof(uint64_t)); const int left = size & 7; uint64_t b = ((uint64_t) size) << 56; v3 ^= k1; v2 ^= k0; v1 ^= k1; v0 ^= k0; for (; buf != end; buf += 8) { m = U8TO64_LE(buf); v3 ^= m; for (i = 0; i < cROUNDS; ++i) SIPROUND; v0 ^= m; } switch (left) { case 7: b |= ((uint64_t) buf[6]) << 48; // fall through case 6: b |= ((uint64_t) buf[5]) << 40; // fall through case 5: b |= ((uint64_t) buf[4]) << 32; // fall through case 4: b |= ((uint64_t) buf[3]) << 24; // fall through case 3: b |= ((uint64_t) buf[2]) << 16; // fall through case 2: b |= ((uint64_t) buf[1]) << 8; // fall through case 1: b |= ((uint64_t) buf[0]); break; case 0: break; } v3 ^= b; for (i = 0; i < cROUNDS; ++i) SIPROUND; v0 ^= b; v2 ^= 0xff; for (i = 0; i < dROUNDS; ++i) SIPROUND; b = v0 ^ v1 ^ v2 ^ v3; return b; } #endif // PL_HAVE_XXHASH libplacebo-v7.349.0/src/include/000077500000000000000000000000001463457750100164065ustar00rootroot00000000000000libplacebo-v7.349.0/src/include/libplacebo/000077500000000000000000000000001463457750100205025ustar00rootroot00000000000000libplacebo-v7.349.0/src/include/libplacebo/cache.h000066400000000000000000000175501463457750100217260ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_CACHE_H_ #define LIBPLACEBO_CACHE_H_ #include #include #include #include #include #include PL_API_BEGIN typedef struct pl_cache_obj { // Cache object key. This will uniquely identify this cached object. uint64_t key; // Cache data pointer and length. 0-length cached objects are invalid // and will be silently dropped. You can explicitly remove a cached // object by overwriting it with a length 0 object. void *data; size_t size; // Free callback, to free memory associated with `data`. (Optional) // Will be called when the object is either explicitly deleted, culled // due to hitting size limits, or on pl_cache_destroy(). void (*free)(void *data); } pl_cache_obj; struct pl_cache_params { // Optional `pl_log` that is used for logging internal events related // to the cache, such as insertions, saving and loading. pl_log log; // Size limits. If 0, no limit is imposed. // // Note: libplacebo will never detect or invalidate stale cache entries, so // setting an upper size limit is strongly recommended size_t max_object_size; size_t max_total_size; // Optional external callback to call after a cached object is modified // (including deletion and (re-)insertion). Note that this is not called on // objects which are merely pruned from the cache due to `max_total_size`, // so users must rely on some external mechanism to prune stale entries or // enforce size limits. // // Note: `pl_cache_load` does not trigger this callback. // Note: Ownership of `obj` does *not* pass to the caller. // Note: This function must be thread safe. void (*set)(void *priv, pl_cache_obj obj); // Optional external callback to call on a cache miss. Ownership of the // returned object passes to the `pl_cache`. Objects returned by this // callback *should* have a valid `free` callback, unless lifetime can be // externally managed and guaranteed to outlive the `pl_cache`. // // Note: This function must be thread safe. pl_cache_obj (*get)(void *priv, uint64_t key); // External context for insert/lookup. void *priv; }; #define pl_cache_params(...) (&(struct pl_cache_params) { __VA_ARGS__ }) PL_API extern const struct pl_cache_params pl_cache_default_params; // Thread-safety: Safe // // Note: In any context in which `pl_cache` is used, users may also pass NULL // to disable caching. In other words, NULL is a valid `pl_cache`. typedef const struct pl_cache_t { struct pl_cache_params params; } *pl_cache; // Create a new cache. This function will never fail. PL_API pl_cache pl_cache_create(const struct pl_cache_params *params); // Destroy a `pl_cache` object, including all underlying objects. PL_API void pl_cache_destroy(pl_cache *cache); // Explicitly clear all objects in the cache without destroying it. This is // similar to `pl_cache_destroy`, but the cache remains valid afterwards. // // Note: Objects destroyed in this way *not* propagated to the `set` callback. PL_API void pl_cache_reset(pl_cache cache); // Return the current internal number of objects and total size (bytes) PL_API int pl_cache_objects(pl_cache cache); PL_API size_t pl_cache_size(pl_cache cache); // Return a lightweight, order-independent hash of all objects currently stored // in the `pl_cache`. Can be used to avoid re-saving unmodified caches. PL_API uint64_t pl_cache_signature(pl_cache cache); // --- Cache saving and loading APIs // Serialize the internal state of a `pl_cache` into an abstract cache // object that can be e.g. saved to disk and loaded again later. Returns the // number of objects saved. // // Note: Using `save/load` is largely redundant with using `insert/lookup` // callbacks, and the user should decide whether to use the explicit API or the // callback-based API. PL_API int pl_cache_save_ex(pl_cache cache, void (*write)(void *priv, size_t size, const void *ptr), void *priv); // Load the result of a previous `pl_cache_save` call. Any duplicate entries in // the `pl_cache` will be overwritten. Returns the number of objects loaded, or // a negative number on serious error (e.g. corrupt header) // // Note: This does not trigger the `update` callback. PL_API int pl_cache_load_ex(pl_cache cache, bool (*read)(void *priv, size_t size, void *ptr), void *priv); // --- Convenience wrappers around pl_cache_save/load_ex // Writes data directly to a pointer. Returns the number of bytes that *would* // have been written, so this can be used on a size 0 buffer to get the required // total size. PL_API size_t pl_cache_save(pl_cache cache, uint8_t *data, size_t size); // Reads data directly from a pointer. This still reads from `data`, so it does // not avoid a copy. PL_API int pl_cache_load(pl_cache cache, const uint8_t *data, size_t size); // Writes/loads data to/from a FILE stream at the current position. #define pl_cache_save_file(c, file) pl_cache_save_ex(c, pl_write_file_cb, file) #define pl_cache_load_file(c, file) pl_cache_load_ex(c, pl_read_file_cb, file) static inline void pl_write_file_cb(void *priv, size_t size, const void *ptr) { (void) fwrite(ptr, 1, size, (FILE *) priv); } static inline bool pl_read_file_cb(void *priv, size_t size, void *ptr) { return fread(ptr, 1, size, (FILE *) priv) == size; } // --- Object modification API. Mostly intended for internal use. // Insert a new cached object into a `pl_cache`. Returns whether successful. // Overwrites any existing cached object with that signature, so this can be // used to e.g. delete objects as well (set their size to 0). On success, // ownership of `obj` passes to the `pl_cache`. // // Note: If `object.free` is NULL, this will perform an internal memdup. To // bypass this (e.g. when directly adding externally managed memory), you can // set the `free` callback to an explicit noop function. // // Note: `obj->data/free` will be reset to NULL on successful insertion. PL_API bool pl_cache_try_set(pl_cache cache, pl_cache_obj *obj); // Variant of `pl_cache_try_set` that simply frees `obj` on failure. PL_API void pl_cache_set(pl_cache cache, pl_cache_obj *obj); // Looks up `obj->key` in the object cache. If successful, `obj->data` is // set to memory owned by the caller, which must be either explicitly // re-inserted, or explicitly freed (using obj->free). // // Note: On failure, `obj->data/size/free` are reset to NULL. PL_API bool pl_cache_get(pl_cache cache, pl_cache_obj *obj); // Run a callback on every object currently stored in `cache`. // // Note: Running any `pl_cache_*` function on `cache` from this callback is // undefined behavior. PL_API void pl_cache_iterate(pl_cache cache, void (*cb)(void *priv, pl_cache_obj obj), void *priv); // Utility wrapper to free a `pl_cache_obj` if necessary (and sanitize it) static inline void pl_cache_obj_free(pl_cache_obj *obj) { if (obj->free) obj->free(obj->data); obj->data = NULL; obj->free = NULL; obj->size = 0; } PL_API_END #endif // LIBPLACEBO_CACHE_H_ libplacebo-v7.349.0/src/include/libplacebo/colorspace.h000066400000000000000000001024411463457750100230070ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_COLORSPACE_H_ #define LIBPLACEBO_COLORSPACE_H_ #include #include #include #include PL_API_BEGIN // The underlying color representation (e.g. RGB, XYZ or YCbCr) enum pl_color_system { PL_COLOR_SYSTEM_UNKNOWN = 0, // YCbCr-like color systems: PL_COLOR_SYSTEM_BT_601, // ITU-R Rec. BT.601 (SD) PL_COLOR_SYSTEM_BT_709, // ITU-R Rec. BT.709 (HD) PL_COLOR_SYSTEM_SMPTE_240M, // SMPTE-240M PL_COLOR_SYSTEM_BT_2020_NC, // ITU-R Rec. BT.2020 (non-constant luminance) PL_COLOR_SYSTEM_BT_2020_C, // ITU-R Rec. BT.2020 (constant luminance) PL_COLOR_SYSTEM_BT_2100_PQ, // ITU-R Rec. BT.2100 ICtCp PQ variant PL_COLOR_SYSTEM_BT_2100_HLG, // ITU-R Rec. BT.2100 ICtCp HLG variant PL_COLOR_SYSTEM_DOLBYVISION, // Dolby Vision (see pl_dovi_metadata) PL_COLOR_SYSTEM_YCGCO, // YCgCo (derived from RGB) // Other color systems: PL_COLOR_SYSTEM_RGB, // Red, Green and Blue PL_COLOR_SYSTEM_XYZ, // Digital Cinema Distribution Master (XYZ) PL_COLOR_SYSTEM_COUNT }; PL_API bool pl_color_system_is_ycbcr_like(enum pl_color_system sys); // Returns the human-readable, friendly name of the color system. PL_API const char *pl_color_system_name(enum pl_color_system sys); PL_API extern const char *const pl_color_system_names[PL_COLOR_SYSTEM_COUNT]; // Returns true for color systems that are linear transformations of the RGB // equivalent, i.e. are simple matrix multiplications. For color systems with // this property, `pl_color_repr_decode` is sufficient for conversion to RGB. PL_API bool pl_color_system_is_linear(enum pl_color_system sys); // Guesses the best YCbCr-like colorspace based on a image given resolution. // This only picks conservative values. (In particular, BT.2020 is never // auto-guessed, even for 4K resolution content) PL_API enum pl_color_system pl_color_system_guess_ycbcr(int width, int height); // Friendly names for the canonical channel names and order. enum pl_channel { PL_CHANNEL_NONE = -1, PL_CHANNEL_A = 3, // alpha // RGB system PL_CHANNEL_R = 0, PL_CHANNEL_G = 1, PL_CHANNEL_B = 2, // YCbCr-like systems PL_CHANNEL_Y = 0, PL_CHANNEL_CB = 1, PL_CHANNEL_CR = 2, // Aliases for Cb/Cr PL_CHANNEL_U = 1, PL_CHANNEL_V = 2 // There are deliberately no names for the XYZ system to avoid // confusion due to PL_CHANNEL_Y. }; // The numerical range of the representation (where applicable). enum pl_color_levels { PL_COLOR_LEVELS_UNKNOWN = 0, PL_COLOR_LEVELS_LIMITED, // Limited/TV range, e.g. 16-235 PL_COLOR_LEVELS_FULL, // Full/PC range, e.g. 0-255 PL_COLOR_LEVELS_COUNT, // Compatibility aliases PL_COLOR_LEVELS_TV = PL_COLOR_LEVELS_LIMITED, PL_COLOR_LEVELS_PC = PL_COLOR_LEVELS_FULL, }; // The alpha representation mode. enum pl_alpha_mode { PL_ALPHA_UNKNOWN = 0, PL_ALPHA_INDEPENDENT, // alpha channel is separate from the video PL_ALPHA_PREMULTIPLIED, // alpha channel is multiplied into the colors PL_ALPHA_NONE, // alpha channel explicitly ignored (or absent) PL_ALPHA_MODE_COUNT, }; // The underlying bit-wise representation of a color sample. For example, // a 10-bit TV-range YCbCr value uploaded to a 16 bit texture would have // sample_depth=16 color_depth=10 bit_shift=0. // // For another example, a 12-bit XYZ full range sample shifted to 16-bits with // the lower 4 bits all set to 0 would have sample_depth=16 color_depth=12 // bit_shift=4. (libavcodec likes outputting this type of `xyz12`) // // To explain the meaning of `sample_depth` further; the consideration factor // here is the fact that GPU sampling will normalized the sampled color to the // range 0.0 - 1.0 in a manner dependent on the number of bits in the texture // format. So if you upload a 10-bit YCbCr value unpadded as 16-bit color // samples, all of the sampled values will be extremely close to 0.0. In such a // case, `pl_color_repr_normalize` would return a high scaling factor, which // would pull the color up to their 16-bit range. struct pl_bit_encoding { int sample_depth; // the number of bits the color is stored/sampled as int color_depth; // the effective number of bits of the color information int bit_shift; // a representational bit shift applied to the color }; // Returns whether two bit encodings are exactly identical. PL_API bool pl_bit_encoding_equal(const struct pl_bit_encoding *b1, const struct pl_bit_encoding *b2); // Parsed metadata from the Dolby Vision RPU struct pl_dovi_metadata { // Colorspace transformation metadata float nonlinear_offset[3]; // input offset ("ycc_to_rgb_offset") pl_matrix3x3 nonlinear; // before PQ, also called "ycc_to_rgb" pl_matrix3x3 linear; // after PQ, also called "rgb_to_lms" // Reshape data, grouped by component struct pl_reshape_data { uint8_t num_pivots; float pivots[9]; // normalized to [0.0, 1.0] based on BL bit depth uint8_t method[8]; // 0 = polynomial, 1 = MMR // Note: these must be normalized (divide by coefficient_log2_denom) float poly_coeffs[8][3]; // x^0, x^1, x^2, unused must be 0 uint8_t mmr_order[8]; // 1, 2 or 3 float mmr_constant[8]; float mmr_coeffs[8][3 /* order */][7]; } comp[3]; }; // Struct describing the underlying color system and representation. This // information is needed to convert an encoded color to a normalized RGB triple // in the range 0-1. struct pl_color_repr { enum pl_color_system sys; enum pl_color_levels levels; enum pl_alpha_mode alpha; struct pl_bit_encoding bits; // or {0} if unknown // Metadata for PL_COLOR_SYSTEM_DOLBYVISION. Note that, for the sake of // efficiency, this is treated purely as an opaque reference - functions // like pl_color_repr_equal will merely do a pointer equality test. // // The only functions that actually dereference it in any way are // pl_color_repr_decode, pl_shader_decode_color and pl_render_image(_mix). const struct pl_dovi_metadata *dovi; }; // Some common color representations. It's worth pointing out that all of these // presets leave `alpha` and `bits` as unknown - that is, only the system and // levels are predefined PL_API extern const struct pl_color_repr pl_color_repr_unknown; PL_API extern const struct pl_color_repr pl_color_repr_rgb; PL_API extern const struct pl_color_repr pl_color_repr_sdtv; PL_API extern const struct pl_color_repr pl_color_repr_hdtv; // also Blu-ray PL_API extern const struct pl_color_repr pl_color_repr_uhdtv; // SDR, NCL system PL_API extern const struct pl_color_repr pl_color_repr_jpeg; // Returns whether two colorspace representations are exactly identical. PL_API bool pl_color_repr_equal(const struct pl_color_repr *c1, const struct pl_color_repr *c2); // Replaces unknown values in the first struct by those of the second struct. PL_API void pl_color_repr_merge(struct pl_color_repr *orig, const struct pl_color_repr *update); // This function normalizes the color representation such that // color_depth=sample_depth and bit_shift=0; and returns the scaling factor // that must be multiplied into the color value to accomplish this, assuming // it has already been sampled by the GPU. If unknown, the color and sample // depth will both be inferred as 8 bits for the purposes of this conversion. PL_API float pl_color_repr_normalize(struct pl_color_repr *repr); // Guesses the best color levels based on the specified color levels and // falling back to using the color system instead. YCbCr-like systems are // assumed to be TV range, otherwise this defaults to PC range. PL_API enum pl_color_levels pl_color_levels_guess(const struct pl_color_repr *repr); // The colorspace's primaries (gamut) enum pl_color_primaries { PL_COLOR_PRIM_UNKNOWN = 0, // Standard gamut: PL_COLOR_PRIM_BT_601_525, // ITU-R Rec. BT.601 (525-line = NTSC, SMPTE-C) PL_COLOR_PRIM_BT_601_625, // ITU-R Rec. BT.601 (625-line = PAL, SECAM) PL_COLOR_PRIM_BT_709, // ITU-R Rec. BT.709 (HD), also sRGB PL_COLOR_PRIM_BT_470M, // ITU-R Rec. BT.470 M PL_COLOR_PRIM_EBU_3213, // EBU Tech. 3213-E / JEDEC P22 phosphors // Wide gamut: PL_COLOR_PRIM_BT_2020, // ITU-R Rec. BT.2020 (UltraHD) PL_COLOR_PRIM_APPLE, // Apple RGB PL_COLOR_PRIM_ADOBE, // Adobe RGB (1998) PL_COLOR_PRIM_PRO_PHOTO, // ProPhoto RGB (ROMM) PL_COLOR_PRIM_CIE_1931, // CIE 1931 RGB primaries PL_COLOR_PRIM_DCI_P3, // DCI-P3 (Digital Cinema) PL_COLOR_PRIM_DISPLAY_P3, // DCI-P3 (Digital Cinema) with D65 white point PL_COLOR_PRIM_V_GAMUT, // Panasonic V-Gamut (VARICAM) PL_COLOR_PRIM_S_GAMUT, // Sony S-Gamut PL_COLOR_PRIM_FILM_C, // Traditional film primaries with Illuminant C PL_COLOR_PRIM_ACES_AP0, // ACES Primaries #0 (ultra wide) PL_COLOR_PRIM_ACES_AP1, // ACES Primaries #1 PL_COLOR_PRIM_COUNT }; PL_API bool pl_color_primaries_is_wide_gamut(enum pl_color_primaries prim); // Returns the human-readable, friendly name of the color primaries. PL_API const char *pl_color_primaries_name(enum pl_color_primaries prim); PL_API extern const char *const pl_color_primaries_names[PL_COLOR_PRIM_COUNT]; // Guesses the best primaries based on a resolution. This always guesses // conservatively, i.e. it will never return a wide gamut color space even if // the resolution is 4K. PL_API enum pl_color_primaries pl_color_primaries_guess(int width, int height); // The colorspace's transfer function (gamma / EOTF) enum pl_color_transfer { PL_COLOR_TRC_UNKNOWN = 0, // Standard dynamic range: PL_COLOR_TRC_BT_1886, // ITU-R Rec. BT.1886 (CRT emulation + OOTF) PL_COLOR_TRC_SRGB, // IEC 61966-2-4 sRGB (CRT emulation) PL_COLOR_TRC_LINEAR, // Linear light content PL_COLOR_TRC_GAMMA18, // Pure power gamma 1.8 PL_COLOR_TRC_GAMMA20, // Pure power gamma 2.0 PL_COLOR_TRC_GAMMA22, // Pure power gamma 2.2 PL_COLOR_TRC_GAMMA24, // Pure power gamma 2.4 PL_COLOR_TRC_GAMMA26, // Pure power gamma 2.6 PL_COLOR_TRC_GAMMA28, // Pure power gamma 2.8 PL_COLOR_TRC_PRO_PHOTO, // ProPhoto RGB (ROMM) PL_COLOR_TRC_ST428, // Digital Cinema Distribution Master (XYZ) // High dynamic range: PL_COLOR_TRC_PQ, // ITU-R BT.2100 PQ (perceptual quantizer), aka SMPTE ST2048 PL_COLOR_TRC_HLG, // ITU-R BT.2100 HLG (hybrid log-gamma), aka ARIB STD-B67 PL_COLOR_TRC_V_LOG, // Panasonic V-Log (VARICAM) PL_COLOR_TRC_S_LOG1, // Sony S-Log1 PL_COLOR_TRC_S_LOG2, // Sony S-Log2 PL_COLOR_TRC_COUNT }; // Returns the human-readable, friendly name of the color transfer. PL_API const char *pl_color_transfer_name(enum pl_color_transfer trc); PL_API extern const char *const pl_color_transfer_names[PL_COLOR_TRC_COUNT]; // Returns the nominal peak of a given transfer function, relative to the // reference white. This refers to the highest encodable signal level. // Always equal to 1.0 for SDR curves. // // Note: For HLG in particular, which is scene-referred, this returns the // highest nominal peak in scene-referred space (3.77), which may be different // from the actual peak in display space after application of the HLG OOTF. PL_API float pl_color_transfer_nominal_peak(enum pl_color_transfer trc); static inline bool pl_color_transfer_is_hdr(enum pl_color_transfer trc) { return pl_color_transfer_nominal_peak(trc) > 1.0; } // This defines the display-space standard reference white level (in cd/m^2) // that is assumed for SDR content, for use when mapping between HDR and SDR in // display space. See ITU-R Report BT.2408 for more information. #define PL_COLOR_SDR_WHITE 203.0f // This defines the assumed contrast level of an unknown SDR display. This // will be used to determine the black point in the absence of any tagged // minimum luminance, relative to the tagged maximum luminance (or // PL_COLOR_SDR_WHITE in the absence of all tagging) #define PL_COLOR_SDR_CONTRAST 1000.0f // This defines the default black point assumed for "infinite contrast" HDR // displays. This is not exactly 0.0 because a value of 0.0 is interpreted // as "unknown / missing metadata" inside struct pl_hdr_metadata, and also // to avoid numerical issues in a variety of tone mapping functions. // Essentially, a black level below this number is functionally meaningless // inside libplacebo, and will be clamped to this value regardless. // // The value used here (1e-6) is about one 13-bit PQ step above absolute zero, // which is a small fraction of the human JND at this brightness level, and also // about 3 bits above the floating point machine epsilon. #define PL_COLOR_HDR_BLACK 1e-6f // This defines the assumed peak brightness of a HLG display with no HDR10 // metadata. This is set to the brightness of a "nominal" HLG reference display. #define PL_COLOR_HLG_PEAK 1000.0f // Represents a single CIE xy coordinate (e.g. CIE Yxy with Y = 1.0) struct pl_cie_xy { float x, y; }; // Creates a pl_cie_xyz from raw XYZ values static inline struct pl_cie_xy pl_cie_from_XYZ(float X, float Y, float Z) { float k = 1.0f / (X + Y + Z); struct pl_cie_xy xy = { k * X, k * Y }; return xy; } // Recovers (X / Y) from a CIE xy value. static inline float pl_cie_X(struct pl_cie_xy xy) { return xy.x / xy.y; } // Recovers (Z / Y) from a CIE xy value. static inline float pl_cie_Z(struct pl_cie_xy xy) { return (1 - xy.x - xy.y) / xy.y; } static inline bool pl_cie_xy_equal(const struct pl_cie_xy *a, const struct pl_cie_xy *b) { return a->x == b->x && a->y == b->y; } // Computes the CIE xy chromaticity coordinates of a CIE D-series illuminant // with the given correlated color temperature. // // `temperature` must be between 2500 K and 25000 K, inclusive. PL_API struct pl_cie_xy pl_white_from_temp(float temperature); // Represents the raw physical primaries corresponding to a color space. struct pl_raw_primaries { struct pl_cie_xy red, green, blue, white; }; // Returns whether two raw primaries are exactly identical. PL_API bool pl_raw_primaries_equal(const struct pl_raw_primaries *a, const struct pl_raw_primaries *b); // Returns whether two raw primaries are approximately equal PL_API bool pl_raw_primaries_similar(const struct pl_raw_primaries *a, const struct pl_raw_primaries *b); // Replaces unknown values in the first struct by those of the second struct. PL_API void pl_raw_primaries_merge(struct pl_raw_primaries *orig, const struct pl_raw_primaries *update); // Returns the raw primaries for a given color space. PL_API const struct pl_raw_primaries *pl_raw_primaries_get(enum pl_color_primaries prim); enum pl_hdr_scaling { PL_HDR_NORM = 0, // 0.0 is absolute black, 1.0 is PL_COLOR_SDR_WHITE PL_HDR_SQRT, // sqrt() of PL_HDR_NORM values PL_HDR_NITS, // absolute brightness in raw cd/m² PL_HDR_PQ, // absolute brightness in PQ (0.0 to 1.0) PL_HDR_SCALING_COUNT, }; // Generic helper for performing HDR scale conversions. PL_API float pl_hdr_rescale(enum pl_hdr_scaling from, enum pl_hdr_scaling to, float x); enum pl_hdr_metadata_type { PL_HDR_METADATA_ANY = 0, PL_HDR_METADATA_NONE, PL_HDR_METADATA_HDR10, // HDR10 static mastering display metadata PL_HDR_METADATA_HDR10PLUS, // HDR10+ dynamic metadata PL_HDR_METADATA_CIE_Y, // CIE Y derived dynamic luminance metadata PL_HDR_METADATA_TYPE_COUNT, }; // Bezier curve for HDR metadata struct pl_hdr_bezier { float target_luma; // target luminance (cd/m²) for this OOTF float knee_x, knee_y; // cross-over knee point (0-1) float anchors[15]; // intermediate bezier curve control points (0-1) uint8_t num_anchors; }; // Represents raw HDR metadata as defined by SMPTE 2086 / CTA 861.3, which is // often attached to HDR sources and can be forwarded to HDR-capable displays, // or used to guide the libplacebo built-in tone mapping. Values left as 0 // are treated as unknown by libplacebo. // // Note: This means that a value of `min_luma == 0.0` gets treated as "minimum // luminance not known", which in practice may end up inferring a default // contrast of 1000:1 for SDR transfer functions. To avoid this, the user should // set these fields to a low positive value, e.g. PL_COLOR_HDR_BLACK, to signal // a "zero" black point (i.e. infinite contrast display). struct pl_hdr_metadata { // --- PL_HDR_METADATA_HDR10 // Mastering display metadata. struct pl_raw_primaries prim; // mastering display primaries float min_luma, max_luma; // min/max luminance (in cd/m²) // Content light level. (Note: this is ignored by libplacebo itself) float max_cll; // max content light level (in cd/m²) float max_fall; // max frame average light level (in cd/m²) // --- PL_HDR_METADATA_HDR10PLUS float scene_max[3]; // maxSCL in cd/m² per component (RGB) float scene_avg; // average of maxRGB in cd/m² struct pl_hdr_bezier ootf; // reference OOTF (optional) // --- PL_HDR_METADATA_CIE_Y float max_pq_y; // maximum PQ luminance (in PQ, 0-1) float avg_pq_y; // averaged PQ luminance (in PQ, 0-1) }; PL_API extern const struct pl_hdr_metadata pl_hdr_metadata_empty; // equal to {0} PL_API extern const struct pl_hdr_metadata pl_hdr_metadata_hdr10; // generic HDR10 display // Returns whether two sets of HDR metadata are exactly identical. PL_API bool pl_hdr_metadata_equal(const struct pl_hdr_metadata *a, const struct pl_hdr_metadata *b); // Replaces unknown values in the first struct by those of the second struct. PL_API void pl_hdr_metadata_merge(struct pl_hdr_metadata *orig, const struct pl_hdr_metadata *update); // Returns `true` if `data` contains a complete set of a given metadata type. // Note: for PL_HDR_METADATA_HDR10, only `min_luma` and `max_luma` are // considered - CLL/FALL and primaries are irrelevant for HDR tone-mapping. PL_API bool pl_hdr_metadata_contains(const struct pl_hdr_metadata *data, enum pl_hdr_metadata_type type); // Rendering intent for colorspace transformations. These constants match the // ICC specification (Table 23) enum pl_rendering_intent { PL_INTENT_AUTO = -1, // not a valid ICC intent, but used to auto-infer PL_INTENT_PERCEPTUAL = 0, PL_INTENT_RELATIVE_COLORIMETRIC = 1, PL_INTENT_SATURATION = 2, PL_INTENT_ABSOLUTE_COLORIMETRIC = 3 }; // Struct describing a physical color space. This information is needed to // turn a normalized RGB triple into its physical meaning, as well as to convert // between color spaces. struct pl_color_space { enum pl_color_primaries primaries; enum pl_color_transfer transfer; // HDR metadata for this color space, if present. (Optional) struct pl_hdr_metadata hdr; }; #define pl_color_space(...) (&(struct pl_color_space) { __VA_ARGS__ }) // Returns whether or not a color space is considered as effectively HDR. // This is true when the effective signal peak is greater than the SDR // reference white (1.0), taking into account `csp->hdr`. PL_API bool pl_color_space_is_hdr(const struct pl_color_space *csp); // Returns whether or not a color space is "black scaled", in which case 0.0 is // the true black point. This is true for SDR signals other than BT.1886, as // well as for HLG. PL_API bool pl_color_space_is_black_scaled(const struct pl_color_space *csp); // Linearize/delinearize input color, given a specified color space. In essence, // this corresponds to the ITU-R EOTF and its inverse (not the OETF). // The linear color will be scaled so that 1.0 is the diffuse white. The // non-linear color will be scaled so that 1.0 is the maximum representable // value. // // Note: This is a no-op if csp->transfer == PL_COLOR_TRC_LINEAR. PL_API void pl_color_linearize(const struct pl_color_space *csp, float color[3]); PL_API void pl_color_delinearize(const struct pl_color_space *csp, float color[3]); struct pl_nominal_luma_params { // The color space to infer luminance from const struct pl_color_space *color; // Which type of metadata to draw values from enum pl_hdr_metadata_type metadata; // This field controls the scaling of `out_*` enum pl_hdr_scaling scaling; // Fields to write the detected nominal luminance to. (Optional) // // For SDR displays, this will default to a contrast level of 1000:1 unless // indicated otherwise in the `min/max_luma` static HDR10 metadata fields. float *out_min; float *out_max; // Field to write the detected average luminance to, or 0.0 in the absence // of dynamic metadata. (Optional) float *out_avg; }; #define pl_nominal_luma_params(...) \ (&(struct pl_nominal_luma_params) { __VA_ARGS__ }) // Returns the effective luminance described by a pl_color_space. PL_API void pl_color_space_nominal_luma_ex(const struct pl_nominal_luma_params *params); // Replaces unknown values in the first struct by those of the second struct. PL_API void pl_color_space_merge(struct pl_color_space *orig, const struct pl_color_space *update); // Returns whether two colorspaces are exactly identical. PL_API bool pl_color_space_equal(const struct pl_color_space *c1, const struct pl_color_space *c2); // Go through a color-space and explicitly default all unknown fields to // reasonable values. After this function is called, none of the values will be // PL_COLOR_*_UNKNOWN or 0.0, except for the dynamic HDR metadata fields. PL_API void pl_color_space_infer(struct pl_color_space *space); // Like `pl_color_space_infer`, but takes default values from the reference // color space (excluding certain special cases like HDR or wide gamut). PL_API void pl_color_space_infer_ref(struct pl_color_space *space, const struct pl_color_space *ref); // Infer both the source and destination gamut simultaneously, and also adjust // values for optimal display. This is mostly the same as // `pl_color_space_infer(src)` followed by `pl_color_space_infer_ref`, but also // takes into account the SDR contrast levels and PQ black points. This is // basically the logic used by `pl_shader_color_map` and `pl_renderer` to // decide the output color space in a conservative way and compute the final // end-to-end color transformation that needs to be done. PL_API void pl_color_space_infer_map(struct pl_color_space *src, struct pl_color_space *dst); // Some common color spaces. Note: These don't necessarily have all fields // filled, in particular `hdr` is left unset. PL_API extern const struct pl_color_space pl_color_space_unknown; PL_API extern const struct pl_color_space pl_color_space_srgb; PL_API extern const struct pl_color_space pl_color_space_bt709; PL_API extern const struct pl_color_space pl_color_space_hdr10; PL_API extern const struct pl_color_space pl_color_space_bt2020_hlg; PL_API extern const struct pl_color_space pl_color_space_monitor; // typical display // This represents metadata about extra operations to perform during colorspace // conversion, which correspond to artistic adjustments of the color. struct pl_color_adjustment { // Brightness boost. 0.0 = neutral, 1.0 = solid white, -1.0 = solid black float brightness; // Contrast boost. 1.0 = neutral, 0.0 = solid black float contrast; // Saturation gain. 1.0 = neutral, 0.0 = grayscale float saturation; // Hue shift, corresponding to a rotation around the [U, V] subvector, in // radians. 0.0 = neutral float hue; // Gamma adjustment. 1.0 = neutral, 0.0 = solid black float gamma; // Color temperature shift. 0.0 = 6500 K, -1.0 = 3000 K, 1.0 = 10000 K float temperature; }; #define PL_COLOR_ADJUSTMENT_NEUTRAL \ .contrast = 1.0, \ .saturation = 1.0, \ .gamma = 1.0, #define pl_color_adjustment(...) (&(struct pl_color_adjustment) { PL_COLOR_ADJUSTMENT_NEUTRAL __VA_ARGS__ }) PL_API extern const struct pl_color_adjustment pl_color_adjustment_neutral; // Represents the chroma placement with respect to the luma samples. This is // only relevant for YCbCr-like colorspaces with chroma subsampling. enum pl_chroma_location { PL_CHROMA_UNKNOWN = 0, PL_CHROMA_LEFT, // MPEG2/4, H.264 PL_CHROMA_CENTER, // MPEG1, JPEG PL_CHROMA_TOP_LEFT, PL_CHROMA_TOP_CENTER, PL_CHROMA_BOTTOM_LEFT, PL_CHROMA_BOTTOM_CENTER, PL_CHROMA_COUNT, }; // Fills *x and *y with the offset in luma pixels corresponding to a given // chroma location. // // Note: PL_CHROMA_UNKNOWN defaults to PL_CHROMA_LEFT PL_API void pl_chroma_location_offset(enum pl_chroma_location loc, float *x, float *y); // Returns an RGB->XYZ conversion matrix for a given set of primaries. // Multiplying this into the RGB color transforms it to CIE XYZ, centered // around the color space's white point. PL_API pl_matrix3x3 pl_get_rgb2xyz_matrix(const struct pl_raw_primaries *prim); // Similar to pl_get_rgb2xyz_matrix, but gives the inverse transformation. PL_API pl_matrix3x3 pl_get_xyz2rgb_matrix(const struct pl_raw_primaries *prim); // Returns a primary adaptation matrix, which converts from one set of // primaries to another. This is an RGB->RGB transformation. For rendering // intents other than PL_INTENT_ABSOLUTE_COLORIMETRIC, the white point is // adapted using the Bradford matrix. PL_API pl_matrix3x3 pl_get_color_mapping_matrix(const struct pl_raw_primaries *src, const struct pl_raw_primaries *dst, enum pl_rendering_intent intent); // Return a chromatic adaptation matrix, which converts from one white point to // another, using the CAT16 matrix. This is an RGB->RGB transformation. PL_API pl_matrix3x3 pl_get_adaptation_matrix(struct pl_cie_xy src, struct pl_cie_xy dst); // Returns true if 'b' is entirely contained in 'a'. Useful for figuring out if // colorimetric clipping will occur or not. PL_API bool pl_primaries_superset(const struct pl_raw_primaries *a, const struct pl_raw_primaries *b); // Returns true if `prim` forms a nominally valid set of primaries. This does // not check whether or not these primaries are actually physically realisable, // merely that they satisfy the requirements for colorspace math (to avoid NaN). PL_API bool pl_primaries_valid(const struct pl_raw_primaries *prim); // Returns true if two primaries are 'compatible', which is the case if // they preserve the relationship between primaries (red=red, green=green, // blue=blue). In other words, this is false for synthetic primaries that have // channels misordered from the convention (e.g. for some test ICC profiles). PL_API bool pl_primaries_compatible(const struct pl_raw_primaries *a, const struct pl_raw_primaries *b); // Clip points in the first gamut (src) to be fully contained inside the second // gamut (dst). Only works on compatible primaries (pl_primaries_compatible). PL_API struct pl_raw_primaries pl_primaries_clip(const struct pl_raw_primaries *src, const struct pl_raw_primaries *dst); // Primary-dependent RGB->LMS matrix for the IPTPQc4 color system. This is // derived from the HPE XYZ->LMS matrix with 4% crosstalk added. PL_API pl_matrix3x3 pl_ipt_rgb2lms(const struct pl_raw_primaries *prim); PL_API pl_matrix3x3 pl_ipt_lms2rgb(const struct pl_raw_primaries *prim); // Primary-independent L'M'S' -> IPT matrix for the IPTPQc4 color system, and // its inverse. This is identical to the Ebner & Fairchild (1998) IPT matrix. PL_API extern const pl_matrix3x3 pl_ipt_lms2ipt; PL_API extern const pl_matrix3x3 pl_ipt_ipt2lms; // Cone types involved in human vision enum pl_cone { PL_CONE_L = 1 << 0, PL_CONE_M = 1 << 1, PL_CONE_S = 1 << 2, // Convenience aliases PL_CONE_NONE = 0, PL_CONE_LM = PL_CONE_L | PL_CONE_M, PL_CONE_MS = PL_CONE_M | PL_CONE_S, PL_CONE_LS = PL_CONE_L | PL_CONE_S, PL_CONE_LMS = PL_CONE_L | PL_CONE_M | PL_CONE_S, }; // Structure describing parameters for simulating color blindness struct pl_cone_params { enum pl_cone cones; // Which cones are *affected* by the vision model float strength; // Coefficient for how strong the defect is // (1.0 = Unaffected, 0.0 = Full blindness) }; #define pl_cone_params(...) (&(struct pl_cone_params) { __VA_ARGS__ }) // Built-in color blindness models PL_API extern const struct pl_cone_params pl_vision_normal; // No distortion (92%) PL_API extern const struct pl_cone_params pl_vision_protanomaly; // Red deficiency (0.66%) PL_API extern const struct pl_cone_params pl_vision_protanopia; // Red absence (0.59%) PL_API extern const struct pl_cone_params pl_vision_deuteranomaly; // Green deficiency (2.7%) PL_API extern const struct pl_cone_params pl_vision_deuteranopia; // Green absence (0.56%) PL_API extern const struct pl_cone_params pl_vision_tritanomaly; // Blue deficiency (0.01%) PL_API extern const struct pl_cone_params pl_vision_tritanopia; // Blue absence (0.016%) PL_API extern const struct pl_cone_params pl_vision_monochromacy; // Blue cones only (<0.001%) PL_API extern const struct pl_cone_params pl_vision_achromatopsia; // Rods only (<0.0001%) // Returns a cone adaptation matrix. Applying this to an RGB color in the given // color space will apply the given cone adaptation coefficients for simulating // a type of color blindness. // // For the color blindness models which don't entail complete loss of a cone, // you can partially counteract the effect by using a similar model with the // `strength` set to its inverse. For example, to partially counteract // deuteranomaly, you could generate a cone matrix for PL_CONE_M with the // strength 2.0 (or some other number above 1.0). PL_API pl_matrix3x3 pl_get_cone_matrix(const struct pl_cone_params *params, const struct pl_raw_primaries *prim); // Returns a color decoding matrix for a given combination of source color // representation and adjustment parameters. This mutates `repr` to reflect the // change. If `params` is NULL, it defaults to &pl_color_adjustment_neutral. // // This function always performs a conversion to RGB. To convert to other // colorspaces (e.g. between YUV systems), obtain a second YUV->RGB matrix // and invert it using `pl_transform3x3_invert`. // // Note: For BT.2020 constant-luminance, this outputs chroma information in the // range [-0.5, 0.5]. Since the CL system conversion is non-linear, further // processing must be done by the caller. The channel order is CrYCb. // // Note: For BT.2100 ICtCp, this outputs in the color space L'M'S'. Further // non-linear processing must be done by the caller. // // Note: XYZ system is expected to be in DCDM X'Y'Z' encoding (ST 428-1), in // practice this means normalizing by (48.0 / 52.37) factor and applying 2.6 gamma PL_API pl_transform3x3 pl_color_repr_decode(struct pl_color_repr *repr, const struct pl_color_adjustment *params); // Common struct to describe an ICC profile struct pl_icc_profile { // Points to the in-memory representation of the ICC profile. This is // allowed to be NULL, in which case the `pl_icc_profile` represents "no // profileâ€. const void *data; size_t len; // If a profile is set, this signature must uniquely identify it (including // across restarts, for caching), ideally using a checksum of the profile // contents. The user is free to choose the method of determining this // signature, but note the existence of the // `pl_icc_profile_compute_signature` helper. uint64_t signature; }; #define pl_icc_profile(...) &(struct pl_icc_profile) { __VA_ARGS__ } // This doesn't do a comparison of the actual contents, only of the signature. PL_API bool pl_icc_profile_equal(const struct pl_icc_profile *p1, const struct pl_icc_profile *p2); // Sets `signature` to a hash of `profile->data`, if non-NULL. Provided as a // convenience function for the sake of users ingesting arbitrary ICC profiles // from sources where they can't reliably detect profile changes. // // Note: This is based on a very fast hash, and will compute a signature for // even large (10 MB) ICC profiles in, typically, a fraction of a millisecond. PL_API void pl_icc_profile_compute_signature(struct pl_icc_profile *profile); PL_API_END #endif // LIBPLACEBO_COLORSPACE_H_ libplacebo-v7.349.0/src/include/libplacebo/common.h000066400000000000000000000212601463457750100221440ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_COMMON_H_ #define LIBPLACEBO_COMMON_H_ #include #include PL_API_BEGIN // Some common utility types. These are overloaded to support 2D, 3D and // integer/float variants. typedef struct pl_rect2d { int x0, y0; int x1, y1; } pl_rect2d; typedef struct pl_rect3d { int x0, y0, z0; int x1, y1, z1; } pl_rect3d; typedef struct pl_rect2df { float x0, y0; float x1, y1; } pl_rect2df; typedef struct pl_rect3df { float x0, y0, z0; float x1, y1, z1; } pl_rect3df; // These macros will work for any of the above pl_rect variants (with enough // dimensions). Careful: double-evaluation hazard #define pl_rect_w(r) ((r).x1 - (r).x0) #define pl_rect_h(r) ((r).y1 - (r).y0) #define pl_rect_d(r) ((r).z1 - (r).z0) #define pl_rect2d_eq(a, b) \ ((a).x0 == (b).x0 && (a).x1 == (b).x1 && \ (a).y0 == (b).y0 && (a).y1 == (b).y1) #define pl_rect3d_eq(a, b) \ ((a).x0 == (b).x0 && (a).x1 == (b).x1 && \ (a).y0 == (b).y0 && (a).y1 == (b).y1 && \ (a).z0 == (b).z0 && (a).z1 == (b).z1) // "Normalize" a rectangle: This ensures d1 >= d0 for all dimensions. PL_API void pl_rect2d_normalize(pl_rect2d *rc); PL_API void pl_rect3d_normalize(pl_rect3d *rc); PL_API void pl_rect2df_normalize(pl_rect2df *rc); PL_API void pl_rect3df_normalize(pl_rect3df *rc); // Return the rounded form of a rect. PL_API pl_rect2d pl_rect2df_round(const pl_rect2df *rc); PL_API pl_rect3d pl_rect3df_round(const pl_rect3df *rc); // Represents a row-major matrix, i.e. the following matrix // [ a11 a12 a13 ] // [ a21 a22 a23 ] // [ a31 a32 a33 ] // is represented in C like this: // { { a11, a12, a13 }, // { a21, a22, a23 }, // { a31, a32, a33 } }; typedef struct pl_matrix3x3 { float m[3][3]; } pl_matrix3x3; PL_API extern const pl_matrix3x3 pl_matrix3x3_identity; // Applies a matrix to a float vector in-place. PL_API void pl_matrix3x3_apply(const pl_matrix3x3 *mat, float vec[3]); // Applies a matrix to a pl_rect3df PL_API void pl_matrix3x3_apply_rc(const pl_matrix3x3 *mat, pl_rect3df *rc); // Scales a color matrix by a linear factor. PL_API void pl_matrix3x3_scale(pl_matrix3x3 *mat, float scale); // Inverts a matrix. Only use where precision is not that important. PL_API void pl_matrix3x3_invert(pl_matrix3x3 *mat); // Composes/multiplies two matrices. Multiples B into A, i.e. // A := A * B PL_API void pl_matrix3x3_mul(pl_matrix3x3 *a, const pl_matrix3x3 *b); // Flipped version of `pl_matrix3x3_mul`. // B := A * B PL_API void pl_matrix3x3_rmul(const pl_matrix3x3 *a, pl_matrix3x3 *b); // Represents an affine transformation, which is basically a 3x3 matrix // together with a column vector to add onto the output. typedef struct pl_transform3x3 { pl_matrix3x3 mat; float c[3]; } pl_transform3x3; PL_API extern const pl_transform3x3 pl_transform3x3_identity; // Applies a transform to a float vector in-place. PL_API void pl_transform3x3_apply(const pl_transform3x3 *t, float vec[3]); // Applies a transform to a pl_rect3df PL_API void pl_transform3x3_apply_rc(const pl_transform3x3 *t, pl_rect3df *rc); // Scales the output of a transform by a linear factor. Since an affine // transformation is non-linear, this does not commute. If you want to scale // the *input* of a transform, use pl_matrix3x3_scale on `t.mat`. PL_API void pl_transform3x3_scale(pl_transform3x3 *t, float scale); // Inverts a transform. Only use where precision is not that important. PL_API void pl_transform3x3_invert(pl_transform3x3 *t); // 2D analog of the above structs. Since these are featured less prominently, // we omit some of the other helper functions. typedef struct pl_matrix2x2 { float m[2][2]; } pl_matrix2x2; PL_API extern const pl_matrix2x2 pl_matrix2x2_identity; PL_API pl_matrix2x2 pl_matrix2x2_rotation(float angle); PL_API void pl_matrix2x2_apply(const pl_matrix2x2 *mat, float vec[2]); PL_API void pl_matrix2x2_apply_rc(const pl_matrix2x2 *mat, pl_rect2df *rc); PL_API void pl_matrix2x2_mul(pl_matrix2x2 *a, const pl_matrix2x2 *b); PL_API void pl_matrix2x2_rmul(const pl_matrix2x2 *a, pl_matrix2x2 *b); PL_API void pl_matrix2x2_scale(pl_matrix2x2 *mat, float scale); PL_API void pl_matrix2x2_invert(pl_matrix2x2 *mat); typedef struct pl_transform2x2 { pl_matrix2x2 mat; float c[2]; } pl_transform2x2; PL_API extern const pl_transform2x2 pl_transform2x2_identity; PL_API void pl_transform2x2_apply(const pl_transform2x2 *t, float vec[2]); PL_API void pl_transform2x2_apply_rc(const pl_transform2x2 *t, pl_rect2df *rc); PL_API void pl_transform2x2_mul(pl_transform2x2 *a, const pl_transform2x2 *b); PL_API void pl_transform2x2_rmul(const pl_transform2x2 *a, pl_transform2x2 *b); PL_API void pl_transform2x2_scale(pl_transform2x2 *t, float scale); PL_API void pl_transform2x2_invert(pl_transform2x2 *t); // Compute new bounding box of a transformation (as applied to a given rect). PL_API pl_rect2df pl_transform2x2_bounds(const pl_transform2x2 *t, const pl_rect2df *rc); // Helper functions for dealing with aspect ratios and stretched/scaled rects. // Return the (absolute) aspect ratio (width/height) of a given pl_rect2df. // This will always be a positive number, even if `rc` is flipped. PL_API float pl_rect2df_aspect(const pl_rect2df *rc); // Set the aspect of a `rc` to a given aspect ratio with an extra 'panscan' // factor choosing the balance between shrinking and growing the `rc` to meet // this aspect ratio. // // Notes: // - If `panscan` is 0.0, this function will only ever shrink the `rc`. // - If `panscan` is 1.0, this function will only ever grow the `rc`. // - If `panscan` is 0.5, this function is area-preserving. PL_API void pl_rect2df_aspect_set(pl_rect2df *rc, float aspect, float panscan); // Set one rect's aspect to that of another #define pl_rect2df_aspect_copy(rc, src, panscan) \ pl_rect2df_aspect_set((rc), pl_rect2df_aspect(src), (panscan)) // 'Fit' one rect inside another. `rc` will be set to the same size and aspect // ratio as `src`, but with the size limited to fit inside the original `rc`. // Like `pl_rect2df_aspect_set`, `panscan` controls the pan&scan factor. PL_API void pl_rect2df_aspect_fit(pl_rect2df *rc, const pl_rect2df *src, float panscan); // Scale rect in each direction while keeping it centered. PL_API void pl_rect2df_stretch(pl_rect2df *rc, float stretch_x, float stretch_y); // Offset rect by an arbitrary offset factor. If the corresponding dimension // of a rect is flipped, so too is the applied offset. PL_API void pl_rect2df_offset(pl_rect2df *rc, float offset_x, float offset_y); // Scale a rect uniformly in both dimensions. #define pl_rect2df_zoom(rc, zoom) pl_rect2df_stretch((rc), (zoom), (zoom)) // Rotation in degrees clockwise typedef int pl_rotation; enum { PL_ROTATION_0 = 0, PL_ROTATION_90 = 1, PL_ROTATION_180 = 2, PL_ROTATION_270 = 3, PL_ROTATION_360 = 4, // equivalent to PL_ROTATION_0 // Note: Values outside the range [0,4) are legal, including negatives. }; // Constrains to the interval [PL_ROTATION_0, PL_ROTATION_360). static inline pl_rotation pl_rotation_normalize(pl_rotation rot) { return (rot % PL_ROTATION_360 + PL_ROTATION_360) % PL_ROTATION_360; } // Rotates the coordinate system of a `pl_rect2d(f)` in a certain direction. // For example, calling this with PL_ROTATION_90 will correspond to rotating // the coordinate system 90° to the right (so the x axis becomes the y axis). // // The resulting rect is re-normalized in the same coordinate system. PL_API void pl_rect2df_rotate(pl_rect2df *rc, pl_rotation rot); // Returns the aspect ratio in a rotated frame of reference. static inline float pl_aspect_rotate(float aspect, pl_rotation rot) { return (rot % PL_ROTATION_180) ? 1.0 / aspect : aspect; } #define pl_rect2df_aspect_set_rot(rc, aspect, rot, panscan) \ pl_rect2df_aspect_set((rc), pl_aspect_rotate((aspect), (rot)), (panscan)) #define pl_rect2df_aspect_copy_rot(rc, src, panscan, rot) \ pl_rect2df_aspect_set_rot((rc), pl_rect2df_aspect(src), (rot), (panscan)) PL_API_END #endif // LIBPLACEBO_COMMON_H_ libplacebo-v7.349.0/src/include/libplacebo/config.h.in000066400000000000000000000053401463457750100225270ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_CONFIG_H_ #define LIBPLACEBO_CONFIG_H_ // Increased any time the library changes in a fundamental/major way. #define PL_MAJOR_VER @majorver@ // Increased any time the API changes. (Note: Does not reset when PL_MAJOR_VER // is increased) #define PL_API_VER @apiver@ // Increased any time a fix is made to a given API version. #define PL_FIX_VER (pl_fix_ver()) // Friendly name (`git describe`) for the overall version of the library #define PL_VERSION (pl_version()) // Feature tests. These aren't described in further detail, but may be useful // for programmers wanting to programmatically check for feature support // in their compiled libplacebo versions. @extra_defs@ // Extra compiler-specific stuff #ifndef PL_DEPRECATED_IN # if defined(_MSC_VER) # define PL_DEPRECATED_IN(VER) # else # define PL_DEPRECATED_IN(VER) __attribute__((deprecated)) # endif #endif #ifndef __has_feature #define __has_feature(x) 0 #endif #ifndef PL_DEPRECATED_ENUM_IN # if (defined(__GNUC__) && (__GNUC__ >= 6)) || __has_feature(enumerator_attributes) # define PL_DEPRECATED_ENUM_IN(VER) PL_DEPRECATED_IN(VER) # else # define PL_DEPRECATED_ENUM_IN(VER) # endif #endif #if defined(_WIN32) || defined(__CYGWIN__) # ifdef PL_EXPORT # define PL_API __declspec(dllexport) # else # ifndef PL_STATIC # define PL_API __declspec(dllimport) # else # define PL_API # endif # endif #else # define PL_API __attribute__ ((visibility ("default"))) #endif // C++ compatibility #ifdef __cplusplus # define PL_API_BEGIN extern "C" { # define PL_API_END } #else # define PL_API_BEGIN # define PL_API_END #endif #ifndef __cplusplus // Disable this warning because libplacebo's params macros override fields # pragma GCC diagnostic ignored "-Woverride-init" #endif // Extra helper macros #define PL_TOSTRING_INNER(x) #x #define PL_TOSTRING(x) PL_TOSTRING_INNER(x) // Deprecated macro for back-compatibility #define PL_STRUCT(name) struct name##_t PL_API_BEGIN PL_API int pl_fix_ver(void); PL_API const char *pl_version(void); PL_API_END #endif // LIBPLACEBO_CONFIG_H_ libplacebo-v7.349.0/src/include/libplacebo/d3d11.h000066400000000000000000000254251463457750100214770ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_D3D11_H_ #define LIBPLACEBO_D3D11_H_ #include #include #include #include #include PL_API_BEGIN // Structure representing the actual D3D11 device and associated GPU instance typedef const struct pl_d3d11_t { pl_gpu gpu; // The D3D11 device in use. The user is free to use this for their own // purposes, including taking a reference to the device (with AddRef) and // using it beyond the lifetime of the pl_d3d11 that created it (though if // this is done with debug enabled, it will confuse the leak checker.) ID3D11Device *device; // True if the device is using a software (WARP) adapter bool software; } *pl_d3d11; struct pl_d3d11_params { // The Direct3D 11 device to use. Optional, if NULL then libplacebo will // create its own ID3D11Device using the options below. If set, all the // options below will be ignored. ID3D11Device *device; // --- Adapter selection options // The adapter to use. This overrides adapter_luid. IDXGIAdapter *adapter; // The LUID of the adapter to use. If adapter and adapter_luid are unset, // the default adapter will be used instead. LUID adapter_luid; // Allow a software (WARP) adapter when selecting the adapter automatically. // Note that sometimes the default adapter will be a software adapter. This // is because, on Windows 8 and up, if there are no hardware adapters, // Windows will pretend the WARP adapter is the default hardware adapter. bool allow_software; // Always use a software adapter. This is mainly for testing purposes. bool force_software; // --- Device creation options // Enable the debug layer (D3D11_CREATE_DEVICE_DEBUG) // Also logs IDXGIInfoQueue messages bool debug; // Extra flags to pass to D3D11CreateDevice (D3D11_CREATE_DEVICE_FLAG). // libplacebo should be compatible with any flags passed here. UINT flags; // The minimum and maximum allowable feature levels for the created device. // libplacebo will attempt to create a device with the highest feature level // between min_feature_level and max_feature_level (inclusive.) If there are // no supported feature levels in this range, `pl_d3d11_create` will either // return NULL or fall back to the software adapter, depending on whether // `allow_software` is set. // // Normally there is no reason to set `max_feature_level` other than to test // if a program works at lower feature levels. // // Note that D3D_FEATURE_LEVEL_9_3 and below (known as 10level9) are highly // restrictive. These feature levels are supported on a best-effort basis. // They represent very old DirectX 9 compatible PC and laptop hardware // (2001-2007, GeForce FX, 6, 7, ATI R300-R500, GMA 950-X3000) and some // less-old mobile devices (Surface RT, Surface 2.) Basic video rendering // should work, but the full pl_gpu API will not be available and advanced // shaders will probably fail. The hardware is probably too slow for these // anyway. // // Known restrictions of 10level9 devices include: // D3D_FEATURE_LEVEL_9_3 and below: // - `pl_pass_run_params->index_buf` will not work (but `index_data` will) // - Dimensions of 3D textures must be powers of two // - Shaders cannot use gl_FragCoord // - Shaders cannot use texelFetch // D3D_FEATURE_LEVEL_9_2 and below: // - Fragment shaders have no dynamic flow control and very strict limits // on the number of constants, temporary registers and instructions. // Whether a shader meets the requirements will depend on how it's // compiled and optimized, but it's likely that only simple shaders will // work. // D3D_FEATURE_LEVEL_9_1: // - No high-bit-depth formats with PL_FMT_CAP_RENDERABLE or // PL_FMT_CAP_LINEAR // // If these restrictions are undesirable and you don't need to support // ancient hardware, set `min_feature_level` to D3D_FEATURE_LEVEL_10_0. int min_feature_level; // Defaults to D3D_FEATURE_LEVEL_9_1 if unset int max_feature_level; // Defaults to D3D_FEATURE_LEVEL_12_1 if unset // Allow up to N in-flight frames. Similar to swapchain_depth for Vulkan and // OpenGL, though with DXGI this is a device-wide setting that affects all // swapchains (except for waitable swapchains.) See the documentation for // `pl_swapchain_latency` for more information. int max_frame_latency; }; // Default/recommended parameters. Should generally be safe and efficient. #define PL_D3D11_DEFAULTS \ .allow_software = true, #define pl_d3d11_params(...) (&(struct pl_d3d11_params) { PL_D3D11_DEFAULTS __VA_ARGS__ }) PL_API extern const struct pl_d3d11_params pl_d3d11_default_params; // Creates a new Direct3D 11 device based on the given parameters, or wraps an // existing device, and initializes a new GPU instance. If params is left as // NULL, it defaults to &pl_d3d11_default_params. If an existing device is // provided in params->device, `pl_d3d11_create` will take a reference to it // that will be released in `pl_d3d11_destroy`. PL_API pl_d3d11 pl_d3d11_create(pl_log log, const struct pl_d3d11_params *params); // Release the D3D11 device. // // Note that all libplacebo objects allocated from this pl_d3d11 object (e.g. // via `d3d11->gpu` or using `pl_d3d11_create_swapchain`) *must* be explicitly // destroyed by the user before calling this. PL_API void pl_d3d11_destroy(pl_d3d11 *d3d11); // For a `pl_gpu` backed by `pl_d3d11`, this function can be used to retrieve // the underlying `pl_d3d11`. Returns NULL for any other type of `gpu`. PL_API pl_d3d11 pl_d3d11_get(pl_gpu gpu); struct pl_d3d11_swapchain_params { // The Direct3D 11 swapchain to wrap. Optional. If NULL, libplacebo will // create its own swapchain using the options below. If set, all the // swapchain creation options will be ignored. // // The provided swapchain must have been created by the same device used // by `gpu` and must not have multisampled backbuffers. IDXGISwapChain *swapchain; // --- Swapchain creation options // Initial framebuffer width and height. If both width and height are set to // 0 and window is non-NULL, the client area of the window is used instead. // For convenience, if either component would be 0, it is set to 1 instead. // This is because Windows can have 0-sized windows, but not 0-sized // swapchains. int width; int height; // The handle of the output window. In Windows 8 and up this is optional // because you can output to a CoreWindow or create a composition swapchain // instead. HWND window; // A pointer to the CoreWindow to output to. If both this and `window` are // NULL, CreateSwapChainForComposition will be used to create the swapchain. IUnknown *core_window; // If set, libplacebo will create a swapchain that uses the legacy bitblt // presentation model (with the DXGI_SWAP_EFFECT_DISCARD swap effect.) This // tends to give worse performance and frame pacing in windowed mode and it // prevents borderless fullscreen optimizations, but it might be necessary // to work around buggy drivers, especially with DXGI 1.2 in the Platform // Update for Windows 7. When unset, libplacebo will try to use the flip // presentation model and only fall back to bitblt if flip is unavailable. bool blit; // additional swapchain flags // No validation on these flags is being performed, and swapchain creation // may fail if an unsupported combination is requested. UINT flags; // --- Swapchain usage behavior options // Disable using a 10-bit swapchain format for SDR output bool disable_10bit_sdr; }; #define pl_d3d11_swapchain_params(...) (&(struct pl_d3d11_swapchain_params) { __VA_ARGS__ }) // Creates a new Direct3D 11 swapchain, or wraps an existing one. If an existing // swapchain is provided in params->swapchain, `pl_d3d11_create_swapchain` will // take a reference to it that will be released in `pl_swapchain_destroy`. PL_API pl_swapchain pl_d3d11_create_swapchain(pl_d3d11 d3d11, const struct pl_d3d11_swapchain_params *params); // Takes a `pl_swapchain` created by pl_d3d11_create_swapchain and returns a // reference to the underlying IDXGISwapChain. This increments the refcount, so // call IDXGISwapChain::Release when finished with it. PL_API IDXGISwapChain *pl_d3d11_swapchain_unwrap(pl_swapchain sw); struct pl_d3d11_wrap_params { // The D3D11 texture to wrap, or a texture array containing the texture to // wrap. Must be a ID3D11Texture1D, ID3D11Texture2D or ID3D11Texture3D // created by the same device used by `gpu`, must have D3D11_USAGE_DEFAULT, // and must not be mipmapped or multisampled. ID3D11Resource *tex; // If tex is a texture array, this is the array member to use as the pl_tex. int array_slice; // If tex is a video resource (eg. DXGI_FORMAT_AYUV, DXGI_FORMAT_NV12, // DXGI_FORMAT_P010, etc.,) it can be wrapped as a pl_tex by specifying the // type and size of the shader view. For planar video formats, the plane // that is wrapped depends on the chosen format. // // If tex is not a video resource, these fields are unnecessary. The correct // format will be determined automatically. If tex is not 2D, these fields // are ignored. // // For a list of supported video formats and their corresponding view // formats and sizes, see: // https://microsoft.github.io/DirectX-Specs/d3d/archive/D3D11_3_FunctionalSpec.htm#VideoViews DXGI_FORMAT fmt; int w; int h; }; #define pl_d3d11_wrap_params(...) (&(struct pl_d3d11_wrap_params) { __VA_ARGS__ }) // Wraps an external texture into a pl_tex abstraction. `pl_d3d11_wrap` takes a // reference to the texture, which is released when `pl_tex_destroy` is called. // // This function may fail due to incompatible formats, incompatible flags or // other reasons, in which case it will return NULL. PL_API pl_tex pl_d3d11_wrap(pl_gpu gpu, const struct pl_d3d11_wrap_params *params); PL_API_END #endif // LIBPLACEBO_D3D11_H_ libplacebo-v7.349.0/src/include/libplacebo/dispatch.h000066400000000000000000000232071463457750100224560ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_DISPATCH_H_ #define LIBPLACEBO_DISPATCH_H_ #include #include PL_API_BEGIN // Thread-safety: Safe typedef struct pl_dispatch_t *pl_dispatch; // Creates a new shader dispatch object. This object provides a translation // layer between generated shaders (pl_shader) and the ra context such that it // can be used to execute shaders. This dispatch object will also provide // shader caching (for efficient re-use). PL_API pl_dispatch pl_dispatch_create(pl_log log, pl_gpu gpu); PL_API void pl_dispatch_destroy(pl_dispatch *dp); // Reset/increments the internal counters of the pl_dispatch. This must be // called whenever the user is going to begin with a new frame, in order to // perform garbage collection and advance the state of the internal PRNG. // // Note that shaders generated by `pl_dispatch` are therefore entirely // deterministic, as long as the sequence of calls (and inputs to the shader) // are the same. PL_API void pl_dispatch_reset_frame(pl_dispatch dp); // Returns a blank pl_shader object, suitable for recording rendering commands. // For more information, see the header documentation in `shaders/*.h`. PL_API pl_shader pl_dispatch_begin(pl_dispatch dp); // Struct passed to `info_callback`. Only valid until that function returns. struct pl_dispatch_info { // Information about the shader for this shader execution, as well as a // 64-bit signature uniquely identifying it. pl_shader_info shader; uint64_t signature; // A list of execution times for this pass, in nanoseconds. May be empty. uint64_t samples[256]; int num_samples; // As a convenience, this contains the last, average and peak of the above // list of samples. If `num_samples` is 0, these values are also 0. uint64_t last; uint64_t peak; uint64_t average; }; // Helper function to make a copy of `pl_dispatch_info`, while overriding // (and dereferencing) whatever was previously stored there. static inline void pl_dispatch_info_move(struct pl_dispatch_info *dst, const struct pl_dispatch_info *src) { pl_shader_info_deref(&dst->shader); *dst = *src; dst->shader = pl_shader_info_ref(src->shader); } // Set up a dispatch callback for this `pl_dispatch` object. The given callback // will be run for every successfully dispatched shader. Call this again with // `cb == NULL` to disable. PL_API void pl_dispatch_callback(pl_dispatch dp, void *priv, void (*cb)(void *priv, const struct pl_dispatch_info *)); struct pl_dispatch_params { // The shader to execute. The pl_dispatch will take over ownership // of this shader, and return it back to the internal pool. // // This shader must have a compatible signature, i.e. inputs // `PL_SHADER_SIG_NONE` and outputs `PL_SHADER_SIG_COLOR`. pl_shader *shader; // The texture to render to. This must have params compatible with the // shader, i.e. `target->params.renderable` for fragment shaders and // `target->params.storable` for compute shaders. // // Note: Even when not using compute shaders, users are advised to always // set `target->params.storable` if permitted by the `pl_fmt`, since this // allows the use of compute shaders instead of full-screen quads, which is // faster on some platforms. pl_tex target; // The target rect to render to. Optional, if left as {0}, then the // entire texture will be rendered to. pl_rect2d rect; // If set, enables and controls the blending for this pass. Optional. When // using this with fragment shaders, `target->params.fmt->caps` must // include `PL_FMT_CAP_BLENDABLE`. const struct pl_blend_params *blend_params; // If set, records the execution time of this dispatch into the given // timer object. Optional. // // Note: If this is set, `pl_dispatch` cannot internally measure the // execution time of the shader, which means `pl_dispatch_info.samples` may // be empty as a result. pl_timer timer; }; #define pl_dispatch_params(...) (&(struct pl_dispatch_params) { __VA_ARGS__ }) // Dispatch a generated shader (via the pl_shader mechanism). Returns whether // or not the dispatch was successful. PL_API bool pl_dispatch_finish(pl_dispatch dp, const struct pl_dispatch_params *params); struct pl_dispatch_compute_params { // The shader to execute. This must be a compute shader with the input // set to PL_SHADER_SIG_NONE. The output, if it has any, is ignored. pl_shader *shader; // The number of work groups to dispatch in each dimension. If this is left // as [0} and `width/height` are both set, the number of work groups will // be inferred from the shader's `compute_group_sizes`. int dispatch_size[3]; // If set, simulate vertex attributes (similar to `pl_dispatch_finish`) // according to the given dimensions. The first two components of the // thread's global ID will be interpreted as the X and Y locations. // // Optional, ignored if either component is left as 0. int width, height; // If set, records the execution time of this dispatch into the given // timer object. Optional. // // Note: If this is set, `pl_dispatch` cannot internally measure the // execution time of the shader, which means `pl_dispatch_info.samples` may // be empty as a result. pl_timer timer; }; #define pl_dispatch_compute_params(...) (&(struct pl_dispatch_compute_params) { __VA_ARGS__ }) // A variant of `pl_dispatch_finish`, this one only dispatches a compute shader // while ignoring its output (if it has one). It's only useful for shaders // which have otherwise observable side effects (such as updating state // objects). PL_API bool pl_dispatch_compute(pl_dispatch dp, const struct pl_dispatch_compute_params *params); enum pl_vertex_coords { PL_COORDS_ABSOLUTE, // Absolute/integer `target` coordinates PL_COORDS_RELATIVE, // Relative `target` coordinates in range [0, 1] PL_COORDS_NORMALIZED, // GL-normalized coordinates in range [-1, 1] }; struct pl_dispatch_vertex_params { // The shader to execute. This must be a raster shader with the input set // to `PL_SHADER_SIG_NONE` and the output set to `PL_SHADER_SIG_COLOR`. // // Additionally, the shader must not have any attached vertex attributes. pl_shader *shader; // The texture to render to. Requires `target->params.renderable`. pl_tex target; // The target rect to clip the rendering to. (Optional) pl_rect2d scissors; // If set, enables and controls the blending for this pass. Optional. When // enabled, `target->params.fmt->caps` must include `PL_FMT_CAP_BLENDABLE`. const struct pl_blend_params *blend_params; // The description of the vertex format, including offsets. // // Note: `location` is ignored and can safely be left unset. const struct pl_vertex_attrib *vertex_attribs; int num_vertex_attribs; size_t vertex_stride; // The index of the vertex position in `vertex_attribs`, as well as the // interpretation of its contents. int vertex_position_idx; enum pl_vertex_coords vertex_coords; bool vertex_flipped; // flip all vertex y coordinates // Type and number of vertices to render. enum pl_prim_type vertex_type; int vertex_count; // Vertex data. See `pl_pass_run_params.vertex_data`. const void *vertex_data; pl_buf vertex_buf; size_t buf_offset; // Index data. See `pl_pass_run_params.index_data`. Optional. const void *index_data; enum pl_index_format index_fmt; pl_buf index_buf; size_t index_offset; // If set, records the execution time of this dispatch into the given // timer object. Optional. // // Note: If this is set, `pl_dispatch` cannot internally measure the // execution time of the shader, which means `pl_dispatch_info.samples` may // be empty as a result. pl_timer timer; }; #define pl_dispatch_vertex_params(...) (&(struct pl_dispatch_vertex_params) { __VA_ARGS__ }) // Dispatch a generated shader using custom vertices, rather than using a quad // generated by the dispatch. This allows the use of e.g. custom fragment // shaders for things like rendering custom UI elements, or possibly doing // advanced things like sampling from a cube map or spherical video. PL_API bool pl_dispatch_vertex(pl_dispatch dp, const struct pl_dispatch_vertex_params *params); // Cancel an active shader without submitting anything. Useful, for example, // if the shader was instead merged into a different shader. PL_API void pl_dispatch_abort(pl_dispatch dp, pl_shader *sh); // Deprecated in favor of `pl_cache_save/pl_cache_load` on the `pl_cache` // associated with the `pl_gpu` this dispatch is using. PL_DEPRECATED_IN(v6.323) PL_API size_t pl_dispatch_save(pl_dispatch dp, uint8_t *out_cache); PL_DEPRECATED_IN(v6.323) PL_API void pl_dispatch_load(pl_dispatch dp, const uint8_t *cache); PL_API_END #endif // LIBPLACEBO_DISPATCH_H libplacebo-v7.349.0/src/include/libplacebo/dither.h000066400000000000000000000071231463457750100221350ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_DITHER_H_ #define LIBPLACEBO_DITHER_H_ #include PL_API_BEGIN // Generates a deterministic NxN bayer (ordered) dither matrix, storing the // result in `data`. `size` must be a power of two. The resulting matrix will // be roughly uniformly distributed within the range [0,1). PL_API void pl_generate_bayer_matrix(float *data, int size); // Generates a random NxN blue noise texture. storing the result in `data`. // `size` must be a positive power of two no larger than 256. The resulting // texture will be roughly uniformly distributed within the range [0,1). // // Note: This function is very, *very* slow for large sizes. Generating a // dither matrix with size 256 can take several seconds on a modern processor. PL_API void pl_generate_blue_noise(float *data, int size); // Defines the border of all error diffusion kernels #define PL_EDF_MIN_DX (-2) #define PL_EDF_MAX_DX (2) #define PL_EDF_MAX_DY (2) struct pl_error_diffusion_kernel { const char *name; // Short and concise identifier const char *description; // Longer / friendly name // The minimum value such that a (y, x) -> (y, x + y * shift) mapping will // make all error pushing operations affect next column (and after it) // only. // // Higher shift values are significantly more computationally intensive. int shift; // The diffusion factor for (y, x) is pattern[y][x - PL_EDF_MIN_DX] / divisor. int pattern[PL_EDF_MAX_DY + 1][PL_EDF_MAX_DX - PL_EDF_MIN_DX + 1]; int divisor; }; // Algorithms with shift=1: PL_API extern const struct pl_error_diffusion_kernel pl_error_diffusion_simple; PL_API extern const struct pl_error_diffusion_kernel pl_error_diffusion_false_fs; // Algorithms with shift=2: PL_API extern const struct pl_error_diffusion_kernel pl_error_diffusion_sierra_lite; PL_API extern const struct pl_error_diffusion_kernel pl_error_diffusion_floyd_steinberg; PL_API extern const struct pl_error_diffusion_kernel pl_error_diffusion_atkinson; // Algorithms with shift=3, probably too heavy for low end GPUs: PL_API extern const struct pl_error_diffusion_kernel pl_error_diffusion_jarvis_judice_ninke; PL_API extern const struct pl_error_diffusion_kernel pl_error_diffusion_stucki; PL_API extern const struct pl_error_diffusion_kernel pl_error_diffusion_burkes; PL_API extern const struct pl_error_diffusion_kernel pl_error_diffusion_sierra2; PL_API extern const struct pl_error_diffusion_kernel pl_error_diffusion_sierra3; // A list of built-in error diffusion kernels, terminated by NULL PL_API extern const struct pl_error_diffusion_kernel * const pl_error_diffusion_kernels[]; PL_API extern const int pl_num_error_diffusion_kernels; // excluding trailing NULL // Find the error diffusion kernel with the given name, or NULL on failure. PL_API const struct pl_error_diffusion_kernel *pl_find_error_diffusion_kernel(const char *name); PL_API_END #endif // LIBPLACEBO_DITHER_H_ libplacebo-v7.349.0/src/include/libplacebo/dummy.h000066400000000000000000000151631463457750100220140ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_DUMMY_H_ #define LIBPLACEBO_DUMMY_H_ #include PL_API_BEGIN // The functions in this file allow creating and manipulating "dummy" contexts. // A dummy context isn't actually mapped by the GPU, all data exists purely on // the CPU. It also isn't capable of compiling or executing any shaders, any // attempts to do so will simply fail. // // The main use case for this dummy context is for users who want to generate // advanced shaders that depend on specific GLSL features or support for // certain types of GPU resources (e.g. LUTs). This dummy context allows such // shaders to be generated, with all of the referenced shader objects and // textures simply containing their data in a host-accessible way. struct pl_gpu_dummy_params { // These GPU parameters correspond to their equivalents in `pl_gpu`, and // must obey the same rules as documented there. The values from // `pl_gpu_dummy_default_params` are set to support pretty much everything // and are set for GLSL version 450. // // Individual fields such as `glsl.compute` or `glsl.version` description // can and should be overridden by the user based on their requirements. // Individual limits should ideally be set based on the corresponding // `glGet` queries etc. struct pl_glsl_version glsl; struct pl_gpu_limits limits; }; #define PL_GPU_DUMMY_DEFAULTS \ .glsl = { \ .version = 450, \ .gles = false, \ .vulkan = false, \ .compute = true, \ .max_shmem_size = SIZE_MAX, \ .max_group_threads = 1024, \ .max_group_size = { 1024, 1024, 1024 }, \ .subgroup_size = 32, \ .min_gather_offset = INT16_MIN, \ .max_gather_offset = INT16_MAX, \ }, \ .limits = { \ /* pl_gpu */ \ .callbacks = false, \ .thread_safe = true, \ /* pl_buf */ \ .max_buf_size = SIZE_MAX, \ .max_ubo_size = SIZE_MAX, \ .max_ssbo_size = SIZE_MAX, \ .max_vbo_size = SIZE_MAX, \ .max_mapped_size = SIZE_MAX, \ .max_buffer_texels = UINT64_MAX, \ /* pl_tex */ \ .max_tex_1d_dim = UINT32_MAX, \ .max_tex_2d_dim = UINT32_MAX, \ .max_tex_3d_dim = UINT32_MAX, \ .buf_transfer = true, \ .align_tex_xfer_pitch = 1, \ .align_tex_xfer_offset = 1, \ /* pl_pass */ \ .max_variable_comps = SIZE_MAX, \ .max_constants = SIZE_MAX, \ .max_pushc_size = SIZE_MAX, \ .max_dispatch = { UINT32_MAX, UINT32_MAX, UINT32_MAX }, \ .fragment_queues = 0, \ .compute_queues = 0, \ }, #define pl_gpu_dummy_params(...) (&(struct pl_gpu_dummy_params) { PL_GPU_DUMMY_DEFAULTS __VA_ARGS__ }) PL_API extern const struct pl_gpu_dummy_params pl_gpu_dummy_default_params; // Create a dummy GPU context based on the given parameters. This GPU will have // a format for each host-representable type (i.e. intN_t, floats and doubles), // in the canonical channel order RGBA. These formats will have every possible // capability activated, respectively. // // If `params` is left as NULL, it defaults to `&pl_gpu_dummy_params`. PL_API pl_gpu pl_gpu_dummy_create(pl_log log, const struct pl_gpu_dummy_params *params); PL_API void pl_gpu_dummy_destroy(pl_gpu *gpu); // Back-doors into the `pl_tex` and `pl_buf` representations. These allow you // to access the raw data backing this object. Textures are always laid out in // a tightly packed manner. // // For "placeholder" dummy textures, this always returns NULL. PL_API uint8_t *pl_buf_dummy_data(pl_buf buf); PL_API uint8_t *pl_tex_dummy_data(pl_tex tex); // Skeleton of `pl_tex_params` containing only the fields relevant to // `pl_tex_dummy_create`, plus the extra `sampler_type` field. struct pl_tex_dummy_params { int w, h, d; pl_fmt format; enum pl_sampler_type sampler_type; void *user_data; }; #define pl_tex_dummy_params(...) (&(struct pl_tex_dummy_params) { __VA_ARGS__ }) // Allows creating a "placeholder" dummy texture. This is basically a texture // that isn't even backed by anything. All `pl_tex_*` operations (other than // `pl_tex_destroy`) performed on it will simply fail. // // All of the permissions will be set to `false`, except `sampleable`, which is // set to `true`. (So you can use it as an input to shader sampling functions) PL_API pl_tex pl_tex_dummy_create(pl_gpu gpu, const struct pl_tex_dummy_params *params); PL_API_END #endif // LIBPLACEBO_DUMMY_H_ libplacebo-v7.349.0/src/include/libplacebo/filters.h000066400000000000000000000460641463457750100223350ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_FILTER_KERNELS_H_ #define LIBPLACEBO_FILTER_KERNELS_H_ #include #include PL_API_BEGIN #define PL_FILTER_MAX_PARAMS 2 // Invocation parameters for a given kernel struct pl_filter_ctx { float radius; float params[PL_FILTER_MAX_PARAMS]; }; // Represents a single filter function, i.e. kernel or windowing function. struct pl_filter_function { // The cosmetic name associated with this filter function. const char *name; // The radius of the filter function. For resizable filters, this gives // the radius needed to represent a single filter lobe (tap). float radius; // If true, the filter function is resizable (see pl_filter_config.radius) bool resizable; // If true, the filter function is tunable (see pl_filter_config.params) bool tunable[PL_FILTER_MAX_PARAMS]; // If the relevant parameter is tunable, this contains the default values. float params[PL_FILTER_MAX_PARAMS]; // The underlying filter function itself: Computes the weight as a function // of the offset. All filter functions must be normalized such that x=0 is // the center point, and in particular weight(0) = 1.0. The functions may // be undefined for values of x outside [0, radius]. double (*weight)(const struct pl_filter_ctx *f, double x); // If true, this filter represents an opaque placeholder for a more // sophisticated filter function which does not fit into the pl_filter // framework. `weight()` will always return 0.0. bool opaque; }; // Deprecated function, merely checks a->weight == b->weight PL_DEPRECATED_IN(v6.303) PL_API bool pl_filter_function_eq(const struct pl_filter_function *a, const struct pl_filter_function *b); // Box filter: Entirely 1.0 within the radius, entirely 0.0 outside of it. // This is also sometimes called a Dirichlet window PL_API extern const struct pl_filter_function pl_filter_function_box; // Triangle filter: Linear transitions from 1.0 at x=0 to 0.0 at x=radius. // This is also sometimes called a Bartlett window. PL_API extern const struct pl_filter_function pl_filter_function_triangle; // Cosine filter: Ordinary cosine function, single lobe. PL_API extern const struct pl_filter_function pl_filter_function_cosine; // Hann function: Cosine filter named after Julius von Hann. Also commonly // mislabeled as a "Hanning" function, due to its similarly to the Hamming // function. PL_API extern const struct pl_filter_function pl_filter_function_hann; // Hamming function: Cosine filter named after Richard Hamming. PL_API extern const struct pl_filter_function pl_filter_function_hamming; // Welch filter: Polynomial function consisting of a single parabolic section. PL_API extern const struct pl_filter_function pl_filter_function_welch; // Kaiser filter: Approximation of the DPSS window using Bessel functions. // Also sometimes called a Kaiser-Bessel window. // Parameter [0]: Shape (alpha). Determines the trade-off between the main lobe // and the side lobes. PL_API extern const struct pl_filter_function pl_filter_function_kaiser; // Blackman filter: Cosine filter named after Ralph Beebe Blackman. // Parameter [0]: Scale (alpha). Influences the shape. The defaults result in // zeros at the third and fourth sidelobes. PL_API extern const struct pl_filter_function pl_filter_function_blackman; // Bohman filter: 2nd order Cosine filter. PL_API extern const struct pl_filter_function pl_filter_function_bohman; // Gaussian function: Similar to the Gaussian distribution, this defines a // bell curve function. // Parameter [0]: Scale (t), increasing makes the result blurrier. PL_API extern const struct pl_filter_function pl_filter_function_gaussian; // Quadratic function: 2nd order approximation of the gaussian function. Also // sometimes called a "quadric" window. PL_API extern const struct pl_filter_function pl_filter_function_quadratic; // Sinc function: Widely used for both kernels and windows, sinc(x) = sin(x)/x. PL_API extern const struct pl_filter_function pl_filter_function_sinc; // Jinc function: Similar to sinc, but extended to the 2D domain. Widely // used as the kernel of polar (EWA) filters. Also sometimes called a Sombrero // function. PL_API extern const struct pl_filter_function pl_filter_function_jinc; // Sphinx function: Similar to sinc and jinx, but extended to the 3D domain. // The name is derived from "spherical" sinc. Can be used to filter 3D signals // in theory. PL_API extern const struct pl_filter_function pl_filter_function_sphinx; // B/C-tunable Spline function: This is a family of commonly used spline // functions with two tunable parameters. Does not need to be windowed. // Parameter [0]: "B" // Parameter [1]: "C" // Some popular variants of this function are: // B = 1.0, C = 0.0: "base" Cubic (blurry) // B = 0.0, C = 0.0: Hermite filter (blocky) // B = 0.0, C = 0.5: Catmull-Rom filter (sharp) // B = 1/3, C = 1/3: Mitchell-Netravali filter (soft, doesn't ring) // B ≈ 0.37, C ≈ 0.31: Robidoux filter (used by ImageMagick) // B ≈ 0.26, C ≈ 0.37: RobidouxSharp filter (sharper variant of Robidoux) PL_API extern const struct pl_filter_function pl_filter_function_cubic; PL_API extern const struct pl_filter_function pl_filter_function_hermite; // Deprecated aliases of pl_filter_function_cubic (see the table above) PL_DEPRECATED_IN(v6.341) PL_API extern const struct pl_filter_function pl_filter_function_bicubic; PL_DEPRECATED_IN(v6.341) PL_API extern const struct pl_filter_function pl_filter_function_bcspline; PL_DEPRECATED_IN(v6.341) PL_API extern const struct pl_filter_function pl_filter_function_catmull_rom; PL_DEPRECATED_IN(v6.341) PL_API extern const struct pl_filter_function pl_filter_function_mitchell; PL_DEPRECATED_IN(v6.341) PL_API extern const struct pl_filter_function pl_filter_function_robidoux; PL_DEPRECATED_IN(v6.341) PL_API extern const struct pl_filter_function pl_filter_function_robidouxsharp; // Cubic splines with 2/3/4 taps. Referred to as "spline16", "spline36", and // "spline64" mainly for historical reasons, based on the number of pixels in // their window when using them as 2D orthogonal filters. Do not need to be // windowed. PL_API extern const struct pl_filter_function pl_filter_function_spline16; PL_API extern const struct pl_filter_function pl_filter_function_spline36; PL_API extern const struct pl_filter_function pl_filter_function_spline64; // Special filter function for the built-in oversampling algorithm. This is an // opaque filter with no meaningful representation. though it has one tunable // parameter controlling the threshold at which to switch back to ordinary // nearest neighbour sampling. (See `pl_shader_sample_oversample`) PL_API extern const struct pl_filter_function pl_filter_function_oversample; // A list of built-in filter functions, terminated by NULL // // Note: May contain extra aliases for the above functions. PL_API extern const struct pl_filter_function * const pl_filter_functions[]; PL_API extern const int pl_num_filter_functions; // excluding trailing NULL // Find the filter function with the given name, or NULL on failure. PL_API const struct pl_filter_function *pl_find_filter_function(const char *name); // Backwards compatibility with the older configuration API. Redundant with // `pl_filter_function.name`. May be formally deprecated in the future. struct pl_filter_function_preset { const char *name; const struct pl_filter_function *function; }; // A list of built-in filter function presets, terminated by {0} PL_API extern const struct pl_filter_function_preset pl_filter_function_presets[]; PL_API extern const int pl_num_filter_function_presets; // excluding trailing {0} // Find the filter function preset with the given name, or NULL on failure. PL_API const struct pl_filter_function_preset *pl_find_filter_function_preset(const char *name); // Different usage domains for a filter enum pl_filter_usage { PL_FILTER_UPSCALING = (1 << 0), PL_FILTER_DOWNSCALING = (1 << 1), PL_FILTER_FRAME_MIXING = (1 << 2), PL_FILTER_SCALING = PL_FILTER_UPSCALING | PL_FILTER_DOWNSCALING, PL_FILTER_ALL = PL_FILTER_SCALING | PL_FILTER_FRAME_MIXING, }; // Represents a tuned combination of filter functions, plus parameters struct pl_filter_config { // The cosmetic name associated with this filter config. Optional for // user-provided configs, but always set by built-in configurations. const char *name; // Longer / friendly name. Always set for built-in configurations, // except for names which are merely aliases of other filters. const char *description; // Allowed and recommended usage domains (respectively) // // When it is desired to maintain a simpler user interface, it may be // recommended to include only scalers whose recommended usage domains // includes the relevant context in which it will be used. enum pl_filter_usage allowed; enum pl_filter_usage recommended; // The kernel function and (optionally) windowing function. const struct pl_filter_function *kernel; const struct pl_filter_function *window; // The radius. Ignored if !kernel->resizable. Optional, defaults to // kernel->radius if unset. float radius; // Parameters for the respective filter function. Ignored if not tunable. float params[PL_FILTER_MAX_PARAMS]; float wparams[PL_FILTER_MAX_PARAMS]; // Represents a clamping coefficient for negative weights. A value of 0.0 // (the default) represents no clamping. A value of 1.0 represents full // clamping, i.e. all negative weights will be clamped to 0. Values in // between will be linearly scaled. float clamp; // Additional blur coefficient. This effectively stretches the kernel, // without changing the effective radius of the filter radius. Setting this // to a value of 0.0 is equivalent to disabling it. Values significantly // below 1.0 may seriously degrade the visual output, and should be used // with care. float blur; // Additional taper coefficient. This essentially flattens the function's // center. The values within [-taper, taper] will return 1.0, with the // actual function being squished into the remainder of [taper, radius]. // Defaults to 0.0. float taper; // If true, this filter is intended to be used as a polar/2D filter (EWA) // instead of a separable/1D filter. Does not affect the actual sampling, // but provides information about how the results are to be interpreted. bool polar; // Antiringing strength. A value of 0.0 disables antiringing, and a value // of 1.0 enables full-strength antiringing. Defaults to 0.0 if // unspecified. // // Note: This is only included in `pl_filter_config` for convenience. Does // not affect the actual filter sampling, but provides information to the // downstream consumer of the `pl_filter`. float antiring; }; PL_API bool pl_filter_config_eq(const struct pl_filter_config *a, const struct pl_filter_config *b); // Samples a given filter configuration at a given x coordinate, while // respecting all parameters of the configuration. PL_API double pl_filter_sample(const struct pl_filter_config *c, double x); // A list of built-in filter configurations. Since they are just combinations // of the above filter functions, they are not described in much further // detail. PL_API extern const struct pl_filter_config pl_filter_spline16; // 2 taps PL_API extern const struct pl_filter_config pl_filter_spline36; // 3 taps PL_API extern const struct pl_filter_config pl_filter_spline64; // 4 taps PL_API extern const struct pl_filter_config pl_filter_nearest; PL_API extern const struct pl_filter_config pl_filter_box; PL_API extern const struct pl_filter_config pl_filter_bilinear; PL_API extern const struct pl_filter_config pl_filter_gaussian; // Sinc family (all configured to 3 taps): PL_API extern const struct pl_filter_config pl_filter_sinc; // unwindowed PL_API extern const struct pl_filter_config pl_filter_lanczos; // sinc-sinc PL_API extern const struct pl_filter_config pl_filter_ginseng; // sinc-jinc PL_API extern const struct pl_filter_config pl_filter_ewa_jinc; // unwindowed PL_API extern const struct pl_filter_config pl_filter_ewa_lanczos; // jinc-jinc PL_API extern const struct pl_filter_config pl_filter_ewa_lanczossharp; PL_API extern const struct pl_filter_config pl_filter_ewa_lanczos4sharpest; PL_API extern const struct pl_filter_config pl_filter_ewa_ginseng; // jinc-sinc PL_API extern const struct pl_filter_config pl_filter_ewa_hann; // jinc-hann // Spline family PL_API extern const struct pl_filter_config pl_filter_bicubic; PL_API extern const struct pl_filter_config pl_filter_hermite; PL_API extern const struct pl_filter_config pl_filter_catmull_rom; PL_API extern const struct pl_filter_config pl_filter_mitchell; PL_API extern const struct pl_filter_config pl_filter_mitchell_clamp; // clamp = 1.0 PL_API extern const struct pl_filter_config pl_filter_robidoux; PL_API extern const struct pl_filter_config pl_filter_robidouxsharp; PL_API extern const struct pl_filter_config pl_filter_ewa_robidoux; PL_API extern const struct pl_filter_config pl_filter_ewa_robidouxsharp; // Special/opaque filters PL_API extern const struct pl_filter_config pl_filter_oversample; // Backwards compatibility #define pl_filter_triangle pl_filter_bilinear #define pl_oversample_frame_mixer pl_filter_oversample // A list of built-in filter configs, terminated by NULL PL_API extern const struct pl_filter_config * const pl_filter_configs[]; PL_API extern const int pl_num_filter_configs; // excluding trailing NULL // Find the filter config with the given name, or NULL on failure. // `usage` restricts the valid usage (based on `pl_filter_config.allowed`). PL_API const struct pl_filter_config * pl_find_filter_config(const char *name, enum pl_filter_usage usage); // Backward compatibility with the previous filter configuration API. Redundant // with pl_filter_config.name/description. May be deprecated in the future. struct pl_filter_preset { const char *name; const struct pl_filter_config *filter; // Longer / friendly name, or NULL for aliases const char *description; }; // A list of built-in filter presets, terminated by {0} PL_API extern const struct pl_filter_preset pl_filter_presets[]; PL_API extern const int pl_num_filter_presets; // excluding trailing {0} // Find the filter preset with the given name, or NULL on failure. PL_API const struct pl_filter_preset *pl_find_filter_preset(const char *name); // Parameters for filter generation. struct pl_filter_params { // The particular filter configuration to be sampled. config.kernel must // be set to a valid pl_filter_function. struct pl_filter_config config; // The precision of the resulting LUT. A value of 64 should be fine for // most practical purposes, but higher or lower values may be justified // depending on the use case. This value must be set to something > 0. int lut_entries; // --- Polar filers only (config.polar) // As a micro-optimization, all samples below this cutoff value will be // ignored when updating the cutoff radius. Setting it to a value of 0.0 // disables this optimization. float cutoff; // --- Separable filters only (!config.polar) // Indicates the maximum row size that is supported by the calling code, or // 0 for no limit. int max_row_size; // Indicates the row stride alignment. For some use cases (e.g. uploading // the weights as a texture), there are certain alignment requirements for // each row. The chosen row_size will always be a multiple of this value. // Specifying 0 indicates no alignment requirements. int row_stride_align; // --- Deprecated options PL_DEPRECATED_IN(v6.316) float filter_scale; // no effect, use `config.blur` instead }; #define pl_filter_params(...) (&(struct pl_filter_params) { __VA_ARGS__ }) // Represents an initialized instance of a particular filter, with a // precomputed LUT. The interpretation of the LUT depends on the type of the // filter (polar or separable). typedef const struct pl_filter_t { // Deep copy of the parameters, for convenience. struct pl_filter_params params; // Contains the true radius of the computed filter. This may be // smaller than the configured radius depending on the exact filter // parameters used. Mainly relevant for polar filters, since // it affects the value range of *weights. float radius; // Radius of the first zero crossing (main lobe size). float radius_zero; // The computed look-up table (LUT). For polar filters, this is interpreted // as a 1D array with dimensions [lut_entries] containing the raw filter // samples on the scale [0, radius]. For separable (non-polar) filters, // this is interpreted as a 2D array with dimensions // [lut_entries][row_stride]. The inner rows contain the `row_size` samples // to convolve with the corresponding input pixels. The outer coordinate is // used to very the fractional offset (phase). So for example, if the // sample position to reconstruct is directly aligned with the source // texels, you would use the values from weights[0]. If the sample position // to reconstruct is exactly half-way between two source texels (180° out // of phase), you would use the values from weights[lut_entries/2]. const float *weights; // --- Separable filters only (!params.config.polar) // The number of source texels to convolve over for each row. This value // will never exceed the given `max_row_size`. If the filter ends up // cut off because of this, the bool `insufficient` will be set to true. int row_size; bool insufficient; // The separation (in *weights) between each row of the filter. Always // a multiple of params.row_stride_align. int row_stride; // --- Deprecated / removed fields PL_DEPRECATED_IN(v6.336) float radius_cutoff; // identical to `radius` } *pl_filter; // Generate (compute) a filter instance based on a given filter configuration. // The resulting pl_filter must be freed with `pl_filter_free` when no longer // needed. Returns NULL if filter generation fails due to invalid parameters // (i.e. missing a required parameter). PL_API pl_filter pl_filter_generate(pl_log log, const struct pl_filter_params *params); PL_API void pl_filter_free(pl_filter *filter); PL_API_END #endif // LIBPLACEBO_FILTER_KERNELS_H_ libplacebo-v7.349.0/src/include/libplacebo/gamut_mapping.h000066400000000000000000000167161463457750100235160ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_GAMUT_MAPPING_H_ #define LIBPLACEBO_GAMUT_MAPPING_H_ #include #include PL_API_BEGIN struct pl_gamut_map_params; struct pl_gamut_map_function { const char *name; // Identifier const char *description; // Friendly / longer name // The gamut-mapping function itself. Iterates over all values in `lut`, // and adapts them as needed. void (*map)(float *lut, const struct pl_gamut_map_params *params); // Returns true if `map` supports both stretching and contracting the // gamut. In this case, `map` is always executed, even if the output gamut // is larger than the input gamut. bool bidirectional; // Private data. Unused by libplacebo, but may be accessed by `map`. void *priv; }; struct pl_gamut_map_constants { // (Relative) chromaticity protection zone for perceptual mapping [0,1] float perceptual_deadzone; // Strength of the perceptual saturation mapping component [0,1] float perceptual_strength; // I vs C curve gamma to use for colorimetric clipping [0,10] float colorimetric_gamma; // Knee point to use for softclipping methods (perceptual, softclip) [0,1] float softclip_knee; // Desaturation strength (for softclip only) [0,1] float softclip_desat; }; #define PL_GAMUT_MAP_CONSTANTS \ .colorimetric_gamma = 1.80f, \ .softclip_knee = 0.70f, \ .softclip_desat = 0.35f, \ .perceptual_deadzone = 0.30f, \ .perceptual_strength = 0.80f, struct pl_gamut_map_params { // If `function` is NULL, defaults to `pl_gamut_map_clip`. const struct pl_gamut_map_function *function; // The desired input/output primaries. This affects the subjective color // volume in which the desired mapping shall take place. struct pl_raw_primaries input_gamut; struct pl_raw_primaries output_gamut; // Minimum/maximum luminance (PQ) of the target display. Note that the same // value applies to both the input and output, since it's assumed that tone // mapping has already happened by this stage. This effectively defines the // legal gamut boundary in RGB space. // // This also defines the I channel value range, for `pl_gamut_map_generate` float min_luma; float max_luma; // Common constants, should be initialized to PL_GAMUT_MAP_CONSTANTS if // not intending to override them further. struct pl_gamut_map_constants constants; // -- LUT generation options (for `pl_gamut_map_generate` only) // The size of the resulting LUT, per channel. // // Note: For quality, it's generally best to increase h > I > C int lut_size_I; int lut_size_C; int lut_size_h; // The stride (in number of floats) between elements in the resulting LUT. int lut_stride; // -- Removed parameters PL_DEPRECATED_IN(v6.289) float chroma_margin; // non-functional }; #define pl_gamut_map_params(...) (&(struct pl_gamut_map_params) { \ .constants = { PL_GAMUT_MAP_CONSTANTS }, \ __VA_ARGS__ \ }) // Note: Only does pointer equality testing on `function` PL_API bool pl_gamut_map_params_equal(const struct pl_gamut_map_params *a, const struct pl_gamut_map_params *b); // Returns true if the given gamut mapping configuration effectively represents // a no-op configuration. Gamut mapping can be skipped in this case. PL_API bool pl_gamut_map_params_noop(const struct pl_gamut_map_params *params); // Generate a gamut-mapping LUT for a given configuration. LUT samples are // stored as IPTPQc4 values, but the LUT itself is indexed by IChPQc4,spanning // the effective range [min_luma, max_luma] × [0, 0.5] × [-pi,pi]. // // This ordering is designed to keep frequently co-occurring values close in // memory, while permitting simple wrapping of the 'h' component. PL_API void pl_gamut_map_generate(float *out, const struct pl_gamut_map_params *params); // Samples a gamut mapping function for a single IPTPQc4 value. The input // values are updated in-place. PL_API void pl_gamut_map_sample(float x[3], const struct pl_gamut_map_params *params); // Performs no gamut-mapping, just hard clips out-of-range colors per-channel. PL_API extern const struct pl_gamut_map_function pl_gamut_map_clip; // Performs a perceptually balanced (saturation) gamut mapping, using a soft // knee function to preserve in-gamut colors, followed by a final softclip // operation. This works bidirectionally, meaning it can both compress and // expand the gamut. Behaves similar to a blend of `saturation` and `softclip`. PL_API extern const struct pl_gamut_map_function pl_gamut_map_perceptual; // Performs a perceptually balanced gamut mapping using a soft knee function to // roll-off clipped regions, and a hue shifting function to preserve saturation. PL_API extern const struct pl_gamut_map_function pl_gamut_map_softclip; // Performs relative colorimetric clipping, while maintaining an exponential // relationship between brightness and chromaticity. PL_API extern const struct pl_gamut_map_function pl_gamut_map_relative; // Performs simple RGB->RGB saturation mapping. The input R/G/B channels are // mapped directly onto the output R/G/B channels. Will never clip, but will // distort all hues and/or result in a faded look. PL_API extern const struct pl_gamut_map_function pl_gamut_map_saturation; // Performs absolute colorimetric clipping. Like pl_gamut_map_relative, but // does not adapt the white point. PL_API extern const struct pl_gamut_map_function pl_gamut_map_absolute; // Performs constant-luminance colorimetric clipping, desaturing colors // towards white until they're in-range. PL_API extern const struct pl_gamut_map_function pl_gamut_map_desaturate; // Uniformly darkens the input slightly to prevent clipping on blown-out // highlights, then clamps colorimetrically to the input gamut boundary, // biased slightly to preserve chromaticity over luminance. PL_API extern const struct pl_gamut_map_function pl_gamut_map_darken; // Performs no gamut mapping, but simply highlights out-of-gamut pixels. PL_API extern const struct pl_gamut_map_function pl_gamut_map_highlight; // Linearly/uniformly desaturates the image in order to bring the entire // image into the target gamut. PL_API extern const struct pl_gamut_map_function pl_gamut_map_linear; // A list of built-in gamut mapping functions, terminated by NULL PL_API extern const struct pl_gamut_map_function * const pl_gamut_map_functions[]; PL_API extern const int pl_num_gamut_map_functions; // excluding trailing NULL // Find the gamut mapping function with the given name, or NULL on failure. PL_API const struct pl_gamut_map_function *pl_find_gamut_map_function(const char *name); PL_API_END #endif // LIBPLACEBO_GAMUT_MAPPING_H_ libplacebo-v7.349.0/src/include/libplacebo/gpu.h000066400000000000000000001736701463457750100214640ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_GPU_H_ #define LIBPLACEBO_GPU_H_ #include #include #include #include #include #include PL_API_BEGIN // These are not memory managed, and should represent compile-time constants typedef const char *pl_debug_tag; #define PL_DEBUG_TAG (__FILE__ ":" PL_TOSTRING(__LINE__)) // Type of a shader input descriptor. enum pl_desc_type { PL_DESC_INVALID = 0, PL_DESC_SAMPLED_TEX, // C: pl_tex* GLSL: combined texture sampler // (`pl_tex->params.sampleable` must be set) PL_DESC_STORAGE_IMG, // C: pl_tex* GLSL: storage image // (`pl_tex->params.storable` must be set) PL_DESC_BUF_UNIFORM, // C: pl_buf* GLSL: uniform buffer // (`pl_buf->params.uniform` must be set) PL_DESC_BUF_STORAGE, // C: pl_buf* GLSL: storage buffer // (`pl_buf->params.storable` must be set) PL_DESC_BUF_TEXEL_UNIFORM,// C: pl_buf* GLSL: uniform samplerBuffer // (`pl_buf->params.uniform` and `format` must be set) PL_DESC_BUF_TEXEL_STORAGE,// C: pl_buf* GLSL: uniform imageBuffer // (`pl_buf->params.uniform` and `format` must be set) PL_DESC_TYPE_COUNT }; // This file contains the definition of an API which is designed to abstract // away from platform-specific APIs like the various OpenGL variants, Direct3D // and Vulkan in a common way. It is a much more limited API than those APIs, // since it tries targeting a very small common subset of features that is // needed to implement libplacebo's rendering. // // NOTE: Most, but not all, parameter conditions (phrases such as "must" or // "valid usage" are explicitly tested and result in error messages followed by // graceful failure. Exceptions are noted where they exist. // Structure which wraps metadata describing GLSL capabilities. struct pl_glsl_version { int version; // GLSL version (e.g. 450), for #version bool gles; // GLSL ES semantics (ESSL) bool vulkan; // GL_KHR_vulkan_glsl semantics // Compute shader support and limits. If `compute` is false, then all // of the remaining fields in this section are {0}. bool compute; size_t max_shmem_size; // maximum compute shader shared memory size uint32_t max_group_threads; // maximum number of local threads per work group uint32_t max_group_size[3]; // maximum work group size per dimension // If nonzero, signals availability of shader subgroups. This guarantess // availability of all of the following extensions: // - GL_KHR_shader_subgroup_basic // - GL_KHR_shader_subgroup_vote // - GL_KHR_shader_subgroup_arithmetic // - GL_KHR_shader_subgroup_ballot // - GL_KHR_shader_subgroup_shuffle uint32_t subgroup_size; // Miscellaneous shader limits int16_t min_gather_offset; // minimum `textureGatherOffset` offset int16_t max_gather_offset; // maximum `textureGatherOffset` offset }; // Backwards compatibility alias #define pl_glsl_desc pl_glsl_version // Structure defining the physical limits and capabilities of this GPU // instance. If a limit is given as 0, that means that feature is unsupported. struct pl_gpu_limits { // --- pl_gpu bool thread_safe; // `pl_gpu` calls are thread-safe bool callbacks; // supports asynchronous GPU callbacks // --- pl_buf size_t max_buf_size; // maximum size of any buffer size_t max_ubo_size; // maximum size of a `uniform` buffer size_t max_ssbo_size; // maximum size of a `storable` buffer size_t max_vbo_size; // maximum size of a `drawable` buffer size_t max_mapped_size; // maximum size of a `host_mapped` buffer uint64_t max_buffer_texels; // maximum number of texels in a texel buffer bool host_cached; // if true, PL_BUF_MEM_HOST buffers are cached size_t max_mapped_vram; // maximum (known) size of a `host_mapped` // PL_BUF_MEM_DEVICE buffer, or 0 if this // combination is not supported // Required alignment for PL_HANDLE_HOST_PTR imports. This is provided // merely as a hint to the user. If the host pointer being imported is // misaligned, libplacebo will internally round (over-map) the region. size_t align_host_ptr; // --- pl_tex uint32_t max_tex_1d_dim; // maximum width for a 1D texture uint32_t max_tex_2d_dim; // maximum width/height for a 2D texture (required) uint32_t max_tex_3d_dim; // maximum width/height/depth for a 3D texture bool blittable_1d_3d; // supports blittable 1D/3D textures bool buf_transfer; // supports `pl_tex_transfer_params.buf` // These don't represent hard limits but indicate performance hints for // optimal alignment. For best performance, the corresponding field // should be aligned to a multiple of these. They will always be a power // of two. size_t align_tex_xfer_pitch; // optimal `pl_tex_transfer_params.row_pitch` size_t align_tex_xfer_offset; // optimal `pl_tex_transfer_params.buf_offset` // --- pl_pass size_t max_variable_comps; // maximum components passed in variables size_t max_constants; // maximum `pl_pass_params.num_constants` bool array_size_constants; // push constants can be used to size arrays size_t max_pushc_size; // maximum `push_constants_size` size_t align_vertex_stride; // alignment of `pl_pass_params.vertex_stride` uint32_t max_dispatch[3]; // maximum dispatch size per dimension // Note: At least one of `max_variable_comps` or `max_ubo_size` is // guaranteed to be nonzero. // As a performance hint, the GPU may signal the number of command queues // it has for fragment and compute shaders, respectively. Users may use // this information to decide the appropriate type of shader to dispatch. uint32_t fragment_queues; uint32_t compute_queues; }; // Backwards compatibility aliases #define max_xfer_size max_buf_size #define align_tex_xfer_stride align_tex_xfer_pitch // Some `pl_gpu` operations allow sharing GPU resources with external APIs - // examples include interop with other graphics APIs such as CUDA, and also // various hardware decoding APIs. This defines the mechanism underpinning the // communication of such an interoperation. typedef uint64_t pl_handle_caps; enum pl_handle_type { PL_HANDLE_FD = (1 << 0), // `int fd` for POSIX-style APIs PL_HANDLE_WIN32 = (1 << 1), // `HANDLE` for win32 API PL_HANDLE_WIN32_KMT = (1 << 2), // `HANDLE` for pre-Windows-8 win32 API PL_HANDLE_DMA_BUF = (1 << 3), // 'int fd' for a dma_buf fd PL_HANDLE_HOST_PTR = (1 << 4), // `void *` for a host-allocated pointer PL_HANDLE_MTL_TEX = (1 << 5), // `MTLTexture*` for Apple platforms PL_HANDLE_IOSURFACE = (1 << 6), // `IOSurfaceRef` for Apple platforms }; struct pl_gpu_handle_caps { pl_handle_caps tex; // supported handles for `pl_tex` + `pl_shared_mem` pl_handle_caps buf; // supported handles for `pl_buf` + `pl_shared_mem` pl_handle_caps sync; // supported handles for semaphores }; // Wrapper for the handle used to communicate a shared resource externally. // This handle is owned by the `pl_gpu` - if a user wishes to use it in a way // that takes over ownership (e.g. importing into some APIs), they must clone // the handle before doing so (e.g. using `dup` for fds). It is important to // read the external API documentation _very_ carefully as different handle // types may be managed in different ways. (eg: CUDA takes ownership of an fd, // but does not take ownership of a win32 handle). union pl_handle { int fd; // PL_HANDLE_FD / PL_HANDLE_DMA_BUF void *handle; // PL_HANDLE_WIN32 / PL_HANDLE_WIN32_KMT / PL_HANDLE_MTL_TEX / PL_HANDLE_IOSURFACE void *ptr; // PL_HANDLE_HOST_PTR }; // Structure encapsulating memory that is shared between libplacebo and the // user. This memory can be imported into external APIs using the handle. // // If the object a `pl_shared_mem` belongs to is destroyed (e.g. via // `pl_buf_destroy`), the handle becomes undefined, as do the contents of the // memory it points to, as well as any external API objects imported from it. struct pl_shared_mem { union pl_handle handle; size_t size; // the total size of the memory referenced by this handle size_t offset; // the offset of the object within the referenced memory // Note: `size` is optional for some APIs and handle types, in particular // when importing DMABUFs or D3D11 textures. // For PL_HANDLE_DMA_BUF, this specifies the DRM format modifier that // describes this resource. Note that when importing `pl_buf`, this must // be DRM_FORMAT_MOD_LINEAR. For importing `pl_tex`, it can be any // format modifier supported by the implementation. uint64_t drm_format_mod; // When importing a `pl_tex` of type PL_HANDLE_DMA_BUF, this can be used to // set the image stride (AKA pitch) in memory. If left as 0, defaults to // the image width/height. size_t stride_w; size_t stride_h; // When importing a `pl_tex` of type PL_HANDLE_MTL_TEX, this determines // which plane is imported (0 - 2). unsigned plane; }; // Structure grouping PCI bus address fields for GPU devices struct pl_gpu_pci_address { uint32_t domain; uint32_t bus; uint32_t device; uint32_t function; }; typedef const struct pl_fmt_t *pl_fmt; // Abstract device context which wraps an underlying graphics context and can // be used to dispatch rendering commands. // // Thread-safety: Depends on `pl_gpu_limits.thread_safe` typedef const struct pl_gpu_t { pl_log log; struct pl_glsl_version glsl; // GLSL features supported by this GPU struct pl_gpu_limits limits; // physical device limits and capabilities // Fields relevant to external API interop. If the underlying device does // not support interop with other APIs, these will all be {0}. struct pl_gpu_handle_caps export_caps; // supported handles for exporting struct pl_gpu_handle_caps import_caps; // supported handles for importing uint8_t uuid[16]; // underlying device UUID // Supported texture formats, in preference order. (If there are multiple // similar formats, the "better" ones come first) pl_fmt *formats; int num_formats; // PCI Bus address of the underlying device, to help with interop. // This will only be filled in if interop is supported. struct pl_gpu_pci_address pci; } *pl_gpu; // Attach a pl_cache object to this GPU instance. This cache will be // used to cache all compiled shaders, as well as several other shader objects // (e.g. cached 3DLUTs). Calling this with `cache = NULL` disables the cache. // // Note: Calling this after shaders have already been compiled will not // retroactively add those shaders to the cache, so it's recommended to set // this early, before creating any passes. PL_API void pl_gpu_set_cache(pl_gpu gpu, pl_cache cache); enum pl_fmt_type { PL_FMT_UNKNOWN = 0, // also used for inconsistent multi-component formats PL_FMT_UNORM, // unsigned, normalized integer format (sampled as float) PL_FMT_SNORM, // signed, normalized integer format (sampled as float) PL_FMT_UINT, // unsigned integer format (sampled as integer) PL_FMT_SINT, // signed integer format (sampled as integer) PL_FMT_FLOAT, // (signed) float formats, any bit size PL_FMT_TYPE_COUNT, }; enum pl_fmt_caps { PL_FMT_CAP_SAMPLEABLE = 1 << 0, // may be sampled from (PL_DESC_SAMPLED_TEX) PL_FMT_CAP_STORABLE = 1 << 1, // may be used as storage image (PL_DESC_STORAGE_IMG) PL_FMT_CAP_LINEAR = 1 << 2, // may be linearly samplied from (PL_TEX_SAMPLE_LINEAR) PL_FMT_CAP_RENDERABLE = 1 << 3, // may be rendered to (pl_pass_params.target_fmt) PL_FMT_CAP_BLENDABLE = 1 << 4, // may be blended to (pl_pass_params.enable_blend) PL_FMT_CAP_BLITTABLE = 1 << 5, // may be blitted from/to (pl_tex_blit) PL_FMT_CAP_VERTEX = 1 << 6, // may be used as a vertex attribute PL_FMT_CAP_TEXEL_UNIFORM = 1 << 7, // may be used as a texel uniform buffer PL_FMT_CAP_TEXEL_STORAGE = 1 << 8, // may be used as a texel storage buffer PL_FMT_CAP_HOST_READABLE = 1 << 9, // may be used with `host_readable` textures PL_FMT_CAP_READWRITE = 1 << 10, // may be used with PL_DESC_ACCESS_READWRITE // Notes: // - PL_FMT_CAP_LINEAR also implies PL_FMT_CAP_SAMPLEABLE // - PL_FMT_CAP_STORABLE also implies `pl_gpu.glsl.compute` // - PL_FMT_CAP_BLENDABLE implies PL_FMT_CAP_RENDERABLE // - PL_FMT_CAP_VERTEX implies that the format is non-opaque // - PL_FMT_CAP_HOST_READABLE implies that the format is non-opaque }; struct pl_fmt_plane { // Underlying format of this particular sub-plane. This describes the // components, texel size and host representation for the purpose of // e.g. transfers, blits, and sampling. pl_fmt format; // X/Y subsampling shift factor for this plane. uint8_t shift_x, shift_y; }; // Structure describing a texel/vertex format. struct pl_fmt_t { const char *name; // symbolic name for this format (e.g. rgba32f) uint64_t signature; // unique but stable signature (for pass reusability) enum pl_fmt_type type; // the format's data type and interpretation enum pl_fmt_caps caps; // the features supported by this format int num_components; // number of components for this format int component_depth[4]; // meaningful bits per component, texture precision size_t internal_size; // internal texel size (for blit compatibility) // For planar formats, this provides a description of each sub-plane. // // Note on planar formats: Planar formats are always opaque and typically // support only a limit subset of capabilities (or none at all). Access // should be done via sub-planes. (See `pl_tex.planes`) struct pl_fmt_plane planes[4]; int num_planes; // or 0 for non-planar textures // This controls the relationship between the data as seen by the host and // the way it's interpreted by the texture. The host representation is // always tightly packed (no padding bits in between each component). // // This representation assumes little endian ordering, i.e. components // being ordered from LSB to MSB in memory. Note that for oddly packed // formats like rgb10a2 or rgb565, this is inconsistent with the naming. // (That is to say, rgb565 has sample order {2, 1, 0} under this convention // - because rgb565 treats the R channel as the *most* significant bits) // // If `opaque` is true, then there's no meaningful correspondence between // the two, and all of the remaining fields in this section are unset. // // If `emulated` is true, then this format doesn't actually exist on the // GPU as an uploadable texture format - and any apparent support is being // emulated (typically using compute shaders in the upload path). bool opaque; bool emulated; size_t texel_size; // total size in bytes per texel size_t texel_align; // texel alignment requirements (bytes) int host_bits[4]; // number of meaningful bits in host memory int sample_order[4]; // sampled index for each component, e.g. // {2, 1, 0, 3} for BGRA textures // For sampleable formats, this bool indicates whether or not the format // is compatible with `textureGather()` bool gatherable; // If usable as a vertex or texel buffer format, this gives the GLSL type // corresponding to the data. (e.g. vec4) const char *glsl_type; // If usable as a storage image or texel storage buffer // (PL_FMT_CAP_STORABLE / PL_FMT_CAP_TEXEL_STORAGE), this gives the GLSL // texel format corresponding to the format (e.g. rgba16ui), if any. This // field may be NULL, in which case the format modifier may be left // unspecified. const char *glsl_format; // If available, this gives the fourcc associated with the host // representation. In particular, this is intended for use with // PL_HANDLE_DMA_BUF, where this field will match the DRM format from // . May be 0, for formats without matching DRM fourcc. uint32_t fourcc; // If `fourcc` is set, this contains the list of supported drm format // modifiers for this format. const uint64_t *modifiers; int num_modifiers; }; // Returns whether or not a pl_fmt's components are ordered sequentially // in memory in the order RGBA. PL_API bool pl_fmt_is_ordered(pl_fmt fmt); // Returns whether or not a pl_fmt is sampled as a float (e.g. UNORM) PL_API bool pl_fmt_is_float(pl_fmt fmt); // Returns whether or not a pl_fmt supports a given DRM modifier. PL_API bool pl_fmt_has_modifier(pl_fmt fmt, uint64_t modifier); // Helper function to find a format with a given number of components and // minimum effective precision per component. If `host_bits` is set, then the // format will always be non-opaque, unpadded, ordered and have exactly this // bit depth for each component. Finally, all `caps` must be supported. PL_API pl_fmt pl_find_fmt(pl_gpu gpu, enum pl_fmt_type type, int num_components, int min_depth, int host_bits, enum pl_fmt_caps caps); // Finds a vertex format for a given configuration. The resulting vertex will // have a component depth equivalent to the sizeof() the equivalent host type. // (e.g. PL_FMT_FLOAT will always have sizeof(float)) PL_API pl_fmt pl_find_vertex_fmt(pl_gpu gpu, enum pl_fmt_type type, int num_components); // Find a format based on its name. PL_API pl_fmt pl_find_named_fmt(pl_gpu gpu, const char *name); // Find a format based on its fourcc. PL_API pl_fmt pl_find_fourcc(pl_gpu gpu, uint32_t fourcc); // A generic 'timer query' object. These can be used to measure an // approximation of the GPU execution time of a given operation. Due to the // highly asynchronous nature of GPUs, the actual results of any individual // timer query may be delayed by quite a bit. As such, users should avoid // trying to pair any particular GPU command with any particular timer query // result, and only reuse `pl_timer` objects with identical operations. The // results of timer queries are guaranteed to be in-order, but individual // queries may be dropped, and some operations might not record timer results // at all. (For example, if the underlying hardware does not support timer // queries for a given operation type) // // Thread-safety: Unsafe typedef struct pl_timer_t *pl_timer; // Creates a new timer object. This may return NULL, for example if the // implementation does not support timers, but since passing NULL to // `pl_timer_destroy` and `pl_timer_query` is safe, users generally need not // concern themselves with handling this. PL_API pl_timer pl_timer_create(pl_gpu gpu); PL_API void pl_timer_destroy(pl_gpu gpu, pl_timer *); // Queries any results that have been measured since the last execution of // `pl_timer_query`. There may be more than one result, in which case the user // should simply call the function again to get the subsequent values. This // function returns a value of 0 in the event that there are no more // unprocessed results. // // The results are reported in nanoseconds, but the actual precision of the // timestamp queries may be significantly lower. // // Note: Results do not queue up indefinitely. Generally, the implementation // will only keep track of a small, fixed number of results internally. Make // sure to include this function as part of your main rendering loop to process // all of its results, or older results will be overwritten by newer ones. PL_API uint64_t pl_timer_query(pl_gpu gpu, pl_timer); enum pl_buf_mem_type { PL_BUF_MEM_AUTO = 0, // use whatever seems most appropriate PL_BUF_MEM_HOST, // try allocating from host memory (RAM) PL_BUF_MEM_DEVICE, // try allocating from device memory (VRAM) PL_BUF_MEM_TYPE_COUNT, // Note: This distinction only matters for discrete GPUs }; // Structure describing a buffer. struct pl_buf_params { size_t size; // size in bytes (must be <= `pl_gpu_limits.max_buf_size`) bool host_writable; // contents may be updated via pl_buf_write() bool host_readable; // contents may be read back via pl_buf_read() bool host_mapped; // create a persistent, RW mapping (pl_buf.data) // May be used as PL_DESC_BUF_UNIFORM or PL_DESC_BUF_TEXEL_UNIFORM. // Requires `size <= pl_gpu_limits.max_ubo_size` bool uniform; // May be used as PL_DESC_BUF_STORAGE or PL_DESC_BUF_TEXEL_STORAGE. // Requires `size <= pl_gpu_limits.max_ssbo_size` bool storable; // May be used as the source of vertex data for `pl_pass_run`. bool drawable; // Provide a hint for the memory type you want to use when allocating // this buffer's memory. // // Note: Restrictions may apply depending on the usage flags. In // particular, allocating buffers with `uniform` or `storable` enabled from // non-device memory will almost surely fail. enum pl_buf_mem_type memory_type; // Setting this to a format with the `PL_FMT_CAP_TEXEL_*` capability allows // this buffer to be used as a `PL_DESC_BUF_TEXEL_*`, when `uniform` and // `storage` are respectively also enabled. pl_fmt format; // At most one of `export_handle` and `import_handle` can be set for a // buffer. // Setting this indicates that the memory backing this buffer should be // shared with external APIs, If so, this must be exactly *one* of // `pl_gpu.export_caps.buf`. enum pl_handle_type export_handle; // Setting this indicates that the memory backing this buffer will be // imported from an external API. If so, this must be exactly *one* of // `pl_gpu.import_caps.buf`. enum pl_handle_type import_handle; // If the shared memory is being imported, the import handle must be // specified here. Otherwise, this is ignored. struct pl_shared_mem shared_mem; // If non-NULL, the buffer will be created with these contents. Otherwise, // the initial data is undefined. Using this does *not* require setting // host_writable. const void *initial_data; // Arbitrary user data. libplacebo does not use this at all. void *user_data; // Arbitrary identifying tag. Used only for debugging purposes. pl_debug_tag debug_tag; }; #define pl_buf_params(...) (&(struct pl_buf_params) { \ .debug_tag = PL_DEBUG_TAG, \ __VA_ARGS__ \ }) // A generic buffer, which can be used for multiple purposes (texture transfer, // storage buffer, uniform buffer, etc.) // // Note on efficiency: A pl_buf does not necessarily represent a true "buffer" // object on the underlying graphics API. It may also refer to a sub-slice of // a larger buffer, depending on the implementation details of the GPU. The // bottom line is that users do not need to worry about the efficiency of using // many small pl_buf objects. Having many small pl_bufs, even lots of few-byte // vertex buffers, is designed to be completely fine. // // Thread-safety: Unsafe typedef const struct pl_buf_t { struct pl_buf_params params; uint8_t *data; // for persistently mapped buffers, points to the first byte // If `params.handle_type` is set, this structure references the shared // memory backing this buffer, via the requested handle type. // // While this buffer is not in an "exported" state, the contents of the // memory are undefined. (See: `pl_buf_export`) struct pl_shared_mem shared_mem; } *pl_buf; // Create a buffer. The type of buffer depends on the parameters. The buffer // parameters must adhere to the restrictions imposed by the pl_gpu_limits. // Returns NULL on failure. // // For buffers with shared memory, the buffer is considered to be in an // "exported" state by default, and may be used directly by the external API // after being created (until the first libplacebo operation on the buffer). PL_API pl_buf pl_buf_create(pl_gpu gpu, const struct pl_buf_params *params); PL_API void pl_buf_destroy(pl_gpu gpu, pl_buf *buf); // This behaves like `pl_buf_create`, but if the buffer already exists and has // incompatible parameters, it will get destroyed first. A buffer is considered // "compatible" if it has the same buffer type and texel format, a size greater // than or equal to the requested size, and it has a superset of the features // the user requested. After this operation, the contents of the buffer are // undefined. // // Note: Due to its unpredictability, it's not allowed to use this with // `params->initial_data` being set. Similarly, it's not allowed on a buffer // with `params->export_handle`. since this may invalidate the corresponding // external API's handle. Conversely, it *is* allowed on a buffer with // `params->host_mapped`, and the corresponding `buf->data` pointer *may* // change as a result of doing so. // // Note: If the `user_data` alone changes, this does not trigger a buffer // recreation. In theory, this can be used to detect when the buffer ended // up being recreated. PL_API bool pl_buf_recreate(pl_gpu gpu, pl_buf *buf, const struct pl_buf_params *params); // Update the contents of a buffer, starting at a given offset (must be a // multiple of 4) and up to a given size, with the contents of *data. // // This function will block until the buffer is no longer in use. Use // `pl_buf_poll` to perform non-blocking queries of buffer availability. // // Note: This function can incur synchronization overhead, so it shouldn't be // used in tight loops. If you do need to loop (e.g. to perform a strided // write), consider using host-mapped buffers, or fixing the memory in RAM, // before calling this function. PL_API void pl_buf_write(pl_gpu gpu, pl_buf buf, size_t buf_offset, const void *data, size_t size); // Read back the contents of a buffer, starting at a given offset, storing the // data into *dest. Returns whether successful. // // This function will block until the buffer is no longer in use. Use // `pl_buf_poll` to perform non-blocking queries of buffer availability. PL_API bool pl_buf_read(pl_gpu gpu, pl_buf buf, size_t buf_offset, void *dest, size_t size); // Copy `size` bytes from one buffer to another, reading from and writing to // the respective offsets. PL_API void pl_buf_copy(pl_gpu gpu, pl_buf dst, size_t dst_offset, pl_buf src, size_t src_offset, size_t size); // Initiates a buffer export operation, allowing a buffer to be accessed by an // external API. This is only valid for buffers with `params.handle_type`. // Calling this twice in a row is a harmless no-op. Returns whether successful. // // There is no corresponding "buffer import" operation, the next libplacebo // operation that touches the buffer (e.g. pl_tex_upload, but also pl_buf_write // and pl_buf_read) will implicitly import the buffer back to libplacebo. Users // must ensure that all pending operations made by the external API are fully // completed before using it in libplacebo again. (Otherwise, the behaviour // is undefined) // // Please note that this function returning does not mean the memory is // immediately available as such. In general, it will mark a buffer as "in use" // in the same way any other buffer operation would, and it is the user's // responsibility to wait until `pl_buf_poll` returns false before accessing // the memory from the external API. // // In terms of the access performed by this operation, it is not considered a // "read" or "write" and therefore does not technically conflict with reads or // writes to the buffer performed by the host (via mapped memory - any use of // `pl_buf_read` or `pl_buf_write` would defeat the purpose of the export). // However, restrictions made by the external API may apply that prevent this. // // The recommended use pattern is something like this: // // while (loop) { // pl_buf buf = get_free_buffer(); // or block on pl_buf_poll // // write to the buffer using the external API // pl_tex_upload(gpu, /* ... buf ... */); // implicitly imports // pl_buf_export(gpu, buf); // } // // i.e. perform an external API operation, then use and immediately export the // buffer in libplacebo, and finally wait until `pl_buf_poll` is false before // re-using it in the external API. (Or get a new buffer in the meantime) PL_API bool pl_buf_export(pl_gpu gpu, pl_buf buf); // Returns whether or not a buffer is currently "in use". This can either be // because of a pending read operation, a pending write operation or a pending // buffer export operation. Any access to the buffer by external APIs or via // the host pointer (for host-mapped buffers) is forbidden while a buffer is // "in use". The only exception to this rule is multiple reads, for example // reading from a buffer with `pl_tex_upload` while simultaneously reading from // it using mapped memory. // // The `timeout`, specified in nanoseconds, indicates how long to block for // before returning. If set to 0, this function will never block, and only // returns the current status of the buffer. The actual precision of the // timeout may be significantly longer than one nanosecond, and has no upper // bound. This function does not provide hard latency guarantees. This function // may also return at any time, even if the buffer is still in use. If the user // wishes to block until the buffer is definitely no longer in use, the // recommended usage is: // // while (pl_buf_poll(gpu, buf, UINT64_MAX)) // ; // do nothing // // Note: libplacebo operations on buffers are always internally synchronized, // so this is only needed for host-mapped or externally exported buffers. // However, it may be used to do non-blocking queries before calling blocking // functions such as `pl_buf_read`. // // Note: If `pl_gpu_limits.thread_safe` is set, this function is implicitly // synchronized, meaning it can safely be called on a `pl_buf` that is in use // by another thread. PL_API bool pl_buf_poll(pl_gpu gpu, pl_buf buf, uint64_t timeout); enum pl_tex_sample_mode { PL_TEX_SAMPLE_NEAREST, // nearest neighbour sampling PL_TEX_SAMPLE_LINEAR, // linear filtering, requires PL_FMT_CAP_LINEAR PL_TEX_SAMPLE_MODE_COUNT, }; enum pl_tex_address_mode { PL_TEX_ADDRESS_CLAMP, // clamp the nearest edge texel PL_TEX_ADDRESS_REPEAT, // repeat (tile) the texture PL_TEX_ADDRESS_MIRROR, // repeat (mirror) the texture PL_TEX_ADDRESS_MODE_COUNT, }; // Structure describing a texture. struct pl_tex_params { int w, h, d; // physical dimension; unused dimensions must be 0 pl_fmt format; // The following bools describe what operations can be performed. The // corresponding pl_fmt capability must be set for every enabled // operation type. // // Note: For planar formats, it is also possible to set capabilities only // supported by sub-planes. In this case, the corresponding functionality // will be available for the sub-plane, but not the planar texture itself. bool sampleable; // usable as a PL_DESC_SAMPLED_TEX bool renderable; // usable as a render target (pl_pass_run) // (must only be used with 2D textures) bool storable; // usable as a storage image (PL_DESC_IMG_*) bool blit_src; // usable as a blit source bool blit_dst; // usable as a blit destination bool host_writable; // may be updated with pl_tex_upload() bool host_readable; // may be fetched with pl_tex_download() // Note: For `blit_src`, `blit_dst`, the texture must either be // 2-dimensional or `pl_gpu_limits.blittable_1d_3d` must be set. // At most one of `export_handle` and `import_handle` can be set for a // texture. // Setting this indicates that the memory backing this texture should be // shared with external APIs, If so, this must be exactly *one* of // `pl_gpu.export_caps.tex`. enum pl_handle_type export_handle; // Setting this indicates that the memory backing this texture will be // imported from an external API. If so, this must be exactly *one* of // `pl_gpu.import_caps.tex`. Mutually exclusive with `initial_data`. enum pl_handle_type import_handle; // If the shared memory is being imported, the import handle must be // specified here. Otherwise, this is ignored. struct pl_shared_mem shared_mem; // If non-NULL, the texture will be created with these contents (tightly // packed). Using this does *not* require setting host_writable. Otherwise, // the initial data is undefined. Mutually exclusive with `import_handle`. const void *initial_data; // Arbitrary user data. libplacebo does not use this at all. void *user_data; // Arbitrary identifying tag. Used only for debugging purposes. pl_debug_tag debug_tag; }; #define pl_tex_params(...) (&(struct pl_tex_params) { \ .debug_tag = PL_DEBUG_TAG, \ __VA_ARGS__ \ }) static inline int pl_tex_params_dimension(const struct pl_tex_params params) { return params.d ? 3 : params.h ? 2 : 1; } enum pl_sampler_type { PL_SAMPLER_NORMAL, // gsampler2D, gsampler3D etc. PL_SAMPLER_RECT, // gsampler2DRect PL_SAMPLER_EXTERNAL, // gsamplerExternalOES PL_SAMPLER_TYPE_COUNT, }; // Conflates the following typical GPU API concepts: // - texture itself // - sampler state // - staging buffers for texture upload // - framebuffer objects // - wrappers for swapchain framebuffers // - synchronization needed for upload/rendering/etc. // // Essentially a pl_tex can be anything ranging from a normal texture, a wrapped // external/real framebuffer, a framebuffer object + texture pair, a mapped // texture (via pl_hwdec), or other sorts of things that can be sampled from // and/or rendered to. // // Thread-safety: Unsafe typedef const struct pl_tex_t *pl_tex; struct pl_tex_t { struct pl_tex_params params; // If `params.format` is a planar format, this contains `pl_tex` handles // encapsulating individual texture planes. Conversely, if this is a // sub-plane of a planar texture, `parent` points to the planar texture. // // Note: Calling `pl_tex_destroy` on sub-planes is undefined behavior. pl_tex planes[4]; pl_tex parent; // If `params.export_handle` is set, this structure references the shared // memory backing this buffer, via the requested handle type. // // While this texture is not in an "exported" state, the contents of the // memory are undefined. (See: `pl_tex_export`) // // Note: Due to vulkan driver limitations, `shared_mem.drm_format_mod` will // currently always be set to DRM_FORMAT_MOD_INVALID. No guarantee can be // made about the cross-driver compatibility of textures exported this way. struct pl_shared_mem shared_mem; // If `params.sampleable` is true, this indicates the correct sampler type // to use when sampling from this texture. enum pl_sampler_type sampler_type; }; // Create a texture (with undefined contents). Returns NULL on failure. This is // assumed to be an expensive/rare operation, and may need to perform memory // allocation or framebuffer creation. PL_API pl_tex pl_tex_create(pl_gpu gpu, const struct pl_tex_params *params); PL_API void pl_tex_destroy(pl_gpu gpu, pl_tex *tex); // This works like `pl_tex_create`, but if the texture already exists and has // incompatible texture parameters, it will get destroyed first. A texture is // considered "compatible" if it has the same texture format and sample/address // mode and it supports a superset of the features the user requested. // // Even if the texture is not recreated, calling this function will still // invalidate the contents of the texture. (Note: Because of this, // `initial_data` may not be used with `pl_tex_recreate`. Doing so is an error) // // Note: If the `user_data` alone changes, this does not trigger a texture // recreation. In theory, this can be used to detect when the texture ended // up being recreated. PL_API bool pl_tex_recreate(pl_gpu gpu, pl_tex *tex, const struct pl_tex_params *params); // Invalidates the contents of a texture. After this, the contents are fully // undefined. PL_API void pl_tex_invalidate(pl_gpu gpu, pl_tex tex); union pl_clear_color { float f[4]; int32_t i[4]; uint32_t u[4]; }; // Clear the dst texture with the given color (rgba). This is functionally // identical to a blit operation, which means `dst->params.blit_dst` must be // set. PL_API void pl_tex_clear_ex(pl_gpu gpu, pl_tex dst, const union pl_clear_color color); // Wrapper for `pl_tex_clear_ex` which only works for floating point textures. PL_API void pl_tex_clear(pl_gpu gpu, pl_tex dst, const float color[4]); struct pl_tex_blit_params { // The texture to blit from. Must have `params.blit_src` enabled. pl_tex src; // The texture to blit to. Must have `params.blit_dst` enabled, and a // format that is loosely compatible with `src`. This essentially means // that they must have the same `internal_size`. Additionally, UINT // textures can only be blitted to other UINT textures, and SINT textures // can only be blitted to other SINT textures. pl_tex dst; // The region of the source texture to blit. Must be within the texture // bounds of `src`. May be flipped. (Optional) pl_rect3d src_rc; // The region of the destination texture to blit into. Must be within the // texture bounds of `dst`. May be flipped. Areas outside of `dst_rc` in // `dst` are preserved. (Optional) pl_rect3d dst_rc; // If `src_rc` and `dst_rc` have different sizes, the texture will be // scaled using the given texture sampling mode. enum pl_tex_sample_mode sample_mode; }; #define pl_tex_blit_params(...) (&(struct pl_tex_blit_params) { __VA_ARGS__ }) // Copy a sub-rectangle from one texture to another. PL_API void pl_tex_blit(pl_gpu gpu, const struct pl_tex_blit_params *params); // Structure describing a texture transfer operation. struct pl_tex_transfer_params { // Texture to transfer to/from. Depending on the type of the operation, // this must have params.host_writable (uploads) or params.host_readable // (downloads) set, respectively. pl_tex tex; // Note: Superfluous parameters are ignored, i.e. for a 1D texture, the y // and z fields of `rc`, as well as the corresponding pitches, are ignored. // In all other cases, the pitch must be large enough to contain the // corresponding dimension of `rc`, and the `rc` must be normalized and // fully contained within the image dimensions. Missing fields in the `rc` // are inferred from the image size. If unset, the pitch is inferred // from `rc` (that is, it's assumed that the data is tightly packed in the // buffer). Otherwise, `row_pitch` *must* be a multiple of // `tex->params.format->texel_align`, and `depth_pitch` must be a multiple // of `row_pitch`. pl_rect3d rc; // region of the texture to transfer size_t row_pitch; // the number of bytes separating image rows size_t depth_pitch; // the number of bytes separating image planes // An optional timer to report the approximate duration of the texture // transfer to. Note that this is only an approximation, since the actual // texture transfer may happen entirely in the background (in particular, // for implementations with asynchronous transfer capabilities). It's also // not guaranteed that all GPUs support this. pl_timer timer; // An optional callback to fire after the operation completes. If this is // specified, then the operation is performed asynchronously. Note that // transfers to/from buffers are always asynchronous, even without, this // field, so it's more useful for `ptr` transfers. (Though it can still be // helpful to avoid having to manually poll buffers all the time) // // When this is *not* specified, uploads from `ptr` are still asynchronous // but require a host memcpy, while downloads from `ptr` are blocking. As // such, it's recommended to always try using asynchronous texture // transfers wherever possible. // // Note: Requires `pl_gpu_limits.callbacks` // // Note: Callbacks are implicitly synchronized, meaning that callbacks are // guaranteed to never execute concurrently with other callbacks. However, // they may execute from any thread that the `pl_gpu` is used on. void (*callback)(void *priv); void *priv; // arbitrary user data // For the data source/target of a transfer operation, there are two valid // options: // // 1. Transferring to/from a buffer: (requires `pl_gpu_limits.buf_transfer`) pl_buf buf; // buffer to use size_t buf_offset; // offset of data within buffer, should be a // multiple of `tex->params.format->texel_size` // 2. Transferring to/from host memory directly: void *ptr; // address of data bool no_import; // always use memcpy, bypassing host ptr import // Note: The contents of the memory region / buffer must exactly match the // texture format; i.e. there is no explicit conversion between formats. }; #define pl_tex_transfer_params(...) (&(struct pl_tex_transfer_params) { __VA_ARGS__ }) // Upload data to a texture. Returns whether successful. PL_API bool pl_tex_upload(pl_gpu gpu, const struct pl_tex_transfer_params *params); // Download data from a texture. Returns whether successful. PL_API bool pl_tex_download(pl_gpu gpu, const struct pl_tex_transfer_params *params); // Returns whether or not a texture is currently "in use". This can either be // because of a pending read operation, a pending write operation or a pending // texture export operation. Note that this function's usefulness is extremely // limited under ordinary circumstances. In practically all cases, textures do // not need to be directly synchronized by the user, except when interfacing // with external libraries. This function should NOT, however, be used as a // crutch to avoid having to implement semaphore-based synchronization. Use // the API-specific functions such as `pl_vulkan_hold/release` for that. // // A good example of a use case in which this function is required is when // interoperating with external memory management that needs to know when an // imported texture is safe to free / reclaim internally, in which case // semaphores are insufficient because memory management is a host operation. // // The `timeout`, specified in nanoseconds, indicates how long to block for // before returning. If set to 0, this function will never block, and only // returns the current status of the texture. The actual precision of the // timeout may be significantly longer than one nanosecond, and has no upper // bound. This function does not provide hard latency guarantees. This function // may also return at any time, even if the texture is still in use. If the // user wishes to block until the texture is definitely no longer in use, the // recommended usage is: // // while (pl_tex_poll(gpu, buf, UINT64_MAX)) // ; // do nothing // // Note: If `pl_gpu_limits.thread_safe` is set, this function is implicitly // synchronized, meaning it can safely be called on a `pl_tex` that is in use // by another thread. PL_API bool pl_tex_poll(pl_gpu gpu, pl_tex tex, uint64_t timeout); // Data type of a shader input variable (e.g. uniform, or UBO member) enum pl_var_type { PL_VAR_INVALID = 0, PL_VAR_SINT, // C: int GLSL: int/ivec PL_VAR_UINT, // C: unsigned int GLSL: uint/uvec PL_VAR_FLOAT, // C: float GLSL: float/vec/mat PL_VAR_TYPE_COUNT }; // Returns the host size (in bytes) of a pl_var_type. PL_API size_t pl_var_type_size(enum pl_var_type type); // Represents a shader input variable (concrete data, e.g. vector, matrix) struct pl_var { const char *name; // name as used in the shader enum pl_var_type type; // The total number of values is given by dim_v * dim_m. For example, a // vec2 would have dim_v = 2 and dim_m = 1. A mat3x4 would have dim_v = 4 // and dim_m = 3. int dim_v; // vector dimension int dim_m; // matrix dimension (number of columns, see below) int dim_a; // array dimension }; // Helper functions for constructing the most common pl_vars, with names // corresponding to their corresponding GLSL built-in types. PL_API struct pl_var pl_var_float(const char *name); PL_API struct pl_var pl_var_vec2(const char *name); PL_API struct pl_var pl_var_vec3(const char *name); PL_API struct pl_var pl_var_vec4(const char *name); PL_API struct pl_var pl_var_mat2(const char *name); PL_API struct pl_var pl_var_mat2x3(const char *name); PL_API struct pl_var pl_var_mat2x4(const char *name); PL_API struct pl_var pl_var_mat3(const char *name); PL_API struct pl_var pl_var_mat3x4(const char *name); PL_API struct pl_var pl_var_mat4x2(const char *name); PL_API struct pl_var pl_var_mat4x3(const char *name); PL_API struct pl_var pl_var_mat4(const char *name); PL_API struct pl_var pl_var_int(const char *name); PL_API struct pl_var pl_var_ivec2(const char *name); PL_API struct pl_var pl_var_ivec3(const char *name); PL_API struct pl_var pl_var_ivec4(const char *name); PL_API struct pl_var pl_var_uint(const char *name); PL_API struct pl_var pl_var_uvec2(const char *name); PL_API struct pl_var pl_var_uvec3(const char *name); PL_API struct pl_var pl_var_uvec4(const char *name); struct pl_named_var { const char *glsl_name; struct pl_var var; }; // The same list as above, tagged by name and terminated with a {0} entry. PL_API extern const struct pl_named_var pl_var_glsl_types[]; // Efficient helper function for performing a lookup in the above array. // Returns NULL if the variable is not legal. Note that the array dimension is // ignored, since it's usually part of the variable name and not the type name. PL_API const char *pl_var_glsl_type_name(struct pl_var var); // Converts a pl_fmt to an "equivalent" pl_var. Equivalent in this sense means // that the pl_var's type will be the same as the vertex's sampled type (e.g. // PL_FMT_UNORM gets turned into PL_VAR_FLOAT). PL_API struct pl_var pl_var_from_fmt(pl_fmt fmt, const char *name); // Describes the memory layout of a variable, relative to some starting location // (typically the offset within a uniform/storage/pushconstant buffer) // // Note on matrices: All GPUs expect column major matrices, for both buffers and // input variables. Care needs to be taken to avoid trying to use e.g. a // pl_matrix3x3 (which is row major) directly as a pl_var_update.data! // // In terms of the host layout, a column-major matrix (e.g. matCxR) with C // columns and R rows is treated like an array vecR[C]. The `stride` here refers // to the separation between these array elements, i.e. the separation between // the individual columns. // // Visualization of a mat4x3: // // 0 1 2 3 <- columns // 0 [ (A) (D) (G) (J) ] // 1 [ (B) (E) (H) (K) ] // 2 [ (C) (F) (I) (L) ] // ^ rows // // Layout in GPU memory: (stride=16, size=60) // // [ A B C ] X <- column 0, offset +0 // [ D E F ] X <- column 1, offset +16 // [ G H I ] X <- column 2, offset +32 // [ J K L ] <- column 3, offset +48 // // Note the lack of padding on the last column in this example. // In general: size <= stride * dim_m // // C representation: (stride=12, size=48) // // { { A, B, C }, // { D, E, F }, // { G, H, I }, // { J, K, L } } // // Note on arrays: `stride` represents both the stride between elements of a // matrix, and the stride between elements of an array. That is, there is no // distinction between the columns of a matrix and the rows of an array. For // example, a mat2[10] and a vec2[20] share the same pl_var_layout - the stride // would be sizeof(vec2) and the size would be sizeof(vec2) * 2 * 10. // // For non-array/matrix types, `stride` is equal to `size`. struct pl_var_layout { size_t offset; // the starting offset of the first byte size_t stride; // the delta between two elements of an array/matrix size_t size; // the total size of the input }; // Returns the host layout of an input variable as required for a // tightly-packed, byte-aligned C data type, given a starting offset. PL_API struct pl_var_layout pl_var_host_layout(size_t offset, const struct pl_var *var); // Returns the GLSL std140 layout of an input variable given a current buffer // offset, as required for a buffer descriptor of type PL_DESC_BUF_UNIFORM // // The normal way to use this function is when calculating the size and offset // requirements of a uniform buffer in an incremental fashion, to calculate the // new offset of the next variable in this buffer. PL_API struct pl_var_layout pl_std140_layout(size_t offset, const struct pl_var *var); // Returns the GLSL std430 layout of an input variable given a current buffer // offset, as required for a buffer descriptor of type PL_DESC_BUF_STORAGE, and // for push constants. PL_API struct pl_var_layout pl_std430_layout(size_t offset, const struct pl_var *var); // Convenience definitions / friendly names for these #define pl_buf_uniform_layout pl_std140_layout #define pl_buf_storage_layout pl_std430_layout #define pl_push_constant_layout pl_std430_layout // Like memcpy, but copies bytes from `src` to `dst` in a manner governed by // the stride and size of `dst_layout` as well as `src_layout`. Also takes // into account the respective `offset`. PL_API void memcpy_layout(void *dst, struct pl_var_layout dst_layout, const void *src, struct pl_var_layout src_layout); // Represents a compile-time constant. struct pl_constant { enum pl_var_type type; // constant data type uint32_t id; // GLSL `constant_id` size_t offset; // byte offset in `constant_data` }; // Represents a vertex attribute. struct pl_vertex_attrib { const char *name; // name as used in the shader pl_fmt fmt; // data format (must have PL_FMT_CAP_VERTEX) size_t offset; // byte offset into the vertex struct int location; // vertex location (as used in the shader) }; // Returns an abstract namespace index for a given descriptor type. This will // always be a value >= 0 and < PL_DESC_TYPE_COUNT. Implementations can use // this to figure out which descriptors may share the same value of `binding`. // Bindings must only be unique for all descriptors within the same namespace. PL_API int pl_desc_namespace(pl_gpu gpu, enum pl_desc_type type); // Access mode of a shader input descriptor. enum pl_desc_access { PL_DESC_ACCESS_READWRITE, PL_DESC_ACCESS_READONLY, PL_DESC_ACCESS_WRITEONLY, PL_DESC_ACCESS_COUNT, }; // Returns the GLSL syntax for a given access mode (e.g. "readonly"). PL_API const char *pl_desc_access_glsl_name(enum pl_desc_access mode); // Represents a shader descriptor (e.g. texture or buffer binding) struct pl_desc { const char *name; // name as used in the shader enum pl_desc_type type; // The binding of this descriptor, as used in the shader. All bindings // within a namespace must be unique. (see: pl_desc_namespace) int binding; // For storage images and storage buffers, this can be used to restrict // the type of access that may be performed on the descriptor. Ignored for // the other descriptor types (uniform buffers and sampled textures are // always read-only). enum pl_desc_access access; }; // Framebuffer blending mode (for raster passes) enum pl_blend_mode { PL_BLEND_ZERO, PL_BLEND_ONE, PL_BLEND_SRC_ALPHA, PL_BLEND_ONE_MINUS_SRC_ALPHA, PL_BLEND_MODE_COUNT, }; struct pl_blend_params { enum pl_blend_mode src_rgb; enum pl_blend_mode dst_rgb; enum pl_blend_mode src_alpha; enum pl_blend_mode dst_alpha; }; #define pl_blend_params(...) (&(struct pl_blend_params) { __VA_ARGS__ }) // Typical alpha compositing PL_API extern const struct pl_blend_params pl_alpha_overlay; enum pl_prim_type { PL_PRIM_TRIANGLE_LIST, PL_PRIM_TRIANGLE_STRIP, PL_PRIM_TYPE_COUNT, }; enum pl_index_format { PL_INDEX_UINT16 = 0, PL_INDEX_UINT32, PL_INDEX_FORMAT_COUNT, }; enum pl_pass_type { PL_PASS_INVALID = 0, PL_PASS_RASTER, // vertex+fragment shader PL_PASS_COMPUTE, // compute shader (requires `pl_gpu.glsl.compute`) PL_PASS_TYPE_COUNT, }; // Description of a rendering pass. It conflates the following: // - GLSL shader(s) and its list of inputs // - target parameters (for raster passes) struct pl_pass_params { enum pl_pass_type type; // Input variables. struct pl_var *variables; int num_variables; // Input descriptors. struct pl_desc *descriptors; int num_descriptors; // Compile-time specialization constants. struct pl_constant *constants; int num_constants; // Initial data for the specialization constants. Optional. If NULL, // specialization constants receive the values from the shader text. void *constant_data; // Push constant region. Must be be a multiple of 4 <= limits.max_pushc_size size_t push_constants_size; // The shader text in GLSL. For PL_PASS_RASTER, this is interpreted // as a fragment shader. For PL_PASS_COMPUTE, this is interpreted as // a compute shader. const char *glsl_shader; // --- type==PL_PASS_RASTER only // Describes the interpretation and layout of the vertex data. enum pl_prim_type vertex_type; struct pl_vertex_attrib *vertex_attribs; int num_vertex_attribs; size_t vertex_stride; // must be a multiple of limits.align_vertex_stride // The vertex shader itself. const char *vertex_shader; // Target format. The format must support PL_FMT_CAP_RENDERABLE. The // resulting pass may only be used on textures that have a format with a // `pl_fmt.signature` compatible to this format. pl_fmt target_format; // Target blending mode. If this is NULL, blending is disabled. Otherwise, // the `target_format` must also support PL_FMT_CAP_BLENDABLE. const struct pl_blend_params *blend_params; // If false, the target's existing contents will be discarded before the // pass is run. (Semantically equivalent to calling pl_tex_invalidate // before every pl_pass_run, but slightly more efficient) // // Specifying `blend_params` requires `load_target` to be true. bool load_target; // --- Deprecated / removed fields. PL_DEPRECATED_IN(v6.322) const uint8_t *cached_program; // Non-functional PL_DEPRECATED_IN(v6.322) size_t cached_program_len; }; #define pl_pass_params(...) (&(struct pl_pass_params) { __VA_ARGS__ }) // Conflates the following typical GPU API concepts: // - various kinds of shaders // - rendering pipelines // - descriptor sets, uniforms, other bindings // - all synchronization necessary // - the current values of all inputs // // Thread-safety: Unsafe typedef const struct pl_pass_t { struct pl_pass_params params; } *pl_pass; // Compile a shader and create a render pass. This is a rare/expensive // operation and may take a significant amount of time, even if a cached // program is used. Returns NULL on failure. PL_API pl_pass pl_pass_create(pl_gpu gpu, const struct pl_pass_params *params); PL_API void pl_pass_destroy(pl_gpu gpu, pl_pass *pass); struct pl_desc_binding { const void *object; // pl_* object with type corresponding to pl_desc_type // For PL_DESC_SAMPLED_TEX, this can be used to configure the sampler. enum pl_tex_address_mode address_mode; enum pl_tex_sample_mode sample_mode; }; struct pl_var_update { int index; // index into params.variables[] const void *data; // pointer to raw byte data corresponding to pl_var_host_layout() }; struct pl_pass_run_params { pl_pass pass; // If present, the shader will be re-specialized with the new constants // provided. This is a significantly cheaper operation than recompiling a // brand new shader, but should still be avoided if possible. // // Leaving it as NULL re-uses the existing specialization values. Ignored // if the shader has no specialization constants. Guaranteed to be a no-op // if the values have not changed since the last invocation. void *constant_data; // This list only contains descriptors/variables which have changed // since the previous invocation. All non-mentioned variables implicitly // preserve their state from the last invocation. struct pl_var_update *var_updates; int num_var_updates; // This list contains all descriptors used by this pass. It must // always be filled, even if the descriptors haven't changed. The order // must match that of pass->params.descriptors struct pl_desc_binding *desc_bindings; // The push constants for this invocation. This must always be set and // fully defined for every invocation if params.push_constants_size > 0. void *push_constants; // An optional timer to report the approximate runtime of this shader pass // invocation to. Note that this is only an approximation, since shaders // may overlap their execution times and contend for GPU time. pl_timer timer; // --- pass->params.type==PL_PASS_RASTER only // Target must be a 2D texture, `target->params.renderable` must be true, // and `target->params.format->signature` must match the signature provided // in `pass->params.target_format`. // // If the viewport or scissors are left blank, they are inferred from // target->params. // // WARNING: Rendering to a *target that is being read from by the same // shader is undefined behavior. In general, trying to bind the same // resource multiple times to the same shader is undefined behavior. pl_tex target; pl_rect2d viewport; // screen space viewport (must be normalized) pl_rect2d scissors; // target render scissors (must be normalized) // Number of vertices to render int vertex_count; // Vertex data may be provided in one of two forms: // // 1. Drawing from host memory directly const void *vertex_data; // 2. Drawing from a vertex buffer (requires `vertex_buf->params.drawable`) pl_buf vertex_buf; size_t buf_offset; // (Optional) Index data may be provided in the form given by `index_fmt`. // These will be used for instanced rendering. Similar to vertex data, this // can be provided in two forms: // 1. From host memory const void *index_data; enum pl_index_format index_fmt; // 2. From an index buffer (requires `index_buf->params.drawable`) pl_buf index_buf; size_t index_offset; // Note: Drawing from an index buffer requires vertex data to also be // present in buffer form, i.e. it's forbidden to mix `index_buf` with // `vertex_data` (though vice versa is allowed). // --- pass->params.type==PL_PASS_COMPUTE only // Number of work groups to dispatch per dimension (X/Y/Z). Must be <= the // corresponding index of limits.max_dispatch int compute_groups[3]; }; #define pl_pass_run_params(...) (&(struct pl_pass_run_params) { __VA_ARGS__ }) // Execute a render pass. PL_API void pl_pass_run(pl_gpu gpu, const struct pl_pass_run_params *params); // This is semantically a no-op, but it provides a hint that you want to flush // any partially queued up commands and begin execution. There is normally no // need to call this, because queued commands will always be implicitly flushed // whenever necessary to make forward progress on commands like `pl_buf_poll`, // or when submitting a frame to a swapchain for display. In fact, calling this // function can negatively impact performance, because some GPUs rely on being // able to re-order and modify queued commands in order to enable optimizations // retroactively. // // The only time this might be beneficial to call explicitly is if you're doing // lots of offline processing, i.e. you aren't rendering to a swapchain but to // textures that you download from again. In that case you should call this // function after each "work item" to ensure good parallelism between them. // // It's worth noting that this function may block if you're over-feeding the // GPU without waiting for existing results to finish. PL_API void pl_gpu_flush(pl_gpu gpu); // This is like `pl_gpu_flush` but also blocks until the GPU is fully idle // before returning. Using this in your rendering loop is seriously disadvised, // and almost never the right solution. The intended use case is for deinit // logic, where users may want to force the all pending GPU operations to // finish so they can clean up their state more easily. // // After this operation is called, it's guaranteed that all pending buffer // operations are complete - i.e. `pl_buf_poll` is guaranteed to return false. // It's also guaranteed that any outstanding timer query results are available. // // Note: If you only care about buffer operations, you can accomplish this more // easily by using `pl_buf_poll` with the timeout set to `UINT64_MAX`. But if // you have many buffers it may be more convenient to call this function // instead. The difference is that this function will also affect e.g. renders // to a `pl_swapchain`. PL_API void pl_gpu_finish(pl_gpu gpu); // Returns true if the GPU is considered to be in a "failed" state, which // during normal operation is typically the result of things like the device // being lost (due to e.g. power management). // // If this returns true, users *should* destroy and recreate the `pl_gpu`, // including all associated resources, via the appropriate mechanism. PL_API bool pl_gpu_is_failed(pl_gpu gpu); PL_API_END #endif // LIBPLACEBO_GPU_H_ libplacebo-v7.349.0/src/include/libplacebo/log.h000066400000000000000000000111061463457750100214330ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_LOG_H_ #define LIBPLACEBO_LOG_H_ #include #include PL_API_BEGIN // The log level associated with a given log message. enum pl_log_level { PL_LOG_NONE = 0, PL_LOG_FATAL, // results in total loss of function of a major component PL_LOG_ERR, // serious error; may result in degraded function PL_LOG_WARN, // warning; potentially bad, probably user-relevant PL_LOG_INFO, // informational message, also potentially harmless errors PL_LOG_DEBUG, // verbose debug message, informational PL_LOG_TRACE, // very noisy trace of activity,, usually benign PL_LOG_ALL = PL_LOG_TRACE, }; struct pl_log_params { // Logging callback. All messages, informational or otherwise, will get // redirected to this callback. The logged messages do not include trailing // newlines. Optional. void (*log_cb)(void *log_priv, enum pl_log_level level, const char *msg); void *log_priv; // The current log level. Controls the level of message that will be // redirected to the log callback. Setting this to PL_LOG_ALL means all // messages will be forwarded, but doing so indiscriminately can result // in increased CPU usage as it may enable extra debug paths based on the // configured log level. enum pl_log_level log_level; }; #define pl_log_params(...) (&(struct pl_log_params) { __VA_ARGS__ }) PL_API extern const struct pl_log_params pl_log_default_params; // Thread-safety: Safe // // Note: In any context in which `pl_log` is used, users may also pass NULL // to disable logging. In other words, NULL is a valid `pl_log`. typedef const struct pl_log_t { struct pl_log_params params; } *pl_log; #define pl_log_glue1(x, y) x##y #define pl_log_glue2(x, y) pl_log_glue1(x, y) // Force a link error in the case of linking against an incompatible API // version. #define pl_log_create pl_log_glue2(pl_log_create_, PL_API_VER) // Creates a pl_log. `api_ver` is for historical reasons and ignored currently. // `params` defaults to `&pl_log_default_params` if left as NULL. // // Note: As a general rule, any `params` struct used as an argument to a // function need only live until the corresponding function returns. PL_API pl_log pl_log_create(int api_ver, const struct pl_log_params *params); // Destroy a `pl_log` object. // // Note: As a general rule, all `_destroy` functions take the pointer to the // object to free as their parameter. This pointer is overwritten by NULL // afterwards. Calling a _destroy function on &{NULL} is valid, but calling it // on NULL itself is invalid. PL_API void pl_log_destroy(pl_log *log); // Update the parameters of a `pl_log` without destroying it. This can be // used to change the log function, log context or log level retroactively. // `params` defaults to `&pl_log_default_params` if left as NULL. // // Returns the previous params, atomically. PL_API struct pl_log_params pl_log_update(pl_log log, const struct pl_log_params *params); // Like `pl_log_update` but only updates the log level, leaving the log // callback intact. // // Returns the previous log level, atomically. PL_API enum pl_log_level pl_log_level_update(pl_log log, enum pl_log_level level); // Two simple, stream-based loggers. You can use these as the log_cb. If you // also set log_priv to a FILE* (e.g. stdout or stderr) it will be printed // there; otherwise, it will be printed to stdout or stderr depending on the // log level. // // The version with colors will use ANSI escape sequences to indicate the log // level. The version without will use explicit prefixes. PL_API void pl_log_simple(void *stream, enum pl_log_level level, const char *msg); PL_API void pl_log_color(void *stream, enum pl_log_level level, const char *msg); // Backwards compatibility with older versions of libplacebo #define pl_context pl_log #define pl_context_params pl_log_params PL_API_END #endif // LIBPLACEBO_LOG_H_ libplacebo-v7.349.0/src/include/libplacebo/meson.build000066400000000000000000000002511463457750100226420ustar00rootroot00000000000000sources += configure_file( input: 'config.h.in', output: 'config.h', install_dir: get_option('includedir') / meson.project_name(), configuration: conf_public, ) libplacebo-v7.349.0/src/include/libplacebo/opengl.h000066400000000000000000000231711463457750100221430ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_OPENGL_H_ #define LIBPLACEBO_OPENGL_H_ #include #include #include PL_API_BEGIN // Note on thread safety: The thread safety of `pl_opengl` and any associated // GPU objects follows the same thread safety rules as the underlying OpenGL // context. In other words, they must only be called from the thread the OpenGL // context is current on. typedef const struct pl_opengl_t { pl_gpu gpu; // Detected GL version int major, minor; // List of GL/EGL extensions, provided for convenience const char * const *extensions; int num_extensions; } *pl_opengl; static inline bool pl_opengl_has_ext(pl_opengl gl, const char *ext) { for (int i = 0; i < gl->num_extensions; i++) if (!strcmp(ext, gl->extensions[i])) return true; return false; } typedef void (*pl_voidfunc_t)(void); struct pl_opengl_params { // Main gl*GetProcAddr function. This will be used to load all GL/EGL // functions. Optional - if unspecified, libplacebo will default to an // internal loading logic which should work on most platforms. pl_voidfunc_t (*get_proc_addr_ex)(void *proc_ctx, const char *procname); void *proc_ctx; // Simpler API for backwards compatibility / convenience. (This one // directly matches the signature of most gl*GetProcAddr library functions) pl_voidfunc_t (*get_proc_addr)(const char *procname); // Enable OpenGL debug report callbacks. May have little effect depending // on whether or not the GL context was initialized with appropriate // debugging enabled. bool debug; // Allow the use of (suspected) software rasterizers and renderers. These // can be useful for debugging purposes, but normally, their use is // undesirable when GPU-accelerated processing is expected. bool allow_software; // Restrict the maximum allowed GLSL version. (Mainly for testing) int max_glsl_version; // Optional. Required when importing/exporting dmabufs as textures. void *egl_display; void *egl_context; // Optional callbacks to bind/release the OpenGL context on the current // thread. If these are specified, then the resulting `pl_gpu` will have // `pl_gpu_limits.thread_safe` enabled, and may therefore be used from any // thread without first needing to bind the OpenGL context. // // If the user is re-using the same OpenGL context in non-libplacebo code, // then these callbacks should include whatever synchronization is // necessary to prevent simultaneous use between libplacebo and the user. bool (*make_current)(void *priv); void (*release_current)(void *priv); void *priv; }; // Default/recommended parameters #define pl_opengl_params(...) (&(struct pl_opengl_params) { __VA_ARGS__ }) PL_API extern const struct pl_opengl_params pl_opengl_default_params; // Creates a new OpenGL renderer based on the given parameters. This will // internally use whatever platform-defined mechanism (WGL, X11, EGL) is // appropriate for loading the OpenGL function calls, so the user doesn't need // to pass in a `getProcAddress` callback. If `params` is left as NULL, it // defaults to `&pl_opengl_default_params`. The context must be active when // calling this function, and must remain active whenever calling any // libplacebo function on the resulting `pl_opengl` or `pl_gpu`. // // Note that creating multiple `pl_opengl` instances from the same OpenGL // context is undefined behavior. PL_API pl_opengl pl_opengl_create(pl_log log, const struct pl_opengl_params *params); // All resources allocated from the `pl_gpu` contained by this `pl_opengl` must // be explicitly destroyed by the user before calling `pl_opengl_destroy`. PL_API void pl_opengl_destroy(pl_opengl *gl); // For a `pl_gpu` backed by `pl_opengl`, this function can be used to retrieve // the underlying `pl_opengl`. Returns NULL for any other type of `gpu`. PL_API pl_opengl pl_opengl_get(pl_gpu gpu); struct pl_opengl_framebuffer { // ID of the framebuffer, or 0 to use the context's default framebuffer. int id; // If true, then the framebuffer is assumed to be "flipped" relative to // normal GL semantics, i.e. set this to `true` if the first pixel is the // top left corner. bool flipped; }; struct pl_opengl_swapchain_params { // Set this to the platform-specific function to swap buffers, e.g. // glXSwapBuffers, eglSwapBuffers etc. This will be called internally by // `pl_swapchain_swap_buffers`. Required, unless you never call that // function. void (*swap_buffers)(void *priv); // Initial framebuffer description. This can be changed later on using // `pl_opengl_swapchain_update_fb`. struct pl_opengl_framebuffer framebuffer; // Attempt forcing a specific latency. If this is nonzero, then // `pl_swapchain_swap_buffers` will wait until fewer than N frames are "in // flight" before returning. Setting this to a high number generally // accomplished nothing, because the OpenGL driver typically limits the // number of buffers on its own. But setting it to a low number like 2 or // even 1 can reduce latency (at the cost of throughput). int max_swapchain_depth; // Arbitrary user pointer that gets passed to `swap_buffers` etc. void *priv; }; #define pl_opengl_swapchain_params(...) (&(struct pl_opengl_swapchain_params) { __VA_ARGS__ }) // Creates an instance of `pl_swapchain` tied to the active context. // Note: Due to OpenGL semantics, users *must* call `pl_swapchain_resize` // before attempting to use this swapchain, otherwise calls to // `pl_swapchain_start_frame` will fail. PL_API pl_swapchain pl_opengl_create_swapchain(pl_opengl gl, const struct pl_opengl_swapchain_params *params); // Update the framebuffer description. After calling this function, users // *must* call `pl_swapchain_resize` before attempting to use the swapchain // again, otherwise calls to `pl_swapchain_start_frame` will fail. PL_API void pl_opengl_swapchain_update_fb(pl_swapchain sw, const struct pl_opengl_framebuffer *fb); struct pl_opengl_wrap_params { // The GLuint texture object itself. Optional. If no texture is provided, // then only the opaque framebuffer `fbo` will be wrapped, leaving the // resulting `pl_tex` object with some operations (such as sampling) being // unsupported. unsigned int texture; // The GLuint associated framebuffer. Optional. If this is not specified, // then libplacebo will attempt creating a framebuffer from the provided // texture object (if possible). // // Note: As a special case, if neither a texture nor an FBO are provided, // this is equivalent to wrapping the OpenGL default framebuffer (id 0). unsigned int framebuffer; // The image's dimensions (unused dimensions must be 0) int width; int height; int depth; // Texture-specific fields: // // Note: These are only relevant if `texture` is provided. // The GLenum for the texture target to use, e.g. GL_TEXTURE_2D. Optional. // If this is left as 0, the target is inferred from the number of // dimensions. Users may want to set this to something specific like // GL_TEXTURE_EXTERNAL_OES depending on the nature of the texture. unsigned int target; // The texture's GLint sized internal format (e.g. GL_RGBA16F). Required. int iformat; }; #define pl_opengl_wrap_params(...) (&(struct pl_opengl_wrap_params) { __VA_ARGS__ }) // Wraps an external OpenGL object into a `pl_tex` abstraction. Due to the // internally synchronized nature of OpenGL, no explicit synchronization // is needed between libplacebo `pl_tex_` operations, and host accesses to // the texture. Wrapping the same OpenGL texture multiple times is permitted. // Note that this function transfers no ownership. // // This wrapper can be destroyed by simply calling `pl_tex_destroy` on it, // which will *not* destroy the user-provided OpenGL texture or framebuffer. // // This function may fail, in which case it returns NULL. PL_API pl_tex pl_opengl_wrap(pl_gpu gpu, const struct pl_opengl_wrap_params *params); // Analogous to `pl_opengl_wrap`, this function takes any `pl_tex` (including // ones created by `pl_tex_create`) and unwraps it to expose the underlying // OpenGL texture to the user. Note that this function transfers no ownership, // i.e. the texture object and framebuffer shall not be destroyed by the user. // // Returns the OpenGL texture. `out_target` and `out_iformat` will be updated // to hold the target type and internal format, respectively. (Optional) // // For renderable/blittable textures, `out_fbo` will be updated to the ID of // the framebuffer attached to this texture, or 0 if there is none. (Optional) PL_API unsigned int pl_opengl_unwrap(pl_gpu gpu, pl_tex tex, unsigned int *out_target, int *out_iformat, unsigned int *out_fbo); PL_API_END #endif // LIBPLACEBO_OPENGL_H_ libplacebo-v7.349.0/src/include/libplacebo/options.h000066400000000000000000000202561463457750100223530ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_OPTIONS_H_ #define LIBPLACEBO_OPTIONS_H_ #include PL_API_BEGIN // High-level heap-managed struct containing storage for all options implied by // pl_render_params, including a high-level interface for serializing, // deserializing and interfacing with them in a programmatic way. typedef const struct pl_opt_t *pl_opt; typedef struct pl_options_t { // Non-NULL `params.*_params` pointers must always point into this struct struct pl_render_params params; // Backing storage for all of the various rendering parameters. Whether // or not these params are active is determined by whether or not // `params.*_params` is set to this address or NULL. struct pl_deband_params deband_params; struct pl_sigmoid_params sigmoid_params; struct pl_color_adjustment color_adjustment; struct pl_peak_detect_params peak_detect_params; struct pl_color_map_params color_map_params; struct pl_dither_params dither_params; struct pl_icc_params icc_params PL_DEPRECATED_IN(v6.327); struct pl_cone_params cone_params; struct pl_blend_params blend_params; struct pl_deinterlace_params deinterlace_params; struct pl_distort_params distort_params; // Backing storage for "custom" scalers. `params.upscaler` etc. will // always be a pointer either to a built-in pl_filter_config, or one of // these structs. `name`, `description` and `allowed` will always be // valid for the respective type of filter config. struct pl_filter_config upscaler; struct pl_filter_config downscaler; struct pl_filter_config plane_upscaler; struct pl_filter_config plane_downscaler; struct pl_filter_config frame_mixer; } *pl_options; // Allocate a new set of render params, with internally backed storage for // all parameters. Initialized to an "empty" config (PL_RENDER_DEFAULTS), // equivalent to `&pl_render_fast_params`. To initialize the struct instead to // the recommended default parameters, use `pl_options_reset` with // `pl_render_default_params`. // // If `log` is provided, errors related to parsing etc. will be logged there. PL_API pl_options pl_options_alloc(pl_log log); PL_API void pl_options_free(pl_options *opts); // Resets all options to their default values from a given struct. If `preset` // is NULL, `opts` is instead reset back to the initial "empty" configuration, // with all options disabled, as if it was freshly allocated. // // Note: This function will also reset structs which were not included in // `preset`, such as any custom upscalers. PL_API void pl_options_reset(pl_options opts, const struct pl_render_params *preset); typedef const struct pl_opt_data_t { // Original options struct. pl_options opts; // Triggering option for this callback invocation. pl_opt opt; // The raw data associated with this option. Always some pointer into // `opts`. Note that only PL_OPT_BOOL, PL_OPT_INT and PL_OPT_FLOAT have // a fixed representation, for other fields its usefulness is dubious. const void *value; // The underlying data, as a formatted, locale-invariant string. Lifetime // is limited until the return of this callback. const char *text; } *pl_opt_data; // Query a single option from `opts` by key, or NULL if none was found. // The resulting pointer is only valid until the next pl_options_* call. PL_API pl_opt_data pl_options_get(pl_options opts, const char *key); // Update an option from a formatted value string (see `pl_opt_data.text`). // This can be used for all type of options, even non-string ones. In this case, // `value` will be parsed according to the option type. // // Returns whether successful. PL_API bool pl_options_set_str(pl_options opts, const char *key, const char *value); // Programmatically iterate over options set in a `pl_options`, running the // provided callback on each entry. PL_API void pl_options_iterate(pl_options opts, void (*cb)(void *priv, pl_opt_data data), void *priv); // Serialize a `pl_options` structs to a comma-separated key/value string. The // returned string has a lifetime valid until either the next call to // `pl_options_save`, or until the `pl_options` is freed. PL_API const char *pl_options_save(pl_options opts); // Parse a `pl_options` struct from a key/value string, in standard syntax // "key1=value1,key2=value2,...", and updates `opts` with the new values. // Valid separators include whitespace, commas (,) and (semi)colons (:;). // // Returns true if no errors occurred. PL_API bool pl_options_load(pl_options opts, const char *str); // Helpers for interfacing with `opts->params.hooks`. Note that using any of // these helpers will overwrite the array by an internally managed pointer, // so care must be taken when combining them with external management of // this memory. Negative indices are possible and are counted relative to the // end of the list. // // Note: These hooks are *not* included in pl_options_save() and related. PL_API void pl_options_add_hook(pl_options opts, const struct pl_hook *hook); PL_API void pl_options_insert_hook(pl_options opts, const struct pl_hook *hook, int idx); PL_API void pl_options_remove_hook_at(pl_options opts, int idx); // Underlying options system and list // // Note: By necessity, this option list does not cover every single field // present in `pl_render_params`. In particular, fields like `info_callback`, // `lut` and `hooks` cannot be configured through the options system, as doing // so would require interop with C code or I/O. (However, see // `pl_options_add_hook` and related) enum pl_option_type { // Accepts `yes/no`, `on/off`, `true/false` and variants PL_OPT_BOOL, // Parsed as human-readable locale-invariant (C) numbers, scientific // notation accepted for floats PL_OPT_INT, PL_OPT_FLOAT, // Parsed as a short string containing only alphanumerics and _-, // corresponding to some name/identifier. Catch-all bucket for several // other types of options, such as presets, struct pointers, and functions // // Note: These options do not correspond to actual strings in C, the // underlying type of option will determine the values of `size` and // corresponding interpretation of pointers. PL_OPT_STRING, PL_OPT_TYPE_COUNT, }; struct pl_opt_t { // Programmatic key uniquely identifying this option. const char *key; // Longer, human readable friendly name const char *name; // Data type of option, affects how it is parsed. This field is purely // informative for the user, the actual implementation may vary. enum pl_option_type type; // Minimum/maximum value ranges for numeric options (int / float) // If both are 0.0, these limits are disabled/ignored. float min, max; // If true, this option is considered deprecated and may be removed // in the future. bool deprecated; // If true, this option is considered a 'preset' (read-only), which can // be loaded but not saved. (The equivalent underlying options this preset // corresponds to will be saved instead) bool preset; // Internal implementation details (for parsing/saving), opaque to user const void *priv; }; // A list of options, terminated by {0} for convenience PL_API extern const struct pl_opt_t pl_option_list[]; PL_API extern const int pl_option_count; // excluding terminating {0} // Returns the `pl_option` associated with a given key, or NULL PL_API pl_opt pl_find_option(const char *key); PL_API_END #endif // LIBPLACEBO_OPTIONS_H_ libplacebo-v7.349.0/src/include/libplacebo/renderer.h000066400000000000000000001220731463457750100224660ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_RENDERER_H_ #define LIBPLACEBO_RENDERER_H_ #include #include #include #include #include #include #include #include #include #include #include #include #include PL_API_BEGIN // Thread-safety: Unsafe typedef struct pl_renderer_t *pl_renderer; // Enum values used in pl_renderer_errors_t as a bit positions for error flags enum pl_render_error { PL_RENDER_ERR_NONE = 0, PL_RENDER_ERR_FBO = 1 << 0, PL_RENDER_ERR_SAMPLING = 1 << 1, PL_RENDER_ERR_DEBANDING = 1 << 2, PL_RENDER_ERR_BLENDING = 1 << 3, PL_RENDER_ERR_OVERLAY = 1 << 4, PL_RENDER_ERR_PEAK_DETECT = 1 << 5, PL_RENDER_ERR_FILM_GRAIN = 1 << 6, PL_RENDER_ERR_FRAME_MIXING = 1 << 7, PL_RENDER_ERR_DEINTERLACING = 1 << 8, PL_RENDER_ERR_ERROR_DIFFUSION = 1 << 9, PL_RENDER_ERR_HOOKS = 1 << 10, PL_RENDER_ERR_CONTRAST_RECOVERY = 1 << 11, }; // Struct describing current renderer state, including internal processing errors, // as well as list of signatures of disabled hooks. struct pl_render_errors { enum pl_render_error errors; // List containing signatures of disabled hooks const uint64_t *disabled_hooks; int num_disabled_hooks; }; // Creates a new renderer object, which is backed by a GPU context. This is a // high-level object that takes care of the rendering chain as a whole, from // the source textures to the finished frame. PL_API pl_renderer pl_renderer_create(pl_log log, pl_gpu gpu); PL_API void pl_renderer_destroy(pl_renderer *rr); // Returns current renderer state, see pl_render_errors. PL_API struct pl_render_errors pl_renderer_get_errors(pl_renderer rr); // Clears errors state of renderer. If `errors` is NULL, all render errors will // be cleared. Otherwise only selected errors/hooks will be cleared. // If `PL_RENDER_ERR_HOOKS` is set and `num_disabled_hooks` is 0, clear all hooks. // Otherwise only selected hooks will be cleard based on `disabled_hooks` array. PL_API void pl_renderer_reset_errors(pl_renderer rr, const struct pl_render_errors *errors); enum pl_lut_type { PL_LUT_UNKNOWN = 0, PL_LUT_NATIVE, // applied to raw image contents (after fixing bit depth) PL_LUT_NORMALIZED, // applied to normalized (HDR) RGB values PL_LUT_CONVERSION, // LUT fully replaces color conversion // Note: When using a PL_LUT_CONVERSION to replace the YUV->RGB conversion, // `pl_render_params.color_adjustment` is no longer applied. Similarly, // when using a PL_LUT_CONVERSION to replace the image->target color space // conversion, `pl_render_params.color_map_params` are ignored. // // Note: For LUTs attached to the output frame, PL_LUT_CONVERSION should // instead perform the inverse (RGB->native) conversion. // // Note: PL_LUT_UNKNOWN tries inferring the meaning of the LUT from the // LUT's tagged metadata, and otherwise falls back to PL_LUT_NATIVE. }; enum pl_clear_mode { PL_CLEAR_COLOR = 0, // set texture to a solid color PL_CLEAR_TILES, // set texture to a tiled pattern PL_CLEAR_SKIP, // skip the clearing pass (no-op) PL_CLEAR_MODE_COUNT, }; enum pl_render_stage { PL_RENDER_STAGE_FRAME, // full frame redraws, for fresh/uncached frames PL_RENDER_STAGE_BLEND, // the output blend pass (only for pl_render_image_mix) PL_RENDER_STAGE_COUNT, }; struct pl_render_info { const struct pl_dispatch_info *pass; // information about the shader enum pl_render_stage stage; // the associated render stage // This specifies the chronological index of this pass within the frame and // stage (starting at `index == 0`). int index; // For PL_RENDER_STAGE_BLEND, this specifies the number of frames // being blended (since that results in a different shader). int count; }; // Represents the options used for rendering. These affect the quality of // the result. struct pl_render_params { // Configures the algorithms used for upscaling and downscaling, // respectively. If left as NULL, then libplacebo will only use inexpensive // sampling (bilinear or nearest neighbour depending on the capabilities // of the hardware / texture). // // Note: Setting `downscaler` to NULL also implies `skip_anti_aliasing`, // since the built-in GPU sampling algorithms can't anti-alias. // // Note: If set to the same address as the built-in `pl_filter_bicubic`, // `pl_filter_nearest` etc.; libplacebo will also use the more efficient // direct sampling algorithm where possible without quality loss. const struct pl_filter_config *upscaler; const struct pl_filter_config *downscaler; // If set, this overrides the value of `upscaler`/`downscaling` for // subsampled (chroma) planes. These scalers are used whenever the size of // multiple different `pl_plane`s in a single `pl_frame` differ, requiring // adaptation when converting to/from RGB. Note that a value of NULL simply // means "no override". To force built-in scaling explicitly, set this to // `&pl_filter_bilinear`. const struct pl_filter_config *plane_upscaler; const struct pl_filter_config *plane_downscaler; // The anti-ringing strength to apply to filters. See the equivalent option // in `pl_sample_filter_params` for more information. float antiringing_strength; // Configures the algorithm used for frame mixing (when using // `pl_render_image_mix`). Ignored otherwise. As a special requirement, // this must be a filter config with `polar` set to false, since it's only // used for 1D mixing and thus only 1D filters are compatible. // // If set to NULL, frame mixing is disabled, in which case // `pl_render_image_mix` will use nearest-neighbour semantics. (Note that // this still goes through the redraw cache, unless you also enable // `skip_caching_single_frame`) const struct pl_filter_config *frame_mixer; // Configures the settings used to deband source textures. Leaving this as // NULL disables debanding. // // Note: The `deband_params.grain` setting is automatically adjusted to // prevent blowing up on HDR sources. The user need not account for this. const struct pl_deband_params *deband_params; // Configures the settings used to sigmoidize the image before upscaling. // This is not always used. If NULL, disables sigmoidization. const struct pl_sigmoid_params *sigmoid_params; // Configures the color adjustment parameters used to decode the color. // This can be used to apply additional artistic settings such as // desaturation, etc. If NULL, defaults to &pl_color_adjustment_neutral. const struct pl_color_adjustment *color_adjustment; // Configures the settings used to detect the peak of the source content, // for HDR sources. Has no effect on SDR content. If NULL, peak detection // is disabled. const struct pl_peak_detect_params *peak_detect_params; // Configures the settings used to tone map from HDR to SDR, or from higher // gamut to standard gamut content. If NULL, defaults to // `&pl_color_map_default_params`. const struct pl_color_map_params *color_map_params; // Configures the settings used to dither to the output depth. Leaving this // as NULL disables dithering. const struct pl_dither_params *dither_params; // Configures the error diffusion kernel to use for error diffusion // dithering. If set, this will be used instead of `dither_params` whenever // possible. Leaving this as NULL disables error diffusion. const struct pl_error_diffusion_kernel *error_diffusion; // Configures the settings used to simulate color blindness, if desired. // If NULL, this feature is disabled. const struct pl_cone_params *cone_params; // Configures output blending. When rendering to the final target, the // framebuffer contents will be blended using this blend mode. Requires // that the target format has PL_FMT_CAP_BLENDABLE. NULL disables blending. const struct pl_blend_params *blend_params; // Configures the settings used to deinterlace frames (see // `pl_frame.field`), if required.. If NULL, deinterlacing is "disabled", // meaning interlaced frames are rendered as weaved frames instead. // // Note: As a consequence of how `pl_frame` represents individual fields, // and especially when using the `pl_queue`, this will still result in // frames being redundantly rendered twice. As such, it's highly // recommended to, instead, fully disable deinterlacing by not marking // source frames as interlaced in the first place. const struct pl_deinterlace_params *deinterlace_params; // If set, applies an extra distortion matrix to the image, after // scaling and before presenting it to the screen. Can be used for e.g. // fractional rotation. // // Note: The distortion canvas will be set to the size of `target->crop`, // so this cannot effectively draw outside the specified target area, // nor change the aspect ratio of the image. const struct pl_distort_params *distort_params; // List of custom user shaders / hooks. // See for more information. const struct pl_hook * const *hooks; int num_hooks; // Color mapping LUT. If present, this will be applied as part of the // image being rendered, in normalized RGB space. // // Note: In this context, PL_LUT_NATIVE means "gamma light" and // PL_LUT_NORMALIZED means "linear light". For HDR signals, normalized LUTs // are scaled so 1.0 corresponds to the `pl_color_transfer_nominal_peak`. // // Note: A PL_LUT_CONVERSION fully replaces the color adaptation from // `image` to `target`, including any tone-mapping (if necessary) and ICC // profiles. It has the same representation as PL_LUT_NATIVE, so in this // case the input and output are (respectively) non-linear light RGB. const struct pl_custom_lut *lut; enum pl_lut_type lut_type; // Controls the image background. The default (PL_CLEAR_AUTO) is equivalent // to PL_CLEAR_NONE, which passes through the alpha channel unmodified. (In // the case of no alpha channel, this implicitly blends against black) enum pl_clear_mode background; // Controls how the remaining empty space in the target is filled up, when // the image does not span the entire framebuffer. The default is equivalent // to PL_CLEAR_COLOR, in which case empty space is automatically colored // according to `background_color`. enum pl_clear_mode border; // The color to use for PL_CLEAR_COLOR. // // Note: Despite the name, this also affects `border = PL_CLEAR_COLOR`. float background_color[3]; float background_transparency; // 0.0 for opaque, 1.0 for fully transparent // The color and size to use for PL_CLEAR_TILES float tile_colors[2][3]; int tile_size; // If set to a value above 0.0, the output will be rendered with rounded // corners, as if an alpha transparency mask had been applied. The value // indicates the relative fraction of the side length to round - a value // of 1.0 rounds the corners as much as possible. float corner_rounding; // --- Performance / quality trade-off options: // These should generally be left off where quality is desired, as they can // degrade the result quite noticeably; but may be useful for older or // slower hardware. Note that libplacebo will automatically disable // advanced features on hardware where they are unsupported, regardless of // these settings. So only enable them if you need a performance bump. // Disables anti-aliasing on downscaling. This will result in moiré // artifacts and nasty, jagged pixels when downscaling, except for some // very limited special cases (e.g. bilinear downsampling to exactly 0.5x). // // Significantly speeds up downscaling with high downscaling ratios. bool skip_anti_aliasing; // Normally, when the size of the `target` used with `pl_render_image_mix` // changes, or the render parameters are updated, the internal cache of // mixed frames must be discarded in order to re-render all required // frames. Setting this option to `true` will skip the cache invalidation // and instead re-use the existing frames (with bilinear scaling to the new // size if necessary), which comes at a quality loss shortly after a // resize, but should make it much more smooth. bool preserve_mixing_cache; // --- Performance tuning / debugging options // These may affect performance or may make debugging problems easier, // but shouldn't have any effect on the quality. // Normally, `pl_render_image_mix` will also push single frames through the // mixer cache, in order to speed up re-draws. Enabling this option // disables that logic, causing single frames to bypass the cache. (Though // it will still read from, if they happen to already be cached) bool skip_caching_single_frame; // Disables linearization / sigmoidization before scaling. This might be // useful when tracking down unexpected image artifacts or excessing // ringing, but it shouldn't normally be necessary. bool disable_linear_scaling; // Forces the use of the "general" scaling algorithms even when using the // special-cased built-in presets like `pl_filter_bicubic`. Basically, this // disables the more efficient implementations in favor of the slower, // general-purpose ones. bool disable_builtin_scalers; // Forces correction of subpixel offsets (using the configured `upscaler`). bool correct_subpixel_offsets; // Forces the use of dithering, even when rendering to 16-bit FBOs. This is // generally pretty pointless because most 16-bit FBOs have high enough // depth that rounding errors are below the human perception threshold, // but this can be used to test the dither code. bool force_dither; // Disables the gamma-correct dithering logic which normally applies when // dithering to low bit depths. No real use, outside of testing. bool disable_dither_gamma_correction; // Completely overrides the use of FBOs, as if there were no renderable // texture format available. This disables most features. bool disable_fbos; // Use only low-bit-depth FBOs (8 bits). Note that this also implies // disabling linear scaling and sigmoidization. bool force_low_bit_depth_fbos; // If this is true, all shaders will be generated as "dynamic" shaders, // with any compile-time constants being replaced by runtime-adjustable // values. This is generally a performance loss, but has the advantage of // being able to freely change parameters without triggering shader // recompilations. // // It's a good idea to enable while presenting configurable settings to the // user, but it should be set to false once those values are "dialed in". bool dynamic_constants; // This callback is invoked for every pass successfully executed in the // process of rendering a frame. Optional. // // Note: `info` is only valid until this function returns. void (*info_callback)(void *priv, const struct pl_render_info *info); void *info_priv; // --- Deprecated/removed fields PL_DEPRECATED_IN(v6.254) bool allow_delayed_peak_detect; // moved to pl_peak_detect_params PL_DEPRECATED_IN(v6.327) const struct pl_icc_params *icc_params; // use pl_frame.icc PL_DEPRECATED_IN(v6.328) bool ignore_icc_profiles; // non-functional, just set pl_frame.icc to NULL PL_DEPRECATED_IN(v6.335) int lut_entries; // hard-coded as 256 PL_DEPRECATED_IN(v6.335) float polar_cutoff; // hard-coded as 1e-3 PL_DEPRECATED_IN(v7.346) bool skip_target_clearing; // `border_background = PL_BACKGROUND_NONE` PL_DEPRECATED_IN(v7.346) bool blend_against_tiles; // `background = PL_BACKGROUND_TILES` }; // Bare minimum parameters, with no features enabled. This is the fastest // possible configuration, and should therefore be fine on any system. #define PL_RENDER_DEFAULTS \ .color_map_params = &pl_color_map_default_params, \ .color_adjustment = &pl_color_adjustment_neutral, \ .tile_colors = {{0.93, 0.93, 0.93}, \ {0.87, 0.87, 0.87}}, \ .tile_size = 32, #define pl_render_params(...) (&(struct pl_render_params) { PL_RENDER_DEFAULTS __VA_ARGS__ }) PL_API extern const struct pl_render_params pl_render_fast_params; // This contains the default/recommended options for reasonable image quality, // while also not being too terribly slow. All of the *_params structs are // defaulted to the corresponding *_default_params, except for deband_params, // which is disabled by default. // // This should be fine on most integrated GPUs, but if it's too slow, // consider using `pl_render_fast_params` instead. PL_API extern const struct pl_render_params pl_render_default_params; // This contains a higher quality preset for better image quality at the cost // of quite a bit of performance. In addition to the settings implied by // `pl_render_default_params`, it enables debanding, sets the upscaler to // `pl_filter_ewa_lanczossharp`, and uses pl_*_high_quality_params structs where // available. This should only really be used with a discrete GPU and where // maximum image quality is desired. PL_API extern const struct pl_render_params pl_render_high_quality_params; #define PL_MAX_PLANES 4 // High level description of a single slice of an image. This basically // represents a single 2D plane, with any number of components struct pl_plane { // The texture underlying this plane. The texture must be 2D, and must // have specific parameters set depending on what the plane is being used // for (see `pl_render_image`). pl_tex texture; // The preferred behaviour when sampling outside of this texture. Optional, // since the default (PL_TEX_ADDRESS_CLAMP) is very reasonable. enum pl_tex_address_mode address_mode; // Controls whether or not the `texture` will be considered flipped // vertically with respect to the overall image dimensions. It's generally // preferable to flip planes using this setting instead of the crop in // cases where the flipping is the result of e.g. negative plane strides or // flipped framebuffers (OpenGL). // // Note that any planar padding (due to e.g. size mismatch or misalignment // of subsampled planes) is always at the physical end of the texture // (highest y coordinate) - even if this bool is true. However, any // subsampling shift (`shift_y`) is applied with respect to the flipped // direction. This ensures the correct interpretation when e.g. vertically // flipping 4:2:0 sources by flipping all planes. bool flipped; // Describes the number and interpretation of the components in this plane. // This defines the mapping from component index to the canonical component // order (RGBA, YCbCrA or XYZA). It's worth pointing out that this is // completely separate from `texture->format.sample_order`. The latter is // essentially irrelevant/transparent for the API user, since it just // determines which order the texture data shows up as inside the GLSL // shader; whereas this field controls the actual meaning of the component. // // Example; if the user has a plane with just {Y} and a plane with just // {Cb Cr}, and a GPU that only supports bgra formats, you would still // specify the component mapping as {0} and {1 2} respectively, even though // the GPU is sampling the data in the order BGRA. Use -1 for "ignored" // components. int components; // number of relevant components int component_mapping[4]; // semantic index of each component // Controls the sample offset, relative to the "reference" dimensions. For // an example of what to set here, see `pl_chroma_location_offset`. Note // that this is given in unit of reference pixels. For a graphical example, // imagine you have a 2x2 image with a 1x1 (subsampled) plane. Without any // shift (0.0), the situation looks like this: // // X-------X X = reference pixel // | | P = plane pixel // | P | // | | // X-------X // // For 4:2:0 subsampling, this corresponds to PL_CHROMA_CENTER. If the // shift_x was instead set to -0.5, the `P` pixel would be offset to the // left by half the separation between the reference (`X` pixels), resulting // in the following: // // X-------X X = reference pixel // | | P = plane pixel // P | // | | // X-------X // // For 4:2:0 subsampling, this corresponds to PL_CHROMA_LEFT. // // Note: It's recommended to fill this using `pl_chroma_location_offset` on // the chroma planes. float shift_x, shift_y; }; enum pl_overlay_mode { PL_OVERLAY_NORMAL = 0, // treat the texture as a normal, full-color texture PL_OVERLAY_MONOCHROME, // treat the texture as a single-component alpha map PL_OVERLAY_MODE_COUNT, }; enum pl_overlay_coords { PL_OVERLAY_COORDS_AUTO = 0, // equal to SRC/DST_FRAME, respectively PL_OVERLAY_COORDS_SRC_FRAME, // relative to the raw src frame PL_OVERLAY_COORDS_SRC_CROP, // relative to the src frame crop PL_OVERLAY_COORDS_DST_FRAME, // relative to the raw dst frame PL_OVERLAY_COORDS_DST_CROP, // relative to the dst frame crop PL_OVERLAY_COORDS_COUNT, // Note on rotations: If there is an end-to-end rotation between `src` and // `dst`, then any overlays relative to SRC_FRAME or SRC_CROP will be // rotated alongside the image, while overlays relative to DST_FRAME or // DST_CROP will not. }; struct pl_overlay_part { pl_rect2df src; // source coordinate with respect to `pl_overlay.tex` pl_rect2df dst; // target coordinates with respect to `pl_overlay.coords` // If `mode` is PL_OVERLAY_MONOCHROME, then this specifies the color of // this overlay part. The color is multiplied into the sampled texture's // first channel. float color[4]; }; // A struct representing an image overlay (e.g. for subtitles or on-screen // status messages, controls, ...) struct pl_overlay { // The texture containing the backing data for overlay parts. Must have // `params.sampleable` set. pl_tex tex; // This controls the coloring mode of this overlay. enum pl_overlay_mode mode; // Controls which coordinates this overlay is addressed relative to. enum pl_overlay_coords coords; // This controls the colorspace information for this overlay. The contents // of the texture / the value of `color` are interpreted according to this. struct pl_color_repr repr; struct pl_color_space color; // The number of parts for this overlay. const struct pl_overlay_part *parts; int num_parts; }; // High-level description of a complete frame, including metadata and planes struct pl_frame { // Each frame is split up into some number of planes, each of which may // carry several components and be of any size / offset. int num_planes; struct pl_plane planes[PL_MAX_PLANES]; // For interlaced frames. If set, this `pl_frame` corresponds to a single // field of the underlying source textures. `first_field` indicates which // of these fields is ordered first in time. `prev` and `next` should point // to the previous/next frames in the file, or NULL if there are none. // // Note: Setting these fields on the render target has no meaning and will // be ignored. enum pl_field field; enum pl_field first_field; const struct pl_frame *prev, *next; // If set, will be called immediately before GPU access to this frame. This // function *may* be used to, for example, perform synchronization with // external APIs (e.g. `pl_vulkan_hold/release`). If your mapping requires // a memcpy of some sort (e.g. pl_tex_transfer), users *should* instead do // the memcpy up-front and avoid the use of these callbacks - because they // might be called multiple times on the same frame. // // This function *may* arbitrarily mutate the `pl_frame`, but it *should* // ideally only update `planes` - in particular, color metadata and so // forth should be provided up-front as best as possible. Note that changes // here will not be reflected back to the structs provided in the original // `pl_render_*` call (e.g. via `pl_frame_mix`). // // Note: Unless dealing with interlaced frames, only one frame will ever be // acquired at a time per `pl_render_*` call. So users *can* safely use // this with, for example, hwdec mappers that can only map a single frame // at a time. When using this with, for example, `pl_render_image_mix`, // each frame to be blended is acquired and release in succession, before // moving on to the next frame. For interlaced frames, the previous and // next frames must also be acquired simultaneously. bool (*acquire)(pl_gpu gpu, struct pl_frame *frame); // If set, will be called after a plane is done being used by the GPU, // *including* after any errors (e.g. `acquire` returning false). void (*release)(pl_gpu gpu, struct pl_frame *frame); // Color representation / encoding / semantics of this frame. struct pl_color_repr repr; struct pl_color_space color; // Optional ICC profile associated with this frame. pl_icc_object icc; // Alternative to `icc`, this can be used in cases where allocating and // tracking an pl_icc_object externally may be inconvenient. The resulting // profile will be managed internally by the pl_renderer. struct pl_icc_profile profile; // Optional LUT associated with this frame. const struct pl_custom_lut *lut; enum pl_lut_type lut_type; // The logical crop / rectangle containing the valid information, relative // to the reference plane's dimensions (e.g. luma). Pixels outside of this // rectangle will ostensibly be ignored, but note that this is not a hard // guarantee. In particular, scaler filters may end up sampling outside of // this crop. This rect may be flipped, and may be partially or wholly // outside the bounds of the underlying textures. (Optional) // // Note that `pl_render_image` will map the input crop directly to the // output crop, stretching and scaling as needed. If you wish to preserve // the aspect ratio, use a dedicated function like pl_rect2df_aspect_copy. pl_rect2df crop; // Logical rotation of the image, with respect to the underlying planes. // For example, if this is PL_ROTATION_90, then the image will be rotated // to the right by 90° when mapping to `crop`. The actual position on-screen // is unaffected, so users should ensure that the (rotated) aspect ratio // matches the source. (Or use a helper like `pl_rect2df_aspect_set_rot`) // // Note: For `target` frames, this corresponds to a rotation of the // display, for `image` frames, this corresponds to a rotation of the // camera. // // So, as an example, target->rotation = PL_ROTATE_90 means the end user // has rotated the display to the right by 90° (meaning rendering will be // rotated 90° to the *left* to compensate), and image->rotation = // PL_ROTATE_90 means the video provider has rotated the camera to the // right by 90° (so rendering will be rotated 90° to the *right* to // compensate). pl_rotation rotation; // A list of additional overlays associated with this frame. Note that will // be rendered directly onto intermediate/cache frames, so changing any of // these overlays may require flushing the renderer cache. const struct pl_overlay *overlays; int num_overlays; // Note on subsampling and plane correspondence: All planes belonging to // the same frame will only be stretched by an integer multiple (or inverse // thereof) in order to match the reference dimensions of this image. For // example, suppose you have an 8x4 image. A valid plane scaling would be // 4x2 -> 8x4 or 4x4 -> 4x4, but not 6x4 -> 8x4. So if a 6x4 plane is // given, then it would be treated like a cropped 8x4 plane (since 1.0 is // the closest scaling ratio to the actual ratio of 1.3). // // For an explanation of why this makes sense, consider the relatively // common example of a subsampled, oddly sized (e.g. jpeg) image. In such // cases, for example a 35x23 image, the 4:2:0 subsampled chroma plane // would have to end up as 17.5x11.5, which gets rounded up to 18x12 by // implementations. So in this example, the 18x12 chroma plane would get // treated by libplacebo as an oversized chroma plane - i.e. the plane // would get sampled as if it was 17.5 pixels wide and 11.5 pixels large. // Associated film grain data (see ). // // Note: This is ignored for the `target` of `pl_render_image`, since // un-applying grain makes little sense. struct pl_film_grain_data film_grain; // Ignored by libplacebo. May be useful for users. void *user_data; }; // Helper function to infer the chroma location offset for each plane in a // frame. This is equivalent to calling `pl_chroma_location_offset` on all // subsampled planes' shift_x/shift_y variables. PL_API void pl_frame_set_chroma_location(struct pl_frame *frame, enum pl_chroma_location chroma_loc); // Fills in a `pl_frame` based on a swapchain frame's FBO and metadata. PL_API void pl_frame_from_swapchain(struct pl_frame *out_frame, const struct pl_swapchain_frame *frame); // Helper function to determine if a frame is logically cropped or not. In // particular, this is useful in determining whether or not an output frame // needs to be cleared before rendering or not. PL_API bool pl_frame_is_cropped(const struct pl_frame *frame); // Helper function to reset a frame to a given RGB color. If the frame's // color representation is something other than RGB, the clear color will // be adjusted accordingly. `clear_color` should be non-premultiplied. PL_API void pl_frame_clear_rgba(pl_gpu gpu, const struct pl_frame *frame, const float clear_color[4]); // Like `pl_frame_clear_rgba` but without an alpha channel. static inline void pl_frame_clear(pl_gpu gpu, const struct pl_frame *frame, const float clear_color[3]) { const float clear_color_rgba[4] = { clear_color[0], clear_color[1], clear_color[2], 1.0 }; pl_frame_clear_rgba(gpu, frame, clear_color_rgba); } // Helper function to clear a frame to a fully tiled background. PL_API void pl_frame_clear_tiles(pl_gpu gpu, const struct pl_frame *frame, const float tile_colors[2][3], int tile_size); // Helper functions to return the fixed/inferred pl_frame parameters used // for rendering internally. Mutates `image` and `target` in-place to hold // the modified values, which are what will actually be used for rendering. // // This currently includes: // - Defaulting all missing pl_color_space/repr parameters // - Coalescing all rotation to the target // - Rounding and clamping the target crop to pixel boundaries and adjusting the // image crop correspondingly // // Note: This is idempotent and does not generally alter the effects of a // subsequent `pl_render_image` on the same pl_frame pair. (But see the // following warning) // // Warning: This does *not* call pl_frame.acquire/release, and so the returned // metadata *may* be incorrect if the acquire callback mutates the pl_frame in // nontrivial ways, in particular the crop and color space fields. PL_API void pl_frames_infer(pl_renderer rr, struct pl_frame *image, struct pl_frame *target); // Render a single image to a target using the given parameters. This is // fully dynamic, i.e. the params can change at any time. libplacebo will // internally detect and flush whatever caches are invalidated as a result of // changing colorspace, size etc. // // Required plane capabilities: // - Planes in `image` must be `sampleable` // - Planes in `target` must be `renderable` // // Recommended plane capabilities: (Optional, but good for performance) // - Planes in `image` should have `sample_mode` PL_TEX_SAMPLE_LINEAR // - Planes in `target` should be `storable` // - Planes in `target` should have `blit_dst` // // Note on lifetime: Once this call returns, the passed structures may be // freely overwritten or discarded by the caller, even the referenced // `pl_tex` objects may be freely reused. // // Note: `image` may be NULL, in which case `target.overlays` will still be // rendered, but nothing else. PL_API bool pl_render_image(pl_renderer rr, const struct pl_frame *image, const struct pl_frame *target, const struct pl_render_params *params); // Flushes the internal state of this renderer. This is normally not needed, // even if the image parameters, colorspace or target configuration change, // since libplacebo will internally detect such circumstances and recreate // outdated resources automatically. Doing this explicitly *may* be useful to // purge some state related to things like HDR peak detection or frame mixing, // so calling it is a good idea if the content source is expected to change // dramatically (e.g. when switching to a different file). PL_API void pl_renderer_flush_cache(pl_renderer rr); // Mirrors `pl_get_detected_hdr_metadata`, giving you the current internal peak // detection HDR metadata (when peak detection is active). Returns false if no // information is available (e.g. not HDR source, peak detection disabled). PL_API bool pl_renderer_get_hdr_metadata(pl_renderer rr, struct pl_hdr_metadata *metadata); // Represents a mixture of input frames, distributed temporally. // // NOTE: Frames must be sorted by timestamp, i.e. `timestamps` must be // monotonically increasing. struct pl_frame_mix { // The number of frames in this mixture. The number of frames should be // sufficient to meet the needs of the configured frame mixer. See the // section below for more information. // // If the number of frames is 0, this call will be equivalent to // `pl_render_image` with `image == NULL`. int num_frames; // A list of the frames themselves. The frames can have different // colorspaces, configurations of planes, or even sizes. // // Note: This is a list of pointers, to avoid users having to copy // around `pl_frame` structs when re-organizing this array. const struct pl_frame **frames; // A list of unique signatures, one for each frame. These are used to // identify frames across calls to this function, so it's crucial that they // be both unique per-frame but also stable across invocations of // `pl_render_frame_mix`. const uint64_t *signatures; // A list of relative timestamps for each frame. These are relative to the // time of the vsync being drawn, i.e. this function will render the frame // that will be made visible at timestamp 0.0. The values are expected to // be normalized such that a separation of 1.0 corresponds to roughly one // nominal source frame duration. So a constant framerate video file will // always have timestamps like e.g. {-2.3, -1.3, -0.3, 0.7, 1.7, 2.7}, // using an example radius of 3. // // In cases where the framerate is variable (e.g. VFR video), the choice of // what to scale to use can be difficult to answer. A typical choice would // be either to use the canonical (container-tagged) framerate, or the // highest momentary framerate, as a reference. If all else fails, you // could also use the display's framerate. // // Note: This function assumes zero-order-hold semantics, i.e. the frame at // timestamp 0.7 is intended to remain visible until timestamp 1.7, when // the next frame replaces it. const float *timestamps; // The duration for which the vsync being drawn will be held, using the // same scale as `timestamps`. If the display has an unknown or variable // frame-rate (e.g. Adaptive Sync), then you're probably better off not // using this function and instead just painting the frames directly using // `pl_render_frame` at the correct PTS. // // As an example, if `vsync_duration` is 0.4, then it's assumed that the // vsync being painted is visible for the period [0.0, 0.4]. float vsync_duration; // Explanation of the frame mixing radius: The algorithm chosen in // `pl_render_params.frame_mixer` has a canonical radius equal to // `pl_filter_config.kernel->radius`. This means that the frame mixing // algorithm will (only) need to consult all of the frames that have a // distance within the interval [-radius, radius]. As such, the user should // include all such frames in `frames`, but may prune or omit frames that // lie outside it. // // The built-in frame mixing (`pl_render_params.frame_mixer == NULL`) has // no concept of radius, it just always needs access to the "current" and // "next" frames. }; // Helper function to calculate the base frame mixing radius. // // Note: When the source FPS exceeds the display FPS, this radius must be // increased by the corresponding ratio. static inline float pl_frame_mix_radius(const struct pl_render_params *params) { // For backwards compatibility, allow !frame_mixer->kernel if (!params->frame_mixer || !params->frame_mixer->kernel) return 0.0; return params->frame_mixer->kernel->radius; } // Find closest frame to current PTS by zero-order hold semantics, or NULL. PL_API const struct pl_frame *pl_frame_mix_current(const struct pl_frame_mix *mix); // Find closest frame to current PTS by nearest neighbour semantics, or NULL. PL_API const struct pl_frame *pl_frame_mix_nearest(const struct pl_frame_mix *mix); // Render a mixture of images to the target using the given parameters. This // functions much like a generalization of `pl_render_image`, for when the API // user has more control over the frame queue / vsync loop, and can provide a // few frames from the past and future + timestamp information. // // This allows libplacebo to perform rudimentary frame mixing / interpolation, // in order to eliminate judder artifacts typically associated with // source/display frame rate mismatch. PL_API bool pl_render_image_mix(pl_renderer rr, const struct pl_frame_mix *images, const struct pl_frame *target, const struct pl_render_params *params); // Analog of `pl_frame_infer` corresponding to `pl_render_image_mix`. This // function will *not* mutate the frames contained in `mix`, and instead // return an adjusted copy of the "reference" frame for that image mix in // `out_refimage`, or {0} if the mix is empty. PL_API void pl_frames_infer_mix(pl_renderer rr, const struct pl_frame_mix *mix, struct pl_frame *target, struct pl_frame *out_ref); // Backwards compatibility with old filters API, may be deprecated. // Redundant with pl_filter_configs and masking `allowed` for // PL_FILTER_SCALING and PL_FILTER_FRAME_MIXING respectively. // A list of recommended frame mixer presets, terminated by {0} PL_API extern const struct pl_filter_preset pl_frame_mixers[]; PL_API extern const int pl_num_frame_mixers; // excluding trailing {0} // A list of recommended scaler presets, terminated by {0}. This is almost // equivalent to `pl_filter_presets` with the exception of including extra // built-in filters that don't map to the `pl_filter` architecture. PL_API extern const struct pl_filter_preset pl_scale_filters[]; PL_API extern const int pl_num_scale_filters; // excluding trailing {0} // Deprecated in favor of `pl_cache_save/pl_cache_load` on the `pl_cache` // associated with the `pl_gpu` this renderer is using. PL_DEPRECATED_IN(v6.323) PL_API size_t pl_renderer_save(pl_renderer rr, uint8_t *out_cache); PL_DEPRECATED_IN(v6.323) PL_API void pl_renderer_load(pl_renderer rr, const uint8_t *cache); PL_API_END #endif // LIBPLACEBO_RENDERER_H_ libplacebo-v7.349.0/src/include/libplacebo/shaders.h000066400000000000000000000273511463457750100223140ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_SHADERS_H_ #define LIBPLACEBO_SHADERS_H_ // This function defines the "direct" interface to libplacebo's GLSL shaders, // suitable for use in contexts where the user controls GLSL shader compilation // but wishes to include functions generated by libplacebo as part of their // own rendering process. This API is normally not used for operation with // libplacebo's higher-level constructs such as `pl_dispatch` or `pl_renderer`. #include PL_API_BEGIN // Thread-safety: Unsafe typedef struct pl_shader_t *pl_shader; struct pl_shader_params { // The `id` represents an abstract identifier for the shader, to avoid // collisions with other shaders being used as part of the same larger, // overarching shader. This is relevant for users which want to combine // multiple `pl_shader` objects together, in which case all `pl_shader` // objects should have a unique `id`. uint8_t id; // If `gpu` is non-NULL, then this `gpu` will be used to create objects // such as textures and buffers, or check for required capabilities, for // operations which depend on either of those. This is fully optional, i.e. // these GLSL primitives are designed to be used without a dependency on // `gpu` wherever possible - however, some features may not work, and will // be disabled even if requested. pl_gpu gpu; // The `index` represents an abstract frame index, which shaders may use // internally to do things like temporal dithering or seeding PRNGs. If the // user does not care about temporal dithering/debanding, or wants // deterministic rendering, this may safely be left as 0. Otherwise, it // should be incremented by 1 on successive frames. uint8_t index; // If `glsl.version` is nonzero, then this structure will be used to // determine the effective GLSL mode and capabilities. If `gpu` is also // set, then this overrides `gpu->glsl`. struct pl_glsl_version glsl; // If this is true, all constants in the shader will be replaced by // dynamic variables. This is mainly useful to avoid recompilation for // shaders which expect to have their values change constantly. bool dynamic_constants; }; #define pl_shader_params(...) (&(struct pl_shader_params) { __VA_ARGS__ }) // Creates a new, blank, mutable pl_shader object. // // Note: Rather than allocating and destroying many shaders, users are // encouraged to reuse them (using `pl_shader_reset`) for efficiency. PL_API pl_shader pl_shader_alloc(pl_log log, const struct pl_shader_params *params); // Frees a pl_shader and all resources associated with it. PL_API void pl_shader_free(pl_shader *sh); // Resets a pl_shader to a blank slate, without releasing internal memory. // If you're going to be re-generating shaders often, this function will let // you skip the re-allocation overhead. PL_API void pl_shader_reset(pl_shader sh, const struct pl_shader_params *params); // Returns whether or not a shader is in a "failed" state. Trying to modify a // shader in illegal ways (e.g. signature mismatch) will result in the shader // being marked as "failed". Since most pl_shader_ operations have a void // return type, the user can use this function to figure out whether a specific // shader operation has failed or not. This function is somewhat redundant // since `pl_shader_finalize` will also return NULL in this case. PL_API bool pl_shader_is_failed(const pl_shader sh); // Returns whether or not a pl_shader needs to be run as a compute shader. This // will never be the case unless the `pl_glsl_version` this `pl_shader` was // created using has `compute` support enabled. PL_API bool pl_shader_is_compute(const pl_shader sh); // Returns whether or not the shader has any particular output size // requirements. Some shaders, in particular those that sample from other // textures, have specific output size requirements which need to be respected // by the caller. If this is false, then the shader is compatible with every // output size. If true, the size requirements are stored into *w and *h. PL_API bool pl_shader_output_size(const pl_shader sh, int *w, int *h); // Indicates the type of signature that is associated with a shader result. // Every shader result defines a function that may be called by the user, and // this enum indicates the type of value that this function takes and/or // returns. // // Which signature a shader ends up with depends on the type of operation being // performed by a shader fragment, as determined by the user's calls. See below // for more information. enum pl_shader_sig { PL_SHADER_SIG_NONE = 0, // no input / void output PL_SHADER_SIG_COLOR, // vec4 color (normalized so that 1.0 is the ref white) // The following are only valid as input signatures: PL_SHADER_SIG_SAMPLER, // (gsampler* src_tex, vecN tex_coord) pair, // specifics depend on how the shader was generated }; // Structure encapsulating information about a shader. This is internally // refcounted, to allow moving it around without having to create deep copies. typedef const struct pl_shader_info_t { // A copy of the parameters used to create the shader. struct pl_shader_params params; // A list of friendly names for the semantic operations being performed by // this shader, e.g. "color decoding" or "debanding". const char **steps; int num_steps; // As a convenience, this contains a pretty-printed version of the // above list, with entries tallied and separated by commas const char *description; } *pl_shader_info; PL_API pl_shader_info pl_shader_info_ref(pl_shader_info info); PL_API void pl_shader_info_deref(pl_shader_info *info); // Represents a finalized shader fragment. This is not a complete shader, but a // collection of raw shader text together with description of the input // attributes, variables and vertices it expects to be available. struct pl_shader_res { // Descriptive information about the shader. Note that this reference is // attached to the shader itself - the user does not need to manually ref // or deref `info` unless they wish to move it elsewhere. pl_shader_info info; // The shader text, as literal GLSL. This will always be a function // definition, such that the the function with the indicated name and // signature may be called by the user. const char *glsl; const char *name; enum pl_shader_sig input; // what the function expects enum pl_shader_sig output; // what the function returns // For compute shaders (pl_shader_is_compute), this indicates the requested // work group size. Otherwise, both fields are 0. The interpretation of // these work groups is that they're tiled across the output image. int compute_group_size[2]; // If this pass is a compute shader, this field indicates the shared memory // size requirements for this shader pass. size_t compute_shmem; // A set of input vertex attributes needed by this shader fragment. const struct pl_shader_va *vertex_attribs; int num_vertex_attribs; // A set of input variables needed by this shader fragment. const struct pl_shader_var *variables; int num_variables; // A list of input descriptors needed by this shader fragment, const struct pl_shader_desc *descriptors; int num_descriptors; // A list of compile-time constants used by this shader fragment. const struct pl_shader_const *constants; int num_constants; // --- Deprecated fields (see `info`) PL_DEPRECATED_IN(v6.266) struct pl_shader_params params; PL_DEPRECATED_IN(v6.266) const char **steps; PL_DEPRECATED_IN(v6.266) int num_steps; PL_DEPRECATED_IN(v6.266) const char *description; }; // Represents a vertex attribute. The four values will be bound to the four // corner vertices respectively, in row-wise order starting from the top left: // data[0] data[1] // data[2] data[3] struct pl_shader_va { struct pl_vertex_attrib attr; // VA type, excluding `offset` and `location` const void *data[4]; }; // Represents a bound shared variable / descriptor struct pl_shader_var { struct pl_var var; // the underlying variable description const void *data; // the raw data (as per `pl_var_host_layout`) bool dynamic; // if true, the value is expected to change frequently }; struct pl_buffer_var { struct pl_var var; struct pl_var_layout layout; }; typedef uint16_t pl_memory_qualifiers; enum { PL_MEMORY_COHERENT = 1 << 0, // supports synchronization across shader invocations PL_MEMORY_VOLATILE = 1 << 1, // all writes are synchronized automatically // Note: All descriptors are also implicitly assumed to have the 'restrict' // memory qualifier. There is currently no way to override this behavior. }; struct pl_shader_desc { struct pl_desc desc; // descriptor type, excluding `int binding` struct pl_desc_binding binding; // contents of the descriptor binding // For PL_DESC_BUF_UNIFORM/STORAGE, this specifies the layout of the // variables contained by a buffer. Ignored for the other descriptor types struct pl_buffer_var *buffer_vars; int num_buffer_vars; // For storage images and buffers, this specifies additional memory // qualifiers on the descriptor. It's highly recommended to always use // at least PL_MEMORY_RESTRICT. Ignored for other descriptor types. pl_memory_qualifiers memory; }; // Represents a compile-time constant. This can be lowered to a specialization // constant to support cheaper recompilations. struct pl_shader_const { enum pl_var_type type; const char *name; const void *data; // If true, this constant *must* be a compile-time constant, which // basically just overrides `pl_shader_params.dynamic_constants`. Useful // for constants which will serve as inputs to e.g. array sizes. bool compile_time; }; // Finalize a pl_shader. It is no longer mutable at this point, and any further // attempts to modify it result in an error. (Functions which take a `const // pl_shader` argument do not modify the shader and may be freely // called on an already-finalized shader) // // The returned pl_shader_res is bound to the lifetime of the pl_shader - and // will only remain valid until the pl_shader is freed or reset. This function // may be called multiple times, and will produce the same result each time. // // This function will return NULL if the shader is considered to be in a // "failed" state (see pl_shader_is_failed). PL_API const struct pl_shader_res *pl_shader_finalize(pl_shader sh); // Shader objects represent abstract resources that shaders need to manage in // order to ensure their operation. This could include shader storage buffers, // generated lookup textures, or other sorts of configured state. The body // of a shader object is fully opaque; but the user is in charge of cleaning up // after them and passing them to the right shader passes. // // Note: pl_shader_obj objects must be initialized to NULL by the caller. typedef struct pl_shader_obj_t *pl_shader_obj; PL_API void pl_shader_obj_destroy(pl_shader_obj *obj); PL_API_END #endif // LIBPLACEBO_SHADERS_H_ libplacebo-v7.349.0/src/include/libplacebo/shaders/000077500000000000000000000000001463457750100221335ustar00rootroot00000000000000libplacebo-v7.349.0/src/include/libplacebo/shaders/colorspace.h000066400000000000000000000427011463457750100244420ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_SHADERS_COLORSPACE_H_ #define LIBPLACEBO_SHADERS_COLORSPACE_H_ // Color space transformation shaders. These all input and output a color // value (PL_SHADER_SIG_COLOR). #include #include #include #include // For backwards compatibility #include PL_API_BEGIN // Transform the input color, in its given representation, to ensure // compatibility with the indicated alpha mode. Mutates `repr` to reflect the // change. Note that this is a no-op if the input is PL_ALPHA_UNKNOWN. PL_API void pl_shader_set_alpha(pl_shader sh, struct pl_color_repr *repr, enum pl_alpha_mode mode); // Colorspace reshaping for PL_COLOR_SYSTEM_DOLBYVISION. Note that this is done // automatically by `pl_shader_decode_color` for PL_COLOR_SYSTEM_DOLBYVISION. PL_API void pl_shader_dovi_reshape(pl_shader sh, const struct pl_dovi_metadata *data); // Decode the color into normalized RGB, given a specified color_repr. This // also takes care of additional pre- and post-conversions requires for the // "special" color systems (XYZ, BT.2020-C, etc.). If `params` is left as NULL, // it defaults to &pl_color_adjustment_neutral. // // Note: This function always returns PC-range RGB with independent alpha. // It mutates the pl_color_repr to reflect the change. // // Note: For DCDM XYZ decoding output is linear PL_API void pl_shader_decode_color(pl_shader sh, struct pl_color_repr *repr, const struct pl_color_adjustment *params); // Encodes a color from normalized, PC-range, independent alpha RGB into a // given representation. That is, this performs the inverse operation of // `pl_shader_decode_color` (sans color adjustments). // // Note: For DCDM XYZ encoding input is expected to be linear PL_API void pl_shader_encode_color(pl_shader sh, const struct pl_color_repr *repr); // Linearize (expand) `vec4 color`, given a specified color space. Shader // equivalent of `pl_color_linearize`. PL_API void pl_shader_linearize(pl_shader sh, const struct pl_color_space *csp); // Delinearize (compress), given a color space as output. Shader equivalent // of `pl_color_delinearize`. PL_API void pl_shader_delinearize(pl_shader sh, const struct pl_color_space *csp); struct pl_sigmoid_params { // The center (bias) of the sigmoid curve. Must be between 0.0 and 1.0. // If left as NULL, defaults to 0.75 float center; // The slope (steepness) of the sigmoid curve. Must be between 1.0 and 20.0. // If left as NULL, defaults to 6.5. float slope; }; #define PL_SIGMOID_DEFAULTS \ .center = 0.75, \ .slope = 6.50, #define pl_sigmoid_params(...) (&(struct pl_sigmoid_params) { PL_SIGMOID_DEFAULTS __VA_ARGS__ }) PL_API extern const struct pl_sigmoid_params pl_sigmoid_default_params; // Applies a sigmoidal color transform to all channels. This helps avoid // ringing artifacts during upscaling by bringing the color information closer // to neutral and away from the extremes. If `params` is NULL, it defaults to // &pl_sigmoid_default_params. // // Warning: This function clamps the input to the interval [0,1]; and as such // it should *NOT* be used on already-decoded high-dynamic range content. PL_API void pl_shader_sigmoidize(pl_shader sh, const struct pl_sigmoid_params *params); // This performs the inverse operation to `pl_shader_sigmoidize`. PL_API void pl_shader_unsigmoidize(pl_shader sh, const struct pl_sigmoid_params *params); struct pl_peak_detect_params { // Smoothing coefficient for the detected values. This controls the time // parameter (tau) of an IIR low pass filter. In other words, it represent // the cutoff period (= 1 / cutoff frequency) in frames. Frequencies below // this length will be suppressed. This helps block out annoying // "sparkling" or "flickering" due to small variations in frame-to-frame // brightness. If left as 0.0, this smoothing is completely disabled. float smoothing_period; // In order to avoid reacting sluggishly on scene changes as a result of // the low-pass filter, we disable it when the difference between the // current frame brightness and the average frame brightness exceeds a // given threshold difference. But rather than a single hard cutoff, which // would lead to weird discontinuities on fades, we gradually disable it // over a small window of brightness ranges. These parameters control the // lower and upper bounds of this window, in units of 1% PQ. // // Setting either one of these to 0.0 disables this logic. float scene_threshold_low; float scene_threshold_high; // Which percentile of the input image brightness histogram to consider as // the true peak of the scene. If this is set to 100 (or 0), the brightest // pixel is measured. Otherwise, the top of the frequency distribution is // progressively cut off. Setting this too low will cause clipping of very // bright details, but can improve the dynamic brightness range of scenes // with very bright isolated highlights. // // A recommended value is 99.995%, which is very conservative and should // cause no major issues in typical content. float percentile; // Black cutoff strength. To prevent unnatural pixel shimmer and excessive // darkness in mostly black scenes, as well as avoid black bars from // affecting the content, (smoothly) cut off any value below this (PQ%) // threshold. Defaults to 1.0, or 1% PQ. // // Setting this to 0.0 (or a negative value) disables this functionality. float black_cutoff; // Allows the peak detection result to be delayed by up to a single frame, // which can sometimes improve thoughput, at the cost of introducing the // possibility of 1-frame flickers on transitions. Disabled by default. bool allow_delayed; // --- Deprecated / removed fields PL_DEPRECATED_IN(v6.313) float minimum_peak; }; #define PL_PEAK_DETECT_DEFAULTS \ .smoothing_period = 20.0f, \ .scene_threshold_low = 1.0f, \ .scene_threshold_high = 3.0f, \ .percentile = 100.0f, \ .black_cutoff = 1.0f, #define PL_PEAK_DETECT_HQ_DEFAULTS \ PL_PEAK_DETECT_DEFAULTS \ .percentile = 99.995f, #define pl_peak_detect_params(...) (&(struct pl_peak_detect_params) { PL_PEAK_DETECT_DEFAULTS __VA_ARGS__ }) PL_API extern const struct pl_peak_detect_params pl_peak_detect_default_params; PL_API extern const struct pl_peak_detect_params pl_peak_detect_high_quality_params; // This function can be used to measure the CLL and FALL of a video // source automatically, using a compute shader. The measured values are // smoothed automatically (depending on the parameters), so to keep track of // the measured results over time, a tone mapping shader state object is used // to hold the state. Returns false on failure initializing the tone mapping // object, or if compute shaders are not supported. // // It's important that the same shader object is used for successive frames // belonging to the same source. If the source changes (e.g. due to a file // change or seek), the user should reset it with `pl_reset_detected_peak` (or // destroy it and use a new state object). // // The parameter `csp` holds the representation of the color values that are // the input to this function. (They must already be in decoded RGB form, i.e. // alternate color representations are not supported) PL_API bool pl_shader_detect_peak(pl_shader sh, struct pl_color_space csp, pl_shader_obj *state, const struct pl_peak_detect_params *params); // After dispatching the above shader, this function can be used to retrieve // the detected dynamic HDR10+ metadata parameters. The other fields of // `metadata` are not written to. Returns whether or not any values were // written. If not, the values are left untouched, so this can be used to // safely update `pl_hdr_metadata` values in-place. This function may or may // not block, depending on the previous setting of `allow_delayed`. PL_API bool pl_get_detected_hdr_metadata(const pl_shader_obj state, struct pl_hdr_metadata *metadata); // Resets the peak detection state in a given tone mapping state object. This // is not equal to `pl_shader_obj_destroy`, because it does not destroy any // state used by `pl_shader_tone_map`. PL_API void pl_reset_detected_peak(pl_shader_obj state); // Feature map extraction (for pl_color_map_args.feature_map). The result // of this shader should be downscaled / low-passed to the indicated kernel // size before use. (This does not happen automatically) PL_API void pl_shader_extract_features(pl_shader sh, struct pl_color_space csp); // Deprecated and unused. Libplacebo now always performs a variant of the old // hybrid tone-mapping, mixing together the intensity (I) and per-channel (LMS) // results. enum pl_tone_map_mode { PL_TONE_MAP_AUTO PL_DEPRECATED_ENUM_IN(v6.269), PL_TONE_MAP_RGB PL_DEPRECATED_ENUM_IN(v6.269), PL_TONE_MAP_MAX PL_DEPRECATED_ENUM_IN(v6.269), PL_TONE_MAP_HYBRID PL_DEPRECATED_ENUM_IN(v6.269), PL_TONE_MAP_LUMA PL_DEPRECATED_ENUM_IN(v6.269), PL_TONE_MAP_MODE_COUNT, }; // Deprecated by enum pl_gamut_mode { PL_GAMUT_CLIP PL_DEPRECATED_ENUM_IN(v6.269), // pl_gamut_map_clip PL_GAMUT_WARN PL_DEPRECATED_ENUM_IN(v6.269), // pl_gamut_map_highlight PL_GAMUT_DARKEN PL_DEPRECATED_ENUM_IN(v6.269), // pl_gamut_map_darken PL_GAMUT_DESATURATE PL_DEPRECATED_ENUM_IN(v6.269), // pl_gamut_map_desaturate PL_GAMUT_MODE_COUNT, }; struct pl_color_map_params { // --- Gamut mapping options // Gamut mapping function to use to handle out-of-gamut colors, including // colors which are out-of-gamut as a consequence of tone mapping. const struct pl_gamut_map_function *gamut_mapping; // Gamut mapping constants, for expert tuning. Leave as default otherwise. struct pl_gamut_map_constants gamut_constants; // Gamut mapping 3DLUT size, for channels ICh. Defaults to {48, 32, 256} int lut3d_size[3]; // Use higher quality, but slower, tricubic interpolation for gamut mapping // 3DLUTs. May substantially improve the 3DLUT gamut mapping accuracy, in // particular at smaller 3DLUT sizes. Shouldn't have much effect at the // default size. bool lut3d_tricubic; // If true, allows the gamut mapping function to expand the gamut, in // cases where the target gamut exceeds that of the source. If false, // the source gamut will never be enlarged, even when using a gamut // mapping function capable of bidirectional mapping. bool gamut_expansion; // --- Tone mapping options // Tone mapping function to use to handle out-of-range colors. const struct pl_tone_map_function *tone_mapping_function; // Tone mapping constants, for expert tuning. Leave as default otherwise. struct pl_tone_map_constants tone_constants; // If true, and supported by the given tone mapping function, libplacebo // will perform inverse tone mapping to expand the dynamic range of a // signal. libplacebo is not liable for any HDR-induced eye damage. bool inverse_tone_mapping; // Data source to use when tone-mapping. Setting this to a specific // value allows overriding the default metadata preference logic. enum pl_hdr_metadata_type metadata; // Tone mapping LUT size. Defaults to 256. int lut_size; // HDR contrast recovery strength. If set to a value above 0.0, the source // image will be divided into high-frequency and low-frequency components, // and a portion of the high-frequency image is added back onto the // tone-mapped output. May cause excessive ringing artifacts for some HDR // sources, but can improve the subjective sharpness and detail left over // in the image after tone-mapping. float contrast_recovery; // Contrast recovery lowpass kernel size. Defaults to 3.5. Increasing // or decreasing this will affect the visual appearance substantially. float contrast_smoothness; // --- Debugging options // Force the use of a full tone-mapping LUT even for functions that have // faster pure GLSL replacements (e.g. clip, linear, saturation). bool force_tone_mapping_lut; // Visualize the tone-mapping LUT and gamut mapping 3DLUT, in IPT space. bool visualize_lut; // Controls where to draw the visualization, relative to the rendered // video (dimensions 0-1). Optional, defaults to the full picture. pl_rect2df visualize_rect; // Controls the rotation of the 3DLUT visualization. float visualize_hue; // useful range [-pi, pi] float visualize_theta; // useful range [0, pi/2] // Graphically highlight hard-clipped pixels during tone-mapping (i.e. // pixels that exceed the claimed source luminance range). bool show_clipping; // --- Deprecated fields PL_DEPRECATED_IN(v6.269) enum pl_tone_map_mode tone_mapping_mode; // removed PL_DEPRECATED_IN(v6.311) float tone_mapping_param; // see `tone_constants` PL_DEPRECATED_IN(v6.269) float tone_mapping_crosstalk; // now hard-coded as 0.04 PL_DEPRECATED_IN(v6.269) enum pl_rendering_intent intent; // see `gamut_mapping` PL_DEPRECATED_IN(v6.269) enum pl_gamut_mode gamut_mode; // see `gamut_mapping` PL_DEPRECATED_IN(v6.290) float hybrid_mix; // removed }; #define PL_COLOR_MAP_DEFAULTS \ .gamut_mapping = &pl_gamut_map_perceptual, \ .tone_mapping_function = &pl_tone_map_spline, \ .gamut_constants = { PL_GAMUT_MAP_CONSTANTS }, \ .tone_constants = { PL_TONE_MAP_CONSTANTS }, \ .metadata = PL_HDR_METADATA_ANY, \ .lut3d_size = {48, 32, 256}, \ .lut_size = 256, \ .visualize_rect = {0, 0, 1, 1}, \ .contrast_smoothness = 3.5f, #define PL_COLOR_MAP_HQ_DEFAULTS \ PL_COLOR_MAP_DEFAULTS \ .contrast_recovery = 0.30f, #define pl_color_map_params(...) (&(struct pl_color_map_params) { PL_COLOR_MAP_DEFAULTS __VA_ARGS__ }) PL_API extern const struct pl_color_map_params pl_color_map_default_params; PL_API extern const struct pl_color_map_params pl_color_map_high_quality_params; // Execution arguments for the `pl_shader_color_map_ex` call. Distinct from // `pl_color_map_params` because it is filled by internally-provided execution // metadata, instead of user-tunable aesthetic parameters. struct pl_color_map_args { // Input/output color space for the mapping. struct pl_color_space src; struct pl_color_space dst; // If true, the logic will assume the input has already been linearized by // the caller (e.g. as part of a previous linear light scaling operation). bool prelinearized; // Object to be used to store generated LUTs. Note that this is the same // state object used by `pl_shader_detect_peak`, and if that function has // been called on `state` prior to `pl_shader_color_map`, the detected // values will be used to guide the tone mapping algorithm. If this is not // provided, tone/gamut mapping are disabled. pl_shader_obj *state; // Low-resolution intensity feature map, as generated by // `pl_shader_extract_features`. Optional. No effect if // `params->contrast_recovery` is disabled. pl_tex feature_map; }; #define pl_color_map_args(...) (&(struct pl_color_map_args) { __VA_ARGS__ }) // Maps `vec4 color` from one color space to another color space according // to the parameters (described in greater depth above). If `params` is left // as NULL, it defaults to `&pl_color_map_default_params` PL_API void pl_shader_color_map_ex(pl_shader sh, const struct pl_color_map_params *params, const struct pl_color_map_args *args); // Backwards compatibility wrapper around `pl_shader_color_map_ex` PL_API void pl_shader_color_map(pl_shader sh, const struct pl_color_map_params *params, struct pl_color_space src, struct pl_color_space dst, pl_shader_obj *state, bool prelinearized); // Applies a set of cone distortion parameters to `vec4 color` in a given color // space. This can be used to simulate color blindness. See `pl_cone_params` // for more information. PL_API void pl_shader_cone_distort(pl_shader sh, struct pl_color_space csp, const struct pl_cone_params *params); PL_API_END #endif // LIBPLACEBO_SHADERS_COLORSPACE_H_ libplacebo-v7.349.0/src/include/libplacebo/shaders/custom.h000066400000000000000000000326031463457750100236220ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_SHADERS_CUSTOM_H_ #define LIBPLACEBO_SHADERS_CUSTOM_H_ #include // Functions for writing custom shaders and hooking them into the `pl_renderer` // pipeline, as well as compatibility functions for parsing shaders in mpv // format. #include #include #include PL_API_BEGIN // Parameters describing custom shader text to be embedded into a `pl_shader` // object. All of the strings are optional and can be left as NULL, but without // a `body` in particular, the shader will do nothing useful on its own. struct pl_custom_shader { // The prelude contains text such as extra #defines, #extension pragmas, // or other parts of the shader that must be placed at the very // beginning (before input layout declarations etc.) // // Note: #extension pragmas do not need to be emitted to enable support for // resource types already attached to the shader (e.g. SSBOs), compute // shaders, or GPU capabilities known to libplacebo (e.g. subgroups). const char *prelude; // The header contains text such as helper function definitions, extra // uniforms, shared memory variables or buffer descriptions. const char *header; // A friendly name for the shader. (Optional) const char *description; // The "primary" GLSL code. This will be effectively appended to the "main" // function. It lives in an environment given by the `input` signature, and // is expected to return results in a way given by the `output` signature. // // Note: In the case of PL_SHADER_SIG_COLOR, the output `vec4 color` is // allocated by `pl_shader_custom`, the user merely needs to assign to it. // // Note: For ease of development it can be useful to have the main logic // live inside a helper function defined as part of `header`, and specify // the `body` as a single line that simply calls the helper function. const char *body; enum pl_shader_sig input; enum pl_shader_sig output; // Extra descriptors, variables and vertex attributes to attach to the // resulting `pl_shader_res`. // // Note: The names inside these will possibly be replaced by fresh // identifiers internally, so users should avoid looking for exact string // matches for the given names inside the `pl_shader_res`. const struct pl_shader_desc *descriptors; int num_descriptors; const struct pl_shader_var *variables; int num_variables; const struct pl_shader_va *vertex_attribs; int num_vertex_attribs; const struct pl_shader_const *constants; int num_constants; // If true, this shader must be a compute shader. The desired workgroup // size and shared memory usage can be optionally specified, or 0 if no // specific work group size or shared memory size restrictions apply. // // See also: `pl_shader_res.compute_group_size` bool compute; size_t compute_shmem; int compute_group_size[2]; // Fixes the output size requirements of the shader to exact dimensions. // Optional, if left as 0, means the shader can be dispatched at any size. int output_w; int output_h; }; // Append custom shader code, including extra descriptors and variables, to an // existing `pl_shader` object. Returns whether successful. This function may // fail in the event that e.g. the custom shader requires compute shaders on // an unsupported GPU, or exceeds the GPU's shared memory capabilities. PL_API bool pl_shader_custom(pl_shader sh, const struct pl_custom_shader *params); // Which "rendering stages" are available for user shader hooking purposes. // Except where otherwise noted, all stages are "non-resizable", i.e. the // shaders already have specific output size requirements. enum pl_hook_stage { // Hook stages for the untouched planes, as made available by the source. // These are all resizable, i.e. there are no specific output stage // requirements. PL_HOOK_RGB_INPUT = 1 << 0, PL_HOOK_LUMA_INPUT = 1 << 1, PL_HOOK_CHROMA_INPUT = 1 << 2, PL_HOOK_ALPHA_INPUT = 1 << 3, PL_HOOK_XYZ_INPUT = 1 << 4, // Hook stages for the scaled/aligned planes PL_HOOK_CHROMA_SCALED = 1 << 5, PL_HOOK_ALPHA_SCALED = 1 << 6, PL_HOOK_NATIVE = 1 << 7, // Combined image in its native color space PL_HOOK_RGB = 1 << 8, // After conversion to RGB (resizable) PL_HOOK_LINEAR = 1 << 9, // After linearization but before scaling PL_HOOK_SIGMOID = 1 << 10, // After sigmoidization PL_HOOK_PRE_KERNEL = 1 << 11, // Immediately before the main scaler kernel PL_HOOK_POST_KERNEL = 1 << 12, // Immediately after the main scaler kernel PL_HOOK_SCALED = 1 << 13, // After scaling, before color management PL_HOOK_PRE_OUTPUT = 1 << 14, // After color management, before blending/rotation PL_HOOK_OUTPUT = 1 << 15, // After blending/rotation, before dithering }; // Returns true if a given hook stage is resizable static inline bool pl_hook_stage_resizable(enum pl_hook_stage stage) { switch (stage) { case PL_HOOK_RGB_INPUT: case PL_HOOK_LUMA_INPUT: case PL_HOOK_CHROMA_INPUT: case PL_HOOK_ALPHA_INPUT: case PL_HOOK_XYZ_INPUT: case PL_HOOK_NATIVE: case PL_HOOK_RGB: return true; case PL_HOOK_CHROMA_SCALED: case PL_HOOK_ALPHA_SCALED: case PL_HOOK_LINEAR: case PL_HOOK_SIGMOID: case PL_HOOK_PRE_KERNEL: case PL_HOOK_POST_KERNEL: case PL_HOOK_SCALED: case PL_HOOK_PRE_OUTPUT: case PL_HOOK_OUTPUT: return false; } abort(); } // The different forms of communicating image data between the renderer and // the hooks enum pl_hook_sig { PL_HOOK_SIG_NONE, // No data is passed, no data is received/returned PL_HOOK_SIG_COLOR, // `vec4 color` already pre-sampled in a `pl_shader` PL_HOOK_SIG_TEX, // `pl_tex` containing the image data PL_HOOK_SIG_COUNT, }; struct pl_hook_params { // GPU objects associated with the `pl_renderer`, which the user may // use for their own purposes. pl_gpu gpu; pl_dispatch dispatch; // Helper function to fetch a new temporary texture, using renderer-backed // storage. This is guaranteed to have sane image usage requirements and a // 16-bit or floating point format. The user does not need to free/destroy // this texture in any way. May return NULL. pl_tex (*get_tex)(void *priv, int width, int height); void *priv; // Which stage triggered the hook to run. enum pl_hook_stage stage; // For `PL_HOOK_SIG_COLOR`, this contains the existing shader object with // the color already pre-sampled into `vec4 color`. The user may modify // this as much as they want, as long as they don't dispatch/finalize/reset // it. // // Note that this shader might have specific output size requirements, // depending on the exact shader stage hooked by the user, and may already // be a compute shader. pl_shader sh; // For `PL_HOOK_SIG_TEX`, this contains the texture that the user should // sample from. // // Note: This texture object is owned by the renderer, and users must not // modify its contents. It will not be touched for the duration of a frame, // but the contents are lost in between frames. pl_tex tex; // The effective current rectangle of the image we're rendering in this // shader, i.e. the effective rect of the content we're interested in, // as a crop of either `sh` or `tex` (depending on the signature). // // Note: This is still set even for `PL_HOOK_SIG_NONE`! pl_rect2df rect; // The current effective colorspace and representation, of either the // pre-sampled color (in `sh`), or the contents of `tex`, respectively. // // Note: This is still set even for `PL_HOOK_SIG_NONE`! struct pl_color_repr repr; struct pl_color_space color; int components; // The representation and colorspace of the original image, for reference. const struct pl_color_repr *orig_repr; const struct pl_color_space *orig_color; // The (cropped) source and destination rectangles of the overall // rendering. These are functionallty equivalent to `image.crop` and // `target.crop`, respectively, but `src_rect` in particular may change as // a result of previous hooks being executed. (e.g. prescalers) pl_rect2df src_rect; pl_rect2d dst_rect; }; struct pl_hook_res { // If true, the hook is assumed to have "failed" or errored in some way, // and all other fields are ignored. bool failed; // What type of output this hook is returning. // Note: If this is `PL_HOOK_SIG_NONE`, all other fields are ignored. enum pl_hook_sig output; // For `PL_HOOK_SIG_COLOR`, this *must* be set to a valid `pl_shader` // object containing the sampled color value (i.e. with an output signature // of `PL_SHADER_SIG_COLOR`), and *should* be allocated from the given // `pl_dispatch` object. Ignored otherwise. pl_shader sh; // For `PL_HOOK_SIG_TEX`, this *must* contain the texture object containing // the result of rendering the hook. This *should* be a texture allocated // using the given `get_tex` callback, to ensure the format and texture // usage flags are compatible with what the renderer expects. pl_tex tex; // For shaders that return some sort of output, this contains the // new/altered versions of the existing "current texture" metadata. struct pl_color_repr repr; struct pl_color_space color; int components; // This contains the new effective rect of the contents. This may be // different from the original `rect` for resizable passes. Ignored for // non-resizable passes. pl_rect2df rect; }; enum pl_hook_par_mode { PL_HOOK_PAR_VARIABLE, // normal shader variable PL_HOOK_PAR_DYNAMIC, // dynamic shader variable, e.g. per-frame changing PL_HOOK_PAR_CONSTANT, // fixed at compile time (e.g. for array sizes), // must be scalar (non-vector/matrix) PL_HOOK_PAR_DEFINE, // defined in the preprocessor, must be `int` PL_HOOK_PAR_MODE_COUNT, }; typedef union pl_var_data { int i; unsigned u; float f; } pl_var_data; struct pl_hook_par { // Name as used in the shader. const char *name; // Type of this shader parameter, and how it's manifested in the shader. enum pl_var_type type; enum pl_hook_par_mode mode; // Human-readable explanation of this parameter. (Optional) const char *description; // Mutable data pointer to current value of variable. pl_var_data *data; // Default/initial value, and lower/upper bounds. pl_var_data initial; pl_var_data minimum; pl_var_data maximum; // Human-readable names for the variants of an integer option. This array // can be indexed directly by integer values, ranging from `minimum.i` to // `maximum.i`. May be NULL, in which case options are unnamed. const char * const *names; }; // Struct describing a hook. // // Note: Users may freely create their own instances of this struct, there is // nothing particularly special about `pl_mpv_user_shader_parse`. struct pl_hook { enum pl_hook_stage stages; // Which stages to hook on enum pl_hook_sig input; // Which input signature this hook expects void *priv; // Arbitrary user context // Custom tunable shader parameters exported by this hook. These may be // updated at any time by the user, to influence the behavior of the hook. // Contents are arbitrary and subject to the method of hook construction. const struct pl_hook_par *parameters; int num_parameters; // Called at the beginning of passes, to reset/initialize the hook. (Optional) void (*reset)(void *priv); // The hook function itself. Called by the renderer at any of the indicated // hook stages. See `pl_hook_res` for more info on the return values. struct pl_hook_res (*hook)(void *priv, const struct pl_hook_params *params); // Unique signature identifying this hook, used to disable misbehaving hooks. // All hooks with the same signature will be disabled, should they fail to // execute during run-time. uint64_t signature; }; // Compatibility layer with `mpv` user shaders. See the mpv man page for more // information on the format. Will return `NULL` if the shader fails parsing. // // The resulting `pl_hook` objects should be destroyed with the corresponding // destructor when no longer needed. PL_API const struct pl_hook * pl_mpv_user_shader_parse(pl_gpu gpu, const char *shader_text, size_t shader_len); PL_API void pl_mpv_user_shader_destroy(const struct pl_hook **hook); PL_API_END #endif // LIBPLACEBO_SHADERS_CUSTOM_H_ libplacebo-v7.349.0/src/include/libplacebo/shaders/deinterlacing.h000066400000000000000000000120671463457750100251220ustar00rootroot00000000000000 /* * This file is part of libplacebo, which is normally licensed under the terms * of the LGPL v2.1+. However, this file (film_grain.h) is also available under * the terms of the more permissive MIT license: * * Copyright (c) 2018-2019 Niklas Haas * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef LIBPLACEBO_SHADERS_DEINTERLACING_H_ #define LIBPLACEBO_SHADERS_DEINTERLACING_H_ #include PL_API_BEGIN enum pl_field { PL_FIELD_NONE = 0, // no deinterlacing PL_FIELD_EVEN, // "top" fields, with even y coordinates PL_FIELD_ODD, // "bottom" fields, with odd y coordinates // Convenience aliases PL_FIELD_TOP = PL_FIELD_EVEN, PL_FIELD_BOTTOM = PL_FIELD_ODD, }; static inline enum pl_field pl_field_other(enum pl_field field) { switch (field) { case PL_FIELD_EVEN: return PL_FIELD_ODD; case PL_FIELD_ODD: return PL_FIELD_EVEN; default: return field; } } struct pl_field_pair { // Top texture. If only this is specified, it's assumed to contain both // fields in an interleaved fashion (MBAFF). // // Note: Support for separate fields (PAFF), is currently pending, so this // is the only way to provide interlaced frames at the moment. pl_tex top; }; #define pl_field_pair(...) ((struct pl_field_pair) { __VA_ARGS__ }) struct pl_deinterlace_source { // Previous, current and next source (interlaced) frames. `prev` and `next` // may be NULL, but `cur` is required. If present, they must all have the // exact same texture dimensions. // // Note: `prev` and `next` are only required for PL_DEINTERLACE_YADIF. struct pl_field_pair prev, cur, next; // The parity of the current field to output. This field will be unmodified // from `cur`, with the corresponding other field interpolated. // // If this is `PL_FIELD_NONE`, no deinterlacing is performed, and the // texture is merely sampled as-is. enum pl_field field; // The parity of the first frame in a stream. Set this the field that is // (conceptually) ordered first in time. // // If this is `PL_FIELD_NONE`, it will instead default to `PL_FIELD_TOP`. enum pl_field first_field; // Components to deinterlace. Components not specified will be ignored. // Optional, if left as 0, all components will be deinterlaced. uint8_t component_mask; }; #define pl_deinterlace_source(...) (&(struct pl_deinterlace_source) { __VA_ARGS__ }) enum pl_deinterlace_algorithm { // No-op deinterlacing, just sample the weaved frame un-touched. PL_DEINTERLACE_WEAVE = 0, // Naive bob deinterlacing. Doubles the field lines vertically. PL_DEINTERLACE_BOB, // "Yet another deinterlacing filter". Deinterlacer with temporal and // spatial information. Based on FFmpeg's Yadif filter algorithm, but // adapted slightly for the GPU. PL_DEINTERLACE_YADIF, PL_DEINTERLACE_ALGORITHM_COUNT, }; // Returns whether or not an algorithm requires `prev`/`next` refs to be set. static inline bool pl_deinterlace_needs_refs(enum pl_deinterlace_algorithm algo) { return algo == PL_DEINTERLACE_YADIF; } struct pl_deinterlace_params { // Algorithm to use. The recommended default is PL_DEINTERLACE_YADIF, which // provides a good trade-off of quality and speed. enum pl_deinterlace_algorithm algo; // Skip the spatial interlacing check. (PL_DEINTERLACE_YADIF only) bool skip_spatial_check; }; #define PL_DEINTERLACE_DEFAULTS \ .algo = PL_DEINTERLACE_YADIF, #define pl_deinterlace_params(...) (&(struct pl_deinterlace_params) { PL_DEINTERLACE_DEFAULTS __VA_ARGS__ }) PL_API extern const struct pl_deinterlace_params pl_deinterlace_default_params; // Deinterlaces a set of interleaved source frames and outputs the result into // `vec4 color`. If `params` is left as NULL, it defaults to // `&pl_deinterlace_default_params`. PL_API void pl_shader_deinterlace(pl_shader sh, const struct pl_deinterlace_source *src, const struct pl_deinterlace_params *params); PL_API_END #endif // LIBPLACEBO_SHADERS_DEINTERLACING_H_ libplacebo-v7.349.0/src/include/libplacebo/shaders/dithering.h000066400000000000000000000140641463457750100242660ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_SHADERS_DITHERING_H_ #define LIBPLACEBO_SHADERS_DITHERING_H_ // Dithering shaders #include #include #include PL_API_BEGIN enum pl_dither_method { // Dither with blue noise. Very high quality, but requires the use of a // LUT. Warning: Computing a blue noise texture with a large size can be // very slow, however this only needs to be performed once. Even so, using // this with a `lut_size` greater than 6 is generally ill-advised. This is // the preferred/default dither method. PL_DITHER_BLUE_NOISE, // Dither with an ordered (bayer) dither matrix, using a LUT. Low quality, // and since this also uses a LUT, there's generally no advantage to picking // this instead of `PL_DITHER_BLUE_NOISE`. It's mainly there for testing. PL_DITHER_ORDERED_LUT, // The same as `PL_DITHER_ORDERED_LUT`, but uses fixed function math instead // of a LUT. This is faster, but only supports a fixed dither matrix size // of 16x16 (equal to a `lut_size` of 4). PL_DITHER_ORDERED_FIXED, // Dither with white noise. This does not require a LUT and is fairly cheap // to compute. Unlike the other modes it doesn't show any repeating // patterns either spatially or temporally, but the downside is that this // is visually fairly jarring due to the presence of low frequencies in the // noise spectrum. PL_DITHER_WHITE_NOISE, PL_DITHER_METHOD_COUNT, }; struct pl_dither_params { // The source of the dither noise to use. enum pl_dither_method method; // For the dither methods which require the use of a LUT, this controls // the size of the LUT (base 2). If left as NULL, this defaults to 6, which // is equivalent to a 64x64 dither matrix. Must not be larger than 8. int lut_size; // Enables temporal dithering. This reduces the persistence of dithering // artifacts by perturbing the dithering matrix per frame. // Warning: This can cause nasty aliasing artifacts on some LCD screens. bool temporal; // Gamma function to use for dither gamma correction. This will only have // an effect when dithering to low bit depths (<= 4). enum pl_color_transfer transfer; }; #define PL_DITHER_DEFAULTS \ .method = PL_DITHER_BLUE_NOISE, \ .lut_size = 6, \ /* temporal dithering commonly flickers on LCDs */ \ .temporal = false, #define pl_dither_params(...) (&(struct pl_dither_params) { PL_DITHER_DEFAULTS __VA_ARGS__ }) PL_API extern const struct pl_dither_params pl_dither_default_params; // Dither the colors to a lower depth, given in bits. This can be used on input // colors of any precision. Basically, this rounds the colors to only linear // multiples of the stated bit depth. The average intensity of the result // will not change (i.e., the dither noise is balanced in both directions). // If `params` is NULL, it defaults to &pl_dither_default_params. // // For the dither methods which require the use of a LUT, `dither_state` must // be set to a valid pointer. To avoid thrashing the resource, users should // avoid trying to re-use the same LUT for different dither configurations. If // passed as NULL, libplacebo will automatically fall back to dither algorithms // that don't require the use of a LUT. // // Warning: This dithering algorithm is not gamma-invariant; so using it for // very low bit depths (below 4 or so) will noticeably increase the brightness // of the resulting image. When doing low bit depth dithering for aesthetic // purposes, it's recommended that the user explicitly (de)linearize the colors // before and after this algorithm. PL_API void pl_shader_dither(pl_shader sh, int new_depth, pl_shader_obj *dither_state, const struct pl_dither_params *params); struct pl_error_diffusion_params { // Both the input and output texture must be provided up-front, with the // same size. The output texture must be storable, and the input texture // must be sampleable. pl_tex input_tex; pl_tex output_tex; // Depth to dither to. Required. int new_depth; // Error diffusion kernel to use. Optional. If unspecified, defaults to // `&pl_error_diffusion_sierra_lite`. const struct pl_error_diffusion_kernel *kernel; }; #define pl_error_diffusion_params(...) (&(struct pl_error_diffusion_params) { __VA_ARGS__ }) // Computes the shared memory requirements for a given error diffusion kernel. // This can be used to test up-front whether or not error diffusion would be // supported or not, before having to initialize textures. PL_API size_t pl_error_diffusion_shmem_req(const struct pl_error_diffusion_kernel *kernel, int height); // Apply an error diffusion dithering kernel. This is a much more expensive and // heavy dithering method, and is not generally recommended for realtime usage // where performance is critical. // // Requires compute shader support. Returns false if dithering fail e.g. as a // result of shader memory limits being exceeded. The resulting shader must be // dispatched with a work group count of exactly 1. PL_API bool pl_shader_error_diffusion(pl_shader sh, const struct pl_error_diffusion_params *params); PL_API_END #endif // LIBPLACEBO_SHADERS_DITHERING_H_ libplacebo-v7.349.0/src/include/libplacebo/shaders/film_grain.h000066400000000000000000000122541463457750100244170ustar00rootroot00000000000000/* * This file is part of libplacebo, which is normally licensed under the terms * of the LGPL v2.1+. However, this file (film_grain.h) is also available under * the terms of the more permissive MIT license: * * Copyright (c) 2018-2019 Niklas Haas * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef LIBPLACEBO_SHADERS_FILM_GRAIN_H_ #define LIBPLACEBO_SHADERS_FILM_GRAIN_H_ // Film grain synthesis shaders for AV1 / H.274. #include #include #include #include PL_API_BEGIN enum pl_film_grain_type { PL_FILM_GRAIN_NONE = 0, PL_FILM_GRAIN_AV1, PL_FILM_GRAIN_H274, PL_FILM_GRAIN_COUNT, }; // AV1 film grain parameters. For the exact meaning of these, see the AV1 // specification (section 6.8.20). struct pl_av1_grain_data { int num_points_y; uint8_t points_y[14][2]; // [n][0] = value, [n][1] = scaling bool chroma_scaling_from_luma; int num_points_uv[2]; // should be {0} for grayscale images uint8_t points_uv[2][10][2]; // like points_y for points_uv[0, 1] = u, v int scaling_shift; int ar_coeff_lag; int8_t ar_coeffs_y[24]; int8_t ar_coeffs_uv[2][25]; int ar_coeff_shift; int grain_scale_shift; int8_t uv_mult[2]; int8_t uv_mult_luma[2]; int16_t uv_offset[2]; // 9-bit value, range [-256, 255] bool overlap; }; // H.274 film grain parameters. For the exact meaning of these, see the H.274 // specification (section 8.5). struct pl_h274_grain_data { int model_id; int blending_mode_id; int log2_scale_factor; bool component_model_present[3]; uint16_t num_intensity_intervals[3]; uint8_t num_model_values[3]; const uint8_t *intensity_interval_lower_bound[3]; const uint8_t *intensity_interval_upper_bound[3]; const int16_t (*comp_model_value[3])[6]; }; // Tagged union for film grain data struct pl_film_grain_data { enum pl_film_grain_type type; // film grain type uint64_t seed; // shared seed value union { // Warning: These values are not sanity-checked at all, Invalid grain // data results in undefined behavior! struct pl_av1_grain_data av1; struct pl_h274_grain_data h274; } params; }; // Options for the `pl_shader_film_grain` call. struct pl_film_grain_params { // Required for all film grain types: struct pl_film_grain_data data; // film grain data pl_tex tex; // texture to sample from struct pl_color_repr *repr; // underlying color representation (see notes) int components; int component_mapping[4]; // same as `struct pl_plane` // Notes for `repr`: // - repr->bits affects the rounding for grain generation // - repr->levels affects whether or not we clip to full range or not // - repr->sys affects the interpretation of channels // - *repr gets normalized by this shader, which is why it's a pointer // Required for PL_FILM_GRAIN_AV1 only: pl_tex luma_tex; // "luma" texture (see notes) int luma_comp; // index of luma in `luma_tex` // Notes for `luma_tex`: // - `luma_tex` must be specified if the `tex` does not itself contain the // "luma-like" component. For XYZ systems, the Y channel is the luma // component. For RGB systems, the G channel is. }; #define pl_film_grain_params(...) (&(struct pl_film_grain_params) { __VA_ARGS__ }) // Test if film grain needs to be applied. This is a helper function that users // can use to decide whether or not `pl_shader_film_grain` needs to be called, // based on the given grain metadata. PL_API bool pl_needs_film_grain(const struct pl_film_grain_params *params); // Sample from a texture while applying film grain at the same time. // `grain_state` must be unique for every plane configuration, as it may // contain plane-dependent state. // // Returns false on any error, or if film grain generation is not supported // due to GLSL limitations. PL_API bool pl_shader_film_grain(pl_shader sh, pl_shader_obj *grain_state, const struct pl_film_grain_params *params); PL_API_END #endif // LIBPLACEBO_SHADERS_FILM_GRAIN_H_ libplacebo-v7.349.0/src/include/libplacebo/shaders/icc.h000066400000000000000000000136531463457750100230520ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_SHADERS_ICC_H_ #define LIBPLACEBO_SHADERS_ICC_H_ // Functions for generating and applying ICC-derived (3D)LUTs #include #include PL_API_BEGIN struct pl_icc_params { // The rendering intent to use, for profiles with multiple intents. A // recommended value is PL_INTENT_RELATIVE_COLORIMETRIC for color-accurate // video reproduction, or PL_INTENT_PERCEPTUAL for profiles containing // meaningful perceptual mapping tables for some more suitable color space // like BT.709. // // If this is set to the special value PL_INTENT_AUTO, will use the // preferred intent provided by the profile header. enum pl_rendering_intent intent; // The size of the 3DLUT to generate. If left as NULL, these individually // default to values appropriate for the profile. (Based on internal // precision heuristics) // // Note: Setting this manually is strongly discouraged, as it can result // in excessively high 3DLUT sizes where a much smaller LUT would have // sufficed. int size_r, size_g, size_b; // This field can be used to override the detected brightness level of the // ICC profile. If you set this to the special value 0 (or a negative // number), libplacebo will attempt reading the brightness value from the // ICC profile's tagging (if available), falling back to PL_COLOR_SDR_WHITE // if unavailable. float max_luma; // Force black point compensation. May help avoid crushed or raised black // points on "improper" profiles containing e.g. colorimetric tables that // do not round-trip. Should not be required on well-behaved profiles, // or when using PL_INTENT_PERCEPTUAL, but YMMV. bool force_bpc; // If provided, this pl_cache instance will be used, instead of the // GPU-internal cache, to cache the generated 3DLUTs. Note that these can // get large, especially for large values of size_{r,g,b}, so the user may // wish to split this cache off from the main shader cache. (Optional) pl_cache cache; // Deprecated legacy caching API. Replaced by `cache`. PL_DEPRECATED_IN(v6.321) void *cache_priv; PL_DEPRECATED_IN(v6.321) void (*cache_save)(void *priv, uint64_t sig, const uint8_t *cache, size_t size); PL_DEPRECATED_IN(v6.321) bool (*cache_load)(void *priv, uint64_t sig, uint8_t *cache, size_t size); }; #define PL_ICC_DEFAULTS \ .intent = PL_INTENT_RELATIVE_COLORIMETRIC, \ .max_luma = PL_COLOR_SDR_WHITE, #define pl_icc_params(...) (&(struct pl_icc_params) { PL_ICC_DEFAULTS __VA_ARGS__ }) PL_API extern const struct pl_icc_params pl_icc_default_params; // This object represents a "parsed" ICC profile. typedef const struct pl_icc_object_t { // Provided params, with the `intent` and `size` fields set (as described) struct pl_icc_params params; // Signature of the corresponding ICC profile. uint64_t signature; // Detected color space (or UNKNOWN for profiles which don't contain an // exact match), with HDR metedata set to the detected gamut and // white/black value ranges. struct pl_color_space csp; // Best estimate of profile gamma. This only serves as a rough guideline. float gamma; // Smallest containing primary set, always set. enum pl_color_primaries containing_primaries; } *pl_icc_object; // Attempts opening/parsing the contents of an ICC profile. The resulting // object is memory managed and may outlive the original profile - access // to the underlying profile is no longer needed once this returns. PL_API pl_icc_object pl_icc_open(pl_log log, const struct pl_icc_profile *profile, const struct pl_icc_params *params); PL_API void pl_icc_close(pl_icc_object *icc); // Update an existing pl_icc_object, which may be NULL, replacing it by the // new profile and parameters (if incompatible). // // Returns success. `obj` is set to the created profile, or NULL on error. // // Note: If `profile->signature` matches `(*obj)->signature`, or if `profile` is // NULL, then the existing profile is directly reused, with only the effective // parameters changing. In this case, `profile->data` is also *not* read from, // and may safely be NULL. PL_API bool pl_icc_update(pl_log log, pl_icc_object *obj, const struct pl_icc_profile *profile, const struct pl_icc_params *params); // Decode the input from the colorspace determined by the attached ICC profile // to linear light RGB (in the profile's containing primary set). `lut` must be // set to a shader object that will store the GPU resources associated with the // generated LUT. The resulting color space will be written to `out_csp`. PL_API void pl_icc_decode(pl_shader sh, pl_icc_object profile, pl_shader_obj *lut, struct pl_color_space *out_csp); // Encode the input from linear light RGB (in the profile's containing primary // set) into the colorspace determined by the attached ICC profile. `lut` must // be set to a shader object that will store the GPU resources associated with // the generated LUT. PL_API void pl_icc_encode(pl_shader sh, pl_icc_object profile, pl_shader_obj *lut); PL_API_END #endif // LIBPLACEBO_SHADERS_ICC_H_ libplacebo-v7.349.0/src/include/libplacebo/shaders/lut.h000066400000000000000000000061641463457750100231170ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_SHADERS_LUT_H_ #define LIBPLACEBO_SHADERS_LUT_H_ // Shaders for loading and applying arbitrary custom 1D/3DLUTs #include #include PL_API_BEGIN // Struct defining custom LUTs // // Note: Users may freely create their own instances of this struct, there is // nothing particularly special about `pl_lut_parse_cube`. struct pl_custom_lut { // Some unique signature identifying this LUT, needed to detect state // changes (for cache invalidation). This should ideally be a hash of the // file contents. (Which is what `pl_lut_parse_*` will set it to.) uint64_t signature; // Size of each dimension, in the order R, G, B. For 1D LUTs, only the R // dimension should be specified (the others left as 0). int size[3]; // Raw LUT data itself, in properly scaled floating point format. For 3D // LUTs, the innermost dimension is the first dimension (R), and the // outermost dimension is the last dimension (B). Individual color samples // are in the order R, G, B. const float *data; // Extra input/output shaper matrices. Ignored if equal to {0}. This is // mostly useful for 1D LUTs, since 3D LUTs can bake the shaper matrix into // the LUT itself - but it can still help optimize LUT precision. pl_matrix3x3 shaper_in, shaper_out; // Nominal metadata for the input/output of a LUT. Left as {0} if unknown. // Note: This is purely informative, `pl_shader_custom_lut` ignores it. struct pl_color_repr repr_in, repr_out; struct pl_color_space color_in, color_out; }; // Parse a 3DLUT in .cube format. Returns NULL if the file fails parsing. PL_API struct pl_custom_lut *pl_lut_parse_cube(pl_log log, const char *str, size_t str_len); // Frees a LUT created by `pl_lut_parse_*`. PL_API void pl_lut_free(struct pl_custom_lut **lut); // Apply a `pl_custom_lut`. The user is responsible for ensuring colors going // into the LUT are in the expected format as informed by the LUT metadata. // // `lut_state` must be a pointer to a NULL-initialized shader state object that // will be used to encapsulate any required GPU state. // // Note: `lut` does not have to be allocated by `pl_lut_parse_*`. It can be a // struct filled out by the user. PL_API void pl_shader_custom_lut(pl_shader sh, const struct pl_custom_lut *lut, pl_shader_obj *lut_state); PL_API_END #endif // LIBPLACEBO_SHADERS_LUT_H_ libplacebo-v7.349.0/src/include/libplacebo/shaders/sampling.h000066400000000000000000000267161463457750100241320ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_SHADERS_SAMPLING_H_ #define LIBPLACEBO_SHADERS_SAMPLING_H_ // Sampling operations. These shaders perform some form of sampling operation // from a given pl_tex. In order to use these, the pl_shader *must* have been // created using the same `gpu` as the originating `pl_tex`. Otherwise, this // is undefined behavior. They require nothing (PL_SHADER_SIG_NONE) and return // a color (PL_SHADER_SIG_COLOR). #include #include #include PL_API_BEGIN // Common parameters for sampling operations struct pl_sample_src { // There are two mutually exclusive ways of providing the source to sample // from: // // 1. Provide the texture and sampled region directly. This generates // a shader with input signature `PL_SHADER_SIG_NONE`, which binds the // texture as a descriptor (and the coordinates as a vertex attribute) pl_tex tex; // texture to sample pl_rect2df rect; // sub-rect to sample from (optional) enum pl_tex_address_mode address_mode; // preferred texture address mode // 2. Have the shader take it as an argument. Doing this requires // specifying the missing metadata of the texture backing the sampler, so // that the shader generation can generate the correct code. int tex_w, tex_h; // dimensions of the actual texture enum pl_fmt_type format; // format of the sampler being accepted enum pl_sampler_type sampler; // type of the sampler being accepted enum pl_tex_sample_mode mode; // sample mode of the sampler being accepted float sampled_w, sampled_h; // dimensions of the sampled region (optional) // Common metadata for both sampler input types: int components; // number of components to sample (optional) uint8_t component_mask; // bitmask of components to sample (optional) int new_w, new_h; // dimensions of the resulting output (optional) float scale; // factor to multiply into sampled signal (optional) // Note: `component_mask` and `components` are mutually exclusive, the // former is preferred if both are specified. }; #define pl_sample_src(...) (&(struct pl_sample_src) { __VA_ARGS__ }) struct pl_deband_params { // The number of debanding steps to perform per sample. Each step reduces a // bit more banding, but takes time to compute. Note that the strength of // each step falls off very quickly, so high numbers (>4) are practically // useless. Defaults to 1. int iterations; // The debanding filter's cut-off threshold. Higher numbers increase the // debanding strength dramatically, but progressively diminish image // details. Defaults to 3.0. float threshold; // The debanding filter's initial radius. The radius increases linearly // for each iteration. A higher radius will find more gradients, but a // lower radius will smooth more aggressively. Defaults to 16.0. float radius; // Add some extra noise to the image. This significantly helps cover up // remaining quantization artifacts. Higher numbers add more noise. // Note: When debanding HDR sources, even a small amount of grain can // result in a very big change to the brightness level. It's recommended to // either scale this value down or disable it entirely for HDR. // // Defaults to 4.0, which is very mild. float grain; // 'Neutral' grain value for each channel being debanded (sorted in order // from low to high index). Grain application will be modulated to avoid // disturbing colors close to this value. Set this to a value corresponding // to black in the relevant colorspace. float grain_neutral[3]; }; #define PL_DEBAND_DEFAULTS \ .iterations = 1, \ .threshold = 3.0, \ .radius = 16.0, \ .grain = 4.0, #define pl_deband_params(...) (&(struct pl_deband_params) {PL_DEBAND_DEFAULTS __VA_ARGS__ }) PL_API extern const struct pl_deband_params pl_deband_default_params; // Debands a given texture and returns the sampled color in `vec4 color`. If // `params` is left as NULL, it defaults to &pl_deband_default_params. // // Note: This can also be used as a pure grain function, by setting the number // of iterations to 0. PL_API void pl_shader_deband(pl_shader sh, const struct pl_sample_src *src, const struct pl_deband_params *params); // Performs direct / native texture sampling, using whatever texture filter is // available (linear for linearly sampleable sources, nearest otherwise). // // Note: This is generally very low quality and should be avoided if possible, // for both upscaling and downscaling. PL_API bool pl_shader_sample_direct(pl_shader sh, const struct pl_sample_src *src); // Performs hardware-accelerated nearest neighbour sampling. This is similar to // `pl_shader_sample_direct`, but forces nearest neighbour interpolation. PL_API bool pl_shader_sample_nearest(pl_shader sh, const struct pl_sample_src *src); // Performs hardware-accelerated bilinear sampling. This is similar to // `pl_shader_sample_direct`, but forces bilinear interpolation. PL_API bool pl_shader_sample_bilinear(pl_shader sh, const struct pl_sample_src *src); // Optimized versions of specific, strictly positive scaler kernels that take // adantage of linear texture sampling to reduce the number of fetches needed // by a factor of four. This family of functions performs radius-2 scaling // with only four texture fetches, which is far more efficient than using // the generalized 1D scaling method. Only works well for upscaling. PL_API bool pl_shader_sample_bicubic(pl_shader sh, const struct pl_sample_src *src); PL_API bool pl_shader_sample_hermite(pl_shader sh, const struct pl_sample_src *src); PL_API bool pl_shader_sample_gaussian(pl_shader sh, const struct pl_sample_src *src); // A sampler that is similar to nearest neighbour sampling, but tries to // preserve pixel aspect ratios. This is mathematically equivalent to taking an // idealized image with square pixels, sampling it at an infinite resolution, // and then downscaling that to the desired resolution. (Hence it being called // "oversample"). Good for pixel art. // // The threshold provides a cutoff threshold below which the contribution of // pixels should be ignored, trading some amount of aspect ratio distortion for // a slightly crisper image. A value of `threshold == 0.5` makes this filter // equivalent to regular nearest neighbour sampling. PL_API bool pl_shader_sample_oversample(pl_shader sh, const struct pl_sample_src *src, float threshold); struct pl_sample_filter_params { // The filter to use for sampling. struct pl_filter_config filter; // Antiringing strength. A value of 0.0 disables antiringing, and a value // of 1.0 enables full-strength antiringing. Defaults to 0.0 if // unspecified. // // Note: Ignored if `filter.antiring` is already set to something nonzero. float antiring; // Disable the use of compute shaders (e.g. if rendering to non-storable tex) bool no_compute; // Disable the use of filter widening / anti-aliasing (for downscaling) bool no_widening; // This shader object is used to store the LUT, and will be recreated // if necessary. To avoid thrashing the resource, users should avoid trying // to re-use the same LUT for different filter configurations or scaling // ratios. Must be set to a valid pointer, and the target NULL-initialized. pl_shader_obj *lut; // Deprecated / removed fields PL_DEPRECATED_IN(v6.335) int lut_entries; // hard-coded as 256 PL_DEPRECATED_IN(v6.335) float cutoff; // hard-coded as 1e-3 }; #define pl_sample_filter_params(...) (&(struct pl_sample_filter_params) { __VA_ARGS__ }) // Performs polar sampling. This internally chooses between an optimized compute // shader, and various fragment shaders, depending on the supported GLSL version // and GPU features. Returns whether or not it was successful. // // Note: `params->filter.polar` must be true to use this function. PL_API bool pl_shader_sample_polar(pl_shader sh, const struct pl_sample_src *src, const struct pl_sample_filter_params *params); // Performs orthogonal (1D) sampling. Using this twice in a row (once vertical // and once horizontal) effectively performs a 2D upscale. This is lower // quality than polar sampling, but significantly faster, and therefore the // recommended default. Returns whether or not it was successful. // // `src` must represent a scaling operation that only scales in one direction, // i.e. either only X or only Y. The other direction must be left unscaled. // // Note: Due to internal limitations, this may currently only be used on 2D // textures - even though the basic principle would work for 1D and 3D textures // as well. PL_API bool pl_shader_sample_ortho2(pl_shader sh, const struct pl_sample_src *src, const struct pl_sample_filter_params *params); struct pl_distort_params { // An arbitrary 2x2 affine transformation to apply to the input image. // For simplicity, the input image is explicitly centered and scaled such // that the longer dimension is in [-1,1], before applying this. pl_transform2x2 transform; // If true, the texture is placed inside the center of the canvas without // scaling. If false, it is effectively stretched to the canvas size. bool unscaled; // If true, the transformation is automatically scaled down and shifted to // ensure that the resulting image fits inside the output canvas. bool constrain; // If true, use bicubic interpolation rather than faster bilinear // interpolation. Higher quality but slower. bool bicubic; // Specifies the texture address mode to use when sampling out of bounds. enum pl_tex_address_mode address_mode; // If set, all out-of-bounds accesses will instead be treated as // transparent, according to the given alpha mode. (Which should match the // alpha mode of the texture) // // Note: `address_mode` has no effect when this is specified. enum pl_alpha_mode alpha_mode; }; #define PL_DISTORT_DEFAULTS \ .transform.mat.m = {{ 1, 0 }, {0, 1}}, #define pl_distort_params(...) (&(struct pl_distort_params) {PL_DISTORT_DEFAULTS __VA_ARGS__ }) PL_API extern const struct pl_distort_params pl_distort_default_params; // Distorts the input image using a given set of transformation parameters. // `out_w` and `out_h` determine the size of the effective canvas inside which // the distorted result may be rendered. Areas outside of this canvas will // be implicitly cut off. PL_API void pl_shader_distort(pl_shader sh, pl_tex tex, int out_w, int out_h, const struct pl_distort_params *params); PL_API_END #endif // LIBPLACEBO_SHADERS_SAMPLING_H_ libplacebo-v7.349.0/src/include/libplacebo/swapchain.h000066400000000000000000000176041463457750100226400ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_SWAPCHAIN_H_ #define LIBPLACEBO_SWAPCHAIN_H_ #include #include #include PL_API_BEGIN // This abstraction represents a low-level interface to visible surfaces // exposed by a graphics API (and accompanying GPU instance), allowing users to // directly present frames to the screen (or window, typically). This is a // sister API to gpu.h and follows the same convention w.r.t undefined behavior. // // Thread-safety: Safe typedef const struct pl_swapchain_t { pl_log log; pl_gpu gpu; } *pl_swapchain; // Destroys this swapchain. May be used at any time, and may block until the // completion of all outstanding rendering commands. The swapchain and any // resources retrieved from it must not be used afterwards. PL_API void pl_swapchain_destroy(pl_swapchain *sw); // Returns the approximate current swapchain latency in vsyncs, or 0 if // unknown. A latency of 1 means that `submit_frame` followed by `swap_buffers` // will block until the just-submitted frame has finished rendering. Typical // values are 2 or 3, which enable better pipelining by allowing the GPU to be // processing one or two frames at the same time as the user is preparing the // next for submission. PL_API int pl_swapchain_latency(pl_swapchain sw); // Update/query the swapchain size. This function performs both roles: it tries // setting the swapchain size to the values requested by the user, and returns // in the same variables what width/height the swapchain was actually set to - // which may be (substantially) different from the values requested by the // user. A value of 0 means "unknown/none" (in which case, libplacebo won't try // updating the size - it will simply return the current state of the // swapchain). It's also possible for libplacebo to return values of 0, such as // in the case that the swapchain doesn't exist yet. // // Returns false on significant errors (e.g. dead surface). This function can // effectively be used to probe if creating a swapchain works. PL_API bool pl_swapchain_resize(pl_swapchain sw, int *width, int *height); // Backwards compatibility #define pl_swapchain_colors pl_color_space // Inform the swapchain about the input color space. This API deliberately // provides no feedback, because the swapchain can internally decide what to do // with this information, including ignoring it entirely, or applying it // asynchronously. Users must still base their rendering on the value of // `pl_swapchain_frame.color_space`. // // Note: Calling this function a second time completely overrides any // previously specified hint. So calling this on {0} or NULL resets the // swapchain back to its initial/preferred colorspace. // // Note: If `csp->transfer` is a HDR transfer curve but HDR metadata is left // unspecified, the HDR metadata defaults to `pl_hdr_metadata_hdr10`. // Conversely, if the HDR metadata is non-empty but `csp->transfer` is left as // PL_COLOR_TRC_UNKNOWN, then it instead defaults to PL_COLOR_TRC_PQ. PL_API void pl_swapchain_colorspace_hint(pl_swapchain sw, const struct pl_color_space *csp); // The struct used to hold the results of `pl_swapchain_start_frame` struct pl_swapchain_frame { // A texture representing the framebuffer users should use for rendering. // It's guaranteed that `fbo->params.renderable` and `fbo->params.blit_dst` // will be true, but no other guarantees are made - not even that // `fbo->params.format` is a real format. pl_tex fbo; // If true, the user should assume that this framebuffer will be flipped // as a result of presenting it on-screen. If false, nothing special needs // to be done - but if true, users should flip the coordinate system of // the `pl_pass` that is rendering to this framebuffer. // // Note: Normally, libplacebo follows the convention that (0,0) represents // the top left of the image/screen. So when flipped is true, this means // (0,0) on this framebuffer gets displayed as the bottom left of the image. bool flipped; // Indicates the color representation this framebuffer will be interpreted // as by the host system / compositor / display, including the bit depth // and alpha handling (where available). struct pl_color_repr color_repr; struct pl_color_space color_space; }; // Retrieve a new frame from the swapchain. Returns whether successful. It's // worth noting that this function can fail sporadically for benign reasons, // for example the window being invisible or inaccessible. This function may // block until an image is available, which may be the case if the GPU is // rendering frames significantly faster than the display can output them. It // may also be non-blocking, so users shouldn't rely on this call alone in // order to meter rendering speed. (Specifics depend on the underlying graphics // API) PL_API bool pl_swapchain_start_frame(pl_swapchain sw, struct pl_swapchain_frame *out_frame); // Submits the previously started frame. Non-blocking. This must be issued in // lockstep with pl_swapchain_start_frame - there is no way to start multiple // frames and submit them out-of-order. The frames submitted this way will // generally be made visible in a first-in first-out fashion, although // specifics depend on the mechanism used to create the pl_swapchain. (See the // platform-specific APIs for more info). // // Returns whether successful. This should normally never fail, unless the // GPU/surface has been lost or some other critical error has occurred. The // "started" frame is consumed even in the event of failure. // // Note that `start_frame` and `submit_frame` form a lock pair, i.e. trying to // call e.g. `pl_swapchain_resize` from another thread will block until // `pl_swapchain_submit_frame` is finished. PL_API bool pl_swapchain_submit_frame(pl_swapchain sw); // Performs a "buffer swap", or some generalization of the concept. In layman's // terms, this blocks until the execution of the Nth previously submitted frame // has been "made complete" in some sense. (The N derives from the swapchain's // built-in latency. See `pl_swapchain_latency` for more information). // // Users should include this call in their rendering loops in order to make // sure they aren't submitting rendering commands faster than the GPU can // process them, which would potentially lead to a queue overrun or exhaust // memory. // // An example loop might look like this: // // while (rendering) { // struct pl_swapchain_frame frame; // bool ok = pl_swapchain_start_frame(swapchain, &frame); // if (!ok) { // /* wait some time, or decide to stop rendering */ // continue; // } // // /* do some rendering with frame.fbo */ // // ok = pl_swapchain_submit_frame(swapchain); // if (!ok) // break; // // pl_swapchain_swap_buffers(swapchain); // } // // The duration this function blocks for, if at all, may be very inconsistent // and should not be used as an authoritative source of vsync timing // information without sufficient smoothing/filtering (and if so, the time that // `start_frame` blocked for should also be included). PL_API void pl_swapchain_swap_buffers(pl_swapchain sw); PL_API_END #endif // LIBPLACEBO_SWAPCHAIN_H_ libplacebo-v7.349.0/src/include/libplacebo/tone_mapping.h000066400000000000000000000272021463457750100233360ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_TONE_MAPPING_H_ #define LIBPLACEBO_TONE_MAPPING_H_ #include #include #include #include PL_API_BEGIN struct pl_tone_map_params; struct pl_tone_map_function { const char *name; // Identifier const char *description; // Friendly / longer name // This controls the type of values input/output to/from `map` enum pl_hdr_scaling scaling; // The tone-mapping function itself. Iterates over all values in `lut`, and // adapts them as needed. // // Note that the `params` struct fed into this function is guaranteed to // satisfy `params->input_scaling == params->output_scaling == scaling`, // and also obeys `params->input_max >= params->output_max`. void (*map)(float *lut, const struct pl_tone_map_params *params); // Inverse tone mapping function. Optional. If absent, this tone mapping // curve only works in the forwards direction. // // For this function, `params->input_max <= params->output_max`. void (*map_inverse)(float *lut, const struct pl_tone_map_params *params); // Private data. Unused by libplacebo, but may be accessed by `map`. void *priv; // --- Deprecated fields PL_DEPRECATED_IN(v6.311) const char *param_desc; PL_DEPRECATED_IN(v6.311) float param_min; PL_DEPRECATED_IN(v6.311) float param_def; PL_DEPRECATED_IN(v6.311) float param_max; }; struct pl_tone_map_constants { // Configures the knee point, as a ratio between the source average and // target average (in PQ space). An adaptation of 1.0 always adapts the // source scene average brightness to the (scaled) target average, // while a value of 0.0 never modifies scene brightness. [0,1] // // Affects all methods that use the ST2094 knee point determination // (currently ST2094-40, ST2094-10 and spline) float knee_adaptation; // Configures the knee point minimum and maximum, respectively, as // a percentage of the PQ luminance range. Provides a hard limit on the // knee point chosen by `knee_adaptation`. float knee_minimum; // (0, 0.5) float knee_maximum; // (0.5, 1.0) // Default knee point to use in the absence of source scene average // metadata. Normally, this is ignored in favor of picking the knee // point as the (relative) source scene average brightness level. float knee_default; // [knee_minimum, knee_maximum] // Knee point offset (for BT.2390 only). Note that a value of 0.5 is // the spec-defined default behavior, which differs from the libplacebo // default of 1.0. [0.5, 2] float knee_offset; // For the single-pivot polynomial (spline) function, this controls the // coefficients used to tune the slope of the curve. This tuning is designed // to make the slope closer to 1.0 when the difference in peaks is low, // and closer to linear when the difference between peaks is high. float slope_tuning; // [0,10] float slope_offset; // [0,1] // Contrast setting for the spline function. Higher values make the curve // steeper (closer to `clip`), preserving midtones at the cost of losing // shadow/highlight details, while lower values make the curve shallowed // (closer to `linear`), preserving highlights at the cost of losing midtone // contrast. Values above 1.0 are possible, resulting in an output with more // contrast than the input. float spline_contrast; // [0,1.5] // For the reinhard function, this specifies the local contrast coefficient // at the display peak. Essentially, a value of 0.5 implies that the // reference white will be about half as bright as when clipping. (0,1) float reinhard_contrast; // For legacy functions (mobius, gamma) which operate on linear light, this // directly sets the corresponding knee point. (0,1) float linear_knee; // For linear methods (linear, linearlight), this controls the linear // exposure/gain applied to the image. (0,10] float exposure; }; #define PL_TONE_MAP_CONSTANTS \ .knee_adaptation = 0.4f, \ .knee_minimum = 0.1f, \ .knee_maximum = 0.8f, \ .knee_default = 0.4f, \ .knee_offset = 1.0f, \ .slope_tuning = 1.5f, \ .slope_offset = 0.2f, \ .spline_contrast = 0.5f, \ .reinhard_contrast = 0.5f, \ .linear_knee = 0.3f, \ .exposure = 1.0f, struct pl_tone_map_params { // If `function` is NULL, defaults to `pl_tone_map_clip`. const struct pl_tone_map_function *function; // Common constants, should be initialized to PL_TONE_MAP_CONSTANTS if // not intending to override them further. struct pl_tone_map_constants constants; // The desired input/output scaling of the tone map. If this differs from // `function->scaling`, any required conversion will be performed. // // Note that to maximize LUT efficiency, it's *highly* recommended to use // either PL_HDR_PQ or PL_HDR_SQRT as the input scaling, except when // using `pl_tone_map_sample`. enum pl_hdr_scaling input_scaling; enum pl_hdr_scaling output_scaling; // The size of the resulting LUT. (For `pl_tone_map_generate` only) size_t lut_size; // The characteristics of the input, in `input_scaling` units. float input_min; float input_max; float input_avg; // or 0 if unknown // The desired characteristics of the output, in `output_scaling` units. float output_min; float output_max; // The input HDR metadata. Only used by a select few tone-mapping // functions, currently only SMPTE ST2094. (Optional) struct pl_hdr_metadata hdr; // --- Deprecated fields PL_DEPRECATED_IN(v6.311) float param; // see `constants` }; #define pl_tone_map_params(...) (&(struct pl_tone_map_params) { __VA_ARGS__ }); // Note: Only does pointer equality testing on `function` PL_API bool pl_tone_map_params_equal(const struct pl_tone_map_params *a, const struct pl_tone_map_params *b); // Clamps/defaults the parameters, including input/output maximum. PL_API void pl_tone_map_params_infer(struct pl_tone_map_params *params); // Returns true if the given tone mapping configuration effectively represents // a no-op configuration. Tone mapping can be skipped in this case (although // strictly speaking, the LUT would still clip illegal input values) PL_API bool pl_tone_map_params_noop(const struct pl_tone_map_params *params); // Generate a tone-mapping LUT for a given configuration. This will always // span the entire input range, as given by `input_min` and `input_max`. PL_API void pl_tone_map_generate(float *out, const struct pl_tone_map_params *params); // Samples a tone mapping function at a single position. Note that this is less // efficient than `pl_tone_map_generate` for generating multiple values. // // Ignores `params->lut_size`. PL_API float pl_tone_map_sample(float x, const struct pl_tone_map_params *params); // Performs no tone-mapping, just clips out-of-range colors. Retains perfect // color accuracy for in-range colors but completely destroys out-of-range // information. Does not perform any black point adaptation. PL_API extern const struct pl_tone_map_function pl_tone_map_clip; // EETF from SMPTE ST 2094-40 Annex B, which uses the provided OOTF based on // Bezier curves to perform tone-mapping. The OOTF used is adjusted based on // the ratio between the targeted and actual display peak luminances. In the // absence of HDR10+ metadata, falls back to a simple constant bezier curve. PL_API extern const struct pl_tone_map_function pl_tone_map_st2094_40; // EETF from SMPTE ST 2094-10 Annex B.2, which takes into account the input // signal average luminance in addition to the maximum/minimum. // // Note: This does *not* currently include the subjective gain/offset/gamma // controls defined in Annex B.3. (Open an issue with a valid sample file if // you want such parameters to be respected.) PL_API extern const struct pl_tone_map_function pl_tone_map_st2094_10; // EETF from the ITU-R Report BT.2390, a hermite spline roll-off with linear // segment. PL_API extern const struct pl_tone_map_function pl_tone_map_bt2390; // EETF from ITU-R Report BT.2446, method A. Can be used for both forward // and inverse tone mapping. PL_API extern const struct pl_tone_map_function pl_tone_map_bt2446a; // Simple spline consisting of two polynomials, joined by a single pivot point, // which is tuned based on the source scene average brightness (taking into // account dynamic metadata if available). This function can be used // for both forward and inverse tone mapping. PL_API extern const struct pl_tone_map_function pl_tone_map_spline; // Very simple non-linear curve. Named after Erik Reinhard. PL_API extern const struct pl_tone_map_function pl_tone_map_reinhard; // Generalization of the reinhard tone mapping algorithm to support an // additional linear slope near black. The name is derived from its function // shape (ax+b)/(cx+d), which is known as a Möbius transformation. PL_API extern const struct pl_tone_map_function pl_tone_map_mobius; // Piece-wise, filmic tone-mapping algorithm developed by John Hable for use in // Uncharted 2, inspired by a similar tone-mapping algorithm used by Kodak. // Popularized by its use in video games with HDR rendering. Preserves both // dark and bright details very well, but comes with the drawback of changing // the average brightness quite significantly. This is sort of similar to // pl_tone_map_reinhard with `reinhard_contrast=0.24`. PL_API extern const struct pl_tone_map_function pl_tone_map_hable; // Fits a gamma (power) function to transfer between the source and target // color spaces, effectively resulting in a perceptual hard-knee joining two // roughly linear sections. This preserves details at all scales, but can result // in an image with a muted or dull appearance. PL_API extern const struct pl_tone_map_function pl_tone_map_gamma; // Linearly stretches the input range to the output range, in PQ space. This // will preserve all details accurately, but results in a significantly // different average brightness. Can be used for inverse tone-mapping in // addition to regular tone-mapping. PL_API extern const struct pl_tone_map_function pl_tone_map_linear; // Like `pl_tone_map_linear`, but in linear light (instead of PQ). Works well // for small range adjustments but may cause severe darkening when // downconverting from e.g. 10k nits to SDR. PL_API extern const struct pl_tone_map_function pl_tone_map_linear_light; // A list of built-in tone mapping functions, terminated by NULL PL_API extern const struct pl_tone_map_function * const pl_tone_map_functions[]; PL_API extern const int pl_num_tone_map_functions; // excluding trailing NULL // Find the tone mapping function with the given name, or NULL on failure. PL_API const struct pl_tone_map_function *pl_find_tone_map_function(const char *name); // Deprecated alias, do not use #define pl_tone_map_auto pl_tone_map_spline PL_API_END #endif // LIBPLACEBO_TONE_MAPPING_H_ libplacebo-v7.349.0/src/include/libplacebo/utils/000077500000000000000000000000001463457750100216425ustar00rootroot00000000000000libplacebo-v7.349.0/src/include/libplacebo/utils/dav1d.h000066400000000000000000000140761463457750100230220ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_DAV1D_H_ #define LIBPLACEBO_DAV1D_H_ #include #include #include #if defined(__cplusplus) && !defined(PL_DAV1D_IMPLEMENTATION) # define PL_DAV1D_API # define PL_DAV1D_IMPLEMENTATION 0 # warning Remember to include this file with a PL_DAV1D_IMPLEMENTATION set to 1 in \ C translation unit to provide implementation. Suppress this warning by \ defining PL_DAV1D_IMPLEMENTATION to 0 in C++ files. #elif !defined(PL_DAV1D_IMPLEMENTATION) # define PL_DAV1D_API static inline # define PL_DAV1D_IMPLEMENTATION 1 #else # define PL_DAV1D_API #endif PL_API_BEGIN // Fill in the details of a `pl_frame` from a Dav1dPicture. This function will // explicitly clear `out_frame`, setting all extra fields to 0. After this // function returns, the only missing data is information related to the plane // texture itself (`planes[N].texture`). // // Note: This will include all possible metadata, including HDR metadata and // AV1 film grain data. Users should explicitly clear this out if undesired. PL_DAV1D_API void pl_frame_from_dav1dpicture(struct pl_frame *out_frame, const Dav1dPicture *picture); // Helper function to generate a `pl_color_space` struct from a Dav1dPicture. // Useful to update the swapchain colorspace mode dynamically (e.g. for HDR). PL_DAV1D_API void pl_swapchain_colors_from_dav1dpicture(struct pl_color_space *out_colors, const Dav1dPicture *picture); struct pl_dav1d_upload_params { // The picture to upload. Not modified unless `asynchronous` is true. Dav1dPicture *picture; // If true, film grain present in `picture` will be exported to the // `pl_frame` as well. This should be set to false unless the user has // disabled `Dav1dSettings.apply_grain`. bool film_grain; // If true, libplacebo will probe for the allocation metadata set by // `pl_allocate_dav1dpicture`, and directly import the attached buffers // (saving a memcpy in some cases). Has no effect if the Dav1dPicture was // not allocated using `pl_allocate_dav1dpicture`. // // Note: When this is the case, `asynchronous` has no further effect - // uploads from attached buffers are already asynchronous. bool gpu_allocated; // If true, `picture` will be asynchronously uploaded and unref'd // internally by libplacebo, and the struct passed by the user cleared to // {0}. This is needed to avoid `memcpy` in some cases, so setting it to // true is highly recommended wherever possible. // // Note: If `pl_upload_dav1dpicture` returns false, `picture` does not get // unref'd. bool asynchronous; }; #define pl_dav1d_upload_params(...) (&(struct pl_dav1d_upload_params) { __VA_ARGS__ }) // Very high level helper function to take a `Dav1dPicture` and upload it to // the GPU. Similar in spirit to `pl_upload_plane`, and the same notes apply. // `tex` must be an array of 3 pointers of type `pl_tex`, each // either pointing to a valid texture, or NULL. Returns whether successful. PL_DAV1D_API bool pl_upload_dav1dpicture(pl_gpu gpu, struct pl_frame *out_frame, pl_tex tex[3], const struct pl_dav1d_upload_params *params); // Allocate a Dav1dPicture from persistently mapped buffers. This can be more // efficient than regular Dav1dPictures, especially when using the synchronous // `pl_upload_dav1dpicture`, or on platforms that don't support importing // PL_HANDLE_HOST_PTR as buffers. Returns 0 or a negative DAV1D_ERR value. // // Note: These may only be used directly as a Dav1dPicAllocator if the `gpu` // passed as the value of `cookie` is `pl_gpu.limits.thread_safe`. Otherwise, // the user must manually synchronize this to ensure it runs on the correct // thread. PL_DAV1D_API int pl_allocate_dav1dpicture(Dav1dPicture *picture, void *gpu); PL_DAV1D_API void pl_release_dav1dpicture(Dav1dPicture *picture, void *gpu); // Mapping functions for the various Dav1dColor* enums. Note that these are not // quite 1:1, and even for values that exist in both, the semantics sometimes // differ. Some special cases (e.g. ICtCp, or XYZ) are handled differently in // libplacebo and libdav1d, respectively. PL_DAV1D_API enum pl_color_system pl_system_from_dav1d(enum Dav1dMatrixCoefficients mc); PL_DAV1D_API enum Dav1dMatrixCoefficients pl_system_to_dav1d(enum pl_color_system sys); PL_DAV1D_API enum pl_color_levels pl_levels_from_dav1d(int color_range); PL_DAV1D_API int pl_levels_to_dav1d(enum pl_color_levels levels); PL_DAV1D_API enum pl_color_primaries pl_primaries_from_dav1d(enum Dav1dColorPrimaries prim); PL_DAV1D_API enum Dav1dColorPrimaries pl_primaries_to_dav1d(enum pl_color_primaries prim); PL_DAV1D_API enum pl_color_transfer pl_transfer_from_dav1d(enum Dav1dTransferCharacteristics trc); PL_DAV1D_API enum Dav1dTransferCharacteristics pl_transfer_to_dav1d(enum pl_color_transfer trc); PL_DAV1D_API enum pl_chroma_location pl_chroma_from_dav1d(enum Dav1dChromaSamplePosition loc); PL_DAV1D_API enum Dav1dChromaSamplePosition pl_chroma_to_dav1d(enum pl_chroma_location loc); // Actual implementation, included as part of this header to avoid having // a compile-time dependency on libdav1d. #if PL_DAV1D_IMPLEMENTATION # include #endif PL_API_END #endif // LIBPLACEBO_DAV1D_H_ libplacebo-v7.349.0/src/include/libplacebo/utils/dav1d_internal.h000066400000000000000000000563431463457750100247210ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_DAV1D_H_ #error This header should be included as part of #elif defined(__cplusplus) #error This header cannot be included from C++ define PL_DAV1D_IMPLEMENTATION appropriately #else #include #include #include PL_DAV1D_API enum pl_color_system pl_system_from_dav1d(enum Dav1dMatrixCoefficients mc) { switch (mc) { case DAV1D_MC_IDENTITY: return PL_COLOR_SYSTEM_RGB; // or XYZ (unlikely) case DAV1D_MC_BT709: return PL_COLOR_SYSTEM_BT_709; case DAV1D_MC_UNKNOWN: return PL_COLOR_SYSTEM_UNKNOWN; case DAV1D_MC_FCC: return PL_COLOR_SYSTEM_UNKNOWN; // missing case DAV1D_MC_BT470BG: return PL_COLOR_SYSTEM_BT_601; case DAV1D_MC_BT601: return PL_COLOR_SYSTEM_BT_601; case DAV1D_MC_SMPTE240: return PL_COLOR_SYSTEM_SMPTE_240M; case DAV1D_MC_SMPTE_YCGCO: return PL_COLOR_SYSTEM_YCGCO; case DAV1D_MC_BT2020_NCL: return PL_COLOR_SYSTEM_BT_2020_NC; case DAV1D_MC_BT2020_CL: return PL_COLOR_SYSTEM_BT_2020_C; case DAV1D_MC_SMPTE2085: return PL_COLOR_SYSTEM_UNKNOWN; // missing case DAV1D_MC_CHROMAT_NCL: return PL_COLOR_SYSTEM_UNKNOWN; // missing case DAV1D_MC_CHROMAT_CL: return PL_COLOR_SYSTEM_UNKNOWN; // missing // Note: this colorspace is confused between PQ and HLG, which dav1d // requires inferring from other sources, but libplacebo makes // explicit. Default to PQ as it's the more common scenario. case DAV1D_MC_ICTCP: return PL_COLOR_SYSTEM_BT_2100_PQ; case DAV1D_MC_RESERVED: abort(); } return PL_COLOR_SYSTEM_UNKNOWN; } PL_DAV1D_API enum Dav1dMatrixCoefficients pl_system_to_dav1d(enum pl_color_system sys) { switch (sys) { case PL_COLOR_SYSTEM_UNKNOWN: return DAV1D_MC_UNKNOWN; case PL_COLOR_SYSTEM_BT_601: return DAV1D_MC_BT601; case PL_COLOR_SYSTEM_BT_709: return DAV1D_MC_BT709; case PL_COLOR_SYSTEM_SMPTE_240M: return DAV1D_MC_SMPTE240; case PL_COLOR_SYSTEM_BT_2020_NC: return DAV1D_MC_BT2020_NCL; case PL_COLOR_SYSTEM_BT_2020_C: return DAV1D_MC_BT2020_CL; case PL_COLOR_SYSTEM_BT_2100_PQ: return DAV1D_MC_ICTCP; case PL_COLOR_SYSTEM_BT_2100_HLG: return DAV1D_MC_ICTCP; case PL_COLOR_SYSTEM_DOLBYVISION: return DAV1D_MC_UNKNOWN; // missing case PL_COLOR_SYSTEM_YCGCO: return DAV1D_MC_SMPTE_YCGCO; case PL_COLOR_SYSTEM_RGB: return DAV1D_MC_IDENTITY; case PL_COLOR_SYSTEM_XYZ: return DAV1D_MC_IDENTITY; case PL_COLOR_SYSTEM_COUNT: abort(); } return DAV1D_MC_UNKNOWN; } PL_DAV1D_API enum pl_color_levels pl_levels_from_dav1d(int color_range) { return color_range ? PL_COLOR_LEVELS_FULL : PL_COLOR_LEVELS_LIMITED; } PL_DAV1D_API int pl_levels_to_dav1d(enum pl_color_levels levels) { return levels == PL_COLOR_LEVELS_FULL; } PL_DAV1D_API enum pl_color_primaries pl_primaries_from_dav1d(enum Dav1dColorPrimaries prim) { switch (prim) { case DAV1D_COLOR_PRI_BT709: return PL_COLOR_PRIM_BT_709; case DAV1D_COLOR_PRI_UNKNOWN: return PL_COLOR_PRIM_UNKNOWN; case DAV1D_COLOR_PRI_RESERVED: return PL_COLOR_PRIM_UNKNOWN; case DAV1D_COLOR_PRI_BT470M: return PL_COLOR_PRIM_BT_470M; case DAV1D_COLOR_PRI_BT470BG: return PL_COLOR_PRIM_BT_601_625; case DAV1D_COLOR_PRI_BT601: return PL_COLOR_PRIM_BT_601_525; case DAV1D_COLOR_PRI_SMPTE240: return PL_COLOR_PRIM_BT_601_525; case DAV1D_COLOR_PRI_FILM: return PL_COLOR_PRIM_FILM_C; case DAV1D_COLOR_PRI_BT2020: return PL_COLOR_PRIM_BT_2020; case DAV1D_COLOR_PRI_XYZ: return PL_COLOR_PRIM_UNKNOWN; case DAV1D_COLOR_PRI_SMPTE431: return PL_COLOR_PRIM_DCI_P3; case DAV1D_COLOR_PRI_SMPTE432: return PL_COLOR_PRIM_DISPLAY_P3; case DAV1D_COLOR_PRI_EBU3213: return PL_COLOR_PRIM_EBU_3213; } return PL_COLOR_PRIM_UNKNOWN; } PL_DAV1D_API enum Dav1dColorPrimaries pl_primaries_to_dav1d(enum pl_color_primaries prim) { switch (prim) { case PL_COLOR_PRIM_UNKNOWN: return DAV1D_COLOR_PRI_UNKNOWN; case PL_COLOR_PRIM_BT_601_525: return DAV1D_COLOR_PRI_BT601; case PL_COLOR_PRIM_BT_601_625: return DAV1D_COLOR_PRI_BT470BG; case PL_COLOR_PRIM_BT_709: return DAV1D_COLOR_PRI_BT709; case PL_COLOR_PRIM_BT_470M: return DAV1D_COLOR_PRI_BT470M; case PL_COLOR_PRIM_EBU_3213: return DAV1D_COLOR_PRI_EBU3213; case PL_COLOR_PRIM_BT_2020: return DAV1D_COLOR_PRI_BT2020; case PL_COLOR_PRIM_APPLE: return DAV1D_COLOR_PRI_UNKNOWN; // missing case PL_COLOR_PRIM_ADOBE: return DAV1D_COLOR_PRI_UNKNOWN; // missing case PL_COLOR_PRIM_PRO_PHOTO: return DAV1D_COLOR_PRI_UNKNOWN; // missing case PL_COLOR_PRIM_CIE_1931: return DAV1D_COLOR_PRI_UNKNOWN; // missing case PL_COLOR_PRIM_DCI_P3: return DAV1D_COLOR_PRI_SMPTE431; case PL_COLOR_PRIM_DISPLAY_P3: return DAV1D_COLOR_PRI_SMPTE432; case PL_COLOR_PRIM_V_GAMUT: return DAV1D_COLOR_PRI_UNKNOWN; // missing case PL_COLOR_PRIM_S_GAMUT: return DAV1D_COLOR_PRI_UNKNOWN; // missing case PL_COLOR_PRIM_FILM_C: return DAV1D_COLOR_PRI_FILM; case PL_COLOR_PRIM_ACES_AP0: return DAV1D_COLOR_PRI_UNKNOWN; // missing case PL_COLOR_PRIM_ACES_AP1: return DAV1D_COLOR_PRI_UNKNOWN; // missing case PL_COLOR_PRIM_COUNT: abort(); } return DAV1D_COLOR_PRI_UNKNOWN; } PL_DAV1D_API enum pl_color_transfer pl_transfer_from_dav1d(enum Dav1dTransferCharacteristics trc) { switch (trc) { case DAV1D_TRC_BT709: return PL_COLOR_TRC_BT_1886; // EOTF != OETF case DAV1D_TRC_UNKNOWN: return PL_COLOR_TRC_UNKNOWN; case DAV1D_TRC_BT470M: return PL_COLOR_TRC_GAMMA22; case DAV1D_TRC_BT470BG: return PL_COLOR_TRC_GAMMA28; case DAV1D_TRC_BT601: return PL_COLOR_TRC_BT_1886; // EOTF != OETF case DAV1D_TRC_SMPTE240: return PL_COLOR_TRC_BT_1886; // EOTF != OETF case DAV1D_TRC_LINEAR: return PL_COLOR_TRC_LINEAR; case DAV1D_TRC_LOG100: return PL_COLOR_TRC_UNKNOWN; // missing case DAV1D_TRC_LOG100_SQRT10: return PL_COLOR_TRC_UNKNOWN; // missing case DAV1D_TRC_IEC61966: return PL_COLOR_TRC_BT_1886; // EOTF != OETF case DAV1D_TRC_BT1361: return PL_COLOR_TRC_BT_1886; // ETOF != OETF case DAV1D_TRC_SRGB: return PL_COLOR_TRC_SRGB; case DAV1D_TRC_BT2020_10BIT: return PL_COLOR_TRC_BT_1886; // EOTF != OETF case DAV1D_TRC_BT2020_12BIT: return PL_COLOR_TRC_BT_1886; // EOTF != OETF case DAV1D_TRC_SMPTE2084: return PL_COLOR_TRC_PQ; case DAV1D_TRC_SMPTE428: return PL_COLOR_TRC_ST428; case DAV1D_TRC_HLG: return PL_COLOR_TRC_HLG; case DAV1D_TRC_RESERVED: abort(); } return PL_COLOR_TRC_UNKNOWN; } PL_DAV1D_API enum Dav1dTransferCharacteristics pl_transfer_to_dav1d(enum pl_color_transfer trc) { switch (trc) { case PL_COLOR_TRC_UNKNOWN: return DAV1D_TRC_UNKNOWN; case PL_COLOR_TRC_BT_1886: return DAV1D_TRC_BT709; // EOTF != OETF case PL_COLOR_TRC_SRGB: return DAV1D_TRC_SRGB; case PL_COLOR_TRC_LINEAR: return DAV1D_TRC_LINEAR; case PL_COLOR_TRC_GAMMA18: return DAV1D_TRC_UNKNOWN; // missing case PL_COLOR_TRC_GAMMA20: return DAV1D_TRC_UNKNOWN; // missing case PL_COLOR_TRC_GAMMA22: return DAV1D_TRC_BT470M; case PL_COLOR_TRC_GAMMA24: return DAV1D_TRC_UNKNOWN; // missing case PL_COLOR_TRC_GAMMA26: return DAV1D_TRC_UNKNOWN; // missing case PL_COLOR_TRC_GAMMA28: return DAV1D_TRC_BT470BG; case PL_COLOR_TRC_ST428: return DAV1D_TRC_SMPTE428; case PL_COLOR_TRC_PRO_PHOTO: return DAV1D_TRC_UNKNOWN; // missing case PL_COLOR_TRC_PQ: return DAV1D_TRC_SMPTE2084; case PL_COLOR_TRC_HLG: return DAV1D_TRC_HLG; case PL_COLOR_TRC_V_LOG: return DAV1D_TRC_UNKNOWN; // missing case PL_COLOR_TRC_S_LOG1: return DAV1D_TRC_UNKNOWN; // missing case PL_COLOR_TRC_S_LOG2: return DAV1D_TRC_UNKNOWN; // missing case PL_COLOR_TRC_COUNT: abort(); } return DAV1D_TRC_UNKNOWN; } PL_DAV1D_API enum pl_chroma_location pl_chroma_from_dav1d(enum Dav1dChromaSamplePosition loc) { switch (loc) { case DAV1D_CHR_UNKNOWN: return PL_CHROMA_UNKNOWN; case DAV1D_CHR_VERTICAL: return PL_CHROMA_LEFT; case DAV1D_CHR_COLOCATED: return PL_CHROMA_TOP_LEFT; } return PL_CHROMA_UNKNOWN; } PL_DAV1D_API enum Dav1dChromaSamplePosition pl_chroma_to_dav1d(enum pl_chroma_location loc) { switch (loc) { case PL_CHROMA_UNKNOWN: return DAV1D_CHR_UNKNOWN; case PL_CHROMA_LEFT: return DAV1D_CHR_VERTICAL; case PL_CHROMA_CENTER: return DAV1D_CHR_UNKNOWN; // missing case PL_CHROMA_TOP_LEFT: return DAV1D_CHR_COLOCATED; case PL_CHROMA_TOP_CENTER: return DAV1D_CHR_UNKNOWN; // missing case PL_CHROMA_BOTTOM_LEFT: return DAV1D_CHR_UNKNOWN; // missing case PL_CHROMA_BOTTOM_CENTER: return DAV1D_CHR_UNKNOWN; // missing case PL_CHROMA_COUNT: abort(); } return DAV1D_CHR_UNKNOWN; } static inline float pl_fixed24_8(uint32_t n) { return (float) n / (1 << 8); } static inline float pl_fixed18_14(uint32_t n) { return (float) n / (1 << 14); } static inline float pl_fixed0_16(uint16_t n) { return (float) n / (1 << 16); } // Align to a power of 2 #define PL_ALIGN2(x, align) (((x) + (align) - 1) & ~((align) - 1)) PL_DAV1D_API void pl_frame_from_dav1dpicture(struct pl_frame *out, const Dav1dPicture *picture) { const Dav1dSequenceHeader *seq_hdr = picture->seq_hdr; int num_planes; switch (picture->p.layout) { case DAV1D_PIXEL_LAYOUT_I400: num_planes = 1; break; case DAV1D_PIXEL_LAYOUT_I420: case DAV1D_PIXEL_LAYOUT_I422: case DAV1D_PIXEL_LAYOUT_I444: num_planes = 3; break; default: abort(); } *out = (struct pl_frame) { .num_planes = num_planes, .planes = { // Components are always in order, which makes things easy { .components = 1, .component_mapping = {0}, }, { .components = 1, .component_mapping = {1}, }, { .components = 1, .component_mapping = {2}, }, }, .crop = { 0, 0, picture->p.w, picture->p.h, }, .color = { .primaries = pl_primaries_from_dav1d(seq_hdr->pri), .transfer = pl_transfer_from_dav1d(seq_hdr->trc), }, .repr = { .sys = pl_system_from_dav1d(seq_hdr->mtrx), .levels = pl_levels_from_dav1d(seq_hdr->color_range), .bits = { .sample_depth = PL_ALIGN2(picture->p.bpc, 8), .color_depth = picture->p.bpc, }, }, }; if (seq_hdr->mtrx == DAV1D_MC_ICTCP && seq_hdr->trc == DAV1D_TRC_HLG) { // dav1d makes no distinction between PQ and HLG ICtCp, so we need // to manually fix it in the case that we have HLG ICtCp data. out->repr.sys = PL_COLOR_SYSTEM_BT_2100_HLG; } else if (seq_hdr->mtrx == DAV1D_MC_IDENTITY && seq_hdr->pri == DAV1D_COLOR_PRI_XYZ) { // dav1d handles this as a special case, but doesn't provide an // explicit flag for it either, so we have to resort to this ugly hack, // even though CIE 1931 RGB *is* a valid thing in principle! out->repr.sys= PL_COLOR_SYSTEM_XYZ; } else if (!out->repr.sys) { // PL_COLOR_SYSTEM_UNKNOWN maps to RGB, so hard-code this one out->repr.sys = pl_color_system_guess_ycbcr(picture->p.w, picture->p.h); } const Dav1dContentLightLevel *cll = picture->content_light; if (cll) { out->color.hdr.max_cll = cll->max_content_light_level; out->color.hdr.max_fall = cll->max_frame_average_light_level; } // This overrides the CLL values above, if both are present const Dav1dMasteringDisplay *md = picture->mastering_display; if (md) { out->color.hdr.max_luma = pl_fixed24_8(md->max_luminance); out->color.hdr.min_luma = pl_fixed18_14(md->min_luminance); out->color.hdr.prim = (struct pl_raw_primaries) { .red.x = pl_fixed0_16(md->primaries[0][0]), .red.y = pl_fixed0_16(md->primaries[0][1]), .green.x = pl_fixed0_16(md->primaries[1][0]), .green.y = pl_fixed0_16(md->primaries[1][1]), .blue.x = pl_fixed0_16(md->primaries[2][0]), .blue.y = pl_fixed0_16(md->primaries[2][1]), .white.x = pl_fixed0_16(md->white_point[0]), .white.y = pl_fixed0_16(md->white_point[1]), }; } if (picture->frame_hdr->film_grain.present) { const Dav1dFilmGrainData *fg = &picture->frame_hdr->film_grain.data; out->film_grain = (struct pl_film_grain_data) { .type = PL_FILM_GRAIN_AV1, .seed = fg->seed, .params.av1 = { .num_points_y = fg->num_y_points, .chroma_scaling_from_luma = fg->chroma_scaling_from_luma, .num_points_uv = { fg->num_uv_points[0], fg->num_uv_points[1] }, .scaling_shift = fg->scaling_shift, .ar_coeff_lag = fg->ar_coeff_lag, .ar_coeff_shift = (int) fg->ar_coeff_shift, .grain_scale_shift = fg->grain_scale_shift, .uv_mult = { fg->uv_mult[0], fg->uv_mult[1] }, .uv_mult_luma = { fg->uv_luma_mult[0], fg->uv_luma_mult[1] }, .uv_offset = { fg->uv_offset[0], fg->uv_offset[1] }, .overlap = fg->overlap_flag, }, }; struct pl_av1_grain_data *av1 = &out->film_grain.params.av1; memcpy(av1->points_y, fg->y_points, sizeof(av1->points_y)); memcpy(av1->points_uv, fg->uv_points, sizeof(av1->points_uv)); memcpy(av1->ar_coeffs_y, fg->ar_coeffs_y, sizeof(av1->ar_coeffs_y)); memcpy(av1->ar_coeffs_uv[0], fg->ar_coeffs_uv[0], sizeof(av1->ar_coeffs_uv[0])); memcpy(av1->ar_coeffs_uv[1], fg->ar_coeffs_uv[1], sizeof(av1->ar_coeffs_uv[1])); } switch (picture->p.layout) { case DAV1D_PIXEL_LAYOUT_I400: case DAV1D_PIXEL_LAYOUT_I444: break; case DAV1D_PIXEL_LAYOUT_I420: case DAV1D_PIXEL_LAYOUT_I422: // Only set the chroma location for definitely subsampled images pl_frame_set_chroma_location(out, pl_chroma_from_dav1d(seq_hdr->chr)); break; } } PL_DAV1D_API void pl_swapchain_colors_from_dav1dpicture(struct pl_swapchain_colors *out_colors, const Dav1dPicture *picture) { struct pl_frame frame; pl_frame_from_dav1dpicture(&frame, picture); *out_colors = (struct pl_swapchain_colors) { .primaries = frame.color.primaries, .transfer = frame.color.transfer, }; const Dav1dContentLightLevel *cll = picture->content_light; if (cll) { out_colors->hdr.max_cll = cll->max_content_light_level; out_colors->hdr.max_fall = cll->max_frame_average_light_level; } const Dav1dMasteringDisplay *md = picture->mastering_display; if (md) { out_colors->hdr.min_luma = pl_fixed18_14(md->min_luminance); out_colors->hdr.max_luma = pl_fixed24_8(md->max_luminance); out_colors->hdr.prim.red.x = pl_fixed0_16(md->primaries[0][0]); out_colors->hdr.prim.red.y = pl_fixed0_16(md->primaries[0][1]); out_colors->hdr.prim.green.x = pl_fixed0_16(md->primaries[1][0]); out_colors->hdr.prim.green.y = pl_fixed0_16(md->primaries[1][1]); out_colors->hdr.prim.blue.x = pl_fixed0_16(md->primaries[2][0]); out_colors->hdr.prim.blue.y = pl_fixed0_16(md->primaries[2][1]); out_colors->hdr.prim.white.x = pl_fixed0_16(md->white_point[0]); out_colors->hdr.prim.white.y = pl_fixed0_16(md->white_point[1]); } } #define PL_MAGIC0 0x2c2a1269 #define PL_MAGIC1 0xc6d02577 struct pl_dav1dalloc { uint32_t magic[2]; pl_gpu gpu; pl_buf buf; }; struct pl_dav1dref { Dav1dPicture pic; uint8_t count; }; static void pl_dav1dpicture_unref(void *priv) { struct pl_dav1dref *ref = priv; if (--ref->count == 0) { dav1d_picture_unref(&ref->pic); free(ref); } } PL_DAV1D_API bool pl_upload_dav1dpicture(pl_gpu gpu, struct pl_frame *out, pl_tex tex[3], const struct pl_dav1d_upload_params *params) { Dav1dPicture *pic = params->picture; pl_frame_from_dav1dpicture(out, pic); if (!params->film_grain) out->film_grain.type = PL_FILM_GRAIN_NONE; const int bytes = (pic->p.bpc + 7) / 8; // rounded up int sub_x = 0, sub_y = 0; switch (pic->p.layout) { case DAV1D_PIXEL_LAYOUT_I400: case DAV1D_PIXEL_LAYOUT_I444: break; case DAV1D_PIXEL_LAYOUT_I420: sub_x = sub_y = 1; break; case DAV1D_PIXEL_LAYOUT_I422: sub_x = 1; break; } struct pl_plane_data data[3] = { { // Y plane .type = PL_FMT_UNORM, .width = pic->p.w, .height = pic->p.h, .pixel_stride = bytes, .component_size = {bytes * 8}, .component_map = {0}, }, { // U plane .type = PL_FMT_UNORM, .width = pic->p.w >> sub_x, .height = pic->p.h >> sub_y, .pixel_stride = bytes, .component_size = {bytes * 8}, .component_map = {1}, }, { // V plane .type = PL_FMT_UNORM, .width = pic->p.w >> sub_x, .height = pic->p.h >> sub_y, .pixel_stride = bytes, .component_size = {bytes * 8}, .component_map = {2}, }, }; pl_buf buf = NULL; struct pl_dav1dalloc *alloc = params->gpu_allocated ? pic->allocator_data : NULL; struct pl_dav1dref *ref = NULL; if (alloc && alloc->magic[0] == PL_MAGIC0 && alloc->magic[1] == PL_MAGIC1) { // Re-use pre-allocated buffers directly assert(alloc->gpu == gpu); buf = alloc->buf; } else if (params->asynchronous && gpu->limits.callbacks) { ref = malloc(sizeof(*ref)); if (!ref) return false; memcpy(&ref->pic, pic, sizeof(Dav1dPicture)); ref->count = out->num_planes; } for (int p = 0; p < out->num_planes; p++) { ptrdiff_t stride = p > 0 ? pic->stride[1] : pic->stride[0]; if (stride < 0) { data[p].pixels = (uint8_t *) pic->data[p] + stride * (data[p].height - 1); data[p].row_stride = -stride; out->planes[p].flipped = true; } else { data[p].pixels = pic->data[p]; data[p].row_stride = stride; } if (buf) { data[p].buf = buf; data[p].buf_offset = (uintptr_t) data[p].pixels - (uintptr_t) buf->data; data[p].pixels = NULL; } else if (ref) { data[p].priv = ref; data[p].callback = pl_dav1dpicture_unref; } if (!pl_upload_plane(gpu, &out->planes[p], &tex[p], &data[p])) { free(ref); return false; } } if (params->asynchronous) { if (ref) { *pic = (Dav1dPicture) {0}; } else { dav1d_picture_unref(pic); } } return true; } PL_DAV1D_API int pl_allocate_dav1dpicture(Dav1dPicture *p, void *cookie) { pl_gpu gpu = cookie; if (!gpu->limits.max_mapped_size || !gpu->limits.host_cached || !gpu->limits.buf_transfer) { return DAV1D_ERR(ENOTSUP); } // Copied from dav1d_default_picture_alloc const int hbd = p->p.bpc > 8; const int aligned_w = PL_ALIGN2(p->p.w, 128); const int aligned_h = PL_ALIGN2(p->p.h, 128); const int has_chroma = p->p.layout != DAV1D_PIXEL_LAYOUT_I400; const int ss_ver = p->p.layout == DAV1D_PIXEL_LAYOUT_I420; const int ss_hor = p->p.layout != DAV1D_PIXEL_LAYOUT_I444; p->stride[0] = aligned_w << hbd; p->stride[1] = has_chroma ? (aligned_w >> ss_hor) << hbd : 0; // Align strides up to multiples of the GPU performance hints p->stride[0] = PL_ALIGN2(p->stride[0], gpu->limits.align_tex_xfer_pitch); p->stride[1] = PL_ALIGN2(p->stride[1], gpu->limits.align_tex_xfer_pitch); // Aligning offsets to 4 also implicitly aligns to the texel alignment (1 or 2) size_t off_align = PL_ALIGN2(gpu->limits.align_tex_xfer_offset, 4); const size_t y_sz = PL_ALIGN2(p->stride[0] * aligned_h, off_align); const size_t uv_sz = PL_ALIGN2(p->stride[1] * (aligned_h >> ss_ver), off_align); // The extra DAV1D_PICTURE_ALIGNMENTs are to brute force plane alignment, // even in the case that the driver gives us insane alignments const size_t pic_size = y_sz + 2 * uv_sz; const size_t total_size = pic_size + DAV1D_PICTURE_ALIGNMENT * 4; // Validate size limitations if (total_size > gpu->limits.max_mapped_size) return DAV1D_ERR(ENOMEM); pl_buf buf = pl_buf_create(gpu, pl_buf_params( .size = total_size, .host_mapped = true, .memory_type = PL_BUF_MEM_HOST, )); if (!buf) return DAV1D_ERR(ENOMEM); struct pl_dav1dalloc *alloc = malloc(sizeof(struct pl_dav1dalloc)); if (!alloc) { pl_buf_destroy(gpu, &buf); return DAV1D_ERR(ENOMEM); } *alloc = (struct pl_dav1dalloc) { .magic = { PL_MAGIC0, PL_MAGIC1 }, .gpu = gpu, .buf = buf, }; assert(buf->data); uintptr_t base = (uintptr_t) buf->data, data[3]; data[0] = PL_ALIGN2(base, DAV1D_PICTURE_ALIGNMENT); data[1] = PL_ALIGN2(data[0] + y_sz, DAV1D_PICTURE_ALIGNMENT); data[2] = PL_ALIGN2(data[1] + uv_sz, DAV1D_PICTURE_ALIGNMENT); p->allocator_data = alloc; p->data[0] = (void *) data[0]; p->data[1] = (void *) data[1]; p->data[2] = (void *) data[2]; return 0; } PL_DAV1D_API void pl_release_dav1dpicture(Dav1dPicture *p, void *cookie) { struct pl_dav1dalloc *alloc = p->allocator_data; if (!alloc) return; assert(alloc->magic[0] == PL_MAGIC0); assert(alloc->magic[1] == PL_MAGIC1); assert(alloc->gpu == cookie); pl_buf_destroy(alloc->gpu, &alloc->buf); free(alloc); p->data[0] = p->data[1] = p->data[2] = p->allocator_data = NULL; } #undef PL_ALIGN2 #undef PL_MAGIC0 #undef PL_MAGIC1 #endif // LIBPLACEBO_DAV1D_H_ libplacebo-v7.349.0/src/include/libplacebo/utils/dolbyvision.h000066400000000000000000000022721463457750100243570ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_DOLBYVISION_H_ #define LIBPLACEBO_DOLBYVISION_H_ #include PL_API_BEGIN // Parses the Dolby Vision RPU, and sets the `pl_hdr_metadata` dynamic // brightness metadata fields accordingly. // // Note: requires `PL_HAVE_LIBDOVI` to be defined, no-op otherwise. PL_API void pl_hdr_metadata_from_dovi_rpu(struct pl_hdr_metadata *out, const uint8_t *buf, size_t size); PL_API_END #endif // LIBPLACEBO_DOLBYVISION_H_ libplacebo-v7.349.0/src/include/libplacebo/utils/frame_queue.h000066400000000000000000000265621463457750100243240ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_FRAME_QUEUE_H #define LIBPLACEBO_FRAME_QUEUE_H #include #include PL_API_BEGIN // An abstraction layer for automatically turning a conceptual stream of // (frame, pts) pairs, as emitted by a decoder or filter graph, into a // `pl_frame_mix` suitable for `pl_render_image_mix`. // // This API ensures that minimal work is performed (e.g. only mapping frames // that are actually required), while also satisfying the requirements // of any configured frame mixer. // // Thread-safety: Safe typedef struct pl_queue_t *pl_queue; enum pl_queue_status { PL_QUEUE_OK, // success PL_QUEUE_EOF, // no more frames are available PL_QUEUE_MORE, // more frames needed, but not (yet) available PL_QUEUE_ERR = -1, // some unknown error occurred while retrieving frames }; struct pl_source_frame { // The frame's presentation timestamp, in seconds relative to the first // frame. These must be monotonically increasing for subsequent frames. // To implement a discontinuous jump, users must explicitly reset the // frame queue with `pl_queue_reset` and restart from PTS 0.0. double pts; // The frame's duration. This is not needed in normal scenarios, as the // FPS can be inferred from the `pts` values themselves. Providing it // only helps initialize the value for initial frames, which can smooth // out the interpolation weights. Its use is also highly recommended // when displaying interlaced frames. (Optional) float duration; // If set to something other than PL_FIELD_NONE, this source frame is // marked as interlaced. It will be split up into two separate frames // internally, and exported to the resulting `pl_frame_mix` as a pair of // fields, referencing the corresponding previous and next frames. The // first field will have the same PTS as `pts`, and the second field will // be inserted at the timestamp `pts + duration/2`. // // Note: As a result of FPS estimates being unreliable around streams with // mixed FPS (or when mixing interlaced and progressive frames), it's // highly recommended to always specify a valid `duration` for interlaced // frames. enum pl_field first_field; // Abstract frame data itself. To allow mapping frames only when they're // actually needed, frames use a lazy representation. The provided // callbacks will be invoked to interface with it. void *frame_data; // This will be called to map the frame to the GPU, only if needed. // // `tex` is a pointer to an array of 4 texture objects (or NULL), which // *may* serve as backing storage for the texture being mapped. These are // intended to be recreated by `map`, e.g. using `pl_tex_recreate` or // `pl_upload_plane` as appropriate. They will be managed internally by // `pl_queue` and destroyed at some unspecified future point in time. // // Note: If `map` fails, it will not be retried, nor will `discard` be run. // The user should clean up state in this case. bool (*map)(pl_gpu gpu, pl_tex *tex, const struct pl_source_frame *src, struct pl_frame *out_frame); // If present, this will be called on frames that are done being used by // `pl_queue`. This may be useful to e.g. unmap textures backed by external // APIs such as hardware decoders. (Optional) void (*unmap)(pl_gpu gpu, struct pl_frame *frame, const struct pl_source_frame *src); // This function will be called for frames that are deemed unnecessary // (e.g. never became visible) and should instead be cleanly freed. // (Optional) void (*discard)(const struct pl_source_frame *src); }; // Create a new, empty frame queue. // // It's highly recommended to fully render a single frame with `pts == 0.0`, // and flush the GPU pipeline with `pl_gpu_finish`, prior to starting the timed // playback loop. PL_API pl_queue pl_queue_create(pl_gpu gpu); PL_API void pl_queue_destroy(pl_queue *queue); // Explicitly clear the queue. This is essentially equivalent to destroying // and recreating the queue, but preserves any internal memory allocations. // // Note: Calling `pl_queue_reset` may block, if another thread is currently // blocked on a different `pl_queue_*` call. PL_API void pl_queue_reset(pl_queue queue); // Explicitly push a frame. This is an alternative way to feed the frame queue // with incoming frames, the other method being the asynchronous callback // specified as `pl_queue_params.get_frame`. Both methods may be used // simultaneously, although providing `get_frame` is recommended since it // avoids the risk of the queue underrunning. // // When no more frames are available, call this function with `frame == NULL` // to indicate EOF and begin draining the frame queue. PL_API void pl_queue_push(pl_queue queue, const struct pl_source_frame *frame); // Variant of `pl_queue_push` that blocks while the queue is judged // (internally) to be "too full". This is useful for asynchronous decoder loops // in order to prevent the queue from exhausting available RAM if frames are // decoded significantly faster than they're displayed. // // The given `timeout` parameter specifies how long to wait before giving up, // in nanoseconds. Returns false if this timeout was reached. PL_API bool pl_queue_push_block(pl_queue queue, uint64_t timeout, const struct pl_source_frame *frame); struct pl_queue_params { // The PTS of the frame that will be rendered. This should be set to the // timestamp (in seconds) of the next vsync, relative to the initial frame. // // These must be monotonically increasing. To implement a discontinuous // jump, users must explicitly reset the frame queue with `pl_queue_reset` // and restart from PTS 0.0. double pts; // The radius of the configured mixer. This should be set to the value // as returned by `pl_frame_mix_radius`. float radius; // The estimated duration of a vsync, in seconds. This will only be used as // a hint, the true value will be estimated by comparing `pts` timestamps // between calls to `pl_queue_update`. (Optional) float vsync_duration; // If the difference between `pts` and the closest frame is smaller than // this delta (in seconds), the mismatch will be assumed as drift/jitter // and dynamically subtracted from all future pl_queue_update calls, until // the queue is either reset or the PTS jumps by a large amount. (Optional) float drift_compensation; // If the difference between the (estimated) vsync duration and the // (measured) frame duration is smaller than this threshold, silently // disable interpolation and switch to ZOH semantics instead. // // For example, a value of 0.01 allows the FPS to differ by up to 1% // without being interpolated. Note that this will result in a continuous // phase drift unless also compensated for by the user, which will // eventually resulted in a dropped or duplicated frame. (Though this can // be preferable to seeing that same phase drift result in a temporally // smeared image) float interpolation_threshold; // Specifies how long `pl_queue_update` will wait for frames to become // available, in nanoseconds, before giving up and returning with // QUEUE_MORE. // // If `get_frame` is provided, this value is ignored by `pl_queue` and // should instead be interpreted by the provided callback. uint64_t timeout; // This callback will be used to pull new frames from the decoder. It may // block if needed. The user is responsible for setting appropriate time // limits and/or returning and interpreting QUEUE_MORE as sensible. // // Providing this callback is entirely optional. Users can instead choose // to manually feed the frame queue with new frames using `pl_queue_push`. enum pl_queue_status (*get_frame)(struct pl_source_frame *out_frame, const struct pl_queue_params *params); void *priv; }; #define PL_QUEUE_DEFAULTS \ .drift_compensation = 1e-3, #define pl_queue_params(...) (&(struct pl_queue_params) { PL_QUEUE_DEFAULTS __VA_ARGS__ }) // Advance the frame queue's internal state to the target timestamp. Any frames // which are no longer needed (i.e. too far in the past) are automatically // unmapped and evicted. Any future frames which are needed to fill the queue // must either have been pushed in advance, or will be requested using the // provided `get_frame` callback. If you call this on `out_mix == NULL`, the // queue state will advance, but no frames will be mapped. // // This function may return with PL_QUEUE_MORE, in which case the user may wish // to ensure more frames are available and then re-run this function with the // same parameters. In this case, `out_mix` is still written to, but it may be // incomplete (or even contain no frames at all). Additionally, when the source // contains interlaced frames (see `pl_source_frame.first_field`), this // function may return with PL_QUEUE_MORE if a frame is missing references to // a future frame. // // The resulting mix of frames in `out_mix` will represent the neighbourhood of // the target timestamp, and can be passed to `pl_render_image_mix` as-is. // // Note: `out_mix` will only remain valid until the next call to // `pl_queue_update` or `pl_queue_reset`. PL_API enum pl_queue_status pl_queue_update(pl_queue queue, struct pl_frame_mix *out_mix, const struct pl_queue_params *params); // Returns a pl_queue's internal estimates for FPS and VPS (vsyncs per second). // Returns 0.0 if no estimate is available. PL_API float pl_queue_estimate_fps(pl_queue queue); PL_API float pl_queue_estimate_vps(pl_queue queue); // Returns the number of frames currently contained in a pl_queue. PL_API int pl_queue_num_frames(pl_queue queue); // Returns the current PTS offset factor, as determined by the PTS drift // compensation algorithm. This value is added onto all incoming values of // pl_queue_params.pts. PL_API double pl_queue_pts_offset(pl_queue queue); // Inspect the contents of the Nth queued frame. Returns false if `idx` is // out of range. // // Warning: No guarantee is made to ensure validity of `out->frame_data` // after this call. In particular, pl_queue_* calls made from another thread // may call `discard()` on the frame in question. The user bears responsibility // to avoid accessing `out->frame_data` in a multi-threaded scenario unless // an external guarantee can be made that the frame won't be dequeued until // it is done being used by the user. PL_API bool pl_queue_peek(pl_queue queue, int idx, struct pl_source_frame *out); PL_API_END #endif // LIBPLACEBO_FRAME_QUEUE_H libplacebo-v7.349.0/src/include/libplacebo/utils/libav.h000066400000000000000000000335521463457750100231200ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_LIBAV_H_ #define LIBPLACEBO_LIBAV_H_ #include #include #include #include #if defined(__cplusplus) && !defined(PL_LIBAV_IMPLEMENTATION) # define PL_LIBAV_API # define PL_LIBAV_IMPLEMENTATION 0 # warning Remember to include this file with a PL_LIBAV_IMPLEMENTATION set to 1 in \ C translation unit to provide implementation. Suppress this warning by \ defining PL_LIBAV_IMPLEMENTATION to 0 in C++ files. #elif !defined(PL_LIBAV_IMPLEMENTATION) # define PL_LIBAV_API static inline # define PL_LIBAV_IMPLEMENTATION 1 #else # define PL_LIBAV_API #endif PL_API_BEGIN #include #include #include #include #if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 16, 100) && defined(PL_HAVE_DOVI) # define PL_HAVE_LAV_DOLBY_VISION # include #endif #if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(56, 61, 100) # define PL_HAVE_LAV_FILM_GRAIN # include #endif #if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(56, 25, 100) # define PL_HAVE_LAV_HDR # include # include #endif //------------------------------------------------------------------------ // Important note: For support for AVVkFrame, which depends on , // users *SHOULD* include manually before this header. //------------------------------------------------------------------------ // Fill in the details of a `pl_frame` from an AVFrame. This function will // explicitly clear `out_frame`, setting all extra fields to 0. After this // function returns, the only missing data is information related to the plane // texture itself (`planes[N].texture`), as well as any overlays (e.g. // subtitles). // // Note: If the AVFrame contains an embedded ICC profile or H.274 film grain // metadata, the resulting `out_image->profile` will reference this pointer, // meaning that in general, the `pl_frame` is only guaranteed to be valid as // long as the AVFrame is not freed. // // Note: This will ignore Dolby Vision metadata by default (to avoid leaking // memory), either switch to pl_map_avframe_ex or do it manually using // pl_map_dovi_metadata. PL_LIBAV_API void pl_frame_from_avframe(struct pl_frame *out_frame, const AVFrame *frame); // Deprecated aliases for backwards compatibility #define pl_image_from_avframe pl_frame_from_avframe #define pl_target_from_avframe pl_frame_from_avframe // Copy extra metadata from an AVStream to a pl_frame. This should be called // after `pl_frame_from_avframe` or `pl_map_avframe` (respectively), and sets // metadata associated with stream-level side data. This is needed because // FFmpeg rather annoyingly does not propagate stream-level metadata to frames. PL_LIBAV_API void pl_frame_copy_stream_props(struct pl_frame *out_frame, const AVStream *stream); #ifdef PL_HAVE_LAV_HDR struct pl_av_hdr_metadata { // All fields are optional and may be passed as `NULL`. const AVMasteringDisplayMetadata *mdm; const AVContentLightMetadata *clm; const AVDynamicHDRPlus *dhp; }; // Helper function to update a `pl_hdr_metadata` struct from HDR10/HDR10+ // metadata in the FFmpeg format. Unspecified/invalid elements will be left // uninitialized in `out`. PL_LIBAV_API void pl_map_hdr_metadata(struct pl_hdr_metadata *out, const struct pl_av_hdr_metadata *metadata); #endif #ifdef PL_HAVE_LAV_DOLBY_VISION // Helper function to map Dolby Vision metadata from the FFmpeg format. PL_LIBAV_API void pl_map_dovi_metadata(struct pl_dovi_metadata *out, const AVDOVIMetadata *metadata); // Helper function to map Dolby Vision metadata from the FFmpeg format // to `pl_dovi_metadata`, and adds it to the `pl_frame`. // The `pl_frame` colorspace fields and HDR struct are also updated with // values from the `AVDOVIMetadata`. // // Note: The `pl_dovi_metadata` must be allocated externally. // Also, currently the metadata is only used if the `AVDOVIRpuDataHeader` // `disable_residual_flag` field is not zero and can be checked before allocating. PL_DEPRECATED_IN(v7.343) PL_LIBAV_API void pl_frame_map_avdovi_metadata( struct pl_frame *out_frame, struct pl_dovi_metadata *dovi, const AVDOVIMetadata *metadata); // Helper function to map Dolby Vision metadata from the FFmpeg format // to `pl_dovi_metadata`, and adds it to the `pl_color_repr`. // The `pl_color_space` fields and HDR struct are also updated with // values from the `AVDOVIMetadata`. // // Note: The `pl_dovi_metadata` must be allocated externally. // Also, currently the metadata is only used if the `AVDOVIRpuDataHeader` // `disable_residual_flag` field is not zero and can be checked before allocating. PL_LIBAV_API void pl_map_avdovi_metadata(struct pl_color_space *color, struct pl_color_repr *repr, struct pl_dovi_metadata *dovi, const AVDOVIMetadata *metadata); #endif // Helper function to test if a pixfmt would be supported by the GPU. // Essentially, this can be used to check if `pl_map_avframe` would work for a // given AVPixelFormat, without actually uploading or allocating anything. PL_LIBAV_API bool pl_test_pixfmt(pl_gpu gpu, enum AVPixelFormat pixfmt); // Variant of `pl_test_pixfmt` that also tests for the given capabilities // being present. Note that in the presence of hardware accelerated frames, // this cannot be tested without frame-specific information (i.e. swformat), // but in practice this should be a non-issue as GPU-native hwformats will // probably be fully supported. PL_LIBAV_API bool pl_test_pixfmt_caps(pl_gpu gpu, enum AVPixelFormat pixfmt, enum pl_fmt_caps caps); // Like `pl_frame_from_avframe`, but the texture pointers are also initialized // to ensure they have the correct size and format to match the AVframe. // Similar in spirit to `pl_recreate_plane`, and the same notes apply. `tex` // must be an array of 4 pointers of type `pl_tex`, each either // pointing to a valid texture, or NULL. Returns whether successful. PL_LIBAV_API bool pl_frame_recreate_from_avframe(pl_gpu gpu, struct pl_frame *out_frame, pl_tex tex[4], const AVFrame *frame); struct pl_avframe_params { // The AVFrame to map. Required. const AVFrame *frame; // Backing textures for frame data. Required for all non-hwdec formats. // This must point to an array of four valid textures (or NULL entries). // // Note: Not cleaned up by `pl_unmap_avframe`. The intent is for users to // re-use this texture array for subsequent frames, to avoid texture // creation/destruction overhead. pl_tex *tex; // Also map Dolby Vision metadata (if supported). Note that this also // overrides the colorimetry metadata (forces BT.2020+PQ). bool map_dovi; }; #define PL_AVFRAME_DEFAULTS \ .map_dovi = true, #define pl_avframe_params(...) (&(struct pl_avframe_params) { PL_AVFRAME_DEFAULTS __VA_ARGS__ }) // Very high level helper function to take an `AVFrame` and map it to the GPU. // The resulting `pl_frame` remains valid until `pl_unmap_avframe` is called, // which must be called at some point to clean up state. The `AVFrame` is // automatically ref'd and unref'd if needed. Returns whether successful. // // Note: `out_frame->user_data` points to a privately managed opaque struct // and must not be touched by the user. PL_LIBAV_API bool pl_map_avframe_ex(pl_gpu gpu, struct pl_frame *out_frame, const struct pl_avframe_params *params); PL_LIBAV_API void pl_unmap_avframe(pl_gpu gpu, struct pl_frame *frame); // Backwards compatibility with previous versions of this API. PL_LIBAV_API bool pl_map_avframe(pl_gpu gpu, struct pl_frame *out_frame, pl_tex tex[4], const AVFrame *avframe); // Return the AVFrame* that a pl_frame was mapped from (via pl_map_avframe_ex) // Note: This reference is attached to the `pl_frame` and will get freed by // pl_unmap_avframe. PL_LIBAV_API AVFrame *pl_get_mapped_avframe(const struct pl_frame *frame); // Download the texture contents of a `pl_frame` back to a corresponding // AVFrame. Blocks until completion. // // Note: This function performs minimal verification, so incorrect usage will // likely result in broken frames. Use `pl_frame_recreate_from_avframe` to // ensure matching formats. PL_LIBAV_API bool pl_download_avframe(pl_gpu gpu, const struct pl_frame *frame, AVFrame *out_frame); // Helper functions to update the colorimetry data in an AVFrame based on // the values specified in the given color space / color repr / profile. // // Note: These functions can and will allocate AVFrame side data if needed, // in particular to encode HDR metadata in `space.hdr`. PL_LIBAV_API void pl_avframe_set_color(AVFrame *frame, struct pl_color_space space); PL_LIBAV_API void pl_avframe_set_repr(AVFrame *frame, struct pl_color_repr repr); PL_LIBAV_API void pl_avframe_set_profile(AVFrame *frame, struct pl_icc_profile profile); // Map an AVPixelFormat to an array of pl_plane_data structs. The array must // have at least `av_pix_fmt_count_planes(fmt)` elements, but never more than // 4. This function leaves `width`, `height` and `row_stride`, as well as the // data pointers, uninitialized. // // If `bits` is non-NULL, this function will attempt aligning the resulting // `pl_plane_data` struct for optimal compatibility, placing the resulting // `pl_bit_depth` metadata into `bits`. // // Returns the number of plane structs written to, or 0 on error. // // Note: This function is usually clumsier to use than the higher-level // functions above, but it might have some fringe use cases, for example if // the user wants to replace the data buffers by `pl_buf` references in the // `pl_plane_data` before uploading it to the GPU. PL_LIBAV_API int pl_plane_data_from_pixfmt(struct pl_plane_data data[4], struct pl_bit_encoding *bits, enum AVPixelFormat pix_fmt); // Callback for AVCodecContext.get_buffer2 that allocates memory from // persistently mapped buffers. This can be more efficient than regular // system memory, especially on platforms that don't support importing // PL_HANDLE_HOST_PTR as buffers. // // Note: `avctx->opaque` must be a pointer that *points* to the GPU instance. // That is, it should have type `pl_gpu *`. PL_LIBAV_API int pl_get_buffer2(AVCodecContext *avctx, AVFrame *pic, int flags); // Mapping functions for the various libavutil enums. Note that these are not // quite 1:1, and even for values that exist in both, the semantics sometimes // differ. Some special cases (e.g. ICtCp, or XYZ) are handled differently in // libplacebo and libavutil, respectively. // // Because of this, it's generally recommended to avoid these and instead use // helpers like `pl_frame_from_avframe`, which contain extra logic to patch // through all of the special cases. PL_LIBAV_API enum pl_color_system pl_system_from_av(enum AVColorSpace spc); PL_LIBAV_API enum AVColorSpace pl_system_to_av(enum pl_color_system sys); PL_LIBAV_API enum pl_color_levels pl_levels_from_av(enum AVColorRange range); PL_LIBAV_API enum AVColorRange pl_levels_to_av(enum pl_color_levels levels); PL_LIBAV_API enum pl_color_primaries pl_primaries_from_av(enum AVColorPrimaries prim); PL_LIBAV_API enum AVColorPrimaries pl_primaries_to_av(enum pl_color_primaries prim); PL_LIBAV_API enum pl_color_transfer pl_transfer_from_av(enum AVColorTransferCharacteristic trc); PL_LIBAV_API enum AVColorTransferCharacteristic pl_transfer_to_av(enum pl_color_transfer trc); PL_LIBAV_API enum pl_chroma_location pl_chroma_from_av(enum AVChromaLocation loc); PL_LIBAV_API enum AVChromaLocation pl_chroma_to_av(enum pl_chroma_location loc); // Helper function to generate a `pl_color_space` struct from an AVFrame. PL_LIBAV_API void pl_color_space_from_avframe(struct pl_color_space *out_csp, const AVFrame *frame); // Helper function to pick the right `pl_field` value for an AVFrame. PL_LIBAV_API enum pl_field pl_field_from_avframe(const AVFrame *frame); #ifdef PL_HAVE_LAV_FILM_GRAIN // Fill in film grain parameters from an AVFilmGrainParams. // // Note: The resulting struct will only remain valid as long as the // `AVFilmGrainParams` remains valid. PL_LIBAV_API void pl_film_grain_from_av(struct pl_film_grain_data *out_data, const AVFilmGrainParams *fgp); #endif // Deprecated alias for backwards compatibility #define pl_swapchain_colors_from_avframe pl_color_space_from_avframe // Actual implementation, included as part of this header to avoid having // a compile-time dependency on libavutil. #if PL_LIBAV_IMPLEMENTATION # include #endif PL_API_END #endif // LIBPLACEBO_LIBAV_H_ libplacebo-v7.349.0/src/include/libplacebo/utils/libav_internal.h000066400000000000000000001545611463457750100250200ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_LIBAV_H_ #error This header should be included as part of #elif defined(__cplusplus) #error This header cannot be included from C++ define PL_LIBAV_IMPLEMENTATION appropriately #else #include #include #include #include #include #include #include #include // Try importing dynamically if it wasn't already #if !defined(VK_API_VERSION_1_2) && defined(__has_include) # if __has_include() # include # endif #endif #if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 8, 100) && \ defined(PL_HAVE_VULKAN) && defined(VK_API_VERSION_1_2) # define PL_HAVE_LAV_VULKAN # include # include # if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(58, 11, 100) # define PL_HAVE_LAV_VULKAN_V2 # endif #endif PL_LIBAV_API enum pl_color_system pl_system_from_av(enum AVColorSpace spc) { switch (spc) { case AVCOL_SPC_RGB: return PL_COLOR_SYSTEM_RGB; case AVCOL_SPC_BT709: return PL_COLOR_SYSTEM_BT_709; case AVCOL_SPC_UNSPECIFIED: return PL_COLOR_SYSTEM_UNKNOWN; case AVCOL_SPC_RESERVED: return PL_COLOR_SYSTEM_UNKNOWN; case AVCOL_SPC_FCC: return PL_COLOR_SYSTEM_UNKNOWN; // missing case AVCOL_SPC_BT470BG: return PL_COLOR_SYSTEM_BT_601; case AVCOL_SPC_SMPTE170M: return PL_COLOR_SYSTEM_BT_601; case AVCOL_SPC_SMPTE240M: return PL_COLOR_SYSTEM_SMPTE_240M; case AVCOL_SPC_YCGCO: return PL_COLOR_SYSTEM_YCGCO; case AVCOL_SPC_BT2020_NCL: return PL_COLOR_SYSTEM_BT_2020_NC; case AVCOL_SPC_BT2020_CL: return PL_COLOR_SYSTEM_BT_2020_C; case AVCOL_SPC_SMPTE2085: return PL_COLOR_SYSTEM_UNKNOWN; // missing case AVCOL_SPC_CHROMA_DERIVED_NCL: return PL_COLOR_SYSTEM_UNKNOWN; // missing case AVCOL_SPC_CHROMA_DERIVED_CL: return PL_COLOR_SYSTEM_UNKNOWN; // missing // Note: this colorspace is confused between PQ and HLG, which libav* // requires inferring from other sources, but libplacebo makes explicit. // Default to PQ as it's the more common scenario. case AVCOL_SPC_ICTCP: return PL_COLOR_SYSTEM_BT_2100_PQ; case AVCOL_SPC_NB: return PL_COLOR_SYSTEM_COUNT; } return PL_COLOR_SYSTEM_UNKNOWN; } PL_LIBAV_API enum AVColorSpace pl_system_to_av(enum pl_color_system sys) { switch (sys) { case PL_COLOR_SYSTEM_UNKNOWN: return AVCOL_SPC_UNSPECIFIED; case PL_COLOR_SYSTEM_BT_601: return AVCOL_SPC_SMPTE170M; case PL_COLOR_SYSTEM_BT_709: return AVCOL_SPC_BT709; case PL_COLOR_SYSTEM_SMPTE_240M: return AVCOL_SPC_SMPTE240M; case PL_COLOR_SYSTEM_BT_2020_NC: return AVCOL_SPC_BT2020_NCL; case PL_COLOR_SYSTEM_BT_2020_C: return AVCOL_SPC_BT2020_CL; case PL_COLOR_SYSTEM_BT_2100_PQ: return AVCOL_SPC_ICTCP; case PL_COLOR_SYSTEM_BT_2100_HLG: return AVCOL_SPC_ICTCP; case PL_COLOR_SYSTEM_DOLBYVISION: return AVCOL_SPC_UNSPECIFIED; // missing case PL_COLOR_SYSTEM_YCGCO: return AVCOL_SPC_YCGCO; case PL_COLOR_SYSTEM_RGB: return AVCOL_SPC_RGB; case PL_COLOR_SYSTEM_XYZ: return AVCOL_SPC_UNSPECIFIED; // handled differently case PL_COLOR_SYSTEM_COUNT: return AVCOL_SPC_NB; } return AVCOL_SPC_UNSPECIFIED; } PL_LIBAV_API enum pl_color_levels pl_levels_from_av(enum AVColorRange range) { switch (range) { case AVCOL_RANGE_UNSPECIFIED: return PL_COLOR_LEVELS_UNKNOWN; case AVCOL_RANGE_MPEG: return PL_COLOR_LEVELS_LIMITED; case AVCOL_RANGE_JPEG: return PL_COLOR_LEVELS_FULL; case AVCOL_RANGE_NB: return PL_COLOR_LEVELS_COUNT; } return PL_COLOR_LEVELS_UNKNOWN; } PL_LIBAV_API enum AVColorRange pl_levels_to_av(enum pl_color_levels levels) { switch (levels) { case PL_COLOR_LEVELS_UNKNOWN: return AVCOL_RANGE_UNSPECIFIED; case PL_COLOR_LEVELS_LIMITED: return AVCOL_RANGE_MPEG; case PL_COLOR_LEVELS_FULL: return AVCOL_RANGE_JPEG; case PL_COLOR_LEVELS_COUNT: return AVCOL_RANGE_NB; } return AVCOL_RANGE_UNSPECIFIED; } PL_LIBAV_API enum pl_color_primaries pl_primaries_from_av(enum AVColorPrimaries prim) { switch (prim) { case AVCOL_PRI_RESERVED0: return PL_COLOR_PRIM_UNKNOWN; case AVCOL_PRI_BT709: return PL_COLOR_PRIM_BT_709; case AVCOL_PRI_UNSPECIFIED: return PL_COLOR_PRIM_UNKNOWN; case AVCOL_PRI_RESERVED: return PL_COLOR_PRIM_UNKNOWN; case AVCOL_PRI_BT470M: return PL_COLOR_PRIM_BT_470M; case AVCOL_PRI_BT470BG: return PL_COLOR_PRIM_BT_601_625; case AVCOL_PRI_SMPTE170M: return PL_COLOR_PRIM_BT_601_525; case AVCOL_PRI_SMPTE240M: return PL_COLOR_PRIM_BT_601_525; case AVCOL_PRI_FILM: return PL_COLOR_PRIM_FILM_C; case AVCOL_PRI_BT2020: return PL_COLOR_PRIM_BT_2020; case AVCOL_PRI_SMPTE428: return PL_COLOR_PRIM_CIE_1931; case AVCOL_PRI_SMPTE431: return PL_COLOR_PRIM_DCI_P3; case AVCOL_PRI_SMPTE432: return PL_COLOR_PRIM_DISPLAY_P3; case AVCOL_PRI_JEDEC_P22: return PL_COLOR_PRIM_EBU_3213; case AVCOL_PRI_NB: return PL_COLOR_PRIM_COUNT; } return PL_COLOR_PRIM_UNKNOWN; } PL_LIBAV_API enum AVColorPrimaries pl_primaries_to_av(enum pl_color_primaries prim) { switch (prim) { case PL_COLOR_PRIM_UNKNOWN: return AVCOL_PRI_UNSPECIFIED; case PL_COLOR_PRIM_BT_601_525: return AVCOL_PRI_SMPTE170M; case PL_COLOR_PRIM_BT_601_625: return AVCOL_PRI_BT470BG; case PL_COLOR_PRIM_BT_709: return AVCOL_PRI_BT709; case PL_COLOR_PRIM_BT_470M: return AVCOL_PRI_BT470M; case PL_COLOR_PRIM_EBU_3213: return AVCOL_PRI_JEDEC_P22; case PL_COLOR_PRIM_BT_2020: return AVCOL_PRI_BT2020; case PL_COLOR_PRIM_APPLE: return AVCOL_PRI_UNSPECIFIED; // missing case PL_COLOR_PRIM_ADOBE: return AVCOL_PRI_UNSPECIFIED; // missing case PL_COLOR_PRIM_PRO_PHOTO: return AVCOL_PRI_UNSPECIFIED; // missing case PL_COLOR_PRIM_CIE_1931: return AVCOL_PRI_SMPTE428; case PL_COLOR_PRIM_DCI_P3: return AVCOL_PRI_SMPTE431; case PL_COLOR_PRIM_DISPLAY_P3: return AVCOL_PRI_SMPTE432; case PL_COLOR_PRIM_V_GAMUT: return AVCOL_PRI_UNSPECIFIED; // missing case PL_COLOR_PRIM_S_GAMUT: return AVCOL_PRI_UNSPECIFIED; // missing case PL_COLOR_PRIM_FILM_C: return AVCOL_PRI_FILM; case PL_COLOR_PRIM_ACES_AP0: return AVCOL_PRI_UNSPECIFIED; // missing case PL_COLOR_PRIM_ACES_AP1: return AVCOL_PRI_UNSPECIFIED; // missing case PL_COLOR_PRIM_COUNT: return AVCOL_PRI_NB; } return AVCOL_PRI_UNSPECIFIED; } PL_LIBAV_API enum pl_color_transfer pl_transfer_from_av(enum AVColorTransferCharacteristic trc) { switch (trc) { case AVCOL_TRC_RESERVED0: return PL_COLOR_TRC_UNKNOWN; case AVCOL_TRC_BT709: return PL_COLOR_TRC_BT_1886; // EOTF != OETF case AVCOL_TRC_UNSPECIFIED: return PL_COLOR_TRC_UNKNOWN; case AVCOL_TRC_RESERVED: return PL_COLOR_TRC_UNKNOWN; case AVCOL_TRC_GAMMA22: return PL_COLOR_TRC_GAMMA22; case AVCOL_TRC_GAMMA28: return PL_COLOR_TRC_GAMMA28; case AVCOL_TRC_SMPTE170M: return PL_COLOR_TRC_BT_1886; // EOTF != OETF case AVCOL_TRC_SMPTE240M: return PL_COLOR_TRC_BT_1886; // EOTF != OETF case AVCOL_TRC_LINEAR: return PL_COLOR_TRC_LINEAR; case AVCOL_TRC_LOG: return PL_COLOR_TRC_UNKNOWN; // missing case AVCOL_TRC_LOG_SQRT: return PL_COLOR_TRC_UNKNOWN; // missing case AVCOL_TRC_IEC61966_2_4: return PL_COLOR_TRC_BT_1886; // EOTF != OETF case AVCOL_TRC_BT1361_ECG: return PL_COLOR_TRC_BT_1886; // ETOF != OETF case AVCOL_TRC_IEC61966_2_1: return PL_COLOR_TRC_SRGB; case AVCOL_TRC_BT2020_10: return PL_COLOR_TRC_BT_1886; // EOTF != OETF case AVCOL_TRC_BT2020_12: return PL_COLOR_TRC_BT_1886; // EOTF != OETF case AVCOL_TRC_SMPTE2084: return PL_COLOR_TRC_PQ; case AVCOL_TRC_SMPTE428: return PL_COLOR_TRC_ST428; case AVCOL_TRC_ARIB_STD_B67: return PL_COLOR_TRC_HLG; case AVCOL_TRC_NB: return PL_COLOR_TRC_COUNT; } return PL_COLOR_TRC_UNKNOWN; } PL_LIBAV_API enum AVColorTransferCharacteristic pl_transfer_to_av(enum pl_color_transfer trc) { switch (trc) { case PL_COLOR_TRC_UNKNOWN: return AVCOL_TRC_UNSPECIFIED; case PL_COLOR_TRC_BT_1886: return AVCOL_TRC_BT709; // EOTF != OETF case PL_COLOR_TRC_SRGB: return AVCOL_TRC_IEC61966_2_1; case PL_COLOR_TRC_LINEAR: return AVCOL_TRC_LINEAR; case PL_COLOR_TRC_GAMMA18: return AVCOL_TRC_UNSPECIFIED; // missing case PL_COLOR_TRC_GAMMA20: return AVCOL_TRC_UNSPECIFIED; // missing case PL_COLOR_TRC_GAMMA22: return AVCOL_TRC_GAMMA22; case PL_COLOR_TRC_GAMMA24: return AVCOL_TRC_UNSPECIFIED; // missing case PL_COLOR_TRC_GAMMA26: return AVCOL_TRC_UNSPECIFIED; // missing case PL_COLOR_TRC_GAMMA28: return AVCOL_TRC_GAMMA28; case PL_COLOR_TRC_ST428: return AVCOL_TRC_SMPTE428; case PL_COLOR_TRC_PRO_PHOTO: return AVCOL_TRC_UNSPECIFIED; // missing case PL_COLOR_TRC_PQ: return AVCOL_TRC_SMPTE2084; case PL_COLOR_TRC_HLG: return AVCOL_TRC_ARIB_STD_B67; case PL_COLOR_TRC_V_LOG: return AVCOL_TRC_UNSPECIFIED; // missing case PL_COLOR_TRC_S_LOG1: return AVCOL_TRC_UNSPECIFIED; // missing case PL_COLOR_TRC_S_LOG2: return AVCOL_TRC_UNSPECIFIED; // missing case PL_COLOR_TRC_COUNT: return AVCOL_TRC_NB; } return AVCOL_TRC_UNSPECIFIED; } PL_LIBAV_API enum pl_chroma_location pl_chroma_from_av(enum AVChromaLocation loc) { switch (loc) { case AVCHROMA_LOC_UNSPECIFIED: return PL_CHROMA_UNKNOWN; case AVCHROMA_LOC_LEFT: return PL_CHROMA_LEFT; case AVCHROMA_LOC_CENTER: return PL_CHROMA_CENTER; case AVCHROMA_LOC_TOPLEFT: return PL_CHROMA_TOP_LEFT; case AVCHROMA_LOC_TOP: return PL_CHROMA_TOP_CENTER; case AVCHROMA_LOC_BOTTOMLEFT: return PL_CHROMA_BOTTOM_LEFT; case AVCHROMA_LOC_BOTTOM: return PL_CHROMA_BOTTOM_CENTER; case AVCHROMA_LOC_NB: return PL_CHROMA_COUNT; } return PL_CHROMA_UNKNOWN; } PL_LIBAV_API enum AVChromaLocation pl_chroma_to_av(enum pl_chroma_location loc) { switch (loc) { case PL_CHROMA_UNKNOWN: return AVCHROMA_LOC_UNSPECIFIED; case PL_CHROMA_LEFT: return AVCHROMA_LOC_LEFT; case PL_CHROMA_CENTER: return AVCHROMA_LOC_CENTER; case PL_CHROMA_TOP_LEFT: return AVCHROMA_LOC_TOPLEFT; case PL_CHROMA_TOP_CENTER: return AVCHROMA_LOC_TOP; case PL_CHROMA_BOTTOM_LEFT: return AVCHROMA_LOC_BOTTOMLEFT; case PL_CHROMA_BOTTOM_CENTER: return AVCHROMA_LOC_BOTTOM; case PL_CHROMA_COUNT: return AVCHROMA_LOC_NB; } return AVCHROMA_LOC_UNSPECIFIED; } #ifdef PL_HAVE_LAV_HDR PL_LIBAV_API void pl_map_hdr_metadata(struct pl_hdr_metadata *out, const struct pl_av_hdr_metadata *data) { if (data->mdm) { if (data->mdm->has_luminance) { out->max_luma = av_q2d(data->mdm->max_luminance); out->min_luma = av_q2d(data->mdm->min_luminance); if (out->max_luma < 5.0 || out->min_luma >= out->max_luma) out->max_luma = out->min_luma = 0; /* sanity */ } if (data->mdm->has_primaries) { out->prim = (struct pl_raw_primaries) { .red.x = av_q2d(data->mdm->display_primaries[0][0]), .red.y = av_q2d(data->mdm->display_primaries[0][1]), .green.x = av_q2d(data->mdm->display_primaries[1][0]), .green.y = av_q2d(data->mdm->display_primaries[1][1]), .blue.x = av_q2d(data->mdm->display_primaries[2][0]), .blue.y = av_q2d(data->mdm->display_primaries[2][1]), .white.x = av_q2d(data->mdm->white_point[0]), .white.y = av_q2d(data->mdm->white_point[1]), }; } } if (data->clm) { out->max_cll = data->clm->MaxCLL; out->max_fall = data->clm->MaxFALL; } if (data->dhp && data->dhp->application_version < 2) { float hist_max = 0; const AVHDRPlusColorTransformParams *pars = &data->dhp->params[0]; assert(data->dhp->num_windows > 0); out->scene_max[0] = 10000 * av_q2d(pars->maxscl[0]); out->scene_max[1] = 10000 * av_q2d(pars->maxscl[1]); out->scene_max[2] = 10000 * av_q2d(pars->maxscl[2]); out->scene_avg = 10000 * av_q2d(pars->average_maxrgb); // Calculate largest value from histogram to use as fallback for clips // with missing MaxSCL information. Note that this may end up picking // the "reserved" value at the 5% percentile, which in practice appears // to track the brightest pixel in the scene. for (int i = 0; i < pars->num_distribution_maxrgb_percentiles; i++) { float hist_val = av_q2d(pars->distribution_maxrgb[i].percentile); if (hist_val > hist_max) hist_max = hist_val; } hist_max *= 10000; if (!out->scene_max[0]) out->scene_max[0] = hist_max; if (!out->scene_max[1]) out->scene_max[1] = hist_max; if (!out->scene_max[2]) out->scene_max[2] = hist_max; if (pars->tone_mapping_flag == 1) { out->ootf.target_luma = av_q2d(data->dhp->targeted_system_display_maximum_luminance); out->ootf.knee_x = av_q2d(pars->knee_point_x); out->ootf.knee_y = av_q2d(pars->knee_point_y); assert(pars->num_bezier_curve_anchors < 16); for (int i = 0; i < pars->num_bezier_curve_anchors; i++) out->ootf.anchors[i] = av_q2d(pars->bezier_curve_anchors[i]); out->ootf.num_anchors = pars->num_bezier_curve_anchors; } } } #endif // PL_HAVE_LAV_HDR static inline void *pl_get_side_data_raw(const AVFrame *frame, enum AVFrameSideDataType type) { const AVFrameSideData *sd = av_frame_get_side_data(frame, type); return sd ? (void *) sd->data : NULL; } PL_LIBAV_API void pl_color_space_from_avframe(struct pl_color_space *out_csp, const AVFrame *frame) { *out_csp = (struct pl_color_space) { .primaries = pl_primaries_from_av(frame->color_primaries), .transfer = pl_transfer_from_av(frame->color_trc), }; #ifdef PL_HAVE_LAV_HDR pl_map_hdr_metadata(&out_csp->hdr, &(struct pl_av_hdr_metadata) { .mdm = pl_get_side_data_raw(frame, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA), .clm = pl_get_side_data_raw(frame, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL), .dhp = pl_get_side_data_raw(frame, AV_FRAME_DATA_DYNAMIC_HDR_PLUS), }); #endif } PL_LIBAV_API enum pl_field pl_field_from_avframe(const AVFrame *frame) { #if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(58, 7, 100) if (!frame || !(frame->flags & AV_FRAME_FLAG_INTERLACED)) return PL_FIELD_NONE; return (frame->flags & AV_FRAME_FLAG_TOP_FIELD_FIRST) ? PL_FIELD_TOP : PL_FIELD_BOTTOM; #else if (!frame || !frame->interlaced_frame) return PL_FIELD_NONE; return frame->top_field_first ? PL_FIELD_TOP : PL_FIELD_BOTTOM; #endif } #ifdef PL_HAVE_LAV_FILM_GRAIN PL_LIBAV_API void pl_film_grain_from_av(struct pl_film_grain_data *out_data, const AVFilmGrainParams *fgp) { out_data->seed = fgp->seed; switch (fgp->type) { case AV_FILM_GRAIN_PARAMS_NONE: break; case AV_FILM_GRAIN_PARAMS_AV1: { const AVFilmGrainAOMParams *src = &fgp->codec.aom; struct pl_av1_grain_data *dst = &out_data->params.av1; out_data->type = PL_FILM_GRAIN_AV1; *dst = (struct pl_av1_grain_data) { .num_points_y = src->num_y_points, .chroma_scaling_from_luma = src->chroma_scaling_from_luma, .num_points_uv = { src->num_uv_points[0], src->num_uv_points[1] }, .scaling_shift = src->scaling_shift, .ar_coeff_lag = src->ar_coeff_lag, .ar_coeff_shift = src->ar_coeff_shift, .grain_scale_shift = src->grain_scale_shift, .uv_mult = { src->uv_mult[0], src->uv_mult[1] }, .uv_mult_luma = { src->uv_mult_luma[0], src->uv_mult_luma[1] }, .uv_offset = { src->uv_offset[0], src->uv_offset[1] }, .overlap = src->overlap_flag, }; assert(sizeof(dst->ar_coeffs_uv) == sizeof(src->ar_coeffs_uv)); memcpy(dst->points_y, src->y_points, sizeof(dst->points_y)); memcpy(dst->points_uv, src->uv_points, sizeof(dst->points_uv)); memcpy(dst->ar_coeffs_y, src->ar_coeffs_y, sizeof(dst->ar_coeffs_y)); memcpy(dst->ar_coeffs_uv, src->ar_coeffs_uv, sizeof(dst->ar_coeffs_uv)); break; } #if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 2, 100) case AV_FILM_GRAIN_PARAMS_H274: { const AVFilmGrainH274Params *src = &fgp->codec.h274; struct pl_h274_grain_data *dst = &out_data->params.h274; out_data->type = PL_FILM_GRAIN_H274; *dst = (struct pl_h274_grain_data) { .model_id = src->model_id, .blending_mode_id = src->blending_mode_id, .log2_scale_factor = src->log2_scale_factor, .component_model_present = { src->component_model_present[0], src->component_model_present[1], src->component_model_present[2], }, .intensity_interval_lower_bound = { src->intensity_interval_lower_bound[0], src->intensity_interval_lower_bound[1], src->intensity_interval_lower_bound[2], }, .intensity_interval_upper_bound = { src->intensity_interval_upper_bound[0], src->intensity_interval_upper_bound[1], src->intensity_interval_upper_bound[2], }, .comp_model_value = { src->comp_model_value[0], src->comp_model_value[1], src->comp_model_value[2], }, }; memcpy(dst->num_intensity_intervals, src->num_intensity_intervals, sizeof(dst->num_intensity_intervals)); memcpy(dst->num_model_values, src->num_model_values, sizeof(dst->num_model_values)); break; } #endif } } #endif // PL_HAVE_LAV_FILM_GRAIN static inline int pl_plane_data_num_comps(const struct pl_plane_data *data) { for (int i = 0; i < 4; i++) { if (data->component_size[i] == 0) return i; } return 4; } PL_LIBAV_API int pl_plane_data_from_pixfmt(struct pl_plane_data out_data[4], struct pl_bit_encoding *out_bits, enum AVPixelFormat pix_fmt) { const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); int planes = av_pix_fmt_count_planes(pix_fmt); struct pl_plane_data aligned_data[4]; struct pl_bit_encoding bits; bool first; if (!desc || planes < 0) // e.g. AV_PIX_FMT_NONE return 0; if (desc->flags & AV_PIX_FMT_FLAG_BITSTREAM) { // Bitstream formats will most likely never be supported return 0; } if (desc->flags & AV_PIX_FMT_FLAG_PAL) { // Palette formats are (currently) not supported return 0; } if (desc->flags & AV_PIX_FMT_FLAG_BAYER) { // Bayer format don't have valid `desc->offset` values, so we can't // use `pl_plane_data_from_mask` on them. return 0; } if (desc->nb_components == 0 || desc->nb_components > 4) { // Bogus components, possibly fake/virtual/hwaccel format? return 0; } if (planes > 4) return 0; // This shouldn't ever happen // Fill in the details for each plane for (int p = 0; p < planes; p++) { struct pl_plane_data *data = &out_data[p]; int size[4] = {0}; int shift[4] = {0}; data->swapped = desc->flags & AV_PIX_FMT_FLAG_BE; data->type = (desc->flags & AV_PIX_FMT_FLAG_FLOAT) ? PL_FMT_FLOAT : PL_FMT_UNORM; data->pixel_stride = 0; for (int c = 0; c < desc->nb_components; c++) { const AVComponentDescriptor *comp = &desc->comp[c]; if (comp->plane != p) continue; if (data->swapped && comp->shift) { // We cannot naively handle packed big endian formats because // swapping the words also swaps the component order, so just // exit out as a stupid safety measure return 0; } size[c] = comp->depth; shift[c] = comp->shift + comp->offset * 8; if (data->pixel_stride && (int) data->pixel_stride != comp->step) { // Pixel format contains components with different pixel stride // (e.g. packed YUYV), this is currently not supported return 0; } data->pixel_stride = comp->step; } pl_plane_data_from_comps(data, size, shift); } if (!out_bits) return planes; // Attempt aligning all of the planes for optimum compatibility first = true; for (int p = 0; p < planes; p++) { aligned_data[p] = out_data[p]; // Planes with only an alpha component should be ignored if (pl_plane_data_num_comps(&aligned_data[p]) == 1 && aligned_data[p].component_map[0] == PL_CHANNEL_A) { continue; } if (!pl_plane_data_align(&aligned_data[p], &bits)) goto misaligned; if (first) { *out_bits = bits; first = false; } else { if (!pl_bit_encoding_equal(&bits, out_bits)) goto misaligned; } } // Overwrite the planes by their aligned versions for (int p = 0; p < planes; p++) out_data[p] = aligned_data[p]; return planes; misaligned: *out_bits = (struct pl_bit_encoding) {0}; return planes; } PL_LIBAV_API bool pl_test_pixfmt_caps(pl_gpu gpu, enum AVPixelFormat pixfmt, enum pl_fmt_caps caps) { struct pl_bit_encoding bits; struct pl_plane_data data[4]; pl_fmt fmt; int planes; switch (pixfmt) { case AV_PIX_FMT_DRM_PRIME: case AV_PIX_FMT_VAAPI: return gpu->import_caps.tex & PL_HANDLE_DMA_BUF; #ifdef PL_HAVE_LAV_VULKAN case AV_PIX_FMT_VULKAN: return pl_vulkan_get(gpu); #endif default: break; } planes = pl_plane_data_from_pixfmt(data, &bits, pixfmt); if (!planes) return false; for (int i = 0; i < planes; i++) { data[i].row_stride = 0; fmt = pl_plane_find_fmt(gpu, NULL, &data[i]); if (!fmt || (fmt->caps & caps) != caps) return false; } return true; } PL_LIBAV_API bool pl_test_pixfmt(pl_gpu gpu, enum AVPixelFormat pixfmt) { return pl_test_pixfmt_caps(gpu, pixfmt, 0); } PL_LIBAV_API void pl_avframe_set_color(AVFrame *frame, struct pl_color_space csp) { const AVFrameSideData *sd; (void) sd; frame->color_primaries = pl_primaries_to_av(csp.primaries); frame->color_trc = pl_transfer_to_av(csp.transfer); #ifdef PL_HAVE_LAV_HDR if (csp.hdr.max_cll) { sd = av_frame_get_side_data(frame, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL); if (!sd) { sd = av_frame_new_side_data(frame, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL, sizeof(AVContentLightMetadata)); } if (sd) { AVContentLightMetadata *clm = (AVContentLightMetadata *) sd->data; *clm = (AVContentLightMetadata) { .MaxCLL = csp.hdr.max_cll, .MaxFALL = csp.hdr.max_fall, }; } } if (csp.hdr.max_luma || csp.hdr.prim.red.x) { sd = av_frame_get_side_data(frame, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA); if (!sd) { sd = av_frame_new_side_data(frame, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA, sizeof(AVMasteringDisplayMetadata)); } if (sd) { AVMasteringDisplayMetadata *mdm = (AVMasteringDisplayMetadata *) sd->data; *mdm = (AVMasteringDisplayMetadata) { .max_luminance = av_d2q(csp.hdr.max_luma, 1000000), .min_luminance = av_d2q(csp.hdr.min_luma, 1000000), .has_luminance = !!csp.hdr.max_luma, .display_primaries = { { av_d2q(csp.hdr.prim.red.x, 1000000), av_d2q(csp.hdr.prim.red.y, 1000000), }, { av_d2q(csp.hdr.prim.green.x, 1000000), av_d2q(csp.hdr.prim.green.y, 1000000), }, { av_d2q(csp.hdr.prim.blue.x, 1000000), av_d2q(csp.hdr.prim.blue.y, 1000000), } }, .white_point = { av_d2q(csp.hdr.prim.white.x, 1000000), av_d2q(csp.hdr.prim.white.y, 1000000), }, .has_primaries = !!csp.hdr.prim.red.x, }; } } #endif // PL_HAVE_LAV_HDR } PL_LIBAV_API void pl_avframe_set_repr(AVFrame *frame, struct pl_color_repr repr) { frame->colorspace = pl_system_to_av(repr.sys); frame->color_range = pl_levels_to_av(repr.levels); // No real way to map repr.bits, the image format already has to match } PL_LIBAV_API void pl_avframe_set_profile(AVFrame *frame, struct pl_icc_profile profile) { const AVFrameSideData *sd; av_frame_remove_side_data(frame, AV_FRAME_DATA_ICC_PROFILE); if (!profile.len) return; sd = av_frame_new_side_data(frame, AV_FRAME_DATA_ICC_PROFILE, profile.len); memcpy(sd->data, profile.data, profile.len); } PL_LIBAV_API void pl_frame_from_avframe(struct pl_frame *out, const AVFrame *frame) { const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format); int planes = av_pix_fmt_count_planes(frame->format); const AVFrameSideData *sd; assert(desc); if (desc->flags & AV_PIX_FMT_FLAG_HWACCEL) { const AVHWFramesContext *hwfc = (AVHWFramesContext *) frame->hw_frames_ctx->data; desc = av_pix_fmt_desc_get(hwfc->sw_format); planes = av_pix_fmt_count_planes(hwfc->sw_format); } // This should never fail, and there's nothing really useful we can do in // this failure case anyway, since this is a `void` function. assert(planes <= 4); *out = (struct pl_frame) { .num_planes = planes, .crop = { .x0 = frame->crop_left, .y0 = frame->crop_top, .x1 = frame->width - frame->crop_right, .y1 = frame->height - frame->crop_bottom, }, .repr = { .sys = pl_system_from_av(frame->colorspace), .levels = pl_levels_from_av(frame->color_range), .alpha = (desc->flags & AV_PIX_FMT_FLAG_ALPHA) ? PL_ALPHA_INDEPENDENT : PL_ALPHA_NONE, // For sake of simplicity, just use the first component's depth as // the authoritative color depth for the whole image. Usually, this // will be overwritten by more specific information when using e.g. // `pl_map_avframe`, but for the sake of e.g. users wishing to map // hwaccel frames manually, this is a good default. .bits.color_depth = desc->comp[0].depth, }, }; pl_color_space_from_avframe(&out->color, frame); if (frame->colorspace == AVCOL_SPC_ICTCP && frame->color_trc == AVCOL_TRC_ARIB_STD_B67) { // libav* makes no distinction between PQ and HLG ICtCp, so we need // to manually fix it in the case that we have HLG ICtCp data. out->repr.sys = PL_COLOR_SYSTEM_BT_2100_HLG; } else if (strncmp(desc->name, "xyz", 3) == 0) { // libav* handles this as a special case, but doesn't provide an // explicit flag for it either, so we have to resort to this ugly // hack... out->repr.sys = PL_COLOR_SYSTEM_XYZ; } else if (desc->flags & AV_PIX_FMT_FLAG_RGB) { out->repr.sys = PL_COLOR_SYSTEM_RGB; out->repr.levels = PL_COLOR_LEVELS_FULL; // libav* ignores levels for RGB } else if (!pl_color_system_is_ycbcr_like(out->repr.sys)) { // libav* likes leaving this as UNKNOWN (or even RGB) for YCbCr frames, // which confuses libplacebo since we infer UNKNOWN as RGB. To get // around this, explicitly infer a suitable colorspace. out->repr.sys = pl_color_system_guess_ycbcr(frame->width, frame->height); } if ((sd = av_frame_get_side_data(frame, AV_FRAME_DATA_ICC_PROFILE))) { out->profile = (struct pl_icc_profile) { .data = sd->data, .len = sd->size, }; // Needed to ensure profile uniqueness pl_icc_profile_compute_signature(&out->profile); } if ((sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DISPLAYMATRIX))) { double rot = av_display_rotation_get((const int32_t *) sd->data); out->rotation = pl_rotation_normalize(4.5 - rot / 90.0); } #ifdef PL_HAVE_LAV_FILM_GRAIN if ((sd = av_frame_get_side_data(frame, AV_FRAME_DATA_FILM_GRAIN_PARAMS))) pl_film_grain_from_av(&out->film_grain, (AVFilmGrainParams *) sd->data); #endif // HAVE_LAV_FILM_GRAIN for (int p = 0; p < out->num_planes; p++) { struct pl_plane *plane = &out->planes[p]; // Fill in the component mapping array for (int c = 0; c < desc->nb_components; c++) { if (desc->comp[c].plane == p) plane->component_mapping[plane->components++] = c; } // Clear the superfluous components for (int c = plane->components; c < 4; c++) plane->component_mapping[c] = PL_CHANNEL_NONE; } // Only set the chroma location for definitely subsampled images, makes no // sense otherwise if (desc->log2_chroma_w || desc->log2_chroma_h) { enum pl_chroma_location loc = pl_chroma_from_av(frame->chroma_location); pl_frame_set_chroma_location(out, loc); } } #if LIBAVFORMAT_VERSION_INT >= AV_VERSION_INT(60, 15, 100) PL_LIBAV_API const uint8_t *pl_av_stream_get_side_data(const AVStream *st, enum AVPacketSideDataType type) { const AVPacketSideData *sd; sd = av_packet_side_data_get(st->codecpar->coded_side_data, st->codecpar->nb_coded_side_data, type); return sd ? sd->data : NULL; } #else # define pl_av_stream_get_side_data(st, type) av_stream_get_side_data(st, type, NULL) #endif PL_LIBAV_API void pl_frame_copy_stream_props(struct pl_frame *out, const AVStream *stream) { const uint8_t *sd; if ((sd = pl_av_stream_get_side_data(stream, AV_PKT_DATA_DISPLAYMATRIX))) { double rot = av_display_rotation_get((const int32_t *) sd); out->rotation = pl_rotation_normalize(4.5 - rot / 90.0); } #ifdef PL_HAVE_LAV_HDR pl_map_hdr_metadata(&out->color.hdr, &(struct pl_av_hdr_metadata) { .mdm = (void *) pl_av_stream_get_side_data(stream, AV_PKT_DATA_MASTERING_DISPLAY_METADATA), .clm = (void *) pl_av_stream_get_side_data(stream, AV_PKT_DATA_CONTENT_LIGHT_LEVEL), # if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(59, 2, 100) .dhp = (void *) pl_av_stream_get_side_data(stream, AV_PKT_DATA_DYNAMIC_HDR10_PLUS), # endif }); #endif } #undef pl_av_stream_get_side_data #ifdef PL_HAVE_LAV_DOLBY_VISION PL_LIBAV_API void pl_map_dovi_metadata(struct pl_dovi_metadata *out, const AVDOVIMetadata *data) { const AVDOVIRpuDataHeader *header; const AVDOVIDataMapping *mapping; const AVDOVIColorMetadata *color; if (!data) return; header = av_dovi_get_header(data); mapping = av_dovi_get_mapping(data); color = av_dovi_get_color(data); for (int i = 0; i < 3; i++) out->nonlinear_offset[i] = av_q2d(color->ycc_to_rgb_offset[i]); for (int i = 0; i < 9; i++) { float *nonlinear = &out->nonlinear.m[0][0]; float *linear = &out->linear.m[0][0]; nonlinear[i] = av_q2d(color->ycc_to_rgb_matrix[i]); linear[i] = av_q2d(color->rgb_to_lms_matrix[i]); } for (int c = 0; c < 3; c++) { const AVDOVIReshapingCurve *csrc = &mapping->curves[c]; struct pl_reshape_data *cdst = &out->comp[c]; cdst->num_pivots = csrc->num_pivots; for (int i = 0; i < csrc->num_pivots; i++) { const float scale = 1.0f / ((1 << header->bl_bit_depth) - 1); cdst->pivots[i] = scale * csrc->pivots[i]; } for (int i = 0; i < csrc->num_pivots - 1; i++) { const float scale = 1.0f / (1 << header->coef_log2_denom); cdst->method[i] = csrc->mapping_idc[i]; switch (csrc->mapping_idc[i]) { case AV_DOVI_MAPPING_POLYNOMIAL: for (int k = 0; k < 3; k++) { cdst->poly_coeffs[i][k] = (k <= csrc->poly_order[i]) ? scale * csrc->poly_coef[i][k] : 0.0f; } break; case AV_DOVI_MAPPING_MMR: cdst->mmr_order[i] = csrc->mmr_order[i]; cdst->mmr_constant[i] = scale * csrc->mmr_constant[i]; for (int j = 0; j < csrc->mmr_order[i]; j++) { for (int k = 0; k < 7; k++) cdst->mmr_coeffs[i][j][k] = scale * csrc->mmr_coef[i][j][k]; } break; } } } } PL_LIBAV_API void pl_map_avdovi_metadata(struct pl_color_space *color, struct pl_color_repr *repr, struct pl_dovi_metadata *dovi, const AVDOVIMetadata *metadata) { const AVDOVIRpuDataHeader *header; const AVDOVIColorMetadata *dovi_color; #if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(59, 12, 100) const AVDOVIDmData *dovi_ext; #endif if (!color || !repr || !dovi) return; header = av_dovi_get_header(metadata); dovi_color = av_dovi_get_color(metadata); if (header->disable_residual_flag) { pl_map_dovi_metadata(dovi, metadata); repr->dovi = dovi; repr->sys = PL_COLOR_SYSTEM_DOLBYVISION; color->primaries = PL_COLOR_PRIM_BT_2020; color->transfer = PL_COLOR_TRC_PQ; color->hdr.min_luma = pl_hdr_rescale(PL_HDR_PQ, PL_HDR_NITS, dovi_color->source_min_pq / 4095.0f); color->hdr.max_luma = pl_hdr_rescale(PL_HDR_PQ, PL_HDR_NITS, dovi_color->source_max_pq / 4095.0f); #if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(59, 12, 100) if ((dovi_ext = av_dovi_find_level(metadata, 1))) { color->hdr.max_pq_y = dovi_ext->l1.max_pq / 4095.0f; color->hdr.avg_pq_y = dovi_ext->l1.avg_pq / 4095.0f; } #endif } } PL_LIBAV_API void pl_frame_map_avdovi_metadata(struct pl_frame *out_frame, struct pl_dovi_metadata *dovi, const AVDOVIMetadata *metadata) { if (!out_frame) return; pl_map_avdovi_metadata(&out_frame->color, &out_frame->repr, dovi, metadata); } #endif // PL_HAVE_LAV_DOLBY_VISION PL_LIBAV_API bool pl_frame_recreate_from_avframe(pl_gpu gpu, struct pl_frame *out, pl_tex tex[4], const AVFrame *frame) { const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format); struct pl_plane_data data[4] = {0}; int planes; pl_frame_from_avframe(out, frame); planes = pl_plane_data_from_pixfmt(data, &out->repr.bits, frame->format); if (!planes) return false; for (int p = 0; p < planes; p++) { bool is_chroma = p == 1 || p == 2; // matches lavu logic data[p].width = AV_CEIL_RSHIFT(frame->width, is_chroma ? desc->log2_chroma_w : 0); data[p].height = AV_CEIL_RSHIFT(frame->height, is_chroma ? desc->log2_chroma_h : 0); if (!pl_recreate_plane(gpu, &out->planes[p], &tex[p], &data[p])) return false; } return true; } static void pl_avframe_free_cb(void *priv) { AVFrame *frame = priv; av_frame_free(&frame); } #define PL_MAGIC0 0xfb5b3b8b #define PL_MAGIC1 0xee659f6d struct pl_avalloc { uint32_t magic[2]; pl_gpu gpu; pl_buf buf; }; // Attached to `pl_frame.user_data` for mapped AVFrames struct pl_avframe_priv { AVFrame *avframe; struct pl_dovi_metadata dovi; // backing storage for per-frame dovi metadata pl_tex planar; // for planar vulkan textures }; static void pl_fix_hwframe_sample_depth(struct pl_frame *out) { pl_fmt fmt = out->planes[0].texture->params.format; struct pl_bit_encoding *bits = &out->repr.bits; bits->sample_depth = fmt->component_depth[0]; } static bool pl_map_avframe_drm(pl_gpu gpu, struct pl_frame *out, const AVFrame *frame) { const AVHWFramesContext *hwfc = (AVHWFramesContext *) frame->hw_frames_ctx->data; const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format); const AVDRMFrameDescriptor *drm = (AVDRMFrameDescriptor *) frame->data[0]; assert(frame->format == AV_PIX_FMT_DRM_PRIME); if (!(gpu->import_caps.tex & PL_HANDLE_DMA_BUF)) return false; assert(drm->nb_layers >= out->num_planes); for (int n = 0; n < out->num_planes; n++) { const AVDRMLayerDescriptor *layer = &drm->layers[n]; const AVDRMPlaneDescriptor *plane = &layer->planes[0]; const AVDRMObjectDescriptor *object = &drm->objects[plane->object_index]; pl_fmt fmt = pl_find_fourcc(gpu, layer->format); bool is_chroma = n == 1 || n == 2; if (!fmt || !pl_fmt_has_modifier(fmt, object->format_modifier)) return false; assert(layer->nb_planes == 1); // we only support planar formats assert(plane->pitch >= 0); // definitely requires special handling out->planes[n].texture = pl_tex_create(gpu, pl_tex_params( .w = AV_CEIL_RSHIFT(frame->width, is_chroma ? desc->log2_chroma_w : 0), .h = AV_CEIL_RSHIFT(frame->height, is_chroma ? desc->log2_chroma_h : 0), .format = fmt, .sampleable = true, .blit_src = fmt->caps & PL_FMT_CAP_BLITTABLE, .import_handle = PL_HANDLE_DMA_BUF, .shared_mem = { .handle.fd = object->fd, .size = object->size, .offset = plane->offset, .drm_format_mod = object->format_modifier, .stride_w = plane->pitch, }, )); if (!out->planes[n].texture) return false; } pl_fix_hwframe_sample_depth(out); switch (hwfc->sw_format) { case AV_PIX_FMT_P010: out->repr.bits.bit_shift = 6; break; default: break; } return true; } // Derive a DMABUF from any other hwaccel format, and map that instead static bool pl_map_avframe_derived(pl_gpu gpu, struct pl_frame *out, const AVFrame *frame) { const int flags = AV_HWFRAME_MAP_READ | AV_HWFRAME_MAP_DIRECT; struct pl_avframe_priv *priv = out->user_data; AVFrame *derived = av_frame_alloc(); derived->width = frame->width; derived->height = frame->height; derived->format = AV_PIX_FMT_DRM_PRIME; derived->hw_frames_ctx = av_buffer_ref(frame->hw_frames_ctx); if (av_hwframe_map(derived, frame, flags) < 0) goto error; if (av_frame_copy_props(derived, frame) < 0) goto error; if (!pl_map_avframe_drm(gpu, out, derived)) goto error; av_frame_free(&priv->avframe); priv->avframe = derived; return true; error: av_frame_free(&derived); return false; } #ifdef PL_HAVE_LAV_VULKAN static bool pl_acquire_avframe(pl_gpu gpu, struct pl_frame *frame) { const struct pl_avframe_priv *priv = frame->user_data; AVHWFramesContext *hwfc = (void *) priv->avframe->hw_frames_ctx->data; AVVulkanFramesContext *vkfc = hwfc->hwctx; AVVkFrame *vkf = (AVVkFrame *) priv->avframe->data[0]; #ifdef PL_HAVE_LAV_VULKAN_V2 vkfc->lock_frame(hwfc, vkf); #else (void) vkfc; #endif for (int n = 0; n < frame->num_planes; n++) { pl_vulkan_release_ex(gpu, pl_vulkan_release_params( .tex = priv->planar ? priv->planar : frame->planes[n].texture, .layout = vkf->layout[n], .qf = VK_QUEUE_FAMILY_IGNORED, .semaphore = { .sem = vkf->sem[n], .value = vkf->sem_value[n], }, )); if (priv->planar) break; } return true; } static void pl_release_avframe(pl_gpu gpu, struct pl_frame *frame) { const struct pl_avframe_priv *priv = frame->user_data; AVHWFramesContext *hwfc = (void *) priv->avframe->hw_frames_ctx->data; AVVulkanFramesContext *vkfc = hwfc->hwctx; AVVkFrame *vkf = (AVVkFrame *) priv->avframe->data[0]; for (int n = 0; n < frame->num_planes; n++) { int ok = pl_vulkan_hold_ex(gpu, pl_vulkan_hold_params( .tex = priv->planar ? priv->planar : frame->planes[n].texture, .out_layout = &vkf->layout[n], .qf = VK_QUEUE_FAMILY_IGNORED, .semaphore = { .sem = vkf->sem[n], .value = vkf->sem_value[n] + 1, }, )); vkf->access[n] = 0; vkf->sem_value[n] += !!ok; if (priv->planar) break; } #ifdef PL_HAVE_LAV_VULKAN_V2 vkfc->unlock_frame(hwfc, vkf); #else (void) vkfc; #endif } static bool pl_map_avframe_vulkan(pl_gpu gpu, struct pl_frame *out, const AVFrame *frame) { const AVHWFramesContext *hwfc = (AVHWFramesContext *) frame->hw_frames_ctx->data; const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format); const AVVulkanFramesContext *vkfc = hwfc->hwctx; AVVkFrame *vkf = (AVVkFrame *) frame->data[0]; struct pl_avframe_priv *priv = out->user_data; pl_vulkan vk = pl_vulkan_get(gpu); #ifdef PL_HAVE_LAV_VULKAN_V2 const VkFormat *vk_fmt = vkfc->format; #else const VkFormat *vk_fmt = av_vkfmt_from_pixfmt(hwfc->sw_format); #endif assert(frame->format == AV_PIX_FMT_VULKAN); priv->planar = NULL; if (!vk) return false; for (int n = 0; n < out->num_planes; n++) { struct pl_plane *plane = &out->planes[n]; bool chroma = n == 1 || n == 2; int num_subplanes; assert(vk_fmt[n]); plane->texture = pl_vulkan_wrap(gpu, pl_vulkan_wrap_params( .image = vkf->img[n], .width = AV_CEIL_RSHIFT(hwfc->width, chroma ? desc->log2_chroma_w : 0), .height = AV_CEIL_RSHIFT(hwfc->height, chroma ? desc->log2_chroma_h : 0), .format = vk_fmt[n], .usage = vkfc->usage, )); if (!plane->texture) return false; num_subplanes = plane->texture->params.format->num_planes; if (num_subplanes) { assert(num_subplanes == out->num_planes); priv->planar = plane->texture; for (int i = 0; i < num_subplanes; i++) out->planes[i].texture = priv->planar->planes[i]; break; } } out->acquire = pl_acquire_avframe; out->release = pl_release_avframe; pl_fix_hwframe_sample_depth(out); return true; } static void pl_unmap_avframe_vulkan(pl_gpu gpu, struct pl_frame *frame) { struct pl_avframe_priv *priv = frame->user_data; if (priv->planar) { pl_tex_destroy(gpu, &priv->planar); for (int n = 0; n < frame->num_planes; n++) frame->planes[n].texture = NULL; } } #endif PL_LIBAV_API bool pl_map_avframe_ex(pl_gpu gpu, struct pl_frame *out, const struct pl_avframe_params *params) { const AVFrame *frame = params->frame; const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format); struct pl_plane_data data[4] = {0}; pl_tex *tex = params->tex; int planes; struct pl_avframe_priv *priv = malloc(sizeof(*priv)); if (!priv) goto error; pl_frame_from_avframe(out, frame); priv->avframe = av_frame_clone(frame); out->user_data = priv; #ifdef PL_HAVE_LAV_DOLBY_VISION if (params->map_dovi) { AVFrameSideData *sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DOVI_METADATA); if (sd) { const AVDOVIMetadata *metadata = (AVDOVIMetadata *) sd->data; const AVDOVIRpuDataHeader *header = av_dovi_get_header(metadata); // Only automatically map DoVi RPUs that don't require an EL if (header->disable_residual_flag) pl_map_avdovi_metadata(&out->color, &out->repr, &priv->dovi, metadata); } #ifdef PL_HAVE_LIBDOVI sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DOVI_RPU_BUFFER); if (sd) pl_hdr_metadata_from_dovi_rpu(&out->color.hdr, sd->buf->data, sd->buf->size); #endif // PL_HAVE_LIBDOVI } #endif // PL_HAVE_LAV_DOLBY_VISION switch (frame->format) { case AV_PIX_FMT_DRM_PRIME: if (!pl_map_avframe_drm(gpu, out, frame)) goto error; return true; case AV_PIX_FMT_VAAPI: if (!pl_map_avframe_derived(gpu, out, frame)) goto error; return true; #ifdef PL_HAVE_LAV_VULKAN case AV_PIX_FMT_VULKAN: if (!pl_map_avframe_vulkan(gpu, out, frame)) goto error; return true; #endif default: break; } // Backing textures are required from this point onwards if (!tex) goto error; planes = pl_plane_data_from_pixfmt(data, &out->repr.bits, frame->format); if (!planes) goto error; for (int p = 0; p < planes; p++) { AVBufferRef *buf = av_frame_get_plane_buffer((AVFrame *) frame, p); struct pl_avalloc *alloc = buf ? av_buffer_get_opaque(buf) : NULL; bool is_chroma = p == 1 || p == 2; // matches lavu logic data[p].width = AV_CEIL_RSHIFT(frame->width, is_chroma ? desc->log2_chroma_w : 0); data[p].height = AV_CEIL_RSHIFT(frame->height, is_chroma ? desc->log2_chroma_h : 0); if (frame->linesize[p] < 0) { data[p].pixels = frame->data[p] + frame->linesize[p] * (data[p].height - 1); data[p].row_stride = -frame->linesize[p]; out->planes[p].flipped = true; } else { data[p].pixels = frame->data[p]; data[p].row_stride = frame->linesize[p]; } // Probe for frames allocated by pl_get_buffer2 if (alloc && alloc->magic[0] == PL_MAGIC0 && alloc->magic[1] == PL_MAGIC1) { data[p].buf = alloc->buf; data[p].buf_offset = (uintptr_t) data[p].pixels - (uintptr_t) alloc->buf->data; data[p].pixels = NULL; } else if (gpu->limits.callbacks) { // Use asynchronous upload if possible data[p].callback = pl_avframe_free_cb; data[p].priv = av_frame_clone(frame); } if (!pl_upload_plane(gpu, &out->planes[p], &tex[p], &data[p])) { av_frame_free((AVFrame **) &data[p].priv); goto error; } out->planes[p].texture = tex[p]; } return true; error: pl_unmap_avframe(gpu, out); return false; } // Backwards compatibility with previous versions of this API. PL_LIBAV_API bool pl_map_avframe(pl_gpu gpu, struct pl_frame *out_frame, pl_tex tex[4], const AVFrame *avframe) { return pl_map_avframe_ex(gpu, out_frame, &(struct pl_avframe_params) { .frame = avframe, .tex = tex, }); } PL_LIBAV_API void pl_unmap_avframe(pl_gpu gpu, struct pl_frame *frame) { struct pl_avframe_priv *priv = frame->user_data; const AVPixFmtDescriptor *desc; if (!priv) goto done; #ifdef PL_HAVE_LAV_VULKAN if (priv->avframe->format == AV_PIX_FMT_VULKAN) pl_unmap_avframe_vulkan(gpu, frame); #endif desc = av_pix_fmt_desc_get(priv->avframe->format); if (desc->flags & AV_PIX_FMT_FLAG_HWACCEL) { for (int i = 0; i < 4; i++) pl_tex_destroy(gpu, &frame->planes[i].texture); } av_frame_free(&priv->avframe); free(priv); done: memset(frame, 0, sizeof(*frame)); // sanity } PL_LIBAV_API AVFrame *pl_get_mapped_avframe(const struct pl_frame *frame) { struct pl_avframe_priv *priv = frame->user_data; return priv->avframe; } static void pl_done_cb(void *priv) { bool *status = priv; *status = true; } PL_LIBAV_API bool pl_download_avframe(pl_gpu gpu, const struct pl_frame *frame, AVFrame *out_frame) { bool done[4] = {0}; if (frame->num_planes != av_pix_fmt_count_planes(out_frame->format)) return false; for (int p = 0; p < frame->num_planes; p++) { bool ok = pl_tex_download(gpu, pl_tex_transfer_params( .tex = frame->planes[p].texture, .row_pitch = out_frame->linesize[p], .ptr = out_frame->data[p], // Use synchronous transfer for the last plane .callback = (p+1) < frame->num_planes ? pl_done_cb : NULL, .priv = &done[p], )); if (!ok) return false; } for (int p = 0; p < frame->num_planes - 1; p++) { while (!done[p]) pl_tex_poll(gpu, frame->planes[p].texture, UINT64_MAX); } return true; } #define PL_DIV_UP(x, y) (((x) + (y) - 1) / (y)) #define PL_ALIGN(x, align) ((align) ? PL_DIV_UP(x, align) * (align) : (x)) #define PL_MAX(x, y) ((x) > (y) ? (x) : (y)) #define PL_LCM(x, y) ((x) * ((y) / av_gcd(x, y))) static inline void pl_avalloc_free(void *opaque, uint8_t *data) { struct pl_avalloc *alloc = opaque; assert(alloc->magic[0] == PL_MAGIC0); assert(alloc->magic[1] == PL_MAGIC1); assert(alloc->buf->data == data); pl_buf_destroy(alloc->gpu, &alloc->buf); free(alloc); } PL_LIBAV_API int pl_get_buffer2(AVCodecContext *avctx, AVFrame *pic, int flags) { int alignment[AV_NUM_DATA_POINTERS]; int width = pic->width; int height = pic->height; size_t planesize[4]; int ret = 0; pl_gpu *pgpu = avctx->opaque; pl_gpu gpu = pgpu ? *pgpu : NULL; struct pl_plane_data data[4]; struct pl_avalloc *alloc; const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pic->format); int planes = pl_plane_data_from_pixfmt(data, NULL, pic->format); // Sanitize frame structs memset(pic->data, 0, sizeof(pic->data)); memset(pic->linesize, 0, sizeof(pic->linesize)); memset(pic->buf, 0, sizeof(pic->buf)); pic->extended_data = pic->data; pic->extended_buf = NULL; if (!(avctx->codec->capabilities & AV_CODEC_CAP_DR1) || !planes) goto fallback; if (!gpu || !gpu->limits.thread_safe || !gpu->limits.max_mapped_size || !gpu->limits.host_cached) { goto fallback; } avcodec_align_dimensions2(avctx, &width, &height, alignment); if ((ret = av_image_fill_linesizes(pic->linesize, pic->format, width))) return ret; for (int p = 0; p < planes; p++) { alignment[p] = PL_LCM(alignment[p], gpu->limits.align_tex_xfer_pitch); alignment[p] = PL_LCM(alignment[p], gpu->limits.align_tex_xfer_offset); alignment[p] = PL_LCM(alignment[p], data[p].pixel_stride); pic->linesize[p] = PL_ALIGN(pic->linesize[p], alignment[p]); } #if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(56, 56, 100) ret = av_image_fill_plane_sizes(planesize, pic->format, height, (ptrdiff_t[4]) { pic->linesize[0], pic->linesize[1], pic->linesize[2], pic->linesize[3], }); if (ret < 0) return ret; #else uint8_t *ptrs[4], * const base = (uint8_t *) 0x10000; ret = av_image_fill_pointers(ptrs, pic->format, height, base, pic->linesize); if (ret < 0) return ret; for (int p = 0; p < 4; p++) planesize[p] = (uintptr_t) ptrs[p] - (uintptr_t) base; #endif for (int p = 0; p < planes; p++) { const size_t buf_size = planesize[p] + alignment[p]; if (buf_size > gpu->limits.max_mapped_size) { av_frame_unref(pic); goto fallback; } alloc = malloc(sizeof(*alloc)); if (!alloc) { av_frame_unref(pic); return AVERROR(ENOMEM); } *alloc = (struct pl_avalloc) { .magic = { PL_MAGIC0, PL_MAGIC1 }, .gpu = gpu, .buf = pl_buf_create(gpu, pl_buf_params( .size = buf_size, .memory_type = PL_BUF_MEM_HOST, .host_mapped = true, .storable = desc->flags & AV_PIX_FMT_FLAG_BE, )), }; if (!alloc->buf) { free(alloc); av_frame_unref(pic); return AVERROR(ENOMEM); } pic->data[p] = (uint8_t *) PL_ALIGN((uintptr_t) alloc->buf->data, alignment[p]); pic->buf[p] = av_buffer_create(alloc->buf->data, buf_size, pl_avalloc_free, alloc, 0); if (!pic->buf[p]) { pl_buf_destroy(gpu, &alloc->buf); free(alloc); av_frame_unref(pic); return AVERROR(ENOMEM); } } return 0; fallback: return avcodec_default_get_buffer2(avctx, pic, flags); } #undef PL_MAGIC0 #undef PL_MAGIC1 #undef PL_ALIGN #undef PL_MAX #endif // LIBPLACEBO_LIBAV_H_ libplacebo-v7.349.0/src/include/libplacebo/utils/upload.h000066400000000000000000000171561463457750100233110ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_UPLOAD_H_ #define LIBPLACEBO_UPLOAD_H_ #include #include #include PL_API_BEGIN // This file contains a utility function to assist in uploading data from host // memory to a texture. In particular, the texture will be suitable for use as // a `pl_plane`. // Description of the host representation of an image plane struct pl_plane_data { enum pl_fmt_type type; // meaning of the data (must not be UINT or SINT) int width, height; // dimensions of the plane int component_size[4]; // size in bits of each coordinate int component_pad[4]; // ignored bits preceding each component int component_map[4]; // semantic meaning of each component (pixel order) size_t pixel_stride; // offset in bytes between pixels (required) size_t row_stride; // offset in bytes between rows (optional) bool swapped; // pixel data is endian-swapped (non-native) // Similar to `pl_tex_transfer_params`, you can either upload from a raw // pointer address, or a buffer + offset. Again, the use of these two // mechanisms is mutually exclusive. // // 1. Uploading from host memory const void *pixels; // the actual data underlying this plane // 2. Uploading from a buffer (requires `pl_gpu_limits.buf_transfer`) pl_buf buf; // the buffer to use size_t buf_offset; // offset of data within buffer, must be a // multiple of `pixel_stride` as well as of 4 // Similar to `pl_tex_transfer_params.callback`, this allows turning the // upload of a plane into an asynchronous upload. The same notes apply. void (*callback)(void *priv); void *priv; // Note: When using this together with `pl_frame`, there is some amount of // overlap between `component_pad` and `pl_color_repr.bits`. Some key // differences between the two: // // - the bits from `component_pad` are ignored; whereas the superfluous bits // in a `pl_color_repr` must be 0. // - the `component_pad` exists to align the component size and placement // with the capabilities of GPUs; the `pl_color_repr` exists to control // the semantics of the color samples on a finer granularity. // - the `pl_color_repr` applies to the color sample as a whole, and // therefore applies to all planes; the `component_pad` can be different // for each plane. // - `component_pad` interacts with float textures by moving the actual // float in memory. `pl_color_repr` interacts with float data as if // the float was converted from an integer under full range semantics. // // To help establish the motivating difference, a typical example of a use // case would be yuv420p10. Since 10-bit GPU texture support is limited, // and working with non-byte-aligned pixels is awkward in general, the // convention is to represent yuv420p10 as 16-bit samples with either the // high or low bits set to 0. In this scenario, the `component_size` of the // `pl_plane_data` and `pl_bit_encoding.sample_depth` would be 16, while // the `pl_bit_encoding.color_depth` would be 10 (and additionally, the // `pl_bit_encoding.bit_shift` would be either 0 or 6, depending on // whether the low or the high bits are used). // // On the contrary, something like a packed, 8-bit XBGR format (where the // X bits are ignored and may contain garbage) would set `component_pad[0]` // to 8, and the component_size[0:2] (respectively) to 8 as well. // // As a general rule of thumb, for maximum compatibility, you should try // and align component_size/component_pad to multiples of 8 and explicitly // clear any remaining superfluous bits (+ use `pl_color_repr.bits` to // ensure they're decoded correctly). You should also try to align the // `pixel_stride` to a power of two. }; // Fills in the `component_size`, `component_pad` and `component_map` fields // based on the supplied mask for each component (in semantic order, i.e. // RGBA). Each element of `mask` must have a contiguous range of set bits. PL_API void pl_plane_data_from_mask(struct pl_plane_data *data, uint64_t mask[4]); // Fills in the `component_size`, `component_pad` and `component_map` fields // based on the supplied sizes (in bits) and shift of each component (in // semantic order). // // Similar to `pl_plane_data_from_mask` but not limited to 64-bit pixels. PL_API void pl_plane_data_from_comps(struct pl_plane_data *data, int size[4], int shift[4]); // Helper function to take a `pl_plane_data` struct and try and improve its // alignment to make it more likely to correspond to a real `pl_fmt`. It does // this by attempting to round each component up to the nearest byte boundary. // This relies on the assumption (true in practice) that superfluous bits of // byte-misaligned formats are explicitly set to 0. // // The resulting shift must be consistent across all components, in which case // it's returned in `out_bits`. If no alignment was possible, `out_bits` is set // to {0}, and this function returns false. PL_API bool pl_plane_data_align(struct pl_plane_data *data, struct pl_bit_encoding *out_bits); // Helper function to find a suitable `pl_fmt` based on a pl_plane_data's // requirements. This is called internally by `pl_upload_plane`, but it's // exposed to users both as a convenience and so they may pre-emptively check // if a format would be supported without actually having to attempt the upload. PL_API pl_fmt pl_plane_find_fmt(pl_gpu gpu, int out_map[4], const struct pl_plane_data *data); // Upload an image plane to a texture, and output the resulting `pl_plane` // struct to `out_plane` (optional). `tex` must be a valid pointer to a texture // (or NULL), which will be destroyed and reinitialized if it does not already // exist or is incompatible. Returns whether successful. // // The resulting texture is guaranteed to be `sampleable`, and it will also try // and maximize compatibility with the other `pl_renderer` requirements // (blittable, linear filterable, etc.). // // Note: `out_plane->shift_x/y` and `out_plane->flipped` are left // uninitialized, and should be set explicitly by the user. PL_API bool pl_upload_plane(pl_gpu gpu, struct pl_plane *out_plane, pl_tex *tex, const struct pl_plane_data *data); // Like `pl_upload_plane`, but only creates an uninitialized texture object // rather than actually performing an upload. This can be useful to, for // example, prepare textures to be used as the target of rendering. // // The resulting texture is guaranteed to be `renderable`, and it will also try // to maximize compatibility with the other `pl_renderer` requirements // (blittable, storable, etc.). PL_API bool pl_recreate_plane(pl_gpu gpu, struct pl_plane *out_plane, pl_tex *tex, const struct pl_plane_data *data); PL_API_END #endif // LIBPLACEBO_UPLOAD_H_ libplacebo-v7.349.0/src/include/libplacebo/vulkan.h000066400000000000000000000662761463457750100221740ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #ifndef LIBPLACEBO_VULKAN_H_ #define LIBPLACEBO_VULKAN_H_ #include #include #include PL_API_BEGIN #define PL_VK_MIN_VERSION VK_API_VERSION_1_2 // Structure representing a VkInstance. Using this is not required. typedef const struct pl_vk_inst_t { VkInstance instance; // The Vulkan API version supported by this VkInstance. uint32_t api_version; // The associated vkGetInstanceProcAddr pointer. PFN_vkGetInstanceProcAddr get_proc_addr; // The instance extensions that were successfully enabled, including // extensions enabled by libplacebo internally. May contain duplicates. const char * const *extensions; int num_extensions; // The instance layers that were successfully enabled, including // layers enabled by libplacebo internally. May contain duplicates. const char * const *layers; int num_layers; } *pl_vk_inst; struct pl_vk_inst_params { // If set, enable the debugging and validation layers. These should // generally be lightweight and relatively harmless to enable. bool debug; // If set, also enable GPU-assisted verification and best practices // layers. (Note: May cause substantial slowdown and/or result in lots of // false positive spam) bool debug_extra; // If nonzero, restricts the Vulkan API version to be at most this. This // is only really useful for explicitly testing backwards compatibility. uint32_t max_api_version; // Pointer to a user-provided `vkGetInstanceProcAddr`. If this is NULL, // libplacebo will use the directly linked version (if available). PFN_vkGetInstanceProcAddr get_proc_addr; // Enables extra instance extensions. Instance creation will fail if these // extensions are not all supported. The user may use this to enable e.g. // windowing system integration. const char * const *extensions; int num_extensions; // Enables extra optional instance extensions. These are opportunistically // enabled if supported by the device, but otherwise skipped. const char * const *opt_extensions; int num_opt_extensions; // Enables extra layers. Instance creation will fail if these layers are // not all supported. // // NOTE: Layers needed for required/optional extensions are automatically // enabled. The user does not specifically need to enable layers related // to extension support. const char * const *layers; int num_layers; // Enables extra optional layers. These are opportunistically enabled if // supported by the platform, but otherwise skipped. const char * const *opt_layers; int num_opt_layers; }; #define pl_vk_inst_params(...) (&(struct pl_vk_inst_params) { __VA_ARGS__ }) PL_API extern const struct pl_vk_inst_params pl_vk_inst_default_params; // Helper function to simplify instance creation. The user could also bypass // these helpers and do it manually, but this function is provided as a // convenience. It also sets up a debug callback which forwards all vulkan // messages to the `pl_log` callback. PL_API pl_vk_inst pl_vk_inst_create(pl_log log, const struct pl_vk_inst_params *params); PL_API void pl_vk_inst_destroy(pl_vk_inst *inst); struct pl_vulkan_queue { uint32_t index; // Queue family index uint32_t count; // Queue family count }; // Structure representing the actual vulkan device and associated GPU instance typedef const struct pl_vulkan_t *pl_vulkan; struct pl_vulkan_t { pl_gpu gpu; // The vulkan objects in use. The user may use this for their own purposes, // but please note that the lifetime is tied to the lifetime of the // pl_vulkan object, and must not be destroyed by the user. Note that the // created vulkan device may have any number of queues and queue family // assignments; so using it for queue submission commands is ill-advised. VkInstance instance; VkPhysicalDevice phys_device; VkDevice device; // The associated vkGetInstanceProcAddr pointer. PFN_vkGetInstanceProcAddr get_proc_addr; // The Vulkan API version supported by this VkPhysicalDevice. uint32_t api_version; // The device extensions that were successfully enabled, including // extensions enabled by libplacebo internally. May contain duplicates. const char * const *extensions; int num_extensions; // The device features that were enabled at device creation time. // // Note: Whenever a feature flag is ambiguious between several alternative // locations, for completeness' sake, we include both. const VkPhysicalDeviceFeatures2 *features; // The explicit queue families we are using to provide a given capability. struct pl_vulkan_queue queue_graphics; // provides VK_QUEUE_GRAPHICS_BIT struct pl_vulkan_queue queue_compute; // provides VK_QUEUE_COMPUTE_BIT struct pl_vulkan_queue queue_transfer; // provides VK_QUEUE_TRANSFER_BIT // Functions for locking a queue. These must be used to lock VkQueues for // submission or other related operations when sharing the VkDevice between // multiple threads, Using this on queue families or indices not contained // in `queues` is undefined behavior. void (*lock_queue)(pl_vulkan vk, uint32_t qf, uint32_t qidx); void (*unlock_queue)(pl_vulkan vk, uint32_t qf, uint32_t qidx); // --- Deprecated fields // These are the same active queue families and their queue counts in list // form. This list does not contain duplicates, nor any extra queues // enabled at device creation time. Deprecated in favor of querying // `vkGetPhysicalDeviceQueueFamilyProperties` directly. PL_DEPRECATED_IN(v6.271) const struct pl_vulkan_queue *queues; PL_DEPRECATED_IN(v6.271) int num_queues; }; struct pl_vulkan_params { // The vulkan instance. Optional, if NULL then libplacebo will internally // create a VkInstance with the settings from `instance_params`. // // Note: The VkInstance provided by the user *MUST* be created with a // VkApplicationInfo.apiVersion of PL_VK_MIN_VERSION or higher. VkInstance instance; // Pointer to `vkGetInstanceProcAddr`. If this is NULL, libplacebo will // use the directly linked version (if available). // // Note: This overwrites the same value from `instance_params`. PFN_vkGetInstanceProcAddr get_proc_addr; // Configures the settings used for creating an internal vulkan instance. // May be NULL. Ignored if `instance` is set. const struct pl_vk_inst_params *instance_params; // When choosing the device, rule out all devices that don't support // presenting to this surface. When creating a device, enable all extensions // needed to ensure we can present to this surface. Optional. Only legal // when specifying an existing VkInstance to use. VkSurfaceKHR surface; // --- Physical device selection options // The vulkan physical device. May be set by the caller to indicate the // physical device to use. Otherwise, libplacebo will pick the "best" // available GPU, based on the advertised device type. (i.e., it will // prefer discrete GPUs over integrated GPUs). Only legal when specifying // an existing VkInstance to use. VkPhysicalDevice device; // When choosing the device, only choose a device with this exact name. // This overrides `allow_software`. No effect if `device` is set. Note: A // list of devices and their names are logged at level PL_LOG_INFO. const char *device_name; // When choosing the device, only choose a device with this exact UUID. // This overrides `allow_software` and `device_name`. No effect if `device` // is set. uint8_t device_uuid[16]; // When choosing the device, controls whether or not to also allow software // GPUs. No effect if `device` or `device_name` are set. bool allow_software; // --- Logical device creation options // Controls whether or not to allow asynchronous transfers, using transfer // queue families, if supported by the device. This can be significantly // faster and more power efficient, and also allows streaming uploads in // parallel with rendering commands. Enabled by default. bool async_transfer; // Controls whether or not to allow asynchronous compute, using dedicated // compute queue families, if supported by the device. On some devices, // these can allow the GPU to schedule compute shaders in parallel with // fragment shaders. Enabled by default. bool async_compute; // Limits the number of queues to use. If left as 0, libplacebo will use as // many queues as the device supports. Multiple queues can result in // improved efficiency when submitting multiple commands that can entirely // or partially execute in parallel. Defaults to 1, since using more queues // can actually decrease performance. // // Note: libplacebo will always *create* logical devices with all available // queues for a given QF enabled, regardless of this setting. int queue_count; // Bitmask of extra queue families to enable. If set, then *all* queue // families matching *any* of these flags will be enabled at device // creation time. Setting this to VK_QUEUE_FLAG_BITS_MAX_ENUM effectively // enables all queue families supported by the device. VkQueueFlags extra_queues; // Enables extra device extensions. Device creation will fail if these // extensions are not all supported. The user may use this to enable e.g. // interop extensions. const char * const *extensions; int num_extensions; // Enables extra optional device extensions. These are opportunistically // enabled if supported by the device, but otherwise skipped. const char * const *opt_extensions; int num_opt_extensions; // Optional extra features to enable at device creation time. These are // opportunistically enabled if supported by the physical device, but // otherwise kept disabled. const VkPhysicalDeviceFeatures2 *features; // --- Misc/debugging options // Restrict specific features to e.g. work around driver bugs, or simply // for testing purposes int max_glsl_version; // limit the maximum GLSL version uint32_t max_api_version; // limit the maximum vulkan API version }; // Default/recommended parameters. Should generally be safe and efficient. #define PL_VULKAN_DEFAULTS \ .async_transfer = true, \ .async_compute = true, \ /* enabling multiple queues often decreases perf */ \ .queue_count = 1, #define pl_vulkan_params(...) (&(struct pl_vulkan_params) { PL_VULKAN_DEFAULTS __VA_ARGS__ }) PL_API extern const struct pl_vulkan_params pl_vulkan_default_params; // Creates a new vulkan device based on the given parameters and initializes // a new GPU. If `params` is left as NULL, it defaults to // &pl_vulkan_default_params. // // Thread-safety: Safe PL_API pl_vulkan pl_vulkan_create(pl_log log, const struct pl_vulkan_params *params); // Destroys the vulkan device and all associated objects, except for the // VkInstance provided by the user. // // Note that all resources allocated from this vulkan object (e.g. via the // `vk->ra` or using `pl_vulkan_create_swapchain`) *must* be explicitly // destroyed by the user before calling this. // // Also note that this function will block until all in-flight GPU commands are // finished processing. You can avoid this by manually calling `pl_gpu_finish` // before `pl_vulkan_destroy`. PL_API void pl_vulkan_destroy(pl_vulkan *vk); // For a `pl_gpu` backed by `pl_vulkan`, this function can be used to retrieve // the underlying `pl_vulkan`. Returns NULL for any other type of `gpu`. PL_API pl_vulkan pl_vulkan_get(pl_gpu gpu); struct pl_vulkan_device_params { // The instance to use. Required! // // Note: The VkInstance provided by the user *must* be created with a // VkApplicationInfo.apiVersion of PL_VK_MIN_VERSION or higher. VkInstance instance; // Mirrored from `pl_vulkan_params`. All of these fields are optional. PFN_vkGetInstanceProcAddr get_proc_addr; VkSurfaceKHR surface; const char *device_name; uint8_t device_uuid[16]; bool allow_software; }; #define pl_vulkan_device_params(...) (&(struct pl_vulkan_device_params) { __VA_ARGS__ }) // Helper function to choose the best VkPhysicalDevice, given a VkInstance. // This uses the same logic as `pl_vulkan_create` uses internally. If no // matching device was found, this returns VK_NULL_HANDLE. PL_API VkPhysicalDevice pl_vulkan_choose_device(pl_log log, const struct pl_vulkan_device_params *params); struct pl_vulkan_swapchain_params { // The surface to use for rendering. Required, the user is in charge of // creating this. Must belong to the same VkInstance as `vk->instance`. VkSurfaceKHR surface; // The preferred presentation mode. See the vulkan documentation for more // information about these. If the device/surface combination does not // support this mode, libplacebo will fall back to VK_PRESENT_MODE_FIFO_KHR. // // Warning: Leaving this zero-initialized is the same as having specified // VK_PRESENT_MODE_IMMEDIATE_KHR, which is probably not what the user // wants! VkPresentModeKHR present_mode; // Allow up to N in-flight frames. This essentially controls how many // rendering commands may be queued up at the same time. See the // documentation for `pl_swapchain_get_latency` for more information. For // vulkan specifically, we are only able to wait until the GPU has finished // rendering a frame - we are unable to wait until the display has actually // finished displaying it. So this only provides a rough guideline. // Optional, defaults to 3. int swapchain_depth; // This suppresses automatic recreation of the swapchain when any call // returns VK_SUBOPTIMAL_KHR. Normally, libplacebo will recreate the // swapchain internally on the next `pl_swapchain_start_frame`. If enabled, // clients are assumed to take care of swapchain recreations themselves, by // calling `pl_swapchain_resize` as appropriate. libplacebo will tolerate // the "suboptimal" status indefinitely. bool allow_suboptimal; // Disable high-bit (10 or more) SDR formats. May help work around buggy // drivers which don't dither properly when outputting high bit depth // SDR backbuffers to 8-bit screens. bool disable_10bit_sdr; }; #define pl_vulkan_swapchain_params(...) (&(struct pl_vulkan_swapchain_params) { __VA_ARGS__ }) // Creates a new vulkan swapchain based on an existing VkSurfaceKHR. Using this // function requires that the vulkan device was created with the // VK_KHR_swapchain extension. The easiest way of accomplishing this is to set // the `pl_vulkan_params.surface` explicitly at creation time. PL_API pl_swapchain pl_vulkan_create_swapchain(pl_vulkan vk, const struct pl_vulkan_swapchain_params *params); // This will return true if the vulkan swapchain is internally detected // as being suboptimal (VK_SUBOPTIMAL_KHR). This might be of use to clients // who have `params->allow_suboptimal` enabled. PL_API bool pl_vulkan_swapchain_suboptimal(pl_swapchain sw); // Vulkan interop API, for sharing a single VkDevice (and associated vulkan // resources) directly with the API user. The use of this API is a bit sketchy // and requires careful communication of Vulkan API state. struct pl_vulkan_import_params { // The vulkan instance. Required. // // Note: The VkInstance provided by the user *must* be created with a // VkApplicationInfo.apiVersion of PL_VK_MIN_VERSION or higher. VkInstance instance; // Pointer to `vkGetInstanceProcAddr`. If this is NULL, libplacebo will // use the directly linked version (if available). PFN_vkGetInstanceProcAddr get_proc_addr; // The physical device selected by the user. Required. VkPhysicalDevice phys_device; // The logical device created by the user. Required. VkDevice device; // --- Logical device parameters // List of all device-level extensions that were enabled. (Instance-level // extensions need not be re-specified here, since it's guaranteed that any // instance-level extensions that device-level extensions depend on were // enabled at the instance level) const char * const *extensions; int num_extensions; // Enabled queue families. At least `queue_graphics` is required. // // It's okay for multiple queue families to be specified with the same // index, e.g. in the event that a dedicated compute queue also happens to // be the dedicated transfer queue. // // It's also okay to leave the queue struct as {0} in the event that no // dedicated queue exists for a given operation type. libplacebo will // automatically fall back to using e.g. the graphics queue instead. struct pl_vulkan_queue queue_graphics; // must support VK_QUEUE_GRAPHICS_BIT struct pl_vulkan_queue queue_compute; // must support VK_QUEUE_COMPUTE_BIT struct pl_vulkan_queue queue_transfer; // must support VK_QUEUE_TRANSFER_BIT // Enabled VkPhysicalDeviceFeatures. The device *must* be created with // all of the features in `pl_vulkan_required_features` enabled. const VkPhysicalDeviceFeatures2 *features; // Functions for locking a queue. If set, these will be used instead of // libplacebo's internal functions for `pl_vulkan.(un)lock_queue`. void (*lock_queue)(void *ctx, uint32_t qf, uint32_t qidx); void (*unlock_queue)(void *ctx, uint32_t qf, uint32_t qidx); void *queue_ctx; // --- Misc/debugging options // Restrict specific features to e.g. work around driver bugs, or simply // for testing purposes. See `pl_vulkan_params` for a description of these. int max_glsl_version; uint32_t max_api_version; }; #define pl_vulkan_import_params(...) (&(struct pl_vulkan_import_params) { __VA_ARGS__ }) // For purely informative reasons, this contains a list of extensions and // device features that libplacebo *can* make use of. These are all strictly // optional, but provide a hint to the API user as to what might be worth // enabling at device creation time. // // Note: This also includes physical device features provided by extensions. // They are all provided using extension-specific features structs, rather // than the more general purpose VkPhysicalDeviceVulkan11Features etc. PL_API extern const char * const pl_vulkan_recommended_extensions[]; PL_API extern const int pl_vulkan_num_recommended_extensions; PL_API extern const VkPhysicalDeviceFeatures2 pl_vulkan_recommended_features; // A list of device features that are required by libplacebo. These // *must* be provided by imported Vulkan devices. // // Note: `pl_vulkan_recommended_features` does not include this list. PL_API extern const VkPhysicalDeviceFeatures2 pl_vulkan_required_features; // Import an existing VkDevice instead of creating a new one, and wrap it into // a `pl_vulkan` abstraction. It's safe to `pl_vulkan_destroy` this, which will // destroy application state related to libplacebo but leave the underlying // VkDevice intact. PL_API pl_vulkan pl_vulkan_import(pl_log log, const struct pl_vulkan_import_params *params); struct pl_vulkan_wrap_params { // The image itself. It *must* be usable concurrently by all of the queue // family indices listed in `pl_vulkan->queues`. Note that this requires // the use of VK_SHARING_MODE_CONCURRENT if `pl_vulkan->num_queues` is // greater than 1. If this is difficult to achieve for the user, then // `async_transfer` / `async_compute` should be turned off, which // guarantees the use of only one queue family. VkImage image; // Which aspect of `image` to wrap. Only useful for wrapping individual // sub-planes of planar images. If left as 0, it defaults to the entire // image (i.e. the union of VK_IMAGE_ASPECT_PLANE_N_BIT for planar formats, // and VK_IMAGE_ASPECT_COLOR_BIT otherwise). VkImageAspectFlags aspect; // The image's dimensions (unused dimensions must be 0) int width; int height; int depth; // The image's format. libplacebo will try to map this to an equivalent // pl_fmt. If no compatible pl_fmt is found, wrapping will fail. VkFormat format; // The usage flags the image was created with. libplacebo will set the // pl_tex capabilities to include whatever it can, as determined by the set // of enabled usage flags. VkImageUsageFlags usage; // See `pl_tex_params` void *user_data; pl_debug_tag debug_tag; }; #define pl_vulkan_wrap_params(...) (&(struct pl_vulkan_wrap_params) { \ .debug_tag = PL_DEBUG_TAG, \ __VA_ARGS__ \ }) // Wraps an external VkImage into a pl_tex abstraction. By default, the image // is considered "held" by the user and must be released before calling any // pl_tex_* API calls on it (see `pl_vulkan_release_ex`). // // This wrapper can be destroyed by simply calling `pl_tex_destroy` on it, // which will not destroy the underlying VkImage. If a pl_tex wrapper is // destroyed while an image is not currently being held by the user, that // image is left in an undefined state. // // Wrapping the same VkImage multiple times is undefined behavior, as is trying // to wrap an image belonging to a different VkDevice than the one in use by // `gpu`. // // This function may fail, in which case it returns NULL. PL_API pl_tex pl_vulkan_wrap(pl_gpu gpu, const struct pl_vulkan_wrap_params *params); // Analogous to `pl_vulkan_wrap`, this function takes any `pl_tex` (including // ones created by `pl_tex_create`) and unwraps it to expose the underlying // VkImage to the user. Unlike `pl_vulkan_wrap`, this `pl_tex` is *not* // considered held after calling this function - the user must explicitly // `pl_vulkan_hold_ex` before accessing the VkImage. // // `out_format` and `out_flags` will be updated to hold the VkImage's // format and usage flags. (Optional) PL_API VkImage pl_vulkan_unwrap(pl_gpu gpu, pl_tex tex, VkFormat *out_format, VkImageUsageFlags *out_flags); // Represents a vulkan semaphore/value pair (for compatibility with timeline // semaphores). When using normal, binary semaphores, `value` may be ignored. typedef struct pl_vulkan_sem { VkSemaphore sem; uint64_t value; } pl_vulkan_sem; struct pl_vulkan_hold_params { // The Vulkan image to hold. It will be marked as held. Attempting to // perform any pl_tex_* operation (except pl_tex_destroy) on a held image // is undefined behavior. pl_tex tex; // The layout to transition the image to when holding. Alternatively, a // pointer to receive the current image layout. If `out_layout` is // provided, `layout` is ignored. VkImageLayout layout; VkImageLayout *out_layout; // The queue family index to transition the image to. This can be used with // VK_QUEUE_FAMILY_EXTERNAL to transition the image to an external API. As // a special case, if set to VK_QUEUE_FAMILY_IGNORED, libplacebo will not // transition the image, even if this image was not set up for concurrent // usage. Ignored for concurrent images. uint32_t qf; // The semaphore to fire when the image is available for use. (Required) pl_vulkan_sem semaphore; }; #define pl_vulkan_hold_params(...) (&(struct pl_vulkan_hold_params) { __VA_ARGS__ }) // "Hold" a shared image, transferring control over the image to the user. // Returns whether successful. PL_API bool pl_vulkan_hold_ex(pl_gpu gpu, const struct pl_vulkan_hold_params *params); struct pl_vulkan_release_params { // The image to be released. It must be marked as "held". Performing any // operation on the VkImage underlying this `pl_tex` while it is not being // held by the user is undefined behavior. pl_tex tex; // The current layout of the image at the point in time when `semaphore` // fires, or if no semaphore is specified, at the time of call. VkImageLayout layout; // The queue family index to transition the image to. This can be used with // VK_QUEUE_FAMILY_EXTERNAL to transition the image rom an external API. As // a special case, if set to VK_QUEUE_FAMILY_IGNORED, libplacebo will not // transition the image, even if this image was not set up for concurrent // usage. Ignored for concurrent images. uint32_t qf; // The semaphore to wait on before libplacebo will actually use or modify // the image. (Optional) // // Note: the lifetime of `semaphore` is indeterminate, and destroying it // while the texture is still depending on that semaphore is undefined // behavior. // // Technically, the only way to be sure that it's safe to free is to use // `pl_gpu_finish()` or similar (e.g. `pl_vulkan_destroy` or // `vkDeviceWaitIdle`) after another operation involving `tex` has been // emitted (or the texture has been destroyed). // // // Warning: If `tex` is a planar image (`pl_fmt.num_planes > 0`), and // `semaphore` is specified, it *must* be a timeline semaphore! Failure to // respect this will result in undefined behavior. This warning does not // apply to individual planes (as exposed by `pl_tex.planes`). pl_vulkan_sem semaphore; }; #define pl_vulkan_release_params(...) (&(struct pl_vulkan_release_params) { __VA_ARGS__ }) // "Release" a shared image, transferring control to libplacebo. PL_API void pl_vulkan_release_ex(pl_gpu gpu, const struct pl_vulkan_release_params *params); struct pl_vulkan_sem_params { // The type of semaphore to create. VkSemaphoreType type; // For VK_SEMAPHORE_TYPE_TIMELINE, sets the initial timeline value. uint64_t initial_value; // If set, exports this VkSemaphore to the handle given in `out_handle`. // The user takes over ownership, and should manually close it before // destroying this VkSemaphore (via `pl_vulkan_sem_destroy`). enum pl_handle_type export_handle; union pl_handle *out_handle; // Optional debug tag to identify this semaphore. pl_debug_tag debug_tag; }; #define pl_vulkan_sem_params(...) (&(struct pl_vulkan_sem_params) { \ .debug_tag = PL_DEBUG_TAG, \ __VA_ARGS__ \ }) // Helper functions to create and destroy vulkan semaphores. Returns // VK_NULL_HANDLE on failure. PL_API VkSemaphore pl_vulkan_sem_create(pl_gpu gpu, const struct pl_vulkan_sem_params *params); PL_API void pl_vulkan_sem_destroy(pl_gpu gpu, VkSemaphore *semaphore); PL_API_END #endif // LIBPLACEBO_VULKAN_H_ libplacebo-v7.349.0/src/log.c000066400000000000000000000356171463457750100157240ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include #include "common.h" #include "log.h" #include "pl_thread.h" struct priv { pl_mutex lock; enum pl_log_level log_level_cap; pl_str logbuffer; }; pl_log pl_log_create(int api_ver, const struct pl_log_params *params) { (void) api_ver; struct pl_log_t *log = pl_zalloc_obj(NULL, log, struct priv); struct priv *p = PL_PRIV(log); log->params = *PL_DEF(params, &pl_log_default_params); pl_mutex_init(&p->lock); pl_info(log, "Initialized libplacebo %s (API v%d)", PL_VERSION, PL_API_VER); return log; } const struct pl_log_params pl_log_default_params = {0}; void pl_log_destroy(pl_log *plog) { pl_log log = *plog; if (!log) return; struct priv *p = PL_PRIV(log); pl_mutex_destroy(&p->lock); pl_free((void *) log); *plog = NULL; } struct pl_log_params pl_log_update(pl_log ptr, const struct pl_log_params *params) { struct pl_log_t *log = (struct pl_log_t *) ptr; if (!log) return pl_log_default_params; struct priv *p = PL_PRIV(log); pl_mutex_lock(&p->lock); struct pl_log_params prev_params = log->params; log->params = *PL_DEF(params, &pl_log_default_params); pl_mutex_unlock(&p->lock); return prev_params; } enum pl_log_level pl_log_level_update(pl_log ptr, enum pl_log_level level) { struct pl_log_t *log = (struct pl_log_t *) ptr; if (!log) return PL_LOG_NONE; struct priv *p = PL_PRIV(log); pl_mutex_lock(&p->lock); enum pl_log_level prev_level = log->params.log_level; log->params.log_level = level; pl_mutex_unlock(&p->lock); return prev_level; } void pl_log_level_cap(pl_log log, enum pl_log_level cap) { if (!log) return; struct priv *p = PL_PRIV(log); pl_mutex_lock(&p->lock); p->log_level_cap = cap; pl_mutex_unlock(&p->lock); } static FILE *default_stream(void *stream, enum pl_log_level level) { return PL_DEF(stream, level <= PL_LOG_WARN ? stderr : stdout); } void pl_log_simple(void *stream, enum pl_log_level level, const char *msg) { static const char *prefix[] = { [PL_LOG_FATAL] = "fatal", [PL_LOG_ERR] = "error", [PL_LOG_WARN] = "warn", [PL_LOG_INFO] = "info", [PL_LOG_DEBUG] = "debug", [PL_LOG_TRACE] = "trace", }; FILE *h = default_stream(stream, level); fprintf(h, "%5s: %s\n", prefix[level], msg); if (level <= PL_LOG_WARN) fflush(h); } void pl_log_color(void *stream, enum pl_log_level level, const char *msg) { static const char *color[] = { [PL_LOG_FATAL] = "31;1", // bright red [PL_LOG_ERR] = "31", // red [PL_LOG_WARN] = "33", // yellow/orange [PL_LOG_INFO] = "32", // green [PL_LOG_DEBUG] = "34", // blue [PL_LOG_TRACE] = "30;1", // bright black }; FILE *h = default_stream(stream, level); fprintf(h, "\033[%sm%s\033[0m\n", color[level], msg); if (level <= PL_LOG_WARN) fflush(h); } static void pl_msg_va(pl_log log, enum pl_log_level lev, const char *fmt, va_list va) { // Test log message without taking the lock, to avoid thrashing the // lock for thousands of trace messages unless those are actually // enabled. This may be a false negative, in which case log messages may // be lost as a result. But this shouldn't be a big deal, since any // situation leading to lost log messages would itself be a race condition. if (!pl_msg_test(log, lev)) return; // Re-test the log message level with held lock to avoid false positives, // which would be a considerably bigger deal than false negatives struct priv *p = PL_PRIV(log); pl_mutex_lock(&p->lock); // Apply this cap before re-testing the log level, to avoid giving users // messages that should have been dropped by the log level. lev = PL_MAX(lev, p->log_level_cap); if (!pl_msg_test(log, lev)) goto done; p->logbuffer.len = 0; pl_str_append_vasprintf((void *) log, &p->logbuffer, fmt, va); log->params.log_cb(log->params.log_priv, lev, (char *) p->logbuffer.buf); done: pl_mutex_unlock(&p->lock); } void pl_msg(pl_log log, enum pl_log_level lev, const char *fmt, ...) { va_list va; va_start(va, fmt); pl_msg_va(log, lev, fmt, va); va_end(va); } void pl_msg_source(pl_log log, enum pl_log_level lev, const char *src) { if (!pl_msg_test(log, lev) || !src) return; int line = 1; while (*src) { const char *end = strchr(src, '\n'); if (!end) { pl_msg(log, lev, "[%3d] %s", line, src); break; } pl_msg(log, lev, "[%3d] %.*s", line, (int)(end - src), src); src = end + 1; line++; } } #ifdef PL_HAVE_DBGHELP #include #include #include // https://github.com/llvm/llvm-project/blob/f03cd763384bbb67ddfa12957859ed58841d4b34/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.h#L85-L106 static inline uintptr_t get_prev_inst_pc(uintptr_t pc) { #if defined(__arm__) // T32 (Thumb) branch instructions might be 16 or 32 bit long, // so we return (pc-2) in that case in order to be safe. // For A32 mode we return (pc-4) because all instructions are 32 bit long. return (pc - 3) & (~1); #elif defined(__x86_64__) || defined(__i386__) return pc - 1; #else return pc - 4; #endif } static DWORD64 get_preferred_base(const char *module) { DWORD64 image_base = 0; HANDLE file_mapping = NULL; HANDLE file_view = NULL; HANDLE file = CreateFile(module, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0); if (file == INVALID_HANDLE_VALUE) goto done; file_mapping = CreateFileMapping(file, NULL, PAGE_READONLY, 0, 0, NULL); if (file_mapping == NULL) goto done; file_view = MapViewOfFile(file_mapping, FILE_MAP_READ, 0, 0, 0); if (file_view == NULL) goto done; PIMAGE_DOS_HEADER dos_header = (PIMAGE_DOS_HEADER) file_view; if (dos_header->e_magic != IMAGE_DOS_SIGNATURE) goto done; PIMAGE_NT_HEADERS pe_header = (PIMAGE_NT_HEADERS) ((char *) file_view + dos_header->e_lfanew); if (pe_header->Signature != IMAGE_NT_SIGNATURE) goto done; if (pe_header->FileHeader.SizeOfOptionalHeader != sizeof(pe_header->OptionalHeader)) goto done; image_base = pe_header->OptionalHeader.ImageBase; done: if (file_view) UnmapViewOfFile(file_view); if (file_mapping) CloseHandle(file_mapping); if (file != INVALID_HANDLE_VALUE) CloseHandle(file); return image_base; } void pl_log_stack_trace(pl_log log, enum pl_log_level lev) { if (!pl_msg_test(log, lev)) return; void *tmp = pl_tmp(NULL); PL_ARRAY(void *) frames = {0}; size_t capacity = 16; do { capacity *= 2; PL_ARRAY_RESIZE(tmp, frames, capacity); // Skip first frame, we don't care about this function frames.num = CaptureStackBackTrace(1, capacity, frames.elem, NULL); } while (capacity == frames.num); if (!frames.num) { pl_free(tmp); return; } // Load dbghelp on demand. While it is available on all Windows versions, // no need to keep it loaded all the time as stack trace printing function, // in theory should be used repetitively rarely. HANDLE process = GetCurrentProcess(); HMODULE dbghelp = LoadLibrary("dbghelp.dll"); DWORD options; SYMBOL_INFO *symbol = NULL; BOOL use_dbghelp = !!dbghelp; #define DBGHELP_SYM(sym) \ __typeof__(&sym) p##sym = (__typeof__(&sym))(void *) GetProcAddress(dbghelp, #sym); \ use_dbghelp &= !!p##sym DBGHELP_SYM(SymCleanup); DBGHELP_SYM(SymFromAddr); DBGHELP_SYM(SymGetLineFromAddr64); DBGHELP_SYM(SymGetModuleInfo64); DBGHELP_SYM(SymGetOptions); DBGHELP_SYM(SymGetSearchPathW); DBGHELP_SYM(SymInitialize); DBGHELP_SYM(SymSetOptions); DBGHELP_SYM(SymSetSearchPathW); #undef DBGHELP_SYM struct priv *p = PL_PRIV(log); PL_ARRAY(wchar_t) base_search = { .num = 1024 }; if (use_dbghelp) { // DbgHelp is not thread-safe. Note that on Windows mutex is recursive, // so no need to unlock before calling pl_msg. pl_mutex_lock(&p->lock); options = pSymGetOptions(); pSymSetOptions(SYMOPT_UNDNAME | SYMOPT_DEFERRED_LOADS | SYMOPT_LOAD_LINES | SYMOPT_FAVOR_COMPRESSED); use_dbghelp &= pSymInitialize(process, NULL, TRUE); if (use_dbghelp) { symbol = pl_alloc(tmp, sizeof(SYMBOL_INFO) + 512); symbol->SizeOfStruct = sizeof(SYMBOL_INFO); symbol->MaxNameLen = 512; PL_ARRAY_RESIZE(tmp, base_search, base_search.num); BOOL ret = pSymGetSearchPathW(process, base_search.elem, base_search.num); base_search.num = ret ? wcslen(base_search.elem) : 0; PL_ARRAY_APPEND(tmp, base_search, L'\0'); } else { pSymSetOptions(options); pl_mutex_unlock(&p->lock); } } pl_msg(log, lev, " Backtrace:"); for (int n = 0; n < frames.num; n++) { uintptr_t pc = get_prev_inst_pc((uintptr_t) frames.elem[n]); pl_str out = {0}; pl_str_append_asprintf(tmp, &out, " #%-2d 0x%"PRIxPTR, n, pc); MEMORY_BASIC_INFORMATION meminfo = {0}; char module_path[MAX_PATH] = {0}; if (VirtualQuery((LPCVOID) pc, &meminfo, sizeof(meminfo))) { DWORD sz = GetModuleFileNameA(meminfo.AllocationBase, module_path, sizeof(module_path)); if (sz == sizeof(module_path)) pl_msg(log, PL_LOG_ERR, "module path truncated"); if (use_dbghelp) { // According to documentation it should search in "The directory // that contains the corresponding module.", but it doesn't appear // to work, so manually set the path to module path. // https://learn.microsoft.com/windows/win32/debug/symbol-paths PL_ARRAY(wchar_t) mod_search = { .num = MAX_PATH }; PL_ARRAY_RESIZE(tmp, mod_search, mod_search.num); sz = GetModuleFileNameW(meminfo.AllocationBase, mod_search.elem, mod_search.num); if (sz > 0 && sz != MAX_PATH && // TODO: Replace with PathCchRemoveFileSpec once mingw-w64 // >= 8.0.1 is commonly available, at the time of writing // there are a few high profile Linux distributions that ship // 8.0.0. PathRemoveFileSpecW(mod_search.elem)) { mod_search.num = wcslen(mod_search.elem); PL_ARRAY_APPEND(tmp, mod_search, L';'); PL_ARRAY_CONCAT(tmp, mod_search, base_search); pSymSetSearchPathW(process, mod_search.elem); } } } DWORD64 sym_displacement; if (use_dbghelp && pSymFromAddr(process, pc, &sym_displacement, symbol)) pl_str_append_asprintf(tmp, &out, " in %s+0x%llx", symbol->Name, sym_displacement); DWORD line_displacement; IMAGEHLP_LINE64 line = {sizeof(line)}; if (use_dbghelp && pSymGetLineFromAddr64(process, pc, &line_displacement, &line)) { pl_str_append_asprintf(tmp, &out, " %s:%lu+0x%lx", line.FileName, line.LineNumber, line_displacement); goto done; } // LLVM tools by convention use absolute addresses with "prefered" base // image offset. We need to read this offset from binary, because due to // ASLR we are not loaded at this base. While Windows tools like WinDbg // expect relative offset to image base. So to be able to easily use it // with both worlds, print both values. DWORD64 module_base = get_preferred_base(module_path); pl_str_append_asprintf(tmp, &out, " (%s+0x%"PRIxPTR") (0x%llx)", module_path, pc - (uintptr_t) meminfo.AllocationBase, module_base + (pc - (uintptr_t) meminfo.AllocationBase)); done: pl_msg(log, lev, "%s", out.buf); } if (use_dbghelp) { pSymSetOptions(options); pSymCleanup(process); pl_mutex_unlock(&p->lock); } // Unload dbghelp. Maybe it is better to keep it loaded? if (dbghelp) FreeLibrary(dbghelp); pl_free(tmp); } #elif defined(PL_HAVE_UNWIND) #define UNW_LOCAL_ONLY #include #include void pl_log_stack_trace(pl_log log, enum pl_log_level lev) { if (!pl_msg_test(log, lev)) return; unw_cursor_t cursor; unw_context_t uc; unw_word_t ip, off; unw_getcontext(&uc); unw_init_local(&cursor, &uc); int depth = 0; pl_msg(log, lev, " Backtrace:"); while (unw_step(&cursor) > 0) { char symbol[256] = ""; Dl_info info = { .dli_fname = "", }; unw_get_reg(&cursor, UNW_REG_IP, &ip); unw_get_proc_name(&cursor, symbol, sizeof(symbol), &off); dladdr((void *) (uintptr_t) ip, &info); pl_msg(log, lev, " #%-2d 0x%016" PRIxPTR " in %s+0x%" PRIxPTR" at %s+0x%" PRIxPTR, depth++, ip, symbol, off, info.dli_fname, ip - (uintptr_t) info.dli_fbase); } } #elif defined(PL_HAVE_EXECINFO) #include void pl_log_stack_trace(pl_log log, enum pl_log_level lev) { if (!pl_msg_test(log, lev)) return; PL_ARRAY(void *) buf = {0}; size_t buf_avail = 16; do { buf_avail *= 2; PL_ARRAY_RESIZE(NULL, buf, buf_avail); buf.num = backtrace(buf.elem, buf_avail); } while (buf.num == buf_avail); pl_msg(log, lev, " Backtrace:"); char **strings = backtrace_symbols(buf.elem, buf.num); for (int i = 1; i < buf.num; i++) pl_msg(log, lev, " #%-2d %s", i - 1, strings[i]); free(strings); pl_free(buf.elem); } #else void pl_log_stack_trace(pl_log log, enum pl_log_level lev) { } #endif libplacebo-v7.349.0/src/log.h000066400000000000000000000064501463457750100157220ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include #include "common.h" #include // Internal logging-related functions // Warning: Not entirely thread-safe. Exercise caution when using. May result // in either false positives or false negatives. Make sure to re-run this // function while `lock` is held, to ensure no race conditions on the check. static inline bool pl_msg_test(pl_log log, enum pl_log_level lev) { return log && log->params.log_cb && log->params.log_level >= lev; } void pl_msg(pl_log log, enum pl_log_level lev, const char *fmt, ...) PL_PRINTF(3, 4); // Convenience macros #define pl_fatal(log, ...) pl_msg(log, PL_LOG_FATAL, __VA_ARGS__) #define pl_err(log, ...) pl_msg(log, PL_LOG_ERR, __VA_ARGS__) #define pl_warn(log, ...) pl_msg(log, PL_LOG_WARN, __VA_ARGS__) #define pl_info(log, ...) pl_msg(log, PL_LOG_INFO, __VA_ARGS__) #define pl_debug(log, ...) pl_msg(log, PL_LOG_DEBUG, __VA_ARGS__) #define pl_trace(log, ...) pl_msg(log, PL_LOG_TRACE, __VA_ARGS__) #define PL_MSG(obj, lev, ...) pl_msg((obj)->log, lev, __VA_ARGS__) #define PL_FATAL(obj, ...) PL_MSG(obj, PL_LOG_FATAL, __VA_ARGS__) #define PL_ERR(obj, ...) PL_MSG(obj, PL_LOG_ERR, __VA_ARGS__) #define PL_WARN(obj, ...) PL_MSG(obj, PL_LOG_WARN, __VA_ARGS__) #define PL_INFO(obj, ...) PL_MSG(obj, PL_LOG_INFO, __VA_ARGS__) #define PL_DEBUG(obj, ...) PL_MSG(obj, PL_LOG_DEBUG, __VA_ARGS__) #define PL_TRACE(obj, ...) PL_MSG(obj, PL_LOG_TRACE, __VA_ARGS__) // Log something with line numbers included void pl_msg_source(pl_log log, enum pl_log_level lev, const char *src); // Temporarily cap the log level to a certain verbosity. This is intended for // things like probing formats, attempting to create buffers that may fail, and // other types of operations in which we want to suppress errors. Call with // PL_LOG_NONE to disable this cap. // // Warning: This is generally not thread-safe, and only provided as a temporary // hack until a better solution can be thought of. void pl_log_level_cap(pl_log log, enum pl_log_level cap); // CPU execution time reporting helper static inline void pl_log_cpu_time(pl_log log, pl_clock_t start, pl_clock_t stop, const char *operation) { double ms = pl_clock_diff(stop, start) * 1e3; enum pl_log_level lev = PL_LOG_DEBUG; if (ms > 10) lev = PL_LOG_INFO; if (ms > 1000) lev = PL_LOG_WARN; pl_msg(log, lev, "Spent %.3f ms %s%s", ms, operation, ms > 100 ? " (slow!)" : ""); } // Log stack trace PL_NOINLINE void pl_log_stack_trace(pl_log log, enum pl_log_level lev); libplacebo-v7.349.0/src/meson.build000066400000000000000000000226171463457750100171350ustar00rootroot00000000000000### Common dependencies unwind = dependency('libunwind', required: get_option('unwind')) libexecinfo = cc.find_library('execinfo', required: false) has_execinfo = cc.has_function('backtrace_symbols', dependencies: libexecinfo, prefix: '#include ') dbghelp = cc.check_header('dbghelp.h', prefix: '#include ') conf_internal.set('PL_HAVE_DBGHELP', dbghelp) conf_internal.set('PL_HAVE_UNWIND', unwind.found()) conf_internal.set('PL_HAVE_EXECINFO', has_execinfo) if dbghelp build_deps += cc.find_library('shlwapi', required: true) elif unwind.found() build_deps += [unwind, cc.find_library('dl', required : false)] elif has_execinfo build_deps += libexecinfo endif link_args = [] link_depends = [] # Looks like meson in certain configuration returns ' ' instead of empty string mingw32 = cc.get_define('__MINGW32__').strip() if host_machine.system() == 'windows' and mingw32 != '' and host_machine.cpu() in ['aarch64', 'arm', 'x86_64'] # MinGW-w64 math functions are significantly slower than the UCRT ones. # In particular powf is over 7 times slower than UCRT counterpart. # MinGW-w64 explicitly excludes some math functions from their ucrtbase def # file and replaces with own versions. To workaround the issue, generate the # import library and link it with UCRT versions of math functions. dlltool = find_program('llvm-dlltool', 'dlltool') ucrt_math = custom_target('ucrt_math.lib', output : ['ucrt_math.lib'], input : 'ucrt_math.def', command : [dlltool, '-d', '@INPUT@', '-l', '@OUTPUT@']) link_args += ucrt_math.full_path() link_depends += ucrt_math # MinGW-w64 inlines functions like powf, rewriting them to pow. We want to use # the powf specialization from UCRT, so disable inlining. add_project_arguments(['-D__CRT__NO_INLINE'], language: ['c', 'cpp']) endif # Work around missing atomics on some (obscure) platforms atomic_test = ''' #include #include int main(void) { _Atomic uint32_t x32; atomic_init(&x32, 0); }''' if not cc.links(atomic_test) build_deps += cc.find_library('atomic') endif ### Common source files headers = [ 'cache.h', 'colorspace.h', 'common.h', 'd3d11.h', 'dispatch.h', 'dither.h', 'dummy.h', 'filters.h', 'gamut_mapping.h', 'gpu.h', 'log.h', 'opengl.h', 'options.h', 'renderer.h', 'shaders/colorspace.h', 'shaders/custom.h', 'shaders/deinterlacing.h', 'shaders/dithering.h', 'shaders/film_grain.h', 'shaders/icc.h', 'shaders/lut.h', 'shaders/sampling.h', 'shaders.h', 'swapchain.h', 'tone_mapping.h', 'utils/dav1d.h', 'utils/dav1d_internal.h', 'utils/dolbyvision.h', 'utils/frame_queue.h', 'utils/libav.h', 'utils/libav_internal.h', 'utils/upload.h', 'vulkan.h', ] sources = [ 'cache.c', 'colorspace.c', 'common.c', 'convert.cc', 'dither.c', 'dispatch.c', 'dummy.c', 'filters.c', 'format.c', 'gamut_mapping.c', 'glsl/spirv.c', 'gpu.c', 'gpu/utils.c', 'log.c', 'options.c', 'pl_alloc.c', 'pl_string.c', 'swapchain.c', 'tone_mapping.c', 'utils/dolbyvision.c', 'utils/frame_queue.c', 'utils/upload.c', ] # Source files that may use GLSL pragmas, we need to use custom_target # to the proper environment and dependency information for these foreach f : ['renderer.c', 'shaders.c'] sources += custom_target(f, command: glsl_preproc, depend_files: glsl_deps, env: python_env, input: f, output: f, ) endforeach # More .c files defined here, we can't put them in this file because of meson # preventing the use of / in custom_target output filenames subdir('shaders') tests = [ 'cache.c', 'colorspace.c', 'common.c', 'dither.c', 'dummy.c', 'lut.c', 'filters.c', 'options.c', 'string.c', 'tone_mapping.c', 'utils.c', ] fuzzers = [ 'lut.c', 'options.c', 'shaders.c', 'user_shaders.c', ] components = configuration_data() ### Optional dependencies / components subdir('glsl') subdir('d3d11') subdir('opengl') subdir('vulkan') lcms = dependency('lcms2', version: '>=2.9', required: get_option('lcms')) components.set('lcms', lcms.found()) if lcms.found() build_deps += lcms tests += 'icc.c' endif # Check to see if libplacebo built this way is sane if not (components.get('vulkan') or components.get('opengl') or components.get('d3d11')) warning('Building without any graphics API. libplacebo built this way still ' + 'has some limited use (e.g. generating GLSL shaders), but most of ' + 'its functionality will be missing or impaired!') endif has_spirv = components.get('shaderc') or components.get('glslang') needs_spirv = components.get('vulkan') or components.get('d3d11') if needs_spirv and not has_spirv warning('Building without any GLSL compiler (shaderc, glslang), but with ' + 'APIs required that require one (vulkan, d3d11). This build is very ' + 'likely to be very limited in functionality!') endif dovi = get_option('dovi') components.set('dovi', dovi.allowed()) libdovi = dependency('dovi', version: '>=1.6.7', required: get_option('libdovi').require(dovi.allowed())) components.set('libdovi', libdovi.found()) if libdovi.found() build_deps += libdovi endif inc_dirs = [ vulkan_headers_inc ] xxhash = dependency('libxxhash', required: get_option('xxhash')) components.set('xxhash', xxhash.found()) if xxhash.found() if xxhash.type_name() == 'internal' build_deps += xxhash else inc_dirs += xxhash.get_variable('includedir') endif endif # Generate configuration files defs = '' pc_vars = [] foreach comp : components.keys() found = components.get(comp) varname = comp.underscorify().to_upper() summary(comp, found, section: 'Optional features', bool_yn: true) defs += (found ? '#define PL_HAVE_@0@ 1\n' : '#undef PL_HAVE_@0@\n').format(varname) pc_vars += 'pl_has_@0@=@1@'.format(varname.to_lower(), found ? 1 : 0) endforeach conf_public.set('extra_defs', defs) subdir('./include/libplacebo') # generate config.h in the right location sources += configure_file( output: 'config_internal.h', configuration: conf_internal ) version_h = custom_target( command: [ python, join_paths(meson.current_source_dir(), 'version.py'), '@INPUT@', '@OUTPUT@', '@CURRENT_SOURCE_DIR@', version_pretty ], input: 'version.h.in', output: 'version.h', build_by_default: true, build_always_stale: true, ) sources += version_h if host_machine.system() == 'windows' windows = import('windows') sources += windows.compile_resources(libplacebo_rc, depends: version_h, include_directories: meson.project_source_root()/'win32') endif if fs.is_dir('../3rdparty/fast_float/include') inc_dirs += include_directories('../3rdparty/fast_float/include') endif ### Main library build process inc = include_directories('./include') lib = library('placebo', sources, c_args: ['-DPL_EXPORT'], install: true, dependencies: build_deps + glad_dep, soversion: apiver, include_directories: [ inc, inc_dirs ], link_args: link_args, link_depends: link_depends, gnu_symbol_visibility: 'hidden', name_prefix: 'lib' ) libplacebo = declare_dependency( include_directories: inc, compile_args: get_option('default_library') == 'static' ? ['-DPL_STATIC'] : [], link_with: lib, variables: pc_vars, ) ### Install process proj_name = meson.project_name() foreach h : headers parts = h.split('/') path = proj_name foreach p : parts if p != parts[-1] path = path / p endif endforeach install_headers('include' / proj_name / h, subdir: path) endforeach extra_cflags = [] if get_option('default_library') == 'static' extra_cflags = ['-DPL_STATIC'] elif get_option('default_library') == 'both' # meson doesn't support Cflags.private, insert it forcefully... extra_cflags = ['\nCflags.private:', '-DPL_STATIC'] endif pkg = import('pkgconfig') pkg.generate( name: proj_name, description: 'Reusable library for GPU-accelerated video/image rendering', libraries: lib, version: version, variables: pc_vars, extra_cflags: extra_cflags, ) ### Testing tdep_static = declare_dependency( dependencies: build_deps, include_directories: [ inc, include_directories('.') ], compile_args: '-DPL_STATIC' # TODO: Define objects here once Meson 1.1.0 is ok to use # objects: lib.extract_all_objects(recursive: false) ) tdep_shared = declare_dependency( include_directories: [ inc, include_directories('.') ], compile_args: get_option('default_library') == 'static' ? ['-DPL_STATIC'] : [], link_with: lib, ) if get_option('tests') subdir('tests') endif if get_option('bench') if not components.get('vk-proc-addr') error('Compiling the benchmark suite requires vulkan support!') endif bench = executable('bench', 'tests/bench.c', dependencies: [tdep_shared, vulkan_headers], link_args: link_args, link_depends: link_depends, include_directories: vulkan_headers_inc, ) test('benchmark', bench, is_parallel: false, timeout: 600) endif if get_option('fuzz') foreach f : fuzzers executable('fuzz.' + f, 'tests/fuzz/' + f, objects: lib.extract_all_objects(recursive: false), dependencies: tdep_static, link_args: link_args, link_depends: link_depends, ) endforeach endif pl_thread = declare_dependency( include_directories: include_directories('.'), dependencies: threads, ) pl_clock = declare_dependency( include_directories: include_directories('.'), ) libplacebo-v7.349.0/src/opengl/000077500000000000000000000000001463457750100162475ustar00rootroot00000000000000libplacebo-v7.349.0/src/opengl/common.h000066400000000000000000000027761463457750100177240ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "../common.h" #include "../log.h" #include "../gpu.h" #include "pl_thread.h" #include // Collision with llvm-mingw #undef MemoryBarrier #define GLAD_GL #define GLAD_GLES2 #include #include typedef GladGLContext gl_funcs; // PL_PRIV(pl_opengl) struct gl_ctx { pl_log log; struct pl_opengl_params params; bool is_debug; bool is_debug_egl; bool is_gles; // For context locking pl_mutex lock; int count; // Dispatch table gl_funcs func; }; struct gl_cb { void (*callback)(void *priv); void *priv; GLsync sync; }; struct fbo_format { pl_fmt fmt; const struct gl_format *glfmt; }; // For locking/unlocking bool gl_make_current(pl_opengl gl); void gl_release_current(pl_opengl gl); libplacebo-v7.349.0/src/opengl/context.c000066400000000000000000000250521463457750100201030ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include "common.h" #include "utils.h" #include "gpu.h" const struct pl_opengl_params pl_opengl_default_params = {0}; static void GLAPIENTRY debug_cb(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, const GLchar *message, const void *userParam) { pl_log log = (void *) userParam; enum pl_log_level level = PL_LOG_ERR; switch (severity) { case GL_DEBUG_SEVERITY_NOTIFICATION:level = PL_LOG_DEBUG; break; case GL_DEBUG_SEVERITY_LOW: level = PL_LOG_INFO; break; case GL_DEBUG_SEVERITY_MEDIUM: level = PL_LOG_WARN; break; case GL_DEBUG_SEVERITY_HIGH: level = PL_LOG_ERR; break; } pl_msg(log, level, "GL: %s", message); if (level <= PL_LOG_ERR) pl_log_stack_trace(log, level); } static void GLAPIENTRY debug_cb_egl(EGLenum error, const char *command, EGLint messageType, EGLLabelKHR threadLabel, EGLLabelKHR objectLabel, const char *message) { pl_log log = threadLabel; enum pl_log_level level = PL_LOG_ERR; switch (messageType) { case EGL_DEBUG_MSG_CRITICAL_KHR: level = PL_LOG_FATAL; break; case EGL_DEBUG_MSG_ERROR_KHR: level = PL_LOG_ERR; break; case EGL_DEBUG_MSG_WARN_KHR: level = PL_LOG_WARN; break; case EGL_DEBUG_MSG_INFO_KHR: level = PL_LOG_DEBUG; break; } pl_msg(log, level, "EGL: %s: %s %s", command, egl_err_str(error), message); if (level <= PL_LOG_ERR) pl_log_stack_trace(log, level); } // Guards access to the (thread-unsafe) glad global EGL state static pl_static_mutex glad_egl_mutex = PL_STATIC_MUTEX_INITIALIZER; void pl_opengl_destroy(pl_opengl *ptr) { pl_opengl pl_gl = *ptr; if (!pl_gl) return; struct gl_ctx *p = PL_PRIV(pl_gl); gl_funcs *gl = &p->func; if (!gl_make_current(pl_gl)) { PL_WARN(p, "Failed uninitializing OpenGL context, leaking resources!"); return; } if (p->is_debug) gl->DebugMessageCallback(NULL, NULL); if (p->is_debug_egl) eglDebugMessageControlKHR(NULL, NULL); pl_gpu_destroy(pl_gl->gpu); #ifdef PL_HAVE_GL_PROC_ADDR if (p->is_gles) { gladLoaderUnloadGLES2Context(gl); } else { gladLoaderUnloadGLContext(gl); } bool used_loader = !p->params.get_proc_addr && !p->params.get_proc_addr_ex; if (p->params.egl_display && used_loader) { pl_static_mutex_lock(&glad_egl_mutex); gladLoaderUnloadEGL(); pl_static_mutex_unlock(&glad_egl_mutex); } #endif gl_release_current(pl_gl); pl_mutex_destroy(&p->lock); pl_free_ptr((void **) ptr); } typedef PL_ARRAY(const char *) ext_arr_t; static void add_exts_str(void *alloc, ext_arr_t *arr, const char *extstr) { pl_str rest = pl_str_strip(pl_str0(pl_strdup0(alloc, pl_str0(extstr)))); while (rest.len) { pl_str ext = pl_str_split_char(rest, ' ', &rest); ext.buf[ext.len] = '\0'; // re-use separator for terminator PL_ARRAY_APPEND(alloc, *arr, (char *) ext.buf); } } pl_opengl pl_opengl_create(pl_log log, const struct pl_opengl_params *params) { params = PL_DEF(params, &pl_opengl_default_params); struct pl_opengl_t *pl_gl = pl_zalloc_obj(NULL, pl_gl, struct gl_ctx); struct gl_ctx *p = PL_PRIV(pl_gl); gl_funcs *gl = &p->func; p->params = *params; p->log = log; pl_mutex_init_type(&p->lock, PL_MUTEX_RECURSIVE); if (!gl_make_current(pl_gl)) { pl_free(pl_gl); return NULL; } bool ok; if (params->get_proc_addr_ex) { ok = gladLoadGLContextUserPtr(gl, params->get_proc_addr_ex, params->proc_ctx); } else if (params->get_proc_addr) { ok = gladLoadGLContext(gl, params->get_proc_addr); } else { #ifdef PL_HAVE_GL_PROC_ADDR ok = gladLoaderLoadGLContext(gl); #else PL_FATAL(p, "No `glGetProcAddress` function provided, and libplacebo " "built without its built-in OpenGL loader!"); goto error; #endif } if (!ok) { PL_INFO(p, "Failed loading core GL, retrying as GLES..."); } else if (gl_is_gles(pl_gl)) { PL_INFO(p, "GL context seems to be OpenGL ES, reloading as GLES..."); ok = false; } if (!ok) { memset(gl, 0, sizeof(*gl)); if (params->get_proc_addr_ex) { ok = gladLoadGLES2ContextUserPtr(gl, params->get_proc_addr_ex, params->proc_ctx); } else if (params->get_proc_addr) { ok = gladLoadGLES2Context(gl, params->get_proc_addr); } else { #ifdef PL_HAVE_GL_PROC_ADDR ok = gladLoaderLoadGLES2Context(gl); #else pl_unreachable(); #endif } p->is_gles = ok; } if (!ok) { PL_FATAL(p, "Failed to initialize OpenGL context - make sure a valid " "OpenGL context is bound to the current thread!"); goto error; } const char *version = (const char *) gl->GetString(GL_VERSION); if (version) { const char *ver = version; while (!isdigit(*ver) && *ver != '\0') ver++; if (sscanf(ver, "%d.%d", &pl_gl->major, &pl_gl->minor) != 2) { PL_FATAL(p, "Invalid GL_VERSION string: %s\n", version); goto error; } } if (!pl_gl->major) { PL_FATAL(p, "No OpenGL version detected - make sure an OpenGL context " "is bound to the current thread!"); goto error; } static const int gl_ver_req = 3; if (pl_gl->major < gl_ver_req) { PL_FATAL(p, "OpenGL version too old (%d < %d), please use a newer " "OpenGL implementation or downgrade libplacebo!", pl_gl->major, gl_ver_req); goto error; } PL_INFO(p, "Detected OpenGL version strings:"); PL_INFO(p, " GL_VERSION: %s", version); PL_INFO(p, " GL_VENDOR: %s", (char *) gl->GetString(GL_VENDOR)); PL_INFO(p, " GL_RENDERER: %s", (char *) gl->GetString(GL_RENDERER)); ext_arr_t exts = {0}; if (pl_gl->major >= 3) { gl->GetIntegerv(GL_NUM_EXTENSIONS, &exts.num); PL_ARRAY_RESIZE(pl_gl, exts, exts.num); for (int i = 0; i < exts.num; i++) exts.elem[i] = (const char *) gl->GetStringi(GL_EXTENSIONS, i); } else { add_exts_str(pl_gl, &exts, (const char *) gl->GetString(GL_EXTENSIONS)); } if (pl_msg_test(log, PL_LOG_DEBUG)) { PL_DEBUG(p, " GL_EXTENSIONS:"); for (int i = 0; i < exts.num; i++) PL_DEBUG(p, " %s", exts.elem[i]); } if (params->egl_display) { pl_static_mutex_lock(&glad_egl_mutex); if (params->get_proc_addr_ex) { ok = gladLoadEGLUserPtr(params->egl_display, params->get_proc_addr_ex, params->proc_ctx); } else if (params->get_proc_addr) { ok = gladLoadEGL(params->egl_display, params->get_proc_addr); } else { #ifdef PL_HAVE_GL_PROC_ADDR ok = gladLoaderLoadEGL(params->egl_display); #else pl_unreachable(); #endif } pl_static_mutex_unlock(&glad_egl_mutex); if (!ok) { PL_FATAL(p, "Failed loading EGL functions - double check EGLDisplay?"); goto error; } int start = exts.num; add_exts_str(pl_gl, &exts, eglQueryString(params->egl_display, EGL_EXTENSIONS)); if (exts.num > start) { PL_DEBUG(p, " EGL_EXTENSIONS:"); for (int i = start; i < exts.num; i++) PL_DEBUG(p, " %s", exts.elem[i]); } } pl_gl->extensions = exts.elem; pl_gl->num_extensions = exts.num; if (!params->allow_software && gl_is_software(pl_gl)) { PL_FATAL(p, "OpenGL context is suspected to be a software rasterizer, " "but `allow_software` is false."); goto error; } if (params->debug) { if (pl_opengl_has_ext(pl_gl, "GL_KHR_debug")) { gl->DebugMessageCallback(debug_cb, log); gl->Enable(GL_DEBUG_OUTPUT); p->is_debug = true; } else { PL_WARN(p, "OpenGL debugging requested, but GL_KHR_debug is not " "available... ignoring!"); } if (params->egl_display && pl_opengl_has_ext(pl_gl, "EGL_KHR_debug")) { static const EGLAttrib attribs[] = { // Enable everything under the sun, because the `pl_ctx` log // level may change at runtime. EGL_DEBUG_MSG_CRITICAL_KHR, EGL_TRUE, EGL_DEBUG_MSG_ERROR_KHR, EGL_TRUE, EGL_DEBUG_MSG_WARN_KHR, EGL_TRUE, EGL_DEBUG_MSG_INFO_KHR, EGL_TRUE, EGL_NONE, }; eglDebugMessageControlKHR(debug_cb_egl, attribs); eglLabelObjectKHR(NULL, EGL_OBJECT_THREAD_KHR, NULL, (void *) log); p->is_debug_egl = true; } } pl_gl->gpu = pl_gpu_create_gl(log, pl_gl, params); if (!pl_gl->gpu) goto error; gl_release_current(pl_gl); return pl_gl; error: PL_FATAL(p, "Failed initializing opengl context!"); gl_release_current(pl_gl); pl_opengl_destroy((pl_opengl *) &pl_gl); return NULL; } bool gl_make_current(pl_opengl pl_gl) { struct gl_ctx *p = PL_PRIV(pl_gl); pl_mutex_lock(&p->lock); if (!p->count && p->params.make_current) { if (!p->params.make_current(p->params.priv)) { PL_ERR(p, "Failed making OpenGL context current on calling thread!"); pl_mutex_unlock(&p->lock); return false; } } p->count++; return true; } void gl_release_current(pl_opengl pl_gl) { struct gl_ctx *p = PL_PRIV(pl_gl); p->count--; if (!p->count && p->params.release_current) p->params.release_current(p->params.priv); pl_mutex_unlock(&p->lock); } libplacebo-v7.349.0/src/opengl/formats.c000066400000000000000000000450401463457750100200710ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "gpu.h" #include "common.h" #include "formats.h" #include "utils.h" #ifdef PL_HAVE_UNIX static bool supported_fourcc(struct pl_gl *p, EGLint fourcc) { for (int i = 0; i < p->egl_formats.num; ++i) if (fourcc == p->egl_formats.elem[i]) return true; return false; } #endif #define FMT(_name, bits, ftype, _caps) \ (struct pl_fmt_t) { \ .name = _name, \ .type = PL_FMT_##ftype, \ .caps = (enum pl_fmt_caps) (_caps), \ .sample_order = {0, 1, 2, 3}, \ .component_depth = {bits, bits, bits, bits}, \ } // Convenience to make the names simpler enum { // Type aliases U8 = GL_UNSIGNED_BYTE, U16 = GL_UNSIGNED_SHORT, U32 = GL_UNSIGNED_INT, I8 = GL_BYTE, I16 = GL_SHORT, I32 = GL_INT, FLT = GL_FLOAT, HALF = GL_HALF_FLOAT, // Component aliases R = GL_RED, RG = GL_RG, RGB = GL_RGB, RGBA = GL_RGBA, BGRA = GL_BGRA, RI = GL_RED_INTEGER, RGI = GL_RG_INTEGER, RGBI = GL_RGB_INTEGER, RGBAI = GL_RGBA_INTEGER, // Capability aliases S = PL_FMT_CAP_SAMPLEABLE, L = PL_FMT_CAP_LINEAR, F = PL_FMT_CAP_RENDERABLE | PL_FMT_CAP_BLITTABLE, // FBO support V = PL_FMT_CAP_VERTEX, }; // Basic 8-bit formats const struct gl_format formats_norm8[] = { {GL_R8, R, U8, FMT("r8", 8, UNORM, S|L|F|V)}, {GL_RG8, RG, U8, FMT("rg8", 8, UNORM, S|L|F|V)}, {GL_RGB8, RGB, U8, FMT("rgb8", 8, UNORM, S|L|F|V)}, {GL_RGBA8, RGBA, U8, FMT("rgba8", 8, UNORM, S|L|F|V)}, }; // Signed variants /* TODO: these are broken in mesa const struct gl_format formats_snorm8[] = { {GL_R8_SNORM, R, I8, FMT("r8s", 8, SNORM, S|L|F|V)}, {GL_RG8_SNORM, RG, I8, FMT("rg8s", 8, SNORM, S|L|F|V)}, {GL_RGB8_SNORM, RGB, I8, FMT("rgb8s", 8, SNORM, S|L|F|V)}, {GL_RGBA8_SNORM, RGBA, I8, FMT("rgba8s", 8, SNORM, S|L|F|V)}, }; */ // BGRA 8-bit const struct gl_format formats_bgra8[] = { {GL_RGBA8, BGRA, U8, { .name = "bgra8", .type = PL_FMT_UNORM, .caps = S|L|F|V, .sample_order = {2, 1, 0, 3}, .component_depth = {8, 8, 8, 8}, }}, }; // Basic 16-bit formats, excluding rgb16 (special cased below) const struct gl_format formats_norm16[] = { {GL_R16, R, U16, FMT("r16", 16, UNORM, S|L|F|V)}, {GL_RG16, RG, U16, FMT("rg16", 16, UNORM, S|L|F|V)}, {GL_RGBA16, RGBA, U16, FMT("rgba16", 16, UNORM, S|L|F|V)}, }; // Renderable version of rgb16 const struct gl_format formats_rgb16_fbo[] = { {GL_RGB16, RGB, U16, FMT("rgb16", 16, UNORM, S|L|F|V)}, }; // Non-renderable version of rgb16 const struct gl_format formats_rgb16_fallback[] = { {GL_RGB16, RGB, U16, FMT("rgb16", 16, UNORM, S|L|V)}, }; // Signed 16-bit variants /* TODO: these are broken in mesa and nvidia const struct gl_format formats_snorm16[] = { {GL_R16_SNORM, R, I16, FMT("r16s", 16, SNORM, S|L|F|V)}, {GL_RG16_SNORM, RG, I16, FMT("rg16s", 16, SNORM, S|L|F|V)}, {GL_RGB16_SNORM, RGB, I16, FMT("rgb16s", 16, SNORM, S|L|F|V)}, {GL_RGBA16_SNORM, RGBA, I16, FMT("rgba16s", 16, SNORM, S|L|F|V)}, }; */ // 32-bit floating point texture formats const struct gl_format formats_float32[] = { {GL_R32F, R, FLT, FMT("r32f", 32, FLOAT, S|L|F|V)}, {GL_RG32F, RG, FLT, FMT("rg32f", 32, FLOAT, S|L|F|V)}, {GL_RGB32F, RGB, FLT, FMT("rgb32f", 32, FLOAT, S|L|F|V)}, {GL_RGBA32F, RGBA, FLT, FMT("rgba32f", 32, FLOAT, S|L|F|V)}, }; // 16-bit floating point texture formats const struct gl_format formats_float16[] = { {GL_R16F, R, FLT, FMT("r16f", 16, FLOAT, S|L|F)}, {GL_RG16F, RG, FLT, FMT("rg16f", 16, FLOAT, S|L|F)}, {GL_RGB16F, RGB, FLT, FMT("rgb16f", 16, FLOAT, S|L|F)}, {GL_RGBA16F, RGBA, FLT, FMT("rgba16f", 16, FLOAT, S|L|F)}, }; // 16-bit half float texture formats const struct gl_format formats_half16[] = { {GL_R16F, R, HALF, FMT("r16hf", 16, FLOAT, S|L|F)}, {GL_RG16F, RG, HALF, FMT("rg16hf", 16, FLOAT, S|L|F)}, {GL_RGB16F, RGB, HALF, FMT("rgb16hf", 16, FLOAT, S|L|F)}, {GL_RGBA16F, RGBA, HALF, FMT("rgba16hf",16, FLOAT, S|L|F)}, }; // Renderable 16-bit float formats (excluding rgb16f) const struct gl_format formats_float16_fbo[] = { {GL_R16F, R, HALF, FMT("r16hf", 16, FLOAT, S|L|F)}, {GL_RG16F, RG, HALF, FMT("rg16hf", 16, FLOAT, S|L|F)}, {GL_RGB16F, RGB, HALF, FMT("rgb16hf", 16, FLOAT, S|L)}, {GL_RGBA16F, RGBA, HALF, FMT("rgba16hf",16, FLOAT, S|L|F)}, {GL_R16F, R, FLT, FMT("r16f", 16, FLOAT, S|L|F)}, {GL_RG16F, RG, FLT, FMT("rg16f", 16, FLOAT, S|L|F)}, {GL_RGB16F, RGB, FLT, FMT("rgb16f", 16, FLOAT, S|L)}, {GL_RGBA16F, RGBA, FLT, FMT("rgba16f", 16, FLOAT, S|L|F)}, }; // Non-renderable 16-bit float formats const struct gl_format formats_float16_fallback[] = { {GL_R16F, R, HALF, FMT("r16hf", 16, FLOAT, S|L)}, {GL_RG16F, RG, HALF, FMT("rg16hf", 16, FLOAT, S|L)}, {GL_RGB16F, RGB, HALF, FMT("rgb16hf", 16, FLOAT, S|L)}, {GL_RGBA16F, RGBA, HALF, FMT("rgba16hf",16, FLOAT, S|L)}, {GL_R16F, R, FLT, FMT("r16f", 16, FLOAT, S|L)}, {GL_RG16F, RG, FLT, FMT("rg16f", 16, FLOAT, S|L)}, {GL_RGB16F, RGB, FLT, FMT("rgb16f", 16, FLOAT, S|L)}, {GL_RGBA16F, RGBA, FLT, FMT("rgba16f", 16, FLOAT, S|L)}, }; // (Unsigned) integer formats const struct gl_format formats_uint[] = { {GL_R8UI, RI, U8, FMT("r8u", 8, UINT, S|F|V)}, {GL_RG8UI, RGI, U8, FMT("rg8u", 8, UINT, S|F|V)}, {GL_RGB8UI, RGBI, U8, FMT("rgb8u", 8, UINT, S|V)}, {GL_RGBA8UI, RGBAI, U8, FMT("rgba8u", 8, UINT, S|F|V)}, {GL_R16UI, RI, U16, FMT("r16u", 16, UINT, S|F|V)}, {GL_RG16UI, RGI, U16, FMT("rg16u", 16, UINT, S|F|V)}, {GL_RGB16UI, RGBI, U16, FMT("rgb16u", 16, UINT, S|V)}, {GL_RGBA16UI, RGBAI, U16, FMT("rgba16u", 16, UINT, S|F|V)}, }; /* TODO {GL_R32UI, RI, U32, FMT("r32u", 32, UINT)}, {GL_RG32UI, RGI, U32, FMT("rg32u", 32, UINT)}, {GL_RGB32UI, RGBI, U32, FMT("rgb32u", 32, UINT)}, {GL_RGBA32UI, RGBAI, U32, FMT("rgba32u", 32, UINT)}, {GL_R8I, RI, I8, FMT("r8i", 8, SINT)}, {GL_RG8I, RGI, I8, FMT("rg8i", 8, SINT)}, {GL_RGB8I, RGBI, I8, FMT("rgb8i", 8, SINT)}, {GL_RGBA8I, RGBAI, I8, FMT("rgba8i", 8, SINT)}, {GL_R16I, RI, I16, FMT("r16i", 16, SINT)}, {GL_RG16I, RGI, I16, FMT("rg16i", 16, SINT)}, {GL_RGB16I, RGBI, I16, FMT("rgb16i", 16, SINT)}, {GL_RGBA16I, RGBAI, I16, FMT("rgba16i", 16, SINT)}, {GL_R32I, RI, I32, FMT("r32i", 32, SINT)}, {GL_RG32I, RGI, I32, FMT("rg32i", 32, SINT)}, {GL_RGB32I, RGBI, I32, FMT("rgb32i", 32, SINT)}, {GL_RGBA32I, RGBAI, I32, FMT("rgba32i", 32, SINT)}, */ // GL2 legacy formats const struct gl_format formats_legacy_gl2[] = { {GL_RGB8, RGB, U8, FMT("rgb8", 8, UNORM, S|L|V)}, {GL_RGBA8, RGBA, U8, FMT("rgba8", 8, UNORM, S|L|V)}, {GL_RGB16, RGB, U16, FMT("rgb16", 16, UNORM, S|L|V)}, {GL_RGBA16, RGBA, U16, FMT("rgba16", 16, UNORM, S|L|V)}, }; // GLES2 legacy formats const struct gl_format formats_legacy_gles2[] = { {GL_RGB, RGB, U8, FMT("rgb", 8, UNORM, S|L)}, {GL_RGBA, RGBA, U8, FMT("rgba", 8, UNORM, S|L)}, }; // GLES BGRA const struct gl_format formats_bgra_gles[] = { {GL_BGRA, BGRA, U8, { .name = "bgra8", .type = PL_FMT_UNORM, .caps = S|L|F|V, .sample_order = {2, 1, 0, 3}, .component_depth = {8, 8, 8, 8}, }}, }; // Fallback for vertex-only formats, as a last resort const struct gl_format formats_basic_vertex[] = { {GL_R32F, R, FLT, FMT("r32f", 32, FLOAT, V)}, {GL_RG32F, RG, FLT, FMT("rg32f", 32, FLOAT, V)}, {GL_RGB32F, RGB, FLT, FMT("rgb32f", 32, FLOAT, V)}, {GL_RGBA32F, RGBA, FLT, FMT("rgba32f", 32, FLOAT, V)}, }; static void add_format(pl_gpu pgpu, const struct gl_format *gl_fmt) { struct pl_gpu_t *gpu = (struct pl_gpu_t *) pgpu; struct pl_gl *p = PL_PRIV(gpu); struct pl_fmt_t *fmt = pl_alloc_obj(gpu, fmt, gl_fmt); const struct gl_format **fmtp = PL_PRIV(fmt); *fmt = gl_fmt->tmpl; *fmtp = gl_fmt; // Calculate the host size and number of components switch (gl_fmt->fmt) { case GL_RED: case GL_RED_INTEGER: fmt->num_components = 1; break; case GL_RG: case GL_RG_INTEGER: fmt->num_components = 2; break; case GL_RGB: case GL_RGB_INTEGER: fmt->num_components = 3; break; case GL_RGBA: case GL_RGBA_INTEGER: case GL_BGRA: fmt->num_components = 4; break; default: pl_unreachable(); } int size; switch (gl_fmt->type) { case GL_BYTE: case GL_UNSIGNED_BYTE: size = 1; break; case GL_SHORT: case GL_UNSIGNED_SHORT: case GL_HALF_FLOAT: size = 2; break; case GL_INT: case GL_UNSIGNED_INT: case GL_FLOAT: size = 4; break; default: pl_unreachable(); } // Host visible representation fmt->texel_size = fmt->num_components * size; fmt->texel_align = 1; for (int i = 0; i < fmt->num_components; i++) fmt->host_bits[i] = size * 8; // Compute internal size by summing up the depth int ibits = 0; for (int i = 0; i < fmt->num_components; i++) ibits += fmt->component_depth[i]; fmt->internal_size = (ibits + 7) / 8; // We're not the ones actually emulating these texture format - the // driver is - but we might as well set the hint. fmt->emulated = fmt->texel_size != fmt->internal_size; // 3-component formats are almost surely also emulated if (fmt->num_components == 3) fmt->emulated = true; // Older OpenGL most likely emulates 32-bit float formats as well if (p->gl_ver < 30 && fmt->component_depth[0] >= 32) fmt->emulated = true; // For sanity, clear the superfluous fields for (int i = fmt->num_components; i < 4; i++) { fmt->component_depth[i] = 0; fmt->sample_order[i] = 0; fmt->host_bits[i] = 0; } fmt->glsl_type = pl_var_glsl_type_name(pl_var_from_fmt(fmt, "")); fmt->glsl_format = pl_fmt_glsl_format(fmt, fmt->num_components); fmt->fourcc = pl_fmt_fourcc(fmt); pl_assert(fmt->glsl_type); #ifdef PL_HAVE_UNIX if (p->has_modifiers && fmt->fourcc && supported_fourcc(p, fmt->fourcc)) { int num_mods = 0; bool ok = eglQueryDmaBufModifiersEXT(p->egl_dpy, fmt->fourcc, 0, NULL, NULL, &num_mods); if (ok && num_mods) { // On my system eglQueryDmaBufModifiersEXT seems to never return // MOD_INVALID even though eglExportDMABUFImageQueryMESA happily // returns such modifiers. Since we handle INVALID by not // requiring modifiers at all, always add this value to the // list of supported modifiers. May result in duplicates, but // whatever. uint64_t *mods = pl_calloc(fmt, num_mods + 1, sizeof(uint64_t)); mods[0] = DRM_FORMAT_MOD_INVALID; ok = eglQueryDmaBufModifiersEXT(p->egl_dpy, fmt->fourcc, num_mods, &mods[1], NULL, &num_mods); if (ok) { fmt->modifiers = mods; fmt->num_modifiers = num_mods + 1; } else { pl_free(mods); } } eglGetError(); // ignore probing errors } if (!fmt->num_modifiers) { // Hacky fallback for older drivers that don't support properly // querying modifiers static const uint64_t static_mods[] = { DRM_FORMAT_MOD_INVALID, DRM_FORMAT_MOD_LINEAR, }; fmt->num_modifiers = PL_ARRAY_SIZE(static_mods); fmt->modifiers = static_mods; } #endif // Gathering requires checking the format type (and extension presence) if (fmt->caps & PL_FMT_CAP_SAMPLEABLE) fmt->gatherable = p->gather_comps >= fmt->num_components; bool host_readable = false; if (p->gl_ver && p->has_readback) host_readable = true; // Reading from textures on GLES requires FBO support for this fmt if (fmt->caps & PL_FMT_CAP_RENDERABLE) { // this combination always works in glReadPixels if ((gl_fmt->fmt == GL_RGBA && gl_fmt->type == GL_UNSIGNED_BYTE) || p->has_readback) host_readable = true; } if (host_readable) fmt->caps |= PL_FMT_CAP_HOST_READABLE; if (gpu->glsl.compute && fmt->glsl_format && p->has_storage) fmt->caps |= PL_FMT_CAP_STORABLE | PL_FMT_CAP_READWRITE; // Only float-type formats are considered blendable in OpenGL switch (fmt->type) { case PL_FMT_UNKNOWN: case PL_FMT_UINT: case PL_FMT_SINT: break; case PL_FMT_FLOAT: case PL_FMT_UNORM: case PL_FMT_SNORM: if (fmt->caps & PL_FMT_CAP_RENDERABLE) fmt->caps |= PL_FMT_CAP_BLENDABLE; break; case PL_FMT_TYPE_COUNT: pl_unreachable(); } // TODO: Texel buffers PL_ARRAY_APPEND_RAW(gpu, gpu->formats, gpu->num_formats, fmt); } #define DO_FORMATS(formats) \ do { \ for (int i = 0; i < PL_ARRAY_SIZE(formats); i++) \ add_format(gpu, &formats[i]); \ } while (0) bool gl_setup_formats(struct pl_gpu_t *gpu) { struct pl_gl *p = PL_PRIV(gpu); #ifdef PL_HAVE_UNIX if (p->has_modifiers) { EGLint num_formats = 0; bool ok = eglQueryDmaBufFormatsEXT(p->egl_dpy, 0, NULL, &num_formats); if (ok && num_formats) { p->egl_formats.elem = pl_calloc(gpu, num_formats, sizeof(EGLint)); p->egl_formats.num = num_formats; ok = eglQueryDmaBufFormatsEXT(p->egl_dpy, num_formats, p->egl_formats.elem, &num_formats); pl_assert(ok); PL_DEBUG(gpu, "EGL formats supported:"); for (int i = 0; i < num_formats; ++i) { PL_DEBUG(gpu, " 0x%08x(%.4s)", p->egl_formats.elem[i], PRINT_FOURCC(p->egl_formats.elem[i])); } } } #endif if (p->gl_ver >= 30) { // Desktop GL3+ has everything DO_FORMATS(formats_norm8); DO_FORMATS(formats_bgra8); DO_FORMATS(formats_norm16); DO_FORMATS(formats_rgb16_fbo); DO_FORMATS(formats_float32); DO_FORMATS(formats_float16); DO_FORMATS(formats_half16); DO_FORMATS(formats_uint); goto done; } if (p->gl_ver >= 21) { // If we have a reasonable set of extensions, we can enable most // things. Otherwise, pick simple fallback formats if (pl_opengl_has_ext(p->gl, "GL_ARB_texture_float") && pl_opengl_has_ext(p->gl, "GL_ARB_texture_rg") && pl_opengl_has_ext(p->gl, "GL_ARB_framebuffer_object")) { DO_FORMATS(formats_norm8); DO_FORMATS(formats_bgra8); DO_FORMATS(formats_norm16); DO_FORMATS(formats_rgb16_fbo); DO_FORMATS(formats_float32); DO_FORMATS(formats_float16); if (pl_opengl_has_ext(p->gl, "GL_ARB_half_float_pixel")) { DO_FORMATS(formats_half16); } } else { // Fallback for GL2 DO_FORMATS(formats_legacy_gl2); DO_FORMATS(formats_basic_vertex); } goto done; } if (p->gles_ver >= 30) { // GLES 3.0 has some basic formats, with framebuffers for float16 // depending on GL_EXT_color_buffer_(half_)float support DO_FORMATS(formats_norm8); if (pl_opengl_has_ext(p->gl, "GL_EXT_texture_norm16")) { DO_FORMATS(formats_norm16); DO_FORMATS(formats_rgb16_fallback); } if (pl_opengl_has_ext(p->gl, "GL_EXT_texture_format_BGRA8888")) DO_FORMATS(formats_bgra_gles); DO_FORMATS(formats_uint); DO_FORMATS(formats_basic_vertex); if (p->gles_ver >= 32 || pl_opengl_has_ext(p->gl, "GL_EXT_color_buffer_float")) { DO_FORMATS(formats_float16_fbo); } else { DO_FORMATS(formats_float16_fallback); } goto done; } if (p->gles_ver >= 20) { // GLES 2.0 only has some legacy fallback formats, with support for // float16 depending on GL_EXT_texture_norm16 being present DO_FORMATS(formats_legacy_gles2); DO_FORMATS(formats_basic_vertex); if (pl_opengl_has_ext(p->gl, "GL_EXT_texture_rg")) { DO_FORMATS(formats_norm8); } if (pl_opengl_has_ext(p->gl, "GL_EXT_texture_format_BGRA8888")) { DO_FORMATS(formats_bgra_gles); } goto done; } // Last resort fallback. Probably not very useful DO_FORMATS(formats_basic_vertex); goto done; done: return gl_check_err(gpu, "gl_setup_formats"); } libplacebo-v7.349.0/src/opengl/formats.h000066400000000000000000000022521463457750100200740ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "common.h" struct gl_format { GLint ifmt; // sized internal format (e.g. GL_RGBA16F) GLenum fmt; // base internal format (e.g. GL_RGBA) GLenum type; // host-visible type (e.g. GL_FLOAT) struct pl_fmt_t tmpl; // pl_fmt template }; typedef void (gl_format_cb)(pl_gpu gpu, const struct gl_format *glfmt); // Add all supported formats to the `pl_gpu` format list. bool gl_setup_formats(struct pl_gpu_t *gpu); libplacebo-v7.349.0/src/opengl/gpu.c000066400000000000000000000523571463457750100172220ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "gpu.h" #include "common.h" #include "formats.h" #include "utils.h" #ifdef PL_HAVE_UNIX #include #endif #ifdef PL_HAVE_WIN32 #include #include #endif static const struct pl_gpu_fns pl_fns_gl; static void gl_gpu_destroy(pl_gpu gpu) { struct pl_gl *p = PL_PRIV(gpu); pl_gpu_finish(gpu); while (p->callbacks.num > 0) gl_poll_callbacks(gpu); pl_free((void *) gpu); } pl_opengl pl_opengl_get(pl_gpu gpu) { const struct pl_gpu_fns *impl = PL_PRIV(gpu); if (impl->destroy == gl_gpu_destroy) { struct pl_gl *p = (struct pl_gl *) impl; return p->gl; } return NULL; } static pl_handle_caps tex_handle_caps(pl_gpu gpu, bool import) { pl_handle_caps caps = 0; struct pl_gl *p = PL_PRIV(gpu); if (!p->egl_dpy || (!p->has_egl_storage && !p->has_egl_import)) return 0; if (import) { if (pl_opengl_has_ext(p->gl, "EGL_EXT_image_dma_buf_import")) caps |= PL_HANDLE_DMA_BUF; } else if (!import && p->egl_ctx) { if (pl_opengl_has_ext(p->gl, "EGL_MESA_image_dma_buf_export")) caps |= PL_HANDLE_DMA_BUF; } return caps; } static inline size_t get_page_size(void) { #ifdef PL_HAVE_UNIX return sysconf(_SC_PAGESIZE); #endif #ifdef PL_HAVE_WIN32 SYSTEM_INFO sysInfo; GetSystemInfo(&sysInfo); return sysInfo.dwAllocationGranularity; #endif pl_assert(!"Unsupported platform!"); } #define get(pname, field) \ do { \ GLint tmp = 0; \ gl->GetIntegerv((pname), &tmp); \ *(field) = tmp; \ } while (0) #define geti(pname, i, field) \ do { \ GLint tmp = 0; \ gl->GetIntegeri_v((pname), i, &tmp);\ *(field) = tmp; \ } while (0) pl_gpu pl_gpu_create_gl(pl_log log, pl_opengl pl_gl, const struct pl_opengl_params *params) { struct pl_gpu_t *gpu = pl_zalloc_obj(NULL, gpu, struct pl_gl); gpu->log = log; struct pl_gl *p = PL_PRIV(gpu); p->impl = pl_fns_gl; p->gl = pl_gl; const gl_funcs *gl = gl_funcs_get(gpu); struct pl_glsl_version *glsl = &gpu->glsl; glsl->gles = gl_is_gles(pl_gl); int ver = pl_gl->major * 10 + pl_gl->minor; p->gl_ver = glsl->gles ? 0 : ver; p->gles_ver = glsl->gles ? ver : 0; // If possible, query the GLSL version from the implementation const char *glslver_p = (char *) gl->GetString(GL_SHADING_LANGUAGE_VERSION); pl_str glslver = pl_str0(glslver_p); if (glslver.len) { PL_INFO(gpu, " GL_SHADING_LANGUAGE_VERSION: %.*s", PL_STR_FMT(glslver)); pl_str_eatstart0(&glslver, "OpenGL ES GLSL ES "); int major = 0, minor = 0; if (pl_str_sscanf(glslver, "%d.%d", &major, &minor) == 2) glsl->version = major * 100 + minor; } if (!glsl->version) { // Otherwise, use the fixed magic versions 100 and 300 for GLES. if (p->gles_ver >= 30) { glsl->version = 300; } else if (p->gles_ver >= 20) { glsl->version = 100; } else { goto error; } } static const int glsl_ver_req = 130; if (glsl->version < glsl_ver_req) { PL_FATAL(gpu, "GLSL version too old (%d < %d), please use a newer " "OpenGL implementation or downgrade libplacebo!", glsl->version, glsl_ver_req); goto error; } if (params->max_glsl_version && params->max_glsl_version >= glsl_ver_req) { glsl->version = PL_MIN(glsl->version, params->max_glsl_version); PL_INFO(gpu, "Restricting GLSL version to %d... new version is %d", params->max_glsl_version, glsl->version); } if (gl_test_ext(gpu, "GL_ARB_compute_shader", 43, 0) && glsl->version >= 420) { glsl->compute = true; get(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE, &glsl->max_shmem_size); get(GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS, &glsl->max_group_threads); for (int i = 0; i < 3; i++) geti(GL_MAX_COMPUTE_WORK_GROUP_SIZE, i, &glsl->max_group_size[i]); } if (gl_test_ext(gpu, "GL_ARB_texture_gather", 40, 31) && glsl->version >= (p->gles_ver ? 310 : 400)) { if (p->gles_ver) p->gather_comps = 4; else get(GL_MAX_PROGRAM_TEXTURE_GATHER_COMPONENTS_ARB, &p->gather_comps); get(GL_MIN_PROGRAM_TEXTURE_GATHER_OFFSET_ARB, &glsl->min_gather_offset); get(GL_MAX_PROGRAM_TEXTURE_GATHER_OFFSET_ARB, &glsl->max_gather_offset); } // Query all device limits struct pl_gpu_limits *limits = &gpu->limits; limits->thread_safe = params->make_current; limits->callbacks = gl_test_ext(gpu, "GL_ARB_sync", 32, 30); limits->align_vertex_stride = 1; if (gl_test_ext(gpu, "GL_ARB_pixel_buffer_object", 31, 0)) { limits->max_buf_size = SIZE_MAX; // no restriction imposed by GL if (gl_test_ext(gpu, "GL_ARB_uniform_buffer_object", 31, 0)) get(GL_MAX_UNIFORM_BLOCK_SIZE, &limits->max_ubo_size); if (gl_test_ext(gpu, "GL_ARB_shader_storage_buffer_object", 43, 0) && gpu->glsl.version >= 140) { get(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &limits->max_ssbo_size); } limits->max_vbo_size = limits->max_buf_size; // No additional restrictions if (gl_test_ext(gpu, "GL_ARB_buffer_storage", 44, 0)) { const char *vendor = (char *) gl->GetString(GL_VENDOR); limits->max_mapped_size = limits->max_buf_size; limits->max_mapped_vram = limits->max_buf_size; limits->host_cached = strcmp(vendor, "AMD") == 0 || strcmp(vendor, "NVIDIA Corporation") == 0; } } get(GL_MAX_TEXTURE_SIZE, &limits->max_tex_2d_dim); if (gl_test_ext(gpu, "GL_EXT_texture3D", 21, 30)) get(GL_MAX_3D_TEXTURE_SIZE, &limits->max_tex_3d_dim); // There's no equivalent limit for 1D textures for whatever reason, so // just set it to the same as the 2D limit if (p->gl_ver >= 21) limits->max_tex_1d_dim = limits->max_tex_2d_dim; limits->buf_transfer = true; if (p->gl_ver || p->gles_ver >= 30) { get(GL_MAX_FRAGMENT_UNIFORM_COMPONENTS, &limits->max_variable_comps); } else { // fallback for GLES 2.0, which doesn't have max_comps get(GL_MAX_FRAGMENT_UNIFORM_VECTORS, &limits->max_variable_comps); limits->max_variable_comps *= 4; } if (glsl->compute) { for (int i = 0; i < 3; i++) geti(GL_MAX_COMPUTE_WORK_GROUP_COUNT, i, &limits->max_dispatch[i]); } // Query import/export support p->egl_dpy = params->egl_display; p->egl_ctx = params->egl_context; p->has_egl_storage = pl_opengl_has_ext(p->gl, "GL_EXT_EGL_image_storage"); p->has_egl_import = pl_opengl_has_ext(p->gl, "GL_OES_EGL_image_external"); gpu->export_caps.tex = tex_handle_caps(gpu, false); gpu->import_caps.tex = tex_handle_caps(gpu, true); if (p->egl_dpy) { p->has_modifiers = pl_opengl_has_ext(p->gl, "EGL_EXT_image_dma_buf_import_modifiers"); } if (pl_opengl_has_ext(pl_gl, "GL_AMD_pinned_memory")) { gpu->import_caps.buf |= PL_HANDLE_HOST_PTR; gpu->limits.align_host_ptr = get_page_size(); } // Cache some internal capability checks p->has_vao = gl_test_ext(gpu, "GL_ARB_vertex_array_object", 30, 30); p->has_invalidate_fb = gl_test_ext(gpu, "GL_ARB_invalidate_subdata", 43, 30); p->has_invalidate_tex = gl_test_ext(gpu, "GL_ARB_invalidate_subdata", 43, 0); p->has_queries = gl_test_ext(gpu, "GL_ARB_timer_query", 30, 30); p->has_storage = gl_test_ext(gpu, "GL_ARB_shader_image_load_store", 42, 31); p->has_readback = true; if (p->has_readback && p->gles_ver) { GLuint fbo = 0, tex = 0; GLint read_type = 0, read_fmt = 0; gl->GenTextures(1, &tex); gl->BindTexture(GL_TEXTURE_2D, tex); gl->GenFramebuffers(1, &fbo); gl->TexImage2D(GL_TEXTURE_2D, 0, GL_R8, 64, 64, 0, GL_RED, GL_UNSIGNED_BYTE, NULL); gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo); gl->FramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, tex, 0); gl->GetIntegerv(GL_IMPLEMENTATION_COLOR_READ_TYPE, &read_type); gl->GetIntegerv(GL_IMPLEMENTATION_COLOR_READ_FORMAT, &read_fmt); if (read_type != GL_UNSIGNED_BYTE || read_fmt != GL_RED) { PL_INFO(gpu, "GPU does not seem to support lossless texture " "readback, restricting readback capabilities! This is a " "GLES/driver limitation, there is little we can do to " "work around it."); p->has_readback = false; } gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); gl->BindTexture(GL_TEXTURE_2D, 0); gl->DeleteFramebuffers(1, &fbo); gl->DeleteTextures(1, &tex); } // We simply don't know, so make up some values limits->align_tex_xfer_offset = 32; limits->align_tex_xfer_pitch = 4; limits->fragment_queues = 1; limits->compute_queues = glsl->compute ? 1 : 0; if (!gl_check_err(gpu, "pl_gpu_create_gl")) { PL_WARN(gpu, "Encountered errors while detecting GPU capabilities... " "ignoring, but expect limitations/issues"); p->failed = false; } // Filter out error messages during format probing pl_log_level_cap(gpu->log, PL_LOG_INFO); bool formats_ok = gl_setup_formats(gpu); pl_log_level_cap(gpu->log, PL_LOG_NONE); if (!formats_ok) goto error; return pl_gpu_finalize(gpu); error: gl_gpu_destroy(gpu); return NULL; } void gl_buf_destroy(pl_gpu gpu, pl_buf buf) { const gl_funcs *gl = gl_funcs_get(gpu); if (!MAKE_CURRENT()) { PL_ERR(gpu, "Failed uninitializing buffer, leaking resources!"); return; } struct pl_buf_gl *buf_gl = PL_PRIV(buf); if (buf_gl->fence) gl->DeleteSync(buf_gl->fence); if (buf_gl->mapped) { gl->BindBuffer(GL_COPY_WRITE_BUFFER, buf_gl->buffer); gl->UnmapBuffer(GL_COPY_WRITE_BUFFER); gl->BindBuffer(GL_COPY_WRITE_BUFFER, 0); } gl->DeleteBuffers(1, &buf_gl->buffer); gl_check_err(gpu, "gl_buf_destroy"); RELEASE_CURRENT(); pl_free((void *) buf); } pl_buf gl_buf_create(pl_gpu gpu, const struct pl_buf_params *params) { const gl_funcs *gl = gl_funcs_get(gpu); if (!MAKE_CURRENT()) return NULL; struct pl_buf_t *buf = pl_zalloc_obj(NULL, buf, struct pl_buf_gl); buf->params = *params; buf->params.initial_data = NULL; struct pl_gl *p = PL_PRIV(gpu); struct pl_buf_gl *buf_gl = PL_PRIV(buf); buf_gl->id = ++p->buf_id; // Just use this since the generic GL_BUFFER doesn't work GLenum target = GL_ARRAY_BUFFER; const void *data = params->initial_data; size_t total_size = params->size; bool import = false; if (params->import_handle == PL_HANDLE_HOST_PTR) { const struct pl_shared_mem *shmem = ¶ms->shared_mem; target = GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD; data = shmem->handle.ptr; buf_gl->offset = shmem->offset; total_size = shmem->size; import = true; if (params->host_mapped) buf->data = (uint8_t *) data + buf_gl->offset; if (buf_gl->offset > 0 && params->drawable) { PL_ERR(gpu, "Cannot combine non-aligned host pointer imports with " "drawable (vertex) buffers! This is a design limitation, " "open an issue if you absolutely need this."); goto error; } } gl->GenBuffers(1, &buf_gl->buffer); gl->BindBuffer(target, buf_gl->buffer); if (gl_test_ext(gpu, "GL_ARB_buffer_storage", 44, 0) && !import) { GLbitfield mapflags = 0, storflags = 0; if (params->host_writable) storflags |= GL_DYNAMIC_STORAGE_BIT; if (params->host_mapped) { mapflags |= GL_MAP_READ_BIT | GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT; } if (params->memory_type == PL_BUF_MEM_HOST) storflags |= GL_CLIENT_STORAGE_BIT; // hopefully this works gl->BufferStorage(target, total_size, data, storflags | mapflags); if (params->host_mapped) { buf_gl->mapped = true; buf->data = gl->MapBufferRange(target, buf_gl->offset, params->size, mapflags); if (!buf->data) { gl->BindBuffer(target, 0); if (!gl_check_err(gpu, "gl_buf_create: map")) PL_ERR(gpu, "Failed mapping buffer: unknown reason"); goto error; } } } else { // Make a random guess based on arbitrary criteria we can't know GLenum hint = GL_STREAM_DRAW; if (params->initial_data && !params->host_writable && !params->host_mapped) hint = GL_STATIC_DRAW; if (params->host_readable && !params->host_writable && !params->host_mapped) hint = GL_STREAM_READ; if (params->storable) hint = GL_DYNAMIC_COPY; gl->BufferData(target, total_size, data, hint); if (import && gl->GetError() == GL_INVALID_OPERATION) { PL_ERR(gpu, "Failed importing host pointer!"); goto error; } } gl->BindBuffer(target, 0); if (!gl_check_err(gpu, "gl_buf_create")) goto error; if (params->storable) { buf_gl->barrier = GL_BUFFER_UPDATE_BARRIER_BIT | // for buf_copy etc. GL_PIXEL_BUFFER_BARRIER_BIT | // for tex_upload GL_SHADER_STORAGE_BARRIER_BIT; if (params->host_mapped) buf_gl->barrier |= GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT; if (params->uniform) buf_gl->barrier |= GL_UNIFORM_BARRIER_BIT; if (params->drawable) buf_gl->barrier |= GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT; } RELEASE_CURRENT(); return buf; error: gl_buf_destroy(gpu, buf); RELEASE_CURRENT(); return NULL; } bool gl_buf_poll(pl_gpu gpu, pl_buf buf, uint64_t timeout) { const gl_funcs *gl = gl_funcs_get(gpu); // Non-persistently mapped buffers are always implicitly reusable in OpenGL, // the implementation will create more buffers under the hood if needed. if (!buf->data) return false; if (!MAKE_CURRENT()) return true; // conservative guess struct pl_buf_gl *buf_gl = PL_PRIV(buf); if (buf_gl->fence) { GLenum res = gl->ClientWaitSync(buf_gl->fence, timeout ? GL_SYNC_FLUSH_COMMANDS_BIT : 0, timeout); if (res == GL_ALREADY_SIGNALED || res == GL_CONDITION_SATISFIED) { gl->DeleteSync(buf_gl->fence); buf_gl->fence = NULL; } } gl_poll_callbacks(gpu); RELEASE_CURRENT(); return !!buf_gl->fence; } void gl_buf_write(pl_gpu gpu, pl_buf buf, size_t offset, const void *data, size_t size) { const gl_funcs *gl = gl_funcs_get(gpu); if (!MAKE_CURRENT()) return; struct pl_buf_gl *buf_gl = PL_PRIV(buf); gl->BindBuffer(GL_ARRAY_BUFFER, buf_gl->buffer); gl->BufferSubData(GL_ARRAY_BUFFER, buf_gl->offset + offset, size, data); gl->BindBuffer(GL_ARRAY_BUFFER, 0); gl_check_err(gpu, "gl_buf_write"); RELEASE_CURRENT(); } bool gl_buf_read(pl_gpu gpu, pl_buf buf, size_t offset, void *dest, size_t size) { const gl_funcs *gl = gl_funcs_get(gpu); if (!MAKE_CURRENT()) return false; struct pl_buf_gl *buf_gl = PL_PRIV(buf); gl->BindBuffer(GL_ARRAY_BUFFER, buf_gl->buffer); gl->GetBufferSubData(GL_ARRAY_BUFFER, buf_gl->offset + offset, size, dest); gl->BindBuffer(GL_ARRAY_BUFFER, 0); bool ok = gl_check_err(gpu, "gl_buf_read"); RELEASE_CURRENT(); return ok; } void gl_buf_copy(pl_gpu gpu, pl_buf dst, size_t dst_offset, pl_buf src, size_t src_offset, size_t size) { const gl_funcs *gl = gl_funcs_get(gpu); if (!MAKE_CURRENT()) return; struct pl_buf_gl *src_gl = PL_PRIV(src); struct pl_buf_gl *dst_gl = PL_PRIV(dst); gl->BindBuffer(GL_COPY_READ_BUFFER, src_gl->buffer); gl->BindBuffer(GL_COPY_WRITE_BUFFER, dst_gl->buffer); gl->CopyBufferSubData(GL_COPY_READ_BUFFER, GL_COPY_WRITE_BUFFER, src_gl->offset + src_offset, dst_gl->offset + dst_offset, size); gl_check_err(gpu, "gl_buf_copy"); RELEASE_CURRENT(); } #define QUERY_OBJECT_NUM 8 struct pl_timer_t { GLuint query[QUERY_OBJECT_NUM]; int index_write; // next index to write to int index_read; // next index to read from }; static pl_timer gl_timer_create(pl_gpu gpu) { const gl_funcs *gl = gl_funcs_get(gpu); struct pl_gl *p = PL_PRIV(gpu); if (!p->has_queries || !MAKE_CURRENT()) return NULL; pl_timer timer = pl_zalloc_ptr(NULL, timer); gl->GenQueries(QUERY_OBJECT_NUM, timer->query); RELEASE_CURRENT(); return timer; } static void gl_timer_destroy(pl_gpu gpu, pl_timer timer) { const gl_funcs *gl = gl_funcs_get(gpu); if (!MAKE_CURRENT()) { PL_ERR(gpu, "Failed uninitializing timer, leaking resources!"); return; } gl->DeleteQueries(QUERY_OBJECT_NUM, timer->query); gl_check_err(gpu, "gl_timer_destroy"); RELEASE_CURRENT(); pl_free(timer); } static uint64_t gl_timer_query(pl_gpu gpu, pl_timer timer) { if (timer->index_read == timer->index_write) return 0; // no more unprocessed results struct pl_gl *p = PL_PRIV(gpu); const gl_funcs *gl = gl_funcs_get(gpu); if (!MAKE_CURRENT()) return 0; uint64_t res = 0; GLuint query = timer->query[timer->index_read]; GLuint avail = 0; gl->GetQueryObjectuiv(query, GL_QUERY_RESULT_AVAILABLE, &avail); if (!avail) goto done; if (p->gles_ver || p->gl_ver < 33) { GLuint tmp = 0; gl->GetQueryObjectuiv(query, GL_QUERY_RESULT, &tmp); res = tmp; } else { gl->GetQueryObjectui64v(query, GL_QUERY_RESULT, &res); } timer->index_read = (timer->index_read + 1) % QUERY_OBJECT_NUM; // fall through done: RELEASE_CURRENT(); return res; } void gl_timer_begin(pl_gpu gpu, pl_timer timer) { if (!timer) return; const gl_funcs *gl = gl_funcs_get(gpu); gl->BeginQuery(GL_TIME_ELAPSED, timer->query[timer->index_write]); } void gl_timer_end(pl_gpu gpu, pl_timer timer) { if (!timer) return; const gl_funcs *gl = gl_funcs_get(gpu); gl->EndQuery(GL_TIME_ELAPSED); timer->index_write = (timer->index_write + 1) % QUERY_OBJECT_NUM; if (timer->index_write == timer->index_read) { // forcibly drop the least recent result to make space timer->index_read = (timer->index_read + 1) % QUERY_OBJECT_NUM; } } static void gl_gpu_flush(pl_gpu gpu) { const gl_funcs *gl = gl_funcs_get(gpu); if (!MAKE_CURRENT()) return; gl->Flush(); gl_check_err(gpu, "gl_gpu_flush"); RELEASE_CURRENT(); } static void gl_gpu_finish(pl_gpu gpu) { const gl_funcs *gl = gl_funcs_get(gpu); if (!MAKE_CURRENT()) return; gl->Finish(); gl_check_err(gpu, "gl_gpu_finish"); RELEASE_CURRENT(); } static bool gl_gpu_is_failed(pl_gpu gpu) { struct pl_gl *gl = PL_PRIV(gpu); return gl->failed; } static const struct pl_gpu_fns pl_fns_gl = { .destroy = gl_gpu_destroy, .tex_create = gl_tex_create, .tex_destroy = gl_tex_destroy, .tex_invalidate = gl_tex_invalidate, .tex_clear_ex = gl_tex_clear_ex, .tex_blit = gl_tex_blit, .tex_upload = gl_tex_upload, .tex_download = gl_tex_download, .buf_create = gl_buf_create, .buf_destroy = gl_buf_destroy, .buf_write = gl_buf_write, .buf_read = gl_buf_read, .buf_copy = gl_buf_copy, .buf_poll = gl_buf_poll, .desc_namespace = gl_desc_namespace, .pass_create = gl_pass_create, .pass_destroy = gl_pass_destroy, .pass_run = gl_pass_run, .timer_create = gl_timer_create, .timer_destroy = gl_timer_destroy, .timer_query = gl_timer_query, .gpu_flush = gl_gpu_flush, .gpu_finish = gl_gpu_finish, .gpu_is_failed = gl_gpu_is_failed, }; libplacebo-v7.349.0/src/opengl/gpu.h000066400000000000000000000075061463457750100172230ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "../gpu.h" #include "common.h" // Thread safety: Unsafe, same as pl_gpu_destroy pl_gpu pl_gpu_create_gl(pl_log log, pl_opengl gl, const struct pl_opengl_params *params); // --- pl_gpu internal structs and functions struct pl_gl { struct pl_gpu_fns impl; pl_opengl gl; bool failed; // For import/export EGLDisplay egl_dpy; EGLContext egl_ctx; bool egl_storage; #ifdef PL_HAVE_UNIX // List of formats supported by EGL_EXT_image_dma_buf_import PL_ARRAY(EGLint) egl_formats; #endif // Sync objects and associated callbacks PL_ARRAY(struct gl_cb) callbacks; // Incrementing counters to keep track of object uniqueness int buf_id; // Cached capabilities int gl_ver; int gles_ver; bool has_storage; bool has_invalidate_fb; bool has_invalidate_tex; bool has_vao; bool has_queries; bool has_modifiers; bool has_readback; bool has_egl_storage; bool has_egl_import; int gather_comps; }; static inline const gl_funcs *gl_funcs_get(pl_gpu gpu) { struct pl_gl *p = PL_PRIV(gpu); struct gl_ctx *glctx = PL_PRIV(p->gl); return &glctx->func; } void gl_timer_begin(pl_gpu gpu, pl_timer timer); void gl_timer_end(pl_gpu gpu, pl_timer timer); static inline bool _make_current(pl_gpu gpu) { struct pl_gl *p = PL_PRIV(gpu); if (!gl_make_current(p->gl)) { p->failed = true; return false; } return true; } static inline void _release_current(pl_gpu gpu) { struct pl_gl *p = PL_PRIV(gpu); gl_release_current(p->gl); } #define MAKE_CURRENT() _make_current(gpu) #define RELEASE_CURRENT() _release_current(gpu) struct pl_tex_gl { GLenum target; GLuint texture; bool wrapped_tex; GLuint fbo; // or 0 bool wrapped_fb; GLbitfield barrier; // GL format fields GLenum format; GLint iformat; GLenum type; // For imported/exported textures EGLImageKHR image; int fd; }; pl_tex gl_tex_create(pl_gpu, const struct pl_tex_params *); void gl_tex_destroy(pl_gpu, pl_tex); void gl_tex_invalidate(pl_gpu, pl_tex); void gl_tex_clear_ex(pl_gpu, pl_tex, const union pl_clear_color); void gl_tex_blit(pl_gpu, const struct pl_tex_blit_params *); bool gl_tex_upload(pl_gpu, const struct pl_tex_transfer_params *); bool gl_tex_download(pl_gpu, const struct pl_tex_transfer_params *); struct pl_buf_gl { uint64_t id; // unique per buffer GLuint buffer; size_t offset; GLsync fence; GLbitfield barrier; bool mapped; }; pl_buf gl_buf_create(pl_gpu, const struct pl_buf_params *); void gl_buf_destroy(pl_gpu, pl_buf); void gl_buf_write(pl_gpu, pl_buf, size_t offset, const void *src, size_t size); bool gl_buf_read(pl_gpu, pl_buf, size_t offset, void *dst, size_t size); void gl_buf_copy(pl_gpu, pl_buf dst, size_t dst_offset, pl_buf src, size_t src_offset, size_t size); bool gl_buf_poll(pl_gpu, pl_buf, uint64_t timeout); struct pl_pass_gl; int gl_desc_namespace(pl_gpu, enum pl_desc_type type); pl_pass gl_pass_create(pl_gpu, const struct pl_pass_params *); void gl_pass_destroy(pl_gpu, pl_pass); void gl_pass_run(pl_gpu, const struct pl_pass_run_params *); libplacebo-v7.349.0/src/opengl/gpu_pass.c000066400000000000000000000605251463457750100202440ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "gpu.h" #include "cache.h" #include "formats.h" #include "utils.h" int gl_desc_namespace(pl_gpu gpu, enum pl_desc_type type) { return (int) type; } struct gl_cache_header { GLenum format; }; static GLuint load_cached_program(pl_gpu gpu, pl_cache cache, pl_cache_obj *obj) { const gl_funcs *gl = gl_funcs_get(gpu); if (!gl_test_ext(gpu, "GL_ARB_get_program_binary", 41, 30)) return 0; if (!pl_cache_get(cache, obj)) return 0; if (obj->size < sizeof(struct gl_cache_header)) return 0; GLuint prog = gl->CreateProgram(); if (!gl_check_err(gpu, "load_cached_program: glCreateProgram")) return 0; struct gl_cache_header *header = (struct gl_cache_header *) obj->data; pl_str rest = (pl_str) { obj->data, obj->size }; rest = pl_str_drop(rest, sizeof(*header)); gl->ProgramBinary(prog, header->format, rest.buf, rest.len); gl->GetError(); // discard potential useless error GLint status = 0; gl->GetProgramiv(prog, GL_LINK_STATUS, &status); if (status) return prog; gl->DeleteProgram(prog); gl_check_err(gpu, "load_cached_program: glProgramBinary"); return 0; } static enum pl_log_level gl_log_level(GLint status, GLint log_length) { if (!status) { return PL_LOG_ERR; } else if (log_length > 0) { return PL_LOG_INFO; } else { return PL_LOG_DEBUG; } } static bool gl_attach_shader(pl_gpu gpu, GLuint program, GLenum type, const char *src) { const gl_funcs *gl = gl_funcs_get(gpu); GLuint shader = gl->CreateShader(type); gl->ShaderSource(shader, 1, &src, NULL); gl->CompileShader(shader); GLint status = 0; gl->GetShaderiv(shader, GL_COMPILE_STATUS, &status); GLint log_length = 0; gl->GetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length); enum pl_log_level level = gl_log_level(status, log_length); if (pl_msg_test(gpu->log, level)) { GLchar *logstr = pl_zalloc(NULL, log_length + 1); gl->GetShaderInfoLog(shader, log_length, NULL, logstr); PL_MSG(gpu, level, "shader compile log (status=%d): %s", status, logstr); pl_free(logstr); } if (!status || !gl_check_err(gpu, "gl_attach_shader")) goto error; gl->AttachShader(program, shader); gl->DeleteShader(shader); return true; error: gl->DeleteShader(shader); return false; } static GLuint gl_compile_program(pl_gpu gpu, const struct pl_pass_params *params) { const gl_funcs *gl = gl_funcs_get(gpu); GLuint prog = gl->CreateProgram(); bool ok = true; switch (params->type) { case PL_PASS_COMPUTE: ok &= gl_attach_shader(gpu, prog, GL_COMPUTE_SHADER, params->glsl_shader); break; case PL_PASS_RASTER: ok &= gl_attach_shader(gpu, prog, GL_VERTEX_SHADER, params->vertex_shader); ok &= gl_attach_shader(gpu, prog, GL_FRAGMENT_SHADER, params->glsl_shader); for (int i = 0; i < params->num_vertex_attribs; i++) gl->BindAttribLocation(prog, i, params->vertex_attribs[i].name); break; case PL_PASS_INVALID: case PL_PASS_TYPE_COUNT: pl_unreachable(); } if (!ok || !gl_check_err(gpu, "gl_compile_program: attach shader")) goto error; gl->LinkProgram(prog); GLint status = 0; gl->GetProgramiv(prog, GL_LINK_STATUS, &status); GLint log_length = 0; gl->GetProgramiv(prog, GL_INFO_LOG_LENGTH, &log_length); enum pl_log_level level = gl_log_level(status, log_length); if (pl_msg_test(gpu->log, level)) { GLchar *logstr = pl_zalloc(NULL, log_length + 1); gl->GetProgramInfoLog(prog, log_length, NULL, logstr); PL_MSG(gpu, level, "shader link log (status=%d): %s", status, logstr); pl_free(logstr); } if (!gl_check_err(gpu, "gl_compile_program: link program")) goto error; return prog; error: gl->DeleteProgram(prog); PL_ERR(gpu, "Failed compiling/linking GLSL program"); return 0; } // For pl_pass.priv struct pl_pass_gl { GLuint program; GLuint vao; // the VAO object uint64_t vao_id; // buf_gl.id of VAO size_t vao_offset; // VBO offset of VAO GLuint buffer; // VBO for raw vertex pointers GLuint index_buffer; GLint *var_locs; }; void gl_pass_destroy(pl_gpu gpu, pl_pass pass) { const gl_funcs *gl = gl_funcs_get(gpu); if (!MAKE_CURRENT()) { PL_ERR(gpu, "Failed uninitializing pass, leaking resources!"); return; } struct pl_pass_gl *pass_gl = PL_PRIV(pass); if (pass_gl->vao) gl->DeleteVertexArrays(1, &pass_gl->vao); gl->DeleteBuffers(1, &pass_gl->index_buffer); gl->DeleteBuffers(1, &pass_gl->buffer); gl->DeleteProgram(pass_gl->program); gl_check_err(gpu, "gl_pass_destroy"); RELEASE_CURRENT(); pl_free((void *) pass); } static void gl_update_va(pl_gpu gpu, pl_pass pass, size_t vbo_offset) { const gl_funcs *gl = gl_funcs_get(gpu); for (int i = 0; i < pass->params.num_vertex_attribs; i++) { const struct pl_vertex_attrib *va = &pass->params.vertex_attribs[i]; const struct gl_format **glfmtp = PL_PRIV(va->fmt); const struct gl_format *glfmt = *glfmtp; bool norm = false; switch (va->fmt->type) { case PL_FMT_UNORM: case PL_FMT_SNORM: norm = true; break; case PL_FMT_UNKNOWN: case PL_FMT_FLOAT: case PL_FMT_UINT: case PL_FMT_SINT: break; case PL_FMT_TYPE_COUNT: pl_unreachable(); } gl->EnableVertexAttribArray(i); gl->VertexAttribPointer(i, va->fmt->num_components, glfmt->type, norm, pass->params.vertex_stride, (void *) (va->offset + vbo_offset)); } } pl_pass gl_pass_create(pl_gpu gpu, const struct pl_pass_params *params) { const gl_funcs *gl = gl_funcs_get(gpu); if (!MAKE_CURRENT()) return NULL; struct pl_gl *p = PL_PRIV(gpu); struct pl_pass_t *pass = pl_zalloc_obj(NULL, pass, struct pl_pass_gl); struct pl_pass_gl *pass_gl = PL_PRIV(pass); pl_cache cache = pl_gpu_cache(gpu); pass->params = pl_pass_params_copy(pass, params); pl_cache_obj obj = { .key = CACHE_KEY_GL_PROG }; if (cache) { pl_hash_merge(&obj.key, pl_str0_hash(params->glsl_shader)); if (params->type == PL_PASS_RASTER) pl_hash_merge(&obj.key, pl_str0_hash(params->vertex_shader)); } // Load/Compile program if ((pass_gl->program = load_cached_program(gpu, cache, &obj))) { PL_DEBUG(gpu, "Using cached GL program"); } else { pl_clock_t start = pl_clock_now(); pass_gl->program = gl_compile_program(gpu, params); pl_log_cpu_time(gpu->log, start, pl_clock_now(), "compiling shader"); } if (!pass_gl->program) goto error; // Update program cache if possible if (cache && gl_test_ext(gpu, "GL_ARB_get_program_binary", 41, 30)) { GLint buf_size = 0; gl->GetProgramiv(pass_gl->program, GL_PROGRAM_BINARY_LENGTH, &buf_size); if (buf_size > 0) { buf_size += sizeof(struct gl_cache_header); pl_cache_obj_resize(NULL, &obj, buf_size); struct gl_cache_header *header = obj.data; void *buffer = &header[1]; GLsizei binary_size = 0; gl->GetProgramBinary(pass_gl->program, buf_size, &binary_size, &header->format, buffer); bool ok = gl_check_err(gpu, "gl_pass_create: get program binary"); if (ok) { obj.size = sizeof(*header) + binary_size; pl_assert(obj.size <= buf_size); pl_cache_set(cache, &obj); } } } gl->UseProgram(pass_gl->program); pass_gl->var_locs = pl_calloc(pass, params->num_variables, sizeof(GLint)); for (int i = 0; i < params->num_variables; i++) { pass_gl->var_locs[i] = gl->GetUniformLocation(pass_gl->program, params->variables[i].name); // Due to OpenGL API restrictions, we need to ensure that this is a // variable type we can actually *update*. Fortunately, this is easily // checked by virtue of the fact that all legal combinations of // parameters will have a valid GLSL type name if (!pl_var_glsl_type_name(params->variables[i])) { gl->UseProgram(0); PL_ERR(gpu, "Input variable '%s' does not match any known type!", params->variables[i].name); goto error; } } for (int i = 0; i < params->num_descriptors; i++) { const struct pl_desc *desc = ¶ms->descriptors[i]; switch (desc->type) { case PL_DESC_SAMPLED_TEX: case PL_DESC_STORAGE_IMG: { // For compatibility with older OpenGL, we need to explicitly // update the texture/image unit bindings after creating the shader // program, since specifying it directly requires GLSL 4.20+ GLint loc = gl->GetUniformLocation(pass_gl->program, desc->name); gl->Uniform1i(loc, desc->binding); break; } case PL_DESC_BUF_UNIFORM: { GLuint idx = gl->GetUniformBlockIndex(pass_gl->program, desc->name); gl->UniformBlockBinding(pass_gl->program, idx, desc->binding); break; } case PL_DESC_BUF_STORAGE: { GLuint idx = gl->GetProgramResourceIndex(pass_gl->program, GL_SHADER_STORAGE_BLOCK, desc->name); gl->ShaderStorageBlockBinding(pass_gl->program, idx, desc->binding); break; } case PL_DESC_BUF_TEXEL_UNIFORM: case PL_DESC_BUF_TEXEL_STORAGE: assert(!"unimplemented"); // TODO case PL_DESC_INVALID: case PL_DESC_TYPE_COUNT: pl_unreachable(); } } gl->UseProgram(0); // Initialize the VAO and single vertex buffer gl->GenBuffers(1, &pass_gl->buffer); if (p->has_vao) { gl->GenVertexArrays(1, &pass_gl->vao); gl->BindBuffer(GL_ARRAY_BUFFER, pass_gl->buffer); gl->BindVertexArray(pass_gl->vao); gl_update_va(gpu, pass, 0); gl->BindVertexArray(0); gl->BindBuffer(GL_ARRAY_BUFFER, 0); } if (!gl_check_err(gpu, "gl_pass_create")) goto error; pl_cache_obj_free(&obj); RELEASE_CURRENT(); return pass; error: PL_ERR(gpu, "Failed creating pass"); pl_cache_obj_free(&obj); gl_pass_destroy(gpu, pass); RELEASE_CURRENT(); return NULL; } static void update_var(pl_gpu gpu, pl_pass pass, const struct pl_var_update *vu) { const gl_funcs *gl = gl_funcs_get(gpu); struct pl_pass_gl *pass_gl = PL_PRIV(pass); const struct pl_var *var = &pass->params.variables[vu->index]; GLint loc = pass_gl->var_locs[vu->index]; switch (var->type) { case PL_VAR_SINT: { const int *i = vu->data; pl_assert(var->dim_m == 1); switch (var->dim_v) { case 1: gl->Uniform1iv(loc, var->dim_a, i); break; case 2: gl->Uniform2iv(loc, var->dim_a, i); break; case 3: gl->Uniform3iv(loc, var->dim_a, i); break; case 4: gl->Uniform4iv(loc, var->dim_a, i); break; default: pl_unreachable(); } return; } case PL_VAR_UINT: { const unsigned int *u = vu->data; pl_assert(var->dim_m == 1); switch (var->dim_v) { case 1: gl->Uniform1uiv(loc, var->dim_a, u); break; case 2: gl->Uniform2uiv(loc, var->dim_a, u); break; case 3: gl->Uniform3uiv(loc, var->dim_a, u); break; case 4: gl->Uniform4uiv(loc, var->dim_a, u); break; default: pl_unreachable(); } return; } case PL_VAR_FLOAT: { const float *f = vu->data; if (var->dim_m == 1) { switch (var->dim_v) { case 1: gl->Uniform1fv(loc, var->dim_a, f); break; case 2: gl->Uniform2fv(loc, var->dim_a, f); break; case 3: gl->Uniform3fv(loc, var->dim_a, f); break; case 4: gl->Uniform4fv(loc, var->dim_a, f); break; default: pl_unreachable(); } } else if (var->dim_m == 2 && var->dim_v == 2) { gl->UniformMatrix2fv(loc, var->dim_a, GL_FALSE, f); } else if (var->dim_m == 3 && var->dim_v == 3) { gl->UniformMatrix3fv(loc, var->dim_a, GL_FALSE, f); } else if (var->dim_m == 4 && var->dim_v == 4) { gl->UniformMatrix4fv(loc, var->dim_a, GL_FALSE, f); } else if (var->dim_m == 2 && var->dim_v == 3) { gl->UniformMatrix2x3fv(loc, var->dim_a, GL_FALSE, f); } else if (var->dim_m == 3 && var->dim_v == 2) { gl->UniformMatrix3x2fv(loc, var->dim_a, GL_FALSE, f); } else if (var->dim_m == 2 && var->dim_v == 4) { gl->UniformMatrix2x4fv(loc, var->dim_a, GL_FALSE, f); } else if (var->dim_m == 4 && var->dim_v == 2) { gl->UniformMatrix4x2fv(loc, var->dim_a, GL_FALSE, f); } else if (var->dim_m == 3 && var->dim_v == 4) { gl->UniformMatrix3x4fv(loc, var->dim_a, GL_FALSE, f); } else if (var->dim_m == 4 && var->dim_v == 3) { gl->UniformMatrix4x3fv(loc, var->dim_a, GL_FALSE, f); } else { pl_unreachable(); } return; } case PL_VAR_INVALID: case PL_VAR_TYPE_COUNT: break; } pl_unreachable(); } static void update_desc(pl_gpu gpu, pl_pass pass, int index, const struct pl_desc_binding *db) { const gl_funcs *gl = gl_funcs_get(gpu); const struct pl_desc *desc = &pass->params.descriptors[index]; static const GLenum access[] = { [PL_DESC_ACCESS_READWRITE] = GL_READ_WRITE, [PL_DESC_ACCESS_READONLY] = GL_READ_ONLY, [PL_DESC_ACCESS_WRITEONLY] = GL_WRITE_ONLY, }; static const GLint wraps[PL_TEX_ADDRESS_MODE_COUNT] = { [PL_TEX_ADDRESS_CLAMP] = GL_CLAMP_TO_EDGE, [PL_TEX_ADDRESS_REPEAT] = GL_REPEAT, [PL_TEX_ADDRESS_MIRROR] = GL_MIRRORED_REPEAT, }; static const GLint filters[PL_TEX_SAMPLE_MODE_COUNT] = { [PL_TEX_SAMPLE_NEAREST] = GL_NEAREST, [PL_TEX_SAMPLE_LINEAR] = GL_LINEAR, }; switch (desc->type) { case PL_DESC_SAMPLED_TEX: { pl_tex tex = db->object; struct pl_tex_gl *tex_gl = PL_PRIV(tex); gl->ActiveTexture(GL_TEXTURE0 + desc->binding); gl->BindTexture(tex_gl->target, tex_gl->texture); GLint filter = filters[db->sample_mode]; GLint wrap = wraps[db->address_mode]; gl->TexParameteri(tex_gl->target, GL_TEXTURE_MIN_FILTER, filter); gl->TexParameteri(tex_gl->target, GL_TEXTURE_MAG_FILTER, filter); switch (pl_tex_params_dimension(tex->params)) { case 3: gl->TexParameteri(tex_gl->target, GL_TEXTURE_WRAP_R, wrap); // fall through case 2: gl->TexParameteri(tex_gl->target, GL_TEXTURE_WRAP_T, wrap); // fall through case 1: gl->TexParameteri(tex_gl->target, GL_TEXTURE_WRAP_S, wrap); break; } return; } case PL_DESC_STORAGE_IMG: { pl_tex tex = db->object; struct pl_tex_gl *tex_gl = PL_PRIV(tex); gl->BindImageTexture(desc->binding, tex_gl->texture, 0, GL_FALSE, 0, access[desc->access], tex_gl->iformat); return; } case PL_DESC_BUF_UNIFORM: { pl_buf buf = db->object; struct pl_buf_gl *buf_gl = PL_PRIV(buf); gl->BindBufferRange(GL_UNIFORM_BUFFER, desc->binding, buf_gl->buffer, buf_gl->offset, buf->params.size); return; } case PL_DESC_BUF_STORAGE: { pl_buf buf = db->object; struct pl_buf_gl *buf_gl = PL_PRIV(buf); gl->BindBufferRange(GL_SHADER_STORAGE_BUFFER, desc->binding, buf_gl->buffer, buf_gl->offset, buf->params.size); return; } case PL_DESC_BUF_TEXEL_UNIFORM: case PL_DESC_BUF_TEXEL_STORAGE: assert(!"unimplemented"); // TODO case PL_DESC_INVALID: case PL_DESC_TYPE_COUNT: break; } pl_unreachable(); } static void unbind_desc(pl_gpu gpu, pl_pass pass, int index, const struct pl_desc_binding *db) { const gl_funcs *gl = gl_funcs_get(gpu); const struct pl_desc *desc = &pass->params.descriptors[index]; switch (desc->type) { case PL_DESC_SAMPLED_TEX: { pl_tex tex = db->object; struct pl_tex_gl *tex_gl = PL_PRIV(tex); gl->ActiveTexture(GL_TEXTURE0 + desc->binding); gl->BindTexture(tex_gl->target, 0); return; } case PL_DESC_STORAGE_IMG: { pl_tex tex = db->object; struct pl_tex_gl *tex_gl = PL_PRIV(tex); gl->BindImageTexture(desc->binding, 0, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R32F); if (desc->access != PL_DESC_ACCESS_READONLY) gl->MemoryBarrier(tex_gl->barrier); return; } case PL_DESC_BUF_UNIFORM: gl->BindBufferBase(GL_UNIFORM_BUFFER, desc->binding, 0); return; case PL_DESC_BUF_STORAGE: { pl_buf buf = db->object; struct pl_buf_gl *buf_gl = PL_PRIV(buf); gl->BindBufferBase(GL_SHADER_STORAGE_BUFFER, desc->binding, 0); if (desc->access != PL_DESC_ACCESS_READONLY) gl->MemoryBarrier(buf_gl->barrier); return; } case PL_DESC_BUF_TEXEL_UNIFORM: case PL_DESC_BUF_TEXEL_STORAGE: assert(!"unimplemented"); // TODO case PL_DESC_INVALID: case PL_DESC_TYPE_COUNT: break; } pl_unreachable(); } void gl_pass_run(pl_gpu gpu, const struct pl_pass_run_params *params) { const gl_funcs *gl = gl_funcs_get(gpu); if (!MAKE_CURRENT()) return; pl_pass pass = params->pass; struct pl_pass_gl *pass_gl = PL_PRIV(pass); struct pl_gl *p = PL_PRIV(gpu); gl->UseProgram(pass_gl->program); for (int i = 0; i < params->num_var_updates; i++) update_var(gpu, pass, ¶ms->var_updates[i]); for (int i = 0; i < pass->params.num_descriptors; i++) update_desc(gpu, pass, i, ¶ms->desc_bindings[i]); gl->ActiveTexture(GL_TEXTURE0); if (!gl_check_err(gpu, "gl_pass_run: updating uniforms")) { RELEASE_CURRENT(); return; } switch (pass->params.type) { case PL_PASS_RASTER: { struct pl_tex_gl *target_gl = PL_PRIV(params->target); gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, target_gl->fbo); if (!pass->params.load_target && p->has_invalidate_fb) { GLenum fb = target_gl->fbo ? GL_COLOR_ATTACHMENT0 : GL_COLOR; gl->InvalidateFramebuffer(GL_DRAW_FRAMEBUFFER, 1, &fb); } gl->Viewport(params->viewport.x0, params->viewport.y0, pl_rect_w(params->viewport), pl_rect_h(params->viewport)); gl->Scissor(params->scissors.x0, params->scissors.y0, pl_rect_w(params->scissors), pl_rect_h(params->scissors)); gl->Enable(GL_SCISSOR_TEST); gl->Disable(GL_DEPTH_TEST); gl->Disable(GL_CULL_FACE); gl_check_err(gpu, "gl_pass_run: enabling viewport/scissor"); const struct pl_blend_params *blend = pass->params.blend_params; if (blend) { static const GLenum map_blend[] = { [PL_BLEND_ZERO] = GL_ZERO, [PL_BLEND_ONE] = GL_ONE, [PL_BLEND_SRC_ALPHA] = GL_SRC_ALPHA, [PL_BLEND_ONE_MINUS_SRC_ALPHA] = GL_ONE_MINUS_SRC_ALPHA, }; gl->BlendFuncSeparate(map_blend[blend->src_rgb], map_blend[blend->dst_rgb], map_blend[blend->src_alpha], map_blend[blend->dst_alpha]); gl->Enable(GL_BLEND); gl_check_err(gpu, "gl_pass_run: enabling blend"); } // Update VBO and VAO pl_buf vert = params->vertex_buf; struct pl_buf_gl *vert_gl = vert ? PL_PRIV(vert) : NULL; gl->BindBuffer(GL_ARRAY_BUFFER, vert ? vert_gl->buffer : pass_gl->buffer); if (!vert) { // Update the buffer directly. In theory we could also do a memcmp // cache here to avoid unnecessary updates. gl->BufferData(GL_ARRAY_BUFFER, pl_vertex_buf_size(params), params->vertex_data, GL_STREAM_DRAW); } if (pass_gl->vao) gl->BindVertexArray(pass_gl->vao); uint64_t vert_id = vert ? vert_gl->id : 0; size_t vert_offset = vert ? params->buf_offset : 0; if (!pass_gl->vao || pass_gl->vao_id != vert_id || pass_gl->vao_offset != vert_offset) { // We need to update the VAO when the buffer ID or offset changes gl_update_va(gpu, pass, vert_offset); pass_gl->vao_id = vert_id; pass_gl->vao_offset = vert_offset; } gl_check_err(gpu, "gl_pass_run: update/bind vertex buffer"); static const GLenum map_prim[PL_PRIM_TYPE_COUNT] = { [PL_PRIM_TRIANGLE_LIST] = GL_TRIANGLES, [PL_PRIM_TRIANGLE_STRIP] = GL_TRIANGLE_STRIP, }; GLenum mode = map_prim[pass->params.vertex_type]; gl_timer_begin(gpu, params->timer); if (params->index_data) { static const GLenum index_fmts[PL_INDEX_FORMAT_COUNT] = { [PL_INDEX_UINT16] = GL_UNSIGNED_SHORT, [PL_INDEX_UINT32] = GL_UNSIGNED_INT, }; // Upload indices to temporary buffer object if (!pass_gl->index_buffer) gl->GenBuffers(1, &pass_gl->index_buffer); // lazily allocated gl->BindBuffer(GL_ELEMENT_ARRAY_BUFFER, pass_gl->index_buffer); gl->BufferData(GL_ELEMENT_ARRAY_BUFFER, pl_index_buf_size(params), params->index_data, GL_STREAM_DRAW); gl->DrawElements(mode, params->vertex_count, index_fmts[params->index_fmt], 0); gl->BindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); } else if (params->index_buf) { // The pointer argument becomes the index buffer offset struct pl_buf_gl *index_gl = PL_PRIV(params->index_buf); gl->BindBuffer(GL_ELEMENT_ARRAY_BUFFER, index_gl->buffer); gl->DrawElements(mode, params->vertex_count, GL_UNSIGNED_SHORT, (void *) params->index_offset); gl->BindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); } else { // Note: the VBO offset is handled in the VAO gl->DrawArrays(mode, 0, params->vertex_count); } gl_timer_end(gpu, params->timer); gl_check_err(gpu, "gl_pass_run: drawing"); if (pass_gl->vao) { gl->BindVertexArray(0); } else { for (int i = 0; i < pass->params.num_vertex_attribs; i++) gl->DisableVertexAttribArray(i); } gl->BindBuffer(GL_ARRAY_BUFFER, 0); gl->Disable(GL_SCISSOR_TEST); gl->Disable(GL_BLEND); gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); break; } case PL_PASS_COMPUTE: gl_timer_begin(gpu, params->timer); gl->DispatchCompute(params->compute_groups[0], params->compute_groups[1], params->compute_groups[2]); gl_timer_end(gpu, params->timer); break; case PL_PASS_INVALID: case PL_PASS_TYPE_COUNT: pl_unreachable(); } for (int i = 0; i < pass->params.num_descriptors; i++) unbind_desc(gpu, pass, i, ¶ms->desc_bindings[i]); gl->ActiveTexture(GL_TEXTURE0); gl->UseProgram(0); gl_check_err(gpu, "gl_pass_run"); RELEASE_CURRENT(); } libplacebo-v7.349.0/src/opengl/gpu_tex.c000066400000000000000000001070361463457750100200750ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "gpu.h" #include "formats.h" #include "utils.h" #ifdef PL_HAVE_UNIX #include #include #endif void gl_tex_destroy(pl_gpu gpu, pl_tex tex) { const gl_funcs *gl = gl_funcs_get(gpu); if (!MAKE_CURRENT()) { PL_ERR(gpu, "Failed uninitializing texture, leaking resources!"); return; } struct pl_tex_gl *tex_gl = PL_PRIV(tex); if (tex_gl->fbo && !tex_gl->wrapped_fb) gl->DeleteFramebuffers(1, &tex_gl->fbo); if (tex_gl->image) { struct pl_gl *p = PL_PRIV(gpu); eglDestroyImageKHR(p->egl_dpy, tex_gl->image); } if (!tex_gl->wrapped_tex) gl->DeleteTextures(1, &tex_gl->texture); #ifdef PL_HAVE_UNIX if (tex_gl->fd != -1) close(tex_gl->fd); #endif gl_check_err(gpu, "gl_tex_destroy"); RELEASE_CURRENT(); pl_free((void *) tex); } static GLbitfield tex_barrier(pl_tex tex) { GLbitfield barrier = 0; const struct pl_tex_params *params = &tex->params; if (params->sampleable) barrier |= GL_TEXTURE_FETCH_BARRIER_BIT; if (params->renderable || params->blit_src || params->blit_dst) barrier |= GL_FRAMEBUFFER_BARRIER_BIT; if (params->storable) barrier |= GL_SHADER_IMAGE_ACCESS_BARRIER_BIT; if (params->host_writable || params->host_readable) barrier |= GL_TEXTURE_UPDATE_BARRIER_BIT; return barrier; } #define ADD_ATTRIB(name, value) \ do { \ assert(num_attribs + 3 < PL_ARRAY_SIZE(attribs)); \ attribs[num_attribs++] = (name); \ attribs[num_attribs++] = (value); \ } while (0) #define ADD_DMABUF_PLANE_ATTRIBS(plane, fd, offset, stride) \ do { \ ADD_ATTRIB(EGL_DMA_BUF_PLANE ## plane ## _FD_EXT, \ fd); \ ADD_ATTRIB(EGL_DMA_BUF_PLANE ## plane ## _OFFSET_EXT, \ offset); \ ADD_ATTRIB(EGL_DMA_BUF_PLANE ## plane ## _PITCH_EXT, \ stride); \ } while (0) #define ADD_DMABUF_PLANE_MODIFIERS(plane, mod) \ do { \ ADD_ATTRIB(EGL_DMA_BUF_PLANE ## plane ## _MODIFIER_LO_EXT, \ (uint32_t) ((mod) & 0xFFFFFFFFlu)); \ ADD_ATTRIB(EGL_DMA_BUF_PLANE ## plane ## _MODIFIER_HI_EXT, \ (uint32_t) (((mod) >> 32u) & 0xFFFFFFFFlu)); \ } while (0) static bool gl_tex_import(pl_gpu gpu, enum pl_handle_type handle_type, const struct pl_shared_mem *shared_mem, struct pl_tex_t *tex) { const gl_funcs *gl = gl_funcs_get(gpu); struct pl_gl *p = PL_PRIV(gpu); if (!MAKE_CURRENT()) return false; struct pl_tex_gl *tex_gl = PL_PRIV(tex); const struct pl_tex_params *params = &tex->params; int attribs[20] = {}; int num_attribs = 0; ADD_ATTRIB(EGL_WIDTH, params->w); ADD_ATTRIB(EGL_HEIGHT, params->h); switch (handle_type) { #ifdef PL_HAVE_UNIX case PL_HANDLE_DMA_BUF: if (shared_mem->handle.fd == -1) { PL_ERR(gpu, "%s: invalid fd", __func__); goto error; } tex_gl->fd = dup(shared_mem->handle.fd); if (tex_gl->fd == -1) { PL_ERR(gpu, "%s: cannot duplicate fd %d for importing: %s", __func__, shared_mem->handle.fd, strerror(errno)); goto error; } ADD_ATTRIB(EGL_LINUX_DRM_FOURCC_EXT, params->format->fourcc); ADD_DMABUF_PLANE_ATTRIBS(0, tex_gl->fd, shared_mem->offset, PL_DEF(shared_mem->stride_w, params->w)); if (p->has_modifiers) ADD_DMABUF_PLANE_MODIFIERS(0, shared_mem->drm_format_mod); attribs[num_attribs] = EGL_NONE; // EGL_LINUX_DMA_BUF_EXT requires EGL_NO_CONTEXT tex_gl->image = eglCreateImageKHR(p->egl_dpy, EGL_NO_CONTEXT, EGL_LINUX_DMA_BUF_EXT, (EGLClientBuffer) NULL, attribs); break; #else // !PL_HAVE_UNIX case PL_HANDLE_DMA_BUF: pl_unreachable(); #endif case PL_HANDLE_WIN32: case PL_HANDLE_WIN32_KMT: case PL_HANDLE_HOST_PTR: case PL_HANDLE_FD: case PL_HANDLE_MTL_TEX: case PL_HANDLE_IOSURFACE: pl_unreachable(); } if (!egl_check_err(gpu, "eglCreateImageKHR") || !tex_gl->image) goto error; // tex_gl->image should be already bound if (p->has_egl_storage) { gl->EGLImageTargetTexStorageEXT(GL_TEXTURE_2D, tex_gl->image, NULL); } else { gl->EGLImageTargetTexture2DOES(GL_TEXTURE_2D, tex_gl->image); } if (!egl_check_err(gpu, "EGLImageTargetTexture2DOES")) goto error; RELEASE_CURRENT(); return true; error: PL_ERR(gpu, "Failed importing GL texture!"); RELEASE_CURRENT(); return false; } static EGLenum egl_from_gl_target(pl_gpu gpu, int target) { switch(target) { case GL_TEXTURE_2D: return EGL_GL_TEXTURE_2D; case GL_TEXTURE_3D: return EGL_GL_TEXTURE_3D; default: PL_ERR(gpu, "%s: unsupported texture target 0x%x", __func__, target); return 0; } } static bool gl_tex_export(pl_gpu gpu, enum pl_handle_type handle_type, bool preserved, struct pl_tex_t *tex) { struct pl_tex_gl *tex_gl = PL_PRIV(tex); struct pl_gl *p = PL_PRIV(gpu); EGLenum egltarget = egl_from_gl_target(gpu, tex_gl->target); if (!egltarget) goto error; int attribs[] = { EGL_IMAGE_PRESERVED, preserved, EGL_NONE, }; // We assume that tex_gl->texture is already bound tex_gl->image = eglCreateImageKHR(p->egl_dpy, p->egl_ctx, egltarget, (EGLClientBuffer) (uintptr_t) tex_gl->texture, attribs); if (!egl_check_err(gpu, "eglCreateImageKHR") || !tex_gl->image) goto error; switch (handle_type) { #ifdef PL_HAVE_UNIX case PL_HANDLE_DMA_BUF: { int fourcc = 0; int num_planes = 0; EGLuint64KHR modifier = 0; bool ok; ok = eglExportDMABUFImageQueryMESA(p->egl_dpy, tex_gl->image, &fourcc, &num_planes, &modifier); if (!egl_check_err(gpu, "eglExportDMABUFImageQueryMESA") || !ok) goto error; if (fourcc != tex->params.format->fourcc) { PL_ERR(gpu, "Exported DRM format %s does not match fourcc of " "specified pl_fmt %s? Please open a bug.", PRINT_FOURCC(fourcc), PRINT_FOURCC(tex->params.format->fourcc)); goto error; } if (num_planes != 1) { PL_ERR(gpu, "Unsupported number of planes: %d", num_planes); goto error; } int offset = 0, stride = 0; ok = eglExportDMABUFImageMESA(p->egl_dpy, tex_gl->image, &tex_gl->fd, &stride, &offset); if (!egl_check_err(gpu, "eglExportDMABUFImageMesa") || !ok) goto error; off_t fdsize = lseek(tex_gl->fd, 0, SEEK_END); off_t err = fdsize > 0 && lseek(tex_gl->fd, 0, SEEK_SET); if (fdsize <= 0 || err < 0) { PL_ERR(gpu, "Failed querying FD size: %s", strerror(errno)); goto error; } tex->shared_mem = (struct pl_shared_mem) { .handle.fd = tex_gl->fd, .size = fdsize, .offset = offset, .drm_format_mod = modifier, .stride_w = stride, }; break; } #else // !PL_HAVE_UNIX case PL_HANDLE_DMA_BUF: pl_unreachable(); #endif case PL_HANDLE_WIN32: case PL_HANDLE_WIN32_KMT: case PL_HANDLE_HOST_PTR: case PL_HANDLE_FD: case PL_HANDLE_MTL_TEX: case PL_HANDLE_IOSURFACE: pl_unreachable(); } return true; error: PL_ERR(gpu, "Failed exporting GL texture!"); return false; } static const char *fb_err_str(GLenum err) { switch (err) { #define CASE(name) case name: return #name CASE(GL_FRAMEBUFFER_COMPLETE); CASE(GL_FRAMEBUFFER_UNDEFINED); CASE(GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT); CASE(GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT); CASE(GL_FRAMEBUFFER_INCOMPLETE_DIMENSIONS); CASE(GL_FRAMEBUFFER_INCOMPLETE_DRAW_BUFFER); CASE(GL_FRAMEBUFFER_INCOMPLETE_READ_BUFFER); CASE(GL_FRAMEBUFFER_UNSUPPORTED); CASE(GL_FRAMEBUFFER_INCOMPLETE_MULTISAMPLE); CASE(GL_FRAMEBUFFER_INCOMPLETE_LAYER_TARGETS); #undef CASE default: return "unknown error"; } } pl_tex gl_tex_create(pl_gpu gpu, const struct pl_tex_params *params) { const gl_funcs *gl = gl_funcs_get(gpu); if (!MAKE_CURRENT()) return NULL; struct pl_gl *p = PL_PRIV(gpu); struct pl_tex_t *tex = pl_zalloc_obj(NULL, tex, struct pl_tex_gl); tex->params = *params; tex->params.initial_data = NULL; tex->sampler_type = PL_SAMPLER_NORMAL; struct pl_tex_gl *tex_gl = PL_PRIV(tex); const struct gl_format **fmtp = PL_PRIV(params->format); const struct gl_format *fmt = *fmtp; *tex_gl = (struct pl_tex_gl) { .format = fmt->fmt, .iformat = fmt->ifmt, .type = fmt->type, .barrier = tex_barrier(tex), .fd = -1, }; static const GLint targets[] = { [1] = GL_TEXTURE_1D, [2] = GL_TEXTURE_2D, [3] = GL_TEXTURE_3D, }; int dims = pl_tex_params_dimension(*params); pl_assert(dims >= 1 && dims <= 3); tex_gl->target = targets[dims]; gl->GenTextures(1, &tex_gl->texture); gl->BindTexture(tex_gl->target, tex_gl->texture); if (params->import_handle) { if (!gl_tex_import(gpu, params->import_handle, ¶ms->shared_mem, tex)) goto error; } else { gl->PixelStorei(GL_UNPACK_ALIGNMENT, 1); switch (dims) { case 1: gl->TexImage1D(tex_gl->target, 0, tex_gl->iformat, params->w, 0, tex_gl->format, tex_gl->type, params->initial_data); break; case 2: gl->TexImage2D(tex_gl->target, 0, tex_gl->iformat, params->w, params->h, 0, tex_gl->format, tex_gl->type, params->initial_data); break; case 3: gl->TexImage3D(tex_gl->target, 0, tex_gl->iformat, params->w, params->h, params->d, 0, tex_gl->format, tex_gl->type, params->initial_data); break; } gl->PixelStorei(GL_UNPACK_ALIGNMENT, 4); } if (params->export_handle) { if (!gl_tex_export(gpu, params->export_handle, params->initial_data, tex)) goto error; } gl->BindTexture(tex_gl->target, 0); if (!gl_check_err(gpu, "gl_tex_create: texture")) goto error; bool need_fbo = tex->params.renderable; if (tex->params.blit_src || tex->params.blit_dst) { if (dims != 2) { PL_ERR(gpu, "Blittable textures may only be 2D!"); goto error; } need_fbo = true; } bool can_fbo = tex->params.format->caps & PL_FMT_CAP_RENDERABLE && tex->params.d == 0; // Try creating an FBO for host-readable textures, since this allows // reading back with glReadPixels instead of glGetTexImage. (Additionally, // GLES does not support glGetTexImage) if (tex->params.host_readable && (can_fbo || p->gles_ver)) need_fbo = true; if (need_fbo) { if (!can_fbo) { PL_ERR(gpu, "Trying to create a renderable/blittable/readable " "texture with an incompatible (non-renderable) format!"); goto error; } gl->GenFramebuffers(1, &tex_gl->fbo); gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, tex_gl->fbo); switch (dims) { case 1: gl->FramebufferTexture1D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_1D, tex_gl->texture, 0); break; case 2: gl->FramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, tex_gl->texture, 0); break; case 3: pl_unreachable(); } GLenum err = gl->CheckFramebufferStatus(GL_DRAW_FRAMEBUFFER); if (err != GL_FRAMEBUFFER_COMPLETE) { gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); PL_ERR(gpu, "Failed creating framebuffer: %s", fb_err_str(err)); goto error; } if (params->host_readable && p->gles_ver) { GLint read_type = 0, read_fmt = 0; gl->GetIntegerv(GL_IMPLEMENTATION_COLOR_READ_TYPE, &read_type); gl->GetIntegerv(GL_IMPLEMENTATION_COLOR_READ_FORMAT, &read_fmt); if (read_type != tex_gl->type || read_fmt != tex_gl->format) { gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); PL_ERR(gpu, "Trying to create host_readable texture whose " "implementation-defined pixel read format " "(type=0x%X, fmt=0x%X) does not match the texture's " "internal format (type=0x%X, fmt=0x%X)! This is a " "GLES/driver limitation, there's little we can do " "about it.", read_type, read_fmt, tex_gl->type, tex_gl->format); goto error; } } gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); if (!gl_check_err(gpu, "gl_tex_create: fbo")) goto error; } RELEASE_CURRENT(); return tex; error: gl_tex_destroy(gpu, tex); RELEASE_CURRENT(); return NULL; } static bool gl_fb_query(pl_gpu gpu, int fbo, struct pl_fmt_t *fmt, struct gl_format *glfmt) { const gl_funcs *gl = gl_funcs_get(gpu); struct pl_gl *p = PL_PRIV(gpu); *fmt = (struct pl_fmt_t) { .name = "fbo", .type = PL_FMT_UNKNOWN, .caps = PL_FMT_CAP_RENDERABLE | PL_FMT_CAP_BLITTABLE | PL_FMT_CAP_BLENDABLE, .num_components = 4, .component_depth = {8, 8, 8, 8}, // default to rgba8 .sample_order = {0, 1, 2, 3}, }; *glfmt = (struct gl_format) { .fmt = GL_RGBA, }; bool can_query = gl_test_ext(gpu, "GL_ARB_framebuffer_object", 30, 20); if (!fbo && p->gles_ver && p->gles_ver < 30) can_query = false; // can't query default framebuffer on GLES 2.0 if (can_query) { gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo); GLenum obj = p->gles_ver ? GL_BACK : GL_BACK_LEFT; if (fbo != 0) obj = GL_COLOR_ATTACHMENT0; GLint type = 0; gl->GetFramebufferAttachmentParameteriv(GL_DRAW_FRAMEBUFFER, obj, GL_FRAMEBUFFER_ATTACHMENT_COMPONENT_TYPE, &type); switch (type) { case GL_FLOAT: fmt->type = PL_FMT_FLOAT; break; case GL_INT: fmt->type = PL_FMT_SINT; break; case GL_UNSIGNED_INT: fmt->type = PL_FMT_UINT; break; case GL_SIGNED_NORMALIZED: fmt->type = PL_FMT_SNORM; break; case GL_UNSIGNED_NORMALIZED: fmt->type = PL_FMT_UNORM; break; default: fmt->type = PL_FMT_UNKNOWN; break; } gl->GetFramebufferAttachmentParameteriv(GL_DRAW_FRAMEBUFFER, obj, GL_FRAMEBUFFER_ATTACHMENT_RED_SIZE, &fmt->component_depth[0]); gl->GetFramebufferAttachmentParameteriv(GL_DRAW_FRAMEBUFFER, obj, GL_FRAMEBUFFER_ATTACHMENT_GREEN_SIZE, &fmt->component_depth[1]); gl->GetFramebufferAttachmentParameteriv(GL_DRAW_FRAMEBUFFER, obj, GL_FRAMEBUFFER_ATTACHMENT_BLUE_SIZE, &fmt->component_depth[2]); gl->GetFramebufferAttachmentParameteriv(GL_DRAW_FRAMEBUFFER, obj, GL_FRAMEBUFFER_ATTACHMENT_ALPHA_SIZE, &fmt->component_depth[3]); gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); gl_check_err(gpu, "gl_fb_query"); if (!fmt->component_depth[0]) { PL_INFO(gpu, "OpenGL framebuffer did not export depth information," "assuming 8-bit framebuffer"); for (int i = 0; i < PL_ARRAY_SIZE(fmt->component_depth); i++) fmt->component_depth[i] = 8; } // Strip missing components from component map while (!fmt->component_depth[fmt->num_components - 1]) { fmt->num_components--; pl_assert(fmt->num_components); } } int gpu_bits = 0; for (int i = 0; i < 4; i++) gpu_bits += fmt->component_depth[i]; fmt->internal_size = (gpu_bits + 7) / 8; size_t host_size = 0; switch (fmt->type) { case PL_FMT_UNKNOWN: fmt->opaque = true; return true; case PL_FMT_FLOAT: glfmt->type = GL_FLOAT; host_size = sizeof(float); break; case PL_FMT_UNORM: case PL_FMT_UINT: if (gpu_bits > 32) { glfmt->type = GL_UNSIGNED_SHORT; host_size = sizeof(uint16_t); } else { glfmt->type = GL_UNSIGNED_BYTE; host_size = sizeof(uint8_t); } break; case PL_FMT_SNORM: case PL_FMT_SINT: if (gpu_bits > 32) { glfmt->type = GL_SHORT; host_size = sizeof(int16_t); } else { glfmt->type = GL_BYTE; host_size = sizeof(int8_t); } break; case PL_FMT_TYPE_COUNT: pl_unreachable(); } fmt->texel_size = fmt->num_components * host_size; for (int i = 0; i < fmt->num_components; i++) fmt->host_bits[i] = 8 * host_size; fmt->caps |= PL_FMT_CAP_HOST_READABLE; return true; } pl_tex pl_opengl_wrap(pl_gpu gpu, const struct pl_opengl_wrap_params *params) { const gl_funcs *gl = gl_funcs_get(gpu); if (!MAKE_CURRENT()) return NULL; struct pl_gl *p = PL_PRIV(gpu); struct pl_tex_t *tex = pl_alloc_obj(NULL, tex, struct pl_tex_gl); struct pl_tex_gl *tex_gl = PL_PRIV(tex); *tex = (struct pl_tex_t) { .params = { .w = params->width, .h = params->height, .d = params->depth, }, }; pl_fmt fmt = NULL; const struct gl_format *glfmt = NULL; if (params->texture) { // Wrapping texture: Require matching iformat pl_assert(params->iformat); for (int i = 0; i < gpu->num_formats; i++) { const struct gl_format **glfmtp = PL_PRIV(gpu->formats[i]); if ((*glfmtp)->ifmt == params->iformat) { fmt = gpu->formats[i]; glfmt = *glfmtp; break; } } if (!fmt) { PL_ERR(gpu, "Failed mapping iformat %d to any equivalent `pl_fmt`", params->iformat); goto error; } } else { // Wrapping framebuffer: Allocate/infer generic FBO format fmt = pl_alloc_obj((void *) gpu, fmt, const struct gl_format *); glfmt = pl_alloc_ptr((void *) fmt, glfmt); const struct gl_format **glfmtp = PL_PRIV(fmt); *glfmtp = glfmt; if (!gl_fb_query(gpu, params->framebuffer, (struct pl_fmt_t *) fmt, (struct gl_format *) glfmt)) { PL_ERR(gpu, "Failed querying framebuffer specifics!"); pl_free((void *) fmt); goto error; } } *tex_gl = (struct pl_tex_gl) { .target = params->target, .texture = params->texture, .fbo = params->framebuffer, .wrapped_tex = !!params->texture, .wrapped_fb = params->framebuffer || !params->texture, .iformat = glfmt->ifmt, .format = glfmt->fmt, .type = glfmt->type, .fd = -1, }; int dims = pl_tex_params_dimension(tex->params); if (!tex_gl->target) { switch (dims) { case 1: tex_gl->target = GL_TEXTURE_1D; break; case 2: tex_gl->target = GL_TEXTURE_2D; break; case 3: tex_gl->target = GL_TEXTURE_3D; break; } } // Map texture-specific sampling metadata if (params->texture) { switch (params->target) { case GL_TEXTURE_1D: if (params->width || params->depth) { PL_ERR(gpu, "Invalid texture dimensions for GL_TEXTURE_1D"); goto error; } // fall through case GL_TEXTURE_2D: if (params->depth) { PL_ERR(gpu, "Invalid texture dimensions for GL_TEXTURE_2D"); goto error; } // fall through case 0: case GL_TEXTURE_3D: tex->sampler_type = PL_SAMPLER_NORMAL; break; case GL_TEXTURE_RECTANGLE: tex->sampler_type = PL_SAMPLER_RECT; break; case GL_TEXTURE_EXTERNAL_OES: tex->sampler_type = PL_SAMPLER_EXTERNAL; break; default: PL_ERR(gpu, "Failed mapping texture target %u to any equivalent " "`pl_sampler_type`", params->target); goto error; } } // Create optional extra fbo if needed/possible bool can_fbo = tex_gl->texture && (fmt->caps & PL_FMT_CAP_RENDERABLE) && tex->sampler_type != PL_SAMPLER_EXTERNAL && dims < 3; if (can_fbo && !tex_gl->fbo) { gl->GenFramebuffers(1, &tex_gl->fbo); gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, tex_gl->fbo); switch (dims) { case 1: gl->FramebufferTexture1D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, tex_gl->target, tex_gl->texture, 0); break; case 2: gl->FramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, tex_gl->target, tex_gl->texture, 0); break; } GLenum err = gl->CheckFramebufferStatus(GL_DRAW_FRAMEBUFFER); if (err != GL_FRAMEBUFFER_COMPLETE) { gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); PL_ERR(gpu, "Failed creating framebuffer: error code %d", err); goto error; } if (p->gles_ver) { GLint read_type = 0, read_fmt = 0; gl->GetIntegerv(GL_IMPLEMENTATION_COLOR_READ_TYPE, &read_type); gl->GetIntegerv(GL_IMPLEMENTATION_COLOR_READ_FORMAT, &read_fmt); tex->params.host_readable = read_type == tex_gl->type && read_fmt == tex_gl->format; } else { tex->params.host_readable = true; } gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); if (!gl_check_err(gpu, "pl_opengl_wrap: fbo")) goto error; } // Complete the process of inferring the texture capabilities tex->params.format = fmt; if (tex_gl->texture) { tex->params.sampleable = fmt->caps & PL_FMT_CAP_SAMPLEABLE; tex->params.storable = fmt->caps & PL_FMT_CAP_STORABLE; tex->params.host_writable = !fmt->opaque; tex->params.host_readable |= fmt->caps & PL_FMT_CAP_HOST_READABLE; } if (tex_gl->fbo || tex_gl->wrapped_fb) { tex->params.renderable = fmt->caps & PL_FMT_CAP_RENDERABLE; tex->params.host_readable |= fmt->caps & PL_FMT_CAP_HOST_READABLE; if (dims == 2 && (fmt->caps & PL_FMT_CAP_BLITTABLE)) { tex->params.blit_src = true; tex->params.blit_dst = true; } } tex_gl->barrier = tex_barrier(tex); RELEASE_CURRENT(); return tex; error: gl_tex_destroy(gpu, tex); RELEASE_CURRENT(); return NULL; } unsigned int pl_opengl_unwrap(pl_gpu gpu, pl_tex tex, unsigned int *out_target, int *out_iformat, unsigned int *out_fbo) { struct pl_tex_gl *tex_gl = PL_PRIV(tex); if (!tex_gl->texture) { PL_ERR(gpu, "Trying to call `pl_opengl_unwrap` on a pseudo-texture " "(perhaps obtained by `pl_swapchain_start_frame`?)"); return 0; } if (out_target) *out_target = tex_gl->target; if (out_iformat) *out_iformat = tex_gl->iformat; if (out_fbo) *out_fbo = tex_gl->fbo; return tex_gl->texture; } void gl_tex_invalidate(pl_gpu gpu, pl_tex tex) { const gl_funcs *gl = gl_funcs_get(gpu); struct pl_gl *p = PL_PRIV(gpu); struct pl_tex_gl *tex_gl = PL_PRIV(tex); if (!MAKE_CURRENT()) return; if (tex_gl->texture && p->has_invalidate_tex) gl->InvalidateTexImage(tex_gl->texture, 0); if ((tex_gl->wrapped_fb || tex_gl->fbo) && p->has_invalidate_fb) { GLenum attachment = tex_gl->fbo ? GL_COLOR_ATTACHMENT0 : GL_COLOR; gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, tex_gl->fbo); gl->InvalidateFramebuffer(GL_DRAW_FRAMEBUFFER, 1, &attachment); gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); } gl_check_err(gpu, "gl_tex_invalidate"); RELEASE_CURRENT(); } void gl_tex_clear_ex(pl_gpu gpu, pl_tex tex, const union pl_clear_color color) { const gl_funcs *gl = gl_funcs_get(gpu); if (!MAKE_CURRENT()) return; struct pl_tex_gl *tex_gl = PL_PRIV(tex); pl_assert(tex_gl->fbo || tex_gl->wrapped_fb); switch (tex->params.format->type) { case PL_FMT_UNKNOWN: case PL_FMT_FLOAT: case PL_FMT_UNORM: case PL_FMT_SNORM: gl->ClearColor(color.f[0], color.f[1], color.f[2], color.f[3]); break; case PL_FMT_UINT: gl->ClearColorIuiEXT(color.u[0], color.u[1], color.u[2], color.u[3]); break; case PL_FMT_SINT: gl->ClearColorIiEXT(color.i[0], color.i[1], color.i[2], color.i[3]); break; case PL_FMT_TYPE_COUNT: pl_unreachable(); } gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, tex_gl->fbo); gl->Clear(GL_COLOR_BUFFER_BIT); gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); gl_check_err(gpu, "gl_tex_clear"); RELEASE_CURRENT(); } void gl_tex_blit(pl_gpu gpu, const struct pl_tex_blit_params *params) { const gl_funcs *gl = gl_funcs_get(gpu); if (!MAKE_CURRENT()) return; struct pl_tex_gl *src_gl = PL_PRIV(params->src); struct pl_tex_gl *dst_gl = PL_PRIV(params->dst); pl_assert(src_gl->fbo || src_gl->wrapped_fb); pl_assert(dst_gl->fbo || dst_gl->wrapped_fb); gl->BindFramebuffer(GL_READ_FRAMEBUFFER, src_gl->fbo); gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, dst_gl->fbo); static const GLint filters[PL_TEX_SAMPLE_MODE_COUNT] = { [PL_TEX_SAMPLE_NEAREST] = GL_NEAREST, [PL_TEX_SAMPLE_LINEAR] = GL_LINEAR, }; pl_rect3d src_rc = params->src_rc, dst_rc = params->dst_rc; gl->BlitFramebuffer(src_rc.x0, src_rc.y0, src_rc.x1, src_rc.y1, dst_rc.x0, dst_rc.y0, dst_rc.x1, dst_rc.y1, GL_COLOR_BUFFER_BIT, filters[params->sample_mode]); gl->BindFramebuffer(GL_READ_FRAMEBUFFER, 0); gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); gl_check_err(gpu, "gl_tex_blit"); RELEASE_CURRENT(); } static int get_alignment(size_t pitch) { if (pitch % 8 == 0) return 8; if (pitch % 4 == 0) return 4; if (pitch % 2 == 0) return 2; return 1; } bool gl_tex_upload(pl_gpu gpu, const struct pl_tex_transfer_params *params) { const gl_funcs *gl = gl_funcs_get(gpu); struct pl_gl *p = PL_PRIV(gpu); pl_tex tex = params->tex; pl_fmt fmt = tex->params.format; pl_buf buf = params->buf; struct pl_tex_gl *tex_gl = PL_PRIV(tex); struct pl_buf_gl *buf_gl = buf ? PL_PRIV(buf) : NULL; // If the user requests asynchronous uploads, it's more efficient to do // them via a PBO - this allows us to skip blocking the caller, especially // when the host pointer can be imported directly. if (params->callback && !buf) { size_t buf_size = pl_tex_transfer_size(params); const size_t min_size = 32*1024; // 32 KiB if (buf_size >= min_size && buf_size <= gpu->limits.max_buf_size) return pl_tex_upload_pbo(gpu, params); } if (!MAKE_CURRENT()) return false; uintptr_t src = (uintptr_t) params->ptr; if (buf) { gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, buf_gl->buffer); src = buf_gl->offset + params->buf_offset; } bool misaligned = params->row_pitch % fmt->texel_size; int stride_w = params->row_pitch / fmt->texel_size; int stride_h = params->depth_pitch / params->row_pitch; int dims = pl_tex_params_dimension(tex->params); if (dims > 1) gl->PixelStorei(GL_UNPACK_ALIGNMENT, get_alignment(params->row_pitch)); int rows = pl_rect_h(params->rc); if (misaligned) { rows = 1; } else if (stride_w != pl_rect_w(params->rc)) { gl->PixelStorei(GL_UNPACK_ROW_LENGTH, stride_w); } int imgs = pl_rect_d(params->rc); if (stride_h != pl_rect_h(params->rc) || rows < stride_h) gl->PixelStorei(GL_UNPACK_IMAGE_HEIGHT, stride_h); gl->BindTexture(tex_gl->target, tex_gl->texture); gl_timer_begin(gpu, params->timer); switch (dims) { case 1: gl->TexSubImage1D(tex_gl->target, 0, params->rc.x0, pl_rect_w(params->rc), tex_gl->format, tex_gl->type, (void *) src); break; case 2: for (int y = params->rc.y0; y < params->rc.y1; y += rows) { gl->TexSubImage2D(tex_gl->target, 0, params->rc.x0, y, pl_rect_w(params->rc), rows, tex_gl->format, tex_gl->type, (void *) src); src += params->row_pitch * rows; } break; case 3: for (int z = params->rc.z0; z < params->rc.z1; z += imgs) { uintptr_t row_src = src; for (int y = params->rc.y0; y < params->rc.y1; y += rows) { gl->TexSubImage3D(tex_gl->target, 0, params->rc.x0, y, z, pl_rect_w(params->rc), rows, imgs, tex_gl->format, tex_gl->type, (void *) row_src); row_src = (uintptr_t) row_src + params->row_pitch * rows; } src += params->depth_pitch * imgs; } break; } gl_timer_end(gpu, params->timer); gl->BindTexture(tex_gl->target, 0); gl->PixelStorei(GL_UNPACK_ALIGNMENT, 4); gl->PixelStorei(GL_UNPACK_ROW_LENGTH, 0); gl->PixelStorei(GL_UNPACK_IMAGE_HEIGHT, 0); if (buf) { gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); if (buf->params.host_mapped) { // Make sure the PBO is not reused until GL is done with it. If a // previous operation is pending, "update" it by creating a new // fence that will cover the previous operation as well. gl->DeleteSync(buf_gl->fence); buf_gl->fence = gl->FenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); } } if (params->callback) { PL_ARRAY_APPEND(gpu, p->callbacks, (struct gl_cb) { .sync = gl->FenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0), .callback = params->callback, .priv = params->priv, }); } bool ok = gl_check_err(gpu, "gl_tex_upload"); RELEASE_CURRENT(); return ok; } bool gl_tex_download(pl_gpu gpu, const struct pl_tex_transfer_params *params) { const gl_funcs *gl = gl_funcs_get(gpu); struct pl_gl *p = PL_PRIV(gpu); pl_tex tex = params->tex; pl_fmt fmt = tex->params.format; pl_buf buf = params->buf; struct pl_tex_gl *tex_gl = PL_PRIV(tex); struct pl_buf_gl *buf_gl = buf ? PL_PRIV(buf) : NULL; bool ok = true; if (params->callback && !buf) { size_t buf_size = pl_tex_transfer_size(params); const size_t min_size = 32*1024; // 32 KiB if (buf_size >= min_size && buf_size <= gpu->limits.max_buf_size) return pl_tex_download_pbo(gpu, params); } if (!MAKE_CURRENT()) return false; uintptr_t dst = (uintptr_t) params->ptr; if (buf) { gl->BindBuffer(GL_PIXEL_PACK_BUFFER, buf_gl->buffer); dst = buf_gl->offset + params->buf_offset; } pl_rect3d full = { 0, 0, 0, tex->params.w, PL_DEF(tex->params.h, 1), PL_DEF(tex->params.d, 1), }; bool misaligned = params->row_pitch % fmt->texel_size; int stride_w = params->row_pitch / fmt->texel_size; int stride_h = params->depth_pitch / params->row_pitch; int dims = pl_tex_params_dimension(tex->params); bool is_copy = pl_rect3d_eq(params->rc, full) && stride_w == tex->params.w && stride_h == PL_DEF(tex->params.h, 1) && !misaligned; gl_timer_begin(gpu, params->timer); if (tex_gl->fbo || tex_gl->wrapped_fb) { // We can use a more efficient path when we have an FBO available if (dims > 1) gl->PixelStorei(GL_PACK_ALIGNMENT, get_alignment(params->row_pitch)); int rows = pl_rect_h(params->rc); if (misaligned) { rows = 1; } else if (stride_w != tex->params.w) { gl->PixelStorei(GL_PACK_ROW_LENGTH, stride_w); } // No 3D framebuffers pl_assert(pl_rect_d(params->rc) == 1); gl->BindFramebuffer(GL_READ_FRAMEBUFFER, tex_gl->fbo); for (int y = params->rc.y0; y < params->rc.y1; y += rows) { gl->ReadPixels(params->rc.x0, y, pl_rect_w(params->rc), rows, tex_gl->format, tex_gl->type, (void *) dst); dst += params->row_pitch * rows; } gl->BindFramebuffer(GL_READ_FRAMEBUFFER, 0); gl->PixelStorei(GL_PACK_ALIGNMENT, 4); gl->PixelStorei(GL_PACK_ROW_LENGTH, 0); } else if (is_copy) { // We're downloading the entire texture gl->BindTexture(tex_gl->target, tex_gl->texture); gl->GetTexImage(tex_gl->target, 0, tex_gl->format, tex_gl->type, (void *) dst); gl->BindTexture(tex_gl->target, 0); } else { PL_ERR(gpu, "Partial downloads of 3D textures not implemented!"); ok = false; } gl_timer_end(gpu, params->timer); if (buf) { gl->BindBuffer(GL_PIXEL_PACK_BUFFER, 0); if (ok && buf->params.host_mapped) { gl->DeleteSync(buf_gl->fence); buf_gl->fence = gl->FenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); } } if (params->callback) { PL_ARRAY_APPEND(gpu, p->callbacks, (struct gl_cb) { .sync = gl->FenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0), .callback = params->callback, .priv = params->priv, }); } ok &= gl_check_err(gpu, "gl_tex_download"); RELEASE_CURRENT(); return ok; } libplacebo-v7.349.0/src/opengl/include/000077500000000000000000000000001463457750100176725ustar00rootroot00000000000000libplacebo-v7.349.0/src/opengl/include/glad/000077500000000000000000000000001463457750100206015ustar00rootroot00000000000000libplacebo-v7.349.0/src/opengl/include/glad/meson.build000066400000000000000000000016141463457750100227450ustar00rootroot00000000000000glad_check = run_command([ python, '-c', 'import glad; print(glad.__version__)' ], env: python_env, capture: true, check: false, ) glad_ver = glad_check.returncode() == 0 ? glad_check.stdout().strip() : 'none' glad_req = '>= 2.0' if not glad_ver.version_compare(glad_req) error(f'glad (required: @glad_req@, found: @glad_ver@) was not found in ' + 'PYTHONPATH or `3rdparty`. Please run `git submodule update --init` ' + 'followed by `meson --wipe`.') endif glad = custom_target('gl.h', output: 'gl.h', env: python_env, command: [ python, '-m', 'glad', '--out-path=@OUTDIR@/../../', '--reproducible', '--merge', '--api=gl:core,gles2,egl', '--extensions=' + ','.join(gl_extensions), 'c', '--header-only', '--mx' ] + (opengl_link.allowed() ? ['--loader'] : []) ) glad_dep = declare_dependency( include_directories: include_directories('..'), sources: glad, ) libplacebo-v7.349.0/src/opengl/loader_egl.c000066400000000000000000000000641463457750100205100ustar00rootroot00000000000000#define GLAD_EGL_IMPLEMENTATION #include "common.h" libplacebo-v7.349.0/src/opengl/loader_gl.c000066400000000000000000000000631463457750100203420ustar00rootroot00000000000000#define GLAD_GL_IMPLEMENTATION #include "common.h" libplacebo-v7.349.0/src/opengl/meson.build000066400000000000000000000040171463457750100204130ustar00rootroot00000000000000opengl_build = get_option('opengl') opengl_link = get_option('gl-proc-addr') if host_machine.system() == 'windows' or host_machine.system().endswith('bsd') or \ host_machine.system() == 'dragonfly' libdl = declare_dependency() else libdl = cc.find_library('dl', required : opengl_link) endif opengl_link = opengl_link.require(libdl.found()) components.set('opengl', opengl_build.allowed()) components.set('gl-proc-addr', opengl_link.allowed()) if opengl_build.allowed() sources += [ 'opengl/context.c', 'opengl/formats.c', 'opengl/loader_gl.c', 'opengl/loader_egl.c', 'opengl/gpu.c', 'opengl/gpu_tex.c', 'opengl/gpu_pass.c', 'opengl/swapchain.c', 'opengl/utils.c', ] if opengl_link.allowed() build_deps += libdl tests += 'opengl_surfaceless.c' endif gl_extensions = [ 'GL_AMD_pinned_memory', 'GL_ARB_buffer_storage', 'GL_ARB_compute_shader', 'GL_ARB_framebuffer_object', 'GL_ARB_get_program_binary', 'GL_ARB_invalidate_subdata', 'GL_ARB_pixel_buffer_object', 'GL_ARB_program_interface_query', 'GL_ARB_shader_image_load_store', 'GL_ARB_shader_storage_buffer_object', 'GL_ARB_sync', 'GL_ARB_texture_float', 'GL_ARB_texture_gather', 'GL_ARB_texture_rg', 'GL_ARB_timer_query', 'GL_ARB_uniform_buffer_object', 'GL_ARB_vertex_array_object', 'GL_ARB_half_float_pixel', 'GL_EXT_EGL_image_storage', 'GL_EXT_color_buffer_float', 'GL_EXT_texture3D', 'GL_EXT_texture_format_BGRA8888', 'GL_EXT_texture_integer', 'GL_EXT_texture_norm16', 'GL_EXT_texture_rg', 'GL_EXT_unpack_subimage', 'GL_KHR_debug', 'GL_OES_EGL_image', 'GL_OES_EGL_image_external', 'EGL_EXT_image_dma_buf_import', 'EGL_EXT_image_dma_buf_import_modifiers', 'EGL_EXT_platform_base', 'EGL_KHR_debug', 'EGL_KHR_image_base', 'EGL_MESA_image_dma_buf_export', 'EGL_MESA_platform_surfaceless', ] # Generate GL loader subdir('include/glad') else glad_dep = [] sources += 'opengl/stubs.c' endif libplacebo-v7.349.0/src/opengl/stubs.c000066400000000000000000000033301463457750100175520ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "../common.h" #include "log.h" #include const struct pl_opengl_params pl_opengl_default_params = {0}; pl_opengl pl_opengl_create(pl_log log, const struct pl_opengl_params *params) { pl_fatal(log, "libplacebo compiled without OpenGL support!"); return NULL; } void pl_opengl_destroy(pl_opengl *pgl) { pl_opengl gl = *pgl; pl_assert(!gl); } pl_opengl pl_opengl_get(pl_gpu gpu) { return NULL; } pl_swapchain pl_opengl_create_swapchain(pl_opengl gl, const struct pl_opengl_swapchain_params *params) { pl_unreachable(); } void pl_opengl_swapchain_update_fb(pl_swapchain sw, const struct pl_opengl_framebuffer *fb) { pl_unreachable(); } pl_tex pl_opengl_wrap(pl_gpu gpu, const struct pl_opengl_wrap_params *params) { pl_unreachable(); } unsigned int pl_opengl_unwrap(pl_gpu gpu, pl_tex tex, unsigned int *out_target, int *out_iformat, unsigned int *out_fbo) { pl_unreachable(); } libplacebo-v7.349.0/src/opengl/swapchain.c000066400000000000000000000176721463457750100204050ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "common.h" #include "formats.h" #include "gpu.h" #include "swapchain.h" #include "utils.h" #include "pl_thread.h" struct priv { struct pl_sw_fns impl; struct pl_opengl_swapchain_params params; pl_opengl gl; pl_mutex lock; bool has_sync; // current parameters pl_tex fb; bool frame_started; // vsync fences int swapchain_depth; PL_ARRAY(GLsync) vsync_fences; }; static const struct pl_sw_fns opengl_swapchain; pl_swapchain pl_opengl_create_swapchain(pl_opengl pl_gl, const struct pl_opengl_swapchain_params *params) { pl_gpu gpu = pl_gl->gpu; if (params->max_swapchain_depth < 0) { PL_ERR(gpu, "Tried specifying negative swapchain depth?"); return NULL; } if (!gl_make_current(pl_gl)) return NULL; struct pl_swapchain_t *sw = pl_zalloc_obj(NULL, sw, struct priv); sw->log = gpu->log; sw->gpu = gpu; struct priv *p = PL_PRIV(sw); pl_mutex_init(&p->lock); p->impl = opengl_swapchain; p->params = *params; p->has_sync = pl_opengl_has_ext(pl_gl, "GL_ARB_sync"); p->gl = pl_gl; gl_release_current(pl_gl); return sw; } static void gl_sw_destroy(pl_swapchain sw) { pl_gpu gpu = sw->gpu; struct priv *p = PL_PRIV(sw); pl_gpu_flush(gpu); pl_tex_destroy(gpu, &p->fb); pl_mutex_destroy(&p->lock); pl_free((void *) sw); } static int gl_sw_latency(pl_swapchain sw) { struct priv *p = PL_PRIV(sw); return p->params.max_swapchain_depth; } static bool gl_sw_resize(pl_swapchain sw, int *width, int *height) { struct priv *p = PL_PRIV(sw); const int w = *width, h = *height; pl_mutex_lock(&p->lock); if (p->fb && w == p->fb->params.w && h == p->fb->params.h) { pl_mutex_unlock(&p->lock); return true; } if (p->frame_started && (w || h)) { PL_ERR(sw, "Tried resizing the swapchain while a frame was in progress! " "Please submit the current frame first."); pl_mutex_unlock(&p->lock); return false; } if (w && h) { pl_tex_destroy(sw->gpu, &p->fb); p->fb = pl_opengl_wrap(sw->gpu, pl_opengl_wrap_params( .framebuffer = p->params.framebuffer.id, .width = w, .height = h, )); if (!p->fb) { PL_ERR(sw, "Failed wrapping OpenGL framebuffer!"); pl_mutex_unlock(&p->lock); return false; } } if (!p->fb) { PL_ERR(sw, "Tried calling `pl_swapchain_resize` with unknown size! " "This is forbidden for OpenGL. The first call to " "`pl_swapchain_resize` must include the width and height of the " "swapchain, because there's no way to figure this out from " "within the API."); pl_mutex_unlock(&p->lock); return false; } *width = p->fb->params.w; *height = p->fb->params.h; pl_mutex_unlock(&p->lock); return true; } void pl_opengl_swapchain_update_fb(pl_swapchain sw, const struct pl_opengl_framebuffer *fb) { struct priv *p = PL_PRIV(sw); pl_mutex_lock(&p->lock); if (p->frame_started) { PL_ERR(sw,"Tried calling `pl_opengl_swapchain_update_fb` while a frame " "was in progress! Please submit the current frame first."); pl_mutex_unlock(&p->lock); return; } if (p->params.framebuffer.id != fb->id) pl_tex_destroy(sw->gpu, &p->fb); p->params.framebuffer = *fb; pl_mutex_unlock(&p->lock); } static bool gl_sw_start_frame(pl_swapchain sw, struct pl_swapchain_frame *out_frame) { struct priv *p = PL_PRIV(sw); pl_mutex_lock(&p->lock); bool ok = false; if (!p->fb) { PL_ERR(sw, "Unknown framebuffer size. Please call `pl_swapchain_resize` " "before `pl_swapchain_start_frame` for OpenGL swapchains!"); goto error; } if (p->frame_started) { PL_ERR(sw, "Attempted calling `pl_swapchain_start` while a frame was " "already in progress! Call `pl_swapchain_submit_frame` first."); goto error; } if (!gl_make_current(p->gl)) goto error; *out_frame = (struct pl_swapchain_frame) { .fbo = p->fb, .flipped = !p->params.framebuffer.flipped, .color_repr = { .sys = PL_COLOR_SYSTEM_RGB, .levels = PL_COLOR_LEVELS_FULL, .alpha = p->fb->params.format->num_components == 4 ? PL_ALPHA_PREMULTIPLIED : PL_ALPHA_NONE, .bits = { // Just use the red channel in the absence of anything more // sane to do, because the red channel is both guaranteed to // exist and also typically has the minimum number of bits // (which is arguably what matters for dithering) .sample_depth = p->fb->params.format->component_depth[0], .color_depth = p->fb->params.format->component_depth[0], }, }, .color_space = pl_color_space_monitor, }; p->frame_started = gl_check_err(sw->gpu, "gl_sw_start_frame"); if (!p->frame_started) goto error; // keep p->lock held gl_release_current(p->gl); return true; error: gl_release_current(p->gl); pl_mutex_unlock(&p->lock); return ok; } static bool gl_sw_submit_frame(pl_swapchain sw) { struct priv *p = PL_PRIV(sw); struct gl_ctx *glctx = PL_PRIV(p->gl); const gl_funcs *gl = &glctx->func; if (!gl_make_current(p->gl)) { p->frame_started = false; pl_mutex_unlock(&p->lock); return false; } pl_assert(p->frame_started); if (p->has_sync && p->params.max_swapchain_depth) { GLsync fence = gl->FenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); if (fence) PL_ARRAY_APPEND(sw, p->vsync_fences, fence); } gl->Flush(); p->frame_started = false; bool ok = gl_check_err(sw->gpu, "gl_sw_submit_frame"); gl_release_current(p->gl); pl_mutex_unlock(&p->lock); return ok; } static void gl_sw_swap_buffers(pl_swapchain sw) { struct priv *p = PL_PRIV(sw); struct gl_ctx *glctx = PL_PRIV(p->gl); const gl_funcs *gl = &glctx->func; if (!p->params.swap_buffers) { PL_ERR(sw, "`pl_swapchain_swap_buffers` called but no " "`params.swap_buffers` callback set!"); return; } pl_mutex_lock(&p->lock); if (!gl_make_current(p->gl)) { pl_mutex_unlock(&p->lock); return; } p->params.swap_buffers(p->params.priv); const int max_depth = p->params.max_swapchain_depth; while (max_depth && p->vsync_fences.num >= max_depth) { gl->ClientWaitSync(p->vsync_fences.elem[0], GL_SYNC_FLUSH_COMMANDS_BIT, 1e9); gl->DeleteSync(p->vsync_fences.elem[0]); PL_ARRAY_REMOVE_AT(p->vsync_fences, 0); } gl_check_err(sw->gpu, "gl_sw_swap_buffers"); gl_release_current(p->gl); pl_mutex_unlock(&p->lock); } static const struct pl_sw_fns opengl_swapchain = { .destroy = gl_sw_destroy, .latency = gl_sw_latency, .resize = gl_sw_resize, .start_frame = gl_sw_start_frame, .submit_frame = gl_sw_submit_frame, .swap_buffers = gl_sw_swap_buffers, }; libplacebo-v7.349.0/src/opengl/utils.c000066400000000000000000000103351463457750100175550ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "common.h" #include "gpu.h" #include "utils.h" const char *gl_err_str(GLenum err) { switch (err) { #define CASE(name) case name: return #name CASE(GL_NO_ERROR); CASE(GL_INVALID_ENUM); CASE(GL_INVALID_VALUE); CASE(GL_INVALID_OPERATION); CASE(GL_INVALID_FRAMEBUFFER_OPERATION); CASE(GL_OUT_OF_MEMORY); CASE(GL_STACK_UNDERFLOW); CASE(GL_STACK_OVERFLOW); #undef CASE default: return "unknown error"; } } void gl_poll_callbacks(pl_gpu gpu) { const gl_funcs *gl = gl_funcs_get(gpu); struct pl_gl *p = PL_PRIV(gpu); while (p->callbacks.num) { struct gl_cb cb = p->callbacks.elem[0]; GLenum res = gl->ClientWaitSync(cb.sync, 0, 0); switch (res) { case GL_ALREADY_SIGNALED: case GL_CONDITION_SATISFIED: PL_ARRAY_REMOVE_AT(p->callbacks, 0); cb.callback(cb.priv); continue; case GL_WAIT_FAILED: PL_ARRAY_REMOVE_AT(p->callbacks, 0); gl->DeleteSync(cb.sync); p->failed = true; gl_check_err(gpu, "gl_poll_callbacks"); // NOTE: will recurse! return; case GL_TIMEOUT_EXPIRED: return; default: pl_unreachable(); } } } bool gl_check_err(pl_gpu gpu, const char *fun) { const gl_funcs *gl = gl_funcs_get(gpu); struct pl_gl *p = PL_PRIV(gpu); bool ret = true; while (true) { GLenum error = gl->GetError(); if (error == GL_NO_ERROR) break; PL_ERR(gpu, "%s: OpenGL error: %s", fun, gl_err_str(error)); ret = false; p->failed = true; } gl_poll_callbacks(gpu); return ret; } bool gl_is_software(pl_opengl pl_gl) { struct gl_ctx *glctx = PL_PRIV(pl_gl); const gl_funcs *gl = &glctx->func; const char *renderer = (char *) gl->GetString(GL_RENDERER); return !renderer || strcmp(renderer, "Software Rasterizer") == 0 || strstr(renderer, "llvmpipe") || strstr(renderer, "softpipe") || strcmp(renderer, "Mesa X11") == 0 || strcmp(renderer, "Apple Software Renderer") == 0; } bool gl_is_gles(pl_opengl pl_gl) { struct gl_ctx *glctx = PL_PRIV(pl_gl); const gl_funcs *gl = &glctx->func; const char *version = (char *) gl->GetString(GL_VERSION); return pl_str_startswith0(pl_str0(version), "OpenGL ES"); } bool gl_test_ext(pl_gpu gpu, const char *ext, int gl_ver, int gles_ver) { struct pl_gl *p = PL_PRIV(gpu); if (gl_ver && p->gl_ver >= gl_ver) return true; if (gles_ver && p->gles_ver >= gles_ver) return true; return ext ? pl_opengl_has_ext(p->gl, ext) : false; } const char *egl_err_str(EGLenum err) { switch (err) { #define CASE(name) case name: return #name CASE(EGL_SUCCESS); CASE(EGL_NOT_INITIALIZED); CASE(EGL_BAD_ACCESS); CASE(EGL_BAD_ALLOC); CASE(EGL_BAD_ATTRIBUTE); CASE(EGL_BAD_CONFIG); CASE(EGL_BAD_CONTEXT); CASE(EGL_BAD_CURRENT_SURFACE); CASE(EGL_BAD_DISPLAY); CASE(EGL_BAD_MATCH); CASE(EGL_BAD_NATIVE_PIXMAP); CASE(EGL_BAD_NATIVE_WINDOW); CASE(EGL_BAD_PARAMETER); CASE(EGL_BAD_SURFACE); #undef CASE default: return "unknown error"; } } bool egl_check_err(pl_gpu gpu, const char *fun) { struct pl_gl *p = PL_PRIV(gpu); bool ret = true; while (true) { GLenum error = eglGetError(); if (error == EGL_SUCCESS) return ret; PL_ERR(gpu, "%s: EGL error: %s", fun, egl_err_str(error)); ret = false; p->failed = true; } } libplacebo-v7.349.0/src/opengl/utils.h000066400000000000000000000033021463457750100175560ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "common.h" // Iterate through callbacks attached to the `pl_gl` and execute all of the // ones that have completed. // // Thread-safety: Unsafe void gl_poll_callbacks(pl_gpu gpu); // Return a human-readable name for various OpenGL errors // // Thread-safety: Safe const char *gl_err_str(GLenum err); // Check for errors and log them + return false if detected // // Thread-safety: Unsafe bool gl_check_err(pl_gpu gpu, const char *fun); // Returns true if the context is a suspected software rasterizer // // Thread-safety: Unsafe bool gl_is_software(pl_opengl gl); // Returns true if the context is detected as OpenGL ES // // Thread-safety: Unsafe bool gl_is_gles(pl_opengl gl); // Check for presence of an extension, alternatively a minimum GL version // // Thread-safety: Unsafe bool gl_test_ext(pl_gpu gpu, const char *ext, int gl_ver, int gles_ver); // Thread-safety: Safe const char *egl_err_str(EGLenum err); // Thread-safety: Unsafe bool egl_check_err(pl_gpu gpu, const char *fun); libplacebo-v7.349.0/src/options.c000066400000000000000000001505011463457750100166240ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include "common.h" #include "log.h" #include struct priv { pl_log log; // for pl_options_get struct pl_opt_data_t data; pl_str data_text; // for pl_options_save pl_str saved; // internally managed hooks array PL_ARRAY(const struct pl_hook *) hooks; }; static const struct pl_options_t defaults = { .params = { PL_RENDER_DEFAULTS }, .deband_params = { PL_DEBAND_DEFAULTS }, .sigmoid_params = { PL_SIGMOID_DEFAULTS }, .color_adjustment = { PL_COLOR_ADJUSTMENT_NEUTRAL }, .peak_detect_params = { PL_PEAK_DETECT_DEFAULTS }, .color_map_params = { PL_COLOR_MAP_DEFAULTS }, .dither_params = { PL_DITHER_DEFAULTS }, .icc_params = { PL_ICC_DEFAULTS }, .cone_params = { PL_CONE_NONE, 1.0 }, .deinterlace_params = { PL_DEINTERLACE_DEFAULTS }, .distort_params = { PL_DISTORT_DEFAULTS }, .upscaler = { .name = "custom", .description = "Custom upscaler", .allowed = PL_FILTER_UPSCALING, }, .downscaler = { .name = "custom", .description = "Custom downscaler", .allowed = PL_FILTER_DOWNSCALING, }, .plane_upscaler = { .name = "custom", .description = "Custom plane upscaler", .allowed = PL_FILTER_UPSCALING, }, .plane_downscaler = { .name = "custom", .description = "Custom plane downscaler", .allowed = PL_FILTER_DOWNSCALING, }, .frame_mixer = { .name = "custom", .description = "Custom frame mixer", .allowed = PL_FILTER_FRAME_MIXING, }, }; // Copies only whitelisted fields static inline void copy_filter(struct pl_filter_config *dst, const struct pl_filter_config *src) { dst->kernel = src->kernel; dst->window = src->window; dst->radius = src->radius; dst->clamp = src->clamp; dst->blur = src->blur; dst->taper = src->taper; dst->polar = src->polar; for (int i = 0; i < PL_FILTER_MAX_PARAMS; i++) { dst->params[i] = src->params[i]; dst->wparams[i] = src->wparams[i]; } } static inline void redirect_params(pl_options opts) { // Copy all non-NULL params structs into pl_options and redirect them #define REDIRECT_PARAMS(field) do \ { \ if (opts->params.field) { \ opts->field = *opts->params.field; \ opts->params.field = &opts->field; \ } \ } while (0) REDIRECT_PARAMS(deband_params); REDIRECT_PARAMS(sigmoid_params); REDIRECT_PARAMS(color_adjustment); REDIRECT_PARAMS(peak_detect_params); REDIRECT_PARAMS(color_map_params); REDIRECT_PARAMS(dither_params); REDIRECT_PARAMS(icc_params); REDIRECT_PARAMS(cone_params); REDIRECT_PARAMS(deinterlace_params); REDIRECT_PARAMS(distort_params); } void pl_options_reset(pl_options opts, const struct pl_render_params *preset) { *opts = defaults; if (preset) opts->params = *preset; redirect_params(opts); // Make a copy of all scaler configurations that aren't built-in filters struct { bool upscaler; bool downscaler; bool plane_upscaler; bool plane_downscaler; bool frame_mixer; } fixed = {0}; for (int i = 0; i < pl_num_filter_configs; i++) { const struct pl_filter_config *f = pl_filter_configs[i]; fixed.upscaler |= f == opts->params.upscaler; fixed.downscaler |= f == opts->params.downscaler; fixed.plane_upscaler |= f == opts->params.plane_upscaler; fixed.plane_downscaler |= f == opts->params.plane_downscaler; fixed.frame_mixer |= f == opts->params.frame_mixer; } #define REDIRECT_SCALER(scaler) do \ { \ if (opts->params.scaler && !fixed.scaler) { \ copy_filter(&opts->scaler, opts->params.scaler); \ opts->params.scaler = &opts->scaler; \ } \ } while (0) REDIRECT_SCALER(upscaler); REDIRECT_SCALER(downscaler); REDIRECT_SCALER(plane_upscaler); REDIRECT_SCALER(plane_downscaler); REDIRECT_SCALER(frame_mixer); } pl_options pl_options_alloc(pl_log log) { struct pl_options_t *opts = pl_zalloc_obj(NULL, opts, struct priv); struct priv *p = PL_PRIV(opts); pl_options_reset(opts, NULL); p->log = log; return opts; } void pl_options_free(pl_options *popts) { pl_free_ptr((void **) popts); } static void make_hooks_internal(pl_options opts) { struct priv *p = PL_PRIV(opts); struct pl_render_params *params = &opts->params; if (params->num_hooks && params->hooks != p->hooks.elem) { PL_ARRAY_MEMDUP(opts, p->hooks, params->hooks, params->num_hooks); params->hooks = p->hooks.elem; } } void pl_options_add_hook(pl_options opts, const struct pl_hook *hook) { struct priv *p = PL_PRIV(opts); make_hooks_internal(opts); PL_ARRAY_APPEND(opts, p->hooks, hook); opts->params.hooks = p->hooks.elem; } void pl_options_insert_hook(pl_options opts, const struct pl_hook *hook, int idx) { struct priv *p = PL_PRIV(opts); make_hooks_internal(opts); PL_ARRAY_INSERT_AT(opts, p->hooks, idx, hook); opts->params.hooks = p->hooks.elem; } void pl_options_remove_hook_at(pl_options opts, int idx) { struct priv *p = PL_PRIV(opts); make_hooks_internal(opts); PL_ARRAY_REMOVE_AT(p->hooks, idx); opts->params.hooks = p->hooks.elem; } // Options printing/parsing context typedef const struct opt_ctx_t { pl_log log; // as a convenience, only needed when parsing pl_opt opt; void *alloc; // for printing only pl_options opts; // current base ptr } *opt_ctx; struct enum_val { const char *name; unsigned val; }; struct preset { const char *name; const void *val; }; struct named { const char *name; }; typedef const struct opt_priv_t { int (*compare)(opt_ctx p, const void *a, const void *b); // optional void (*print)(opt_ctx p, pl_str *out, const void *val); // apends to `out` bool (*parse)(opt_ctx p, pl_str str, void *out_val); const struct enum_val *values; // for enums, terminated by {0} const struct preset *presets; // for preset lists, terminated by {0} const struct named * const *names; // for array-backed options, terminated by NULL // Offset and size of option in `struct pl_options_t` size_t offset; size_t size; size_t offset_params; // offset of actual struct (for params toggles) } *opt_priv; static pl_opt_data get_opt_data(opt_ctx ctx) { pl_options opts = ctx->opts; struct priv *p = PL_PRIV(opts); opt_priv priv = ctx->opt->priv; const void *val = (void *) ((uintptr_t) opts + priv->offset); p->data_text.len = 0; priv->print(ctx, &p->data_text, val); p->data = (struct pl_opt_data_t) { .opts = opts, .opt = ctx->opt, .value = val, .text = (char *) p->data_text.buf, }; return &p->data; } pl_opt_data pl_options_get(pl_options opts, const char *key) { struct priv *p = PL_PRIV(opts); pl_opt opt = pl_find_option(key); if (!opt || opt->preset) { PL_ERR(p, "Unrecognized or invalid option '%s'", key); return NULL; } return get_opt_data(&(struct opt_ctx_t) { .alloc = opts, .opts = opts, .opt = opt, }); } void pl_options_iterate(pl_options opts, void (*cb)(void *priv, pl_opt_data data), void *cb_priv) { for (pl_opt opt = pl_option_list; opt->key; opt++) { if (opt->preset) continue; struct opt_ctx_t ctx = { .alloc = opts, .opts = opts, .opt = opt, }; opt_priv priv = opt->priv; const void *val = (void *) ((uintptr_t) opts + priv->offset); const void *ref = (void *) ((uintptr_t) &defaults + priv->offset); int cmp = priv->compare ? priv->compare(&ctx, val, ref) : memcmp(val, ref, priv->size); if (cmp != 0) cb(cb_priv, get_opt_data(&ctx)); } } static void save_cb(void *priv, pl_opt_data data) { pl_opt opt = data->opt; void *alloc = data->opts; pl_str *out = priv; if (out->len) pl_str_append_raw(alloc, out, ",", 1); pl_str_append_raw(alloc, out, opt->key, strlen(opt->key)); pl_str_append_raw(alloc, out, "=", 1); pl_str_append(alloc, out, pl_str0(data->text)); } const char *pl_options_save(pl_options opts) { struct priv *p = PL_PRIV(opts); p->saved.len = 0; pl_options_iterate(opts, save_cb, &p->saved); return p->saved.len ? (char *) p->saved.buf : ""; } static bool option_set_raw(pl_options opts, pl_str k, pl_str v) { struct priv *p = PL_PRIV(opts); k = pl_str_strip(k); v = pl_str_strip(v); pl_opt opt; for (opt = pl_option_list; opt->key; opt++) { if (pl_str_equals0(k, opt->key)) goto found; } PL_ERR(p, "Unrecognized option '%.*s', in '%.*s=%.*s'", PL_STR_FMT(k), PL_STR_FMT(k), PL_STR_FMT(v)); return false; found: PL_TRACE(p, "Parsing option '%s' = '%.*s'", opt->key, PL_STR_FMT(v)); if (opt->deprecated) PL_WARN(p, "Option '%s' is deprecated", opt->key); struct opt_ctx_t ctx = { .log = p->log, .opts = opts, .opt = opt, }; opt_priv priv = opt->priv; void *val = (void *) ((uintptr_t) opts + priv->offset); return priv->parse(&ctx, v, val); } bool pl_options_set_str(pl_options opts, const char *key, const char *value) { return option_set_raw(opts, pl_str0(key), pl_str0(value)); } bool pl_options_load(pl_options opts, const char *str) { bool ret = true; pl_str rest = pl_str0(str); while (rest.len) { pl_str kv = pl_str_strip(pl_str_split_chars(rest, " ,;:\n", &rest)); if (!kv.len) continue; pl_str v, k = pl_str_split_char(kv, '=', &v); ret &= option_set_raw(opts, k, v); } return ret; } // Individual option types static void print_bool(opt_ctx p, pl_str *out, const void *ptr) { const bool *val = ptr; if (*val) { pl_str_append(p->alloc, out, pl_str0("yes")); } else { pl_str_append(p->alloc, out, pl_str0("no")); } } static bool parse_bool(opt_ctx p, pl_str str, void *out) { bool *res = out; if (pl_str_equals0(str, "yes") || pl_str_equals0(str, "y") || pl_str_equals0(str, "on") || pl_str_equals0(str, "true") || pl_str_equals0(str, "enabled") || !str.len) // accept naked option name as well { *res = true; return true; } else if (pl_str_equals0(str, "no") || pl_str_equals0(str, "n") || pl_str_equals0(str, "off") || pl_str_equals0(str, "false") || pl_str_equals0(str, "disabled")) { *res = false; return true; } PL_ERR(p, "Invalid value '%.*s' for option '%s', expected boolean", PL_STR_FMT(str), p->opt->key); return false; } static void print_int(opt_ctx p, pl_str *out, const void *ptr) { pl_opt opt = p->opt; const int *val = ptr; pl_assert(opt->min == opt->max || (*val >= opt->min && *val <= opt->max)); pl_str_append_asprintf_c(p->alloc, out, "%d", *val); } static bool parse_int(opt_ctx p, pl_str str, void *out) { pl_opt opt = p->opt; int val; if (!pl_str_parse_int(str, &val)) { PL_ERR(p, "Invalid value '%.*s' for option '%s', expected integer", PL_STR_FMT(str), opt->key); return false; } if (opt->min != opt->max) { if (val < opt->min || val > opt->max) { PL_ERR(p, "Value of %d out of range for option '%s': [%d, %d]", val, opt->key, (int) opt->min, (int) opt->max); return false; } } *(int *) out = val; return true; } static void print_float(opt_ctx p, pl_str *out, const void *ptr) { pl_opt opt = p->opt; const float *val = ptr; pl_assert(opt->min == opt->max || (*val >= opt->min && *val <= opt->max)); pl_str_append_asprintf_c(p->alloc, out, "%f", *val); } static bool parse_fraction(pl_str str, float *val) { pl_str denom, num = pl_str_split_char(str, '/', &denom); float n, d; bool ok = denom.buf && denom.len && pl_str_parse_float(num, &n) && pl_str_parse_float(denom, &d); if (ok) *val = n / d; return ok; } static bool parse_float(opt_ctx p, pl_str str, void *out) { pl_opt opt = p->opt; float val; if (!parse_fraction(str, &val) && !pl_str_parse_float(str, &val)) { PL_ERR(p, "Invalid value '%.*s' for option '%s', expected floating point " "or fraction", PL_STR_FMT(str), opt->key); return false; } switch (fpclassify(val)) { case FP_NAN: case FP_INFINITE: case FP_SUBNORMAL: PL_ERR(p, "Invalid value '%f' for option '%s', non-normal float", val, opt->key); return false; case FP_ZERO: case FP_NORMAL: break; } if (opt->min != opt->max) { if (val < opt->min || val > opt->max) { PL_ERR(p, "Value of %.3f out of range for option '%s': [%.2f, %.2f]", val, opt->key, opt->min, opt->max); return false; } } *(float *) out = val; return true; } static int compare_params(opt_ctx p, const void *pa, const void *pb) { const bool a = *(const void * const *) pa; const bool b = *(const void * const *) pb; return PL_CMP(a, b); } static void print_params(opt_ctx p, pl_str *out, const void *ptr) { const bool value = *(const void * const *) ptr; print_bool(p, out, &value); } static bool parse_params(opt_ctx p, pl_str str, void *out) { pl_opt opt = p->opt; opt_priv priv = opt->priv; const void **res = out; bool set; if (!parse_bool(p, str, &set)) return false; if (set) { *res = (const void *) ((uintptr_t) p->opts + priv->offset_params); } else { *res = NULL; } return true; } static void print_enum(opt_ctx p, pl_str *out, const void *ptr) { pl_opt opt = p->opt; opt_priv priv = opt->priv; const unsigned value = *(const unsigned *) ptr; for (int i = 0; priv->values[i].name; i++) { if (priv->values[i].val == value) { pl_str_append(p->alloc, out, pl_str0(priv->values[i].name)); return; } } pl_unreachable(); } static bool parse_enum(opt_ctx p, pl_str str, void *out) { pl_opt opt = p->opt; opt_priv priv = opt->priv; for (int i = 0; priv->values[i].name; i++) { if (pl_str_equals0(str, priv->values[i].name)) { *(unsigned *) out = priv->values[i].val; return true; } } PL_ERR(p, "Value of '%.*s' unrecognized for option '%s', valid values:", PL_STR_FMT(str), opt->key); for (int i = 0; priv->values[i].name; i++) PL_ERR(p, " %s", priv->values[i].name); return false; } static bool parse_preset(opt_ctx p, pl_str str, void *out) { pl_opt opt = p->opt; opt_priv priv = opt->priv; for (int i = 0; priv->presets[i].name; i++) { if (pl_str_equals0(str, priv->presets[i].name)) { if (priv->offset == offsetof(struct pl_options_t, params)) { const struct pl_render_params *preset = priv->presets[i].val; pl_assert(priv->size == sizeof(*preset)); // Redirect params structs into internal system after loading struct pl_render_params *params = out, prev = *params; *params = *preset; redirect_params(p->opts); // Re-apply excluded options params->lut = prev.lut; params->hooks = prev.hooks; params->num_hooks = prev.num_hooks; params->info_callback = prev.info_callback; params->info_priv = prev.info_priv; } else { memcpy(out, priv->presets[i].val, priv->size); } return true; } } PL_ERR(p, "Value of '%.*s' unrecognized for option '%s', valid values:", PL_STR_FMT(str), opt->key); for (int i = 0; priv->presets[i].name; i++) PL_ERR(p, " %s", priv->presets[i].name); return false; } static void print_named(opt_ctx p, pl_str *out, const void *ptr) { const struct named *value = *(const struct named **) ptr; if (value) { pl_str_append(p->alloc, out, pl_str0(value->name)); } else { pl_str_append(p->alloc, out, pl_str0("none")); } } static bool parse_named(opt_ctx p, pl_str str, void *out) { pl_opt opt = p->opt; opt_priv priv = opt->priv; const struct named **res = out; if (pl_str_equals0(str, "none")) { *res = NULL; return true; } for (int i = 0; priv->names[i]; i++) { if (pl_str_equals0(str, priv->names[i]->name)) { *res = priv->names[i]; return true; } } PL_ERR(p, "Value of '%.*s' unrecognized for option '%s', valid values:", PL_STR_FMT(str), opt->key); PL_ERR(p, " none"); for (int i = 0; priv->names[i]; i++) PL_ERR(p, " %s", priv->names[i]->name); return false; } static void print_scaler(opt_ctx p, pl_str *out, const void *ptr) { const struct pl_filter_config *f = *(const struct pl_filter_config **) ptr; if (f) { pl_assert(f->name); // this is either a built-in scaler or ptr to custom pl_str_append(p->alloc, out, pl_str0(f->name)); } else { pl_str_append(p->alloc, out, pl_str0("none")); } } static enum pl_filter_usage scaler_usage(pl_opt opt) { opt_priv priv = opt->priv; switch (priv->offset) { case offsetof(struct pl_options_t, params.upscaler): case offsetof(struct pl_options_t, params.plane_upscaler): case offsetof(struct pl_options_t, upscaler): case offsetof(struct pl_options_t, plane_upscaler): return PL_FILTER_UPSCALING; case offsetof(struct pl_options_t, params.downscaler): case offsetof(struct pl_options_t, params.plane_downscaler): case offsetof(struct pl_options_t, downscaler): case offsetof(struct pl_options_t, plane_downscaler): return PL_FILTER_DOWNSCALING; case offsetof(struct pl_options_t, params.frame_mixer): case offsetof(struct pl_options_t, frame_mixer): return PL_FILTER_FRAME_MIXING; } pl_unreachable(); } static bool parse_scaler(opt_ctx p, pl_str str, void *out) { pl_opt opt = p->opt; opt_priv priv = opt->priv; const struct pl_filter_config **res = out; if (pl_str_equals0(str, "none")) { *res = NULL; return true; } else if (pl_str_equals0(str, "custom")) { *res = (void *) ((uintptr_t) p->opts + priv->offset_params); return true; } const enum pl_filter_usage usage = scaler_usage(opt); for (int i = 0; i < pl_num_filter_configs; i++) { if (!(pl_filter_configs[i]->allowed & usage)) continue; if (pl_str_equals0(str, pl_filter_configs[i]->name)) { *res = pl_filter_configs[i]; return true; } } PL_ERR(p, "Value of '%.*s' unrecognized for option '%s', valid values:", PL_STR_FMT(str), opt->key); PL_ERR(p, " none"); PL_ERR(p, " custom"); for (int i = 0; i < pl_num_filter_configs; i++) { if (pl_filter_configs[i]->allowed & usage) PL_ERR(p, " %s", pl_filter_configs[i]->name); } return false; } static bool parse_scaler_preset(opt_ctx p, pl_str str, void *out) { pl_opt opt = p->opt; struct pl_filter_config *res = out; if (pl_str_equals0(str, "none")) { *res = (struct pl_filter_config) { .name = "custom" }; return true; } const enum pl_filter_usage usage = scaler_usage(opt); for (int i = 0; i < pl_num_filter_configs; i++) { if (!(pl_filter_configs[i]->allowed & usage)) continue; if (pl_str_equals0(str, pl_filter_configs[i]->name)) { copy_filter(res, pl_filter_configs[i]); return true; } } PL_ERR(p, "Value of '%.*s' unrecognized for option '%s', valid values:", PL_STR_FMT(str), opt->key); PL_ERR(p, " none"); for (int i = 0; i < pl_num_filter_configs; i++) { if (pl_filter_configs[i]->allowed & usage) PL_ERR(p, " %s", pl_filter_configs[i]->name); } return false; } #define OPT_BOOL(KEY, NAME, FIELD, ...) \ { \ .key = KEY, \ .name = NAME, \ .type = PL_OPT_BOOL, \ .priv = &(struct opt_priv_t) { \ .print = print_bool, \ .parse = parse_bool, \ .offset = offsetof(struct pl_options_t, FIELD), \ .size = sizeof(struct { \ bool dummy; \ pl_static_assert(sizeof(defaults.FIELD) == sizeof(bool)); \ }), \ }, \ __VA_ARGS__ \ } #define OPT_INT(KEY, NAME, FIELD, ...) \ { \ .key = KEY, \ .name = NAME, \ .type = PL_OPT_INT, \ .priv = &(struct opt_priv_t) { \ .print = print_int, \ .parse = parse_int, \ .offset = offsetof(struct pl_options_t, FIELD), \ .size = sizeof(struct { \ int dummy; \ pl_static_assert(sizeof(defaults.FIELD) == sizeof(int)); \ }), \ }, \ __VA_ARGS__ \ } #define OPT_FLOAT(KEY, NAME, FIELD, ...) \ { \ .key = KEY, \ .name = NAME, \ .type = PL_OPT_FLOAT, \ .priv = &(struct opt_priv_t) { \ .print = print_float, \ .parse = parse_float, \ .offset = offsetof(struct pl_options_t, FIELD), \ .size = sizeof(struct { \ float dummy; \ pl_static_assert(sizeof(defaults.FIELD) == sizeof(float)); \ }), \ }, \ __VA_ARGS__ \ } #define OPT_ENABLE_PARAMS(KEY, NAME, PARAMS, ...) \ { \ .key = KEY, \ .name = NAME, \ .type = PL_OPT_BOOL, \ .priv = &(struct opt_priv_t) { \ .compare = compare_params, \ .print = print_params, \ .parse = parse_params, \ .offset = offsetof(struct pl_options_t, params.PARAMS), \ .offset_params = offsetof(struct pl_options_t, PARAMS), \ .size = sizeof(struct { \ void *dummy; \ pl_static_assert(sizeof(defaults.params.PARAMS) == sizeof(void*));\ }), \ }, \ __VA_ARGS__ \ } #define OPT_ENUM(KEY, NAME, FIELD, VALUES, ...) \ { \ .key = KEY, \ .name = NAME, \ .type = PL_OPT_STRING, \ .priv = &(struct opt_priv_t) { \ .print = print_enum, \ .parse = parse_enum, \ .offset = offsetof(struct pl_options_t, FIELD), \ .size = sizeof(struct { \ unsigned dummy; \ pl_static_assert(sizeof(defaults.FIELD) == sizeof(unsigned)); \ }), \ .values = (struct enum_val[]) { VALUES } \ }, \ __VA_ARGS__ \ } #define OPT_PRESET(KEY, NAME, PARAMS, PRESETS, ...) \ { \ .key = KEY, \ .name = NAME, \ .type = PL_OPT_STRING, \ .preset = true, \ .priv = &(struct opt_priv_t) { \ .parse = parse_preset, \ .offset = offsetof(struct pl_options_t, PARAMS), \ .size = sizeof(defaults.PARAMS), \ .presets = (struct preset[]) { PRESETS }, \ }, \ __VA_ARGS__ \ } #define OPT_NAMED(KEY, NAME, FIELD, NAMES, ...) \ { \ .key = KEY, \ .name = NAME, \ .type = PL_OPT_STRING, \ .priv = &(struct opt_priv_t) { \ .print = print_named, \ .parse = parse_named, \ .offset = offsetof(struct pl_options_t, FIELD), \ .names = (const struct named * const * ) NAMES, \ .size = sizeof(struct { \ const struct named *dummy; \ pl_static_assert(offsetof(__typeof__(*NAMES[0]), name) == 0); \ pl_static_assert(sizeof(defaults.FIELD) == \ sizeof(const struct named *)); \ }), \ }, \ __VA_ARGS__ \ } #define OPT_SCALER(KEY, NAME, SCALER, ...) \ { \ .key = KEY, \ .name = NAME, \ .type = PL_OPT_STRING, \ .priv = &(struct opt_priv_t) { \ .print = print_scaler, \ .parse = parse_scaler, \ .offset = offsetof(struct pl_options_t, params.SCALER), \ .offset_params = offsetof(struct pl_options_t, SCALER), \ .size = sizeof(struct { \ const struct pl_filter_config *dummy; \ pl_static_assert(sizeof(defaults.SCALER) == \ sizeof(struct pl_filter_config)); \ }), \ }, \ __VA_ARGS__ \ } #define OPT_SCALER_PRESET(KEY, NAME, SCALER, ...) \ { \ .key = KEY, \ .name = NAME, \ .type = PL_OPT_STRING, \ .preset = true, \ .priv = &(struct opt_priv_t) { \ .parse = parse_scaler_preset, \ .offset = offsetof(struct pl_options_t, SCALER), \ .size = sizeof(struct { \ struct pl_filter_config dummy; \ pl_static_assert(sizeof(defaults.SCALER) == \ sizeof(struct pl_filter_config)); \ }), \ }, \ __VA_ARGS__ \ } #define LIST(...) __VA_ARGS__, {0} #define SCALE_OPTS(PREFIX, NAME, FIELD) \ OPT_SCALER(PREFIX, NAME, FIELD), \ OPT_SCALER_PRESET(PREFIX"_preset", NAME "preset", FIELD), \ OPT_NAMED(PREFIX"_kernel", NAME" kernel", FIELD.kernel, pl_filter_functions), \ OPT_NAMED(PREFIX"_window", NAME" window", FIELD.window, pl_filter_functions), \ OPT_FLOAT(PREFIX"_radius", NAME" radius", FIELD.radius, .min = 0.0, .max = 16.0), \ OPT_FLOAT(PREFIX"_clamp", NAME" clamping", FIELD.clamp, .max = 1.0), \ OPT_FLOAT(PREFIX"_blur", NAME" blur factor", FIELD.blur, .max = 100.0), \ OPT_FLOAT(PREFIX"_taper", NAME" taper factor", FIELD.taper, .max = 1.0), \ OPT_FLOAT(PREFIX"_antiring", NAME" antiringing", FIELD.antiring, .max = 1.0), \ OPT_FLOAT(PREFIX"_param1", NAME" parameter 1", FIELD.params[0]), \ OPT_FLOAT(PREFIX"_param2", NAME" parameter 2", FIELD.params[1]), \ OPT_FLOAT(PREFIX"_wparam1", NAME" window parameter 1", FIELD.wparams[0]), \ OPT_FLOAT(PREFIX"_wparam2", NAME" window parameter 2", FIELD.wparams[1]), \ OPT_BOOL(PREFIX"_polar", NAME" polar", FIELD.polar) const struct pl_opt_t pl_option_list[] = { OPT_PRESET("preset", "Global preset", params, LIST( {"default", &pl_render_default_params}, {"fast", &pl_render_fast_params}, {"high_quality", &pl_render_high_quality_params})), // Scalers SCALE_OPTS("upscaler", "Upscaler", upscaler), SCALE_OPTS("downscaler", "Downscaler", downscaler), SCALE_OPTS("plane_upscaler", "Plane upscaler", plane_upscaler), SCALE_OPTS("plane_downscaler", "Plane downscaler", plane_downscaler), SCALE_OPTS("frame_mixer", "Frame mixer", frame_mixer), OPT_FLOAT("antiringing_strength", "Anti-ringing strength", params.antiringing_strength, .max = 1.0), // Debanding OPT_ENABLE_PARAMS("deband", "Enable debanding", deband_params), OPT_PRESET("deband_preset", "Debanding preset", deband_params, LIST( {"default", &pl_deband_default_params})), OPT_INT("deband_iterations", "Debanding iterations", deband_params.iterations, .max = 16), OPT_FLOAT("deband_threshold", "Debanding threshold", deband_params.threshold, .max = 1000.0), OPT_FLOAT("deband_radius", "Debanding radius", deband_params.radius, .max = 1000.0), OPT_FLOAT("deband_grain", "Debanding grain", deband_params.grain, .max = 1000.0), OPT_FLOAT("deband_grain_neutral_r", "Debanding grain neutral R", deband_params.grain_neutral[0]), OPT_FLOAT("deband_grain_neutral_g", "Debanding grain neutral G", deband_params.grain_neutral[1]), OPT_FLOAT("deband_grain_neutral_b", "Debanding grain neutral B", deband_params.grain_neutral[2]), // Sigmodization OPT_ENABLE_PARAMS("sigmoid", "Enable sigmoidization", sigmoid_params), OPT_PRESET("sigmoid_preset", "Sigmoidization preset", sigmoid_params, LIST( {"default", &pl_sigmoid_default_params})), OPT_FLOAT("sigmoid_center", "Sigmoidization center", sigmoid_params.center, .max = 1.0), OPT_FLOAT("sigmoid_slope", "Sigmoidization slope", sigmoid_params.slope, .min = 1.0, .max = 20.0), // Color adjustment OPT_ENABLE_PARAMS("color_adjustment", "Enable color adjustment", color_adjustment), OPT_PRESET("color_adjustment_preset", "Color adjustment preset", color_adjustment, LIST( {"neutral", &pl_color_adjustment_neutral})), OPT_FLOAT("brightness", "Brightness boost", color_adjustment.brightness, .min = -1.0, .max = 1.0), OPT_FLOAT("contrast", "Contrast boost", color_adjustment.contrast, .max = 100.0), OPT_FLOAT("saturation", "Saturation gain", color_adjustment.saturation, .max = 100.0), OPT_FLOAT("hue", "Hue shift", color_adjustment.hue), OPT_FLOAT("gamma", "Gamma adjustment", color_adjustment.gamma, .max = 100.0), OPT_FLOAT("temperature", "Color temperature shift", color_adjustment.temperature, .min = (2500 - 6500) / 3500.0, // see `pl_white_from_temp` .max = (25000 - 6500) / 3500.0), // Peak detection OPT_ENABLE_PARAMS("peak_detect", "Enable peak detection", peak_detect_params), OPT_PRESET("peak_detect_preset", "Peak detection preset", peak_detect_params, LIST( {"default", &pl_peak_detect_default_params}, {"high_quality", &pl_peak_detect_high_quality_params})), OPT_FLOAT("peak_smoothing_period", "Peak detection smoothing coefficient", peak_detect_params.smoothing_period, .max = 1000.0), OPT_FLOAT("scene_threshold_low", "Scene change threshold low", peak_detect_params.scene_threshold_low, .max = 100.0), OPT_FLOAT("scene_threshold_high", "Scene change threshold high", peak_detect_params.scene_threshold_high, .max = 100.0), OPT_FLOAT("minimum_peak", "Minimum detected peak", peak_detect_params.minimum_peak, .max = 100.0, .deprecated = true), OPT_FLOAT("peak_percentile", "Peak detection percentile", peak_detect_params.percentile, .max = 100.0), OPT_FLOAT("black_cutoff", "Peak detection black cutoff", peak_detect_params.black_cutoff, .max = 100.0), OPT_BOOL("allow_delayed_peak", "Allow delayed peak detection", peak_detect_params.allow_delayed), // Color mapping OPT_ENABLE_PARAMS("color_map", "Enable color mapping", color_map_params), OPT_PRESET("color_map_preset", "Color mapping preset", color_map_params, LIST( {"default", &pl_color_map_default_params}, {"high_quality", &pl_color_map_high_quality_params})), OPT_NAMED("gamut_mapping", "Gamut mapping function", color_map_params.gamut_mapping, pl_gamut_map_functions), OPT_FLOAT("perceptual_deadzone", "Gamut mapping perceptual deadzone", color_map_params.gamut_constants.perceptual_deadzone, .max = 1.0f), OPT_FLOAT("perceptual_strength", "Gamut mapping perceptual strength", color_map_params.gamut_constants.perceptual_strength, .max = 1.0f), OPT_FLOAT("colorimetric_gamma", "Gamut mapping colorimetric gamma", color_map_params.gamut_constants.colorimetric_gamma, .max = 10.0f), OPT_FLOAT("softclip_knee", "Gamut mapping softclip knee point", color_map_params.gamut_constants.softclip_knee, .max = 1.0f), OPT_FLOAT("softclip_desat", "Gamut mapping softclip desaturation strength", color_map_params.gamut_constants.softclip_desat, .max = 1.0f), OPT_INT("lut3d_size_I", "Gamut 3DLUT size I", color_map_params.lut3d_size[0], .max = 1024), OPT_INT("lut3d_size_C", "Gamut 3DLUT size C", color_map_params.lut3d_size[1], .max = 1024), OPT_INT("lut3d_size_h", "Gamut 3DLUT size h", color_map_params.lut3d_size[2], .max = 1024), OPT_BOOL("lut3d_tricubic", "Gamut 3DLUT tricubic interpolation", color_map_params.lut3d_tricubic), OPT_BOOL("gamut_expansion", "Gamut expansion", color_map_params.gamut_expansion), OPT_NAMED("tone_mapping", "Tone mapping function", color_map_params.tone_mapping_function, pl_tone_map_functions), OPT_FLOAT("knee_adaptation", "Tone mapping knee point adaptation", color_map_params.tone_constants.knee_adaptation, .max = 1.0f), OPT_FLOAT("knee_minimum", "Tone mapping knee point minimum", color_map_params.tone_constants.knee_minimum, .max = 0.5f), OPT_FLOAT("knee_maximum", "Tone mapping knee point maximum", color_map_params.tone_constants.knee_maximum, .min = 0.5f, .max = 1.0f), OPT_FLOAT("knee_default", "Tone mapping knee point default", color_map_params.tone_constants.knee_default, .max = 1.0f), OPT_FLOAT("knee_offset", "BT.2390 knee point offset", color_map_params.tone_constants.knee_offset, .min = 0.5f, .max = 2.0f), OPT_FLOAT("slope_tuning", "Spline slope tuning strength", color_map_params.tone_constants.slope_tuning, .max = 10.0f), OPT_FLOAT("slope_offset", "Spline slope tuning offset", color_map_params.tone_constants.slope_offset, .max = 1.0f), OPT_FLOAT("spline_contrast", "Spline slope contrast", color_map_params.tone_constants.spline_contrast, .max = 1.5f), OPT_FLOAT("reinhard_contrast", "Reinhard contrast", color_map_params.tone_constants.reinhard_contrast, .max = 1.0f), OPT_FLOAT("linear_knee", "Tone mapping linear knee point", color_map_params.tone_constants.linear_knee, .max = 1.0f), OPT_FLOAT("exposure", "Tone mapping linear exposure", color_map_params.tone_constants.exposure, .max = 10.0f), OPT_BOOL("inverse_tone_mapping", "Inverse tone mapping", color_map_params.inverse_tone_mapping), OPT_ENUM("tone_map_metadata", "Source of HDR metadata to use", color_map_params.metadata, LIST( {"any", PL_HDR_METADATA_ANY}, {"none", PL_HDR_METADATA_NONE}, {"hdr10", PL_HDR_METADATA_HDR10}, {"hdr10plus", PL_HDR_METADATA_HDR10PLUS}, {"cie_y", PL_HDR_METADATA_CIE_Y})), OPT_INT("tone_lut_size", "Tone mapping LUT size", color_map_params.lut_size, .max = 4096), OPT_FLOAT("contrast_recovery", "HDR contrast recovery strength", color_map_params.contrast_recovery, .max = 2.0), OPT_FLOAT("contrast_smoothness", "HDR contrast recovery smoothness", color_map_params.contrast_smoothness, .min = 1.0, .max = 32.0), OPT_BOOL("force_tone_mapping_lut", "Force tone mapping LUT", color_map_params.force_tone_mapping_lut), OPT_BOOL("visualize_lut", "Visualize tone mapping LUTs", color_map_params.visualize_lut), OPT_FLOAT("visualize_lut_x0", "Visualization rect x0", color_map_params.visualize_rect.x0), OPT_FLOAT("visualize_lut_y0", "Visualization rect y0", color_map_params.visualize_rect.y0), OPT_FLOAT("visualize_lut_x1", "Visualization rect x0", color_map_params.visualize_rect.x1), OPT_FLOAT("visualize_lut_y1", "Visualization rect y0", color_map_params.visualize_rect.y1), OPT_FLOAT("visualize_hue", "Visualization hue slice", color_map_params.visualize_hue), OPT_FLOAT("visualize_theta", "Visualization rotation", color_map_params.visualize_theta), OPT_BOOL("show_clipping", "Highlight clipped pixels", color_map_params.show_clipping), OPT_FLOAT("tone_mapping_param", "Tone mapping function parameter", color_map_params.tone_mapping_param, .deprecated = true), // Dithering OPT_ENABLE_PARAMS("dither", "Enable dithering", dither_params), OPT_PRESET("dither_preset", "Dithering preset", dither_params, LIST( {"default", &pl_dither_default_params})), OPT_ENUM("dither_method", "Dither method", dither_params.method, LIST( {"blue", PL_DITHER_BLUE_NOISE}, {"ordered_lut", PL_DITHER_ORDERED_LUT}, {"ordered", PL_DITHER_ORDERED_FIXED}, {"white", PL_DITHER_WHITE_NOISE})), OPT_INT("dither_lut_size", "Dither LUT size", dither_params.lut_size, .min = 1, .max = 8), OPT_BOOL("dither_temporal", "Temporal dithering", dither_params.temporal), // ICC OPT_ENABLE_PARAMS("icc", "Enable ICC settings", icc_params, .deprecated = true), OPT_PRESET("icc_preset", "ICC preset", icc_params, LIST( {"default", &pl_icc_default_params}), .deprecated = true), OPT_ENUM("icc_intent", "ICC rendering intent", icc_params.intent, LIST( {"auto", PL_INTENT_AUTO}, {"perceptual", PL_INTENT_PERCEPTUAL}, {"relative", PL_INTENT_RELATIVE_COLORIMETRIC}, {"saturation", PL_INTENT_SATURATION}, {"absolute", PL_INTENT_ABSOLUTE_COLORIMETRIC}), .deprecated = true), OPT_INT("icc_size_r", "ICC 3DLUT size R", icc_params.size_r, .max = 256, .deprecated = true), OPT_INT("icc_size_g", "ICC 3DLUT size G", icc_params.size_g, .max = 256, .deprecated = true), OPT_INT("icc_size_b", "ICC 3DLUT size G", icc_params.size_b, .max = 256, .deprecated = true), OPT_FLOAT("icc_max_luma", "ICC profile luma override", icc_params.max_luma, .max = 10000, .deprecated = true), OPT_BOOL("icc_force_bpc", "Force ICC black point compensation", icc_params.force_bpc, .deprecated = true), // Cone distortion OPT_ENABLE_PARAMS("cone", "Enable cone distortion", cone_params), OPT_PRESET("cone_preset", "Cone distortion preset", cone_params, LIST( {"normal", &pl_vision_normal}, {"protanomaly", &pl_vision_protanomaly}, {"protanopia", &pl_vision_protanopia}, {"deuteranomaly", &pl_vision_deuteranomaly}, {"deuteranopia", &pl_vision_deuteranopia}, {"tritanomaly", &pl_vision_tritanomaly}, {"tritanopia", &pl_vision_tritanopia}, {"monochromacy", &pl_vision_monochromacy}, {"achromatopsia", &pl_vision_achromatopsia})), OPT_ENUM("cones", "Cone selection", cone_params.cones, LIST( {"none", PL_CONE_NONE}, {"l", PL_CONE_L}, {"m", PL_CONE_M}, {"s", PL_CONE_S}, {"lm", PL_CONE_LM}, {"ms", PL_CONE_MS}, {"ls", PL_CONE_LS}, {"lms", PL_CONE_LMS})), OPT_FLOAT("cone_strength", "Cone distortion gain", cone_params.strength), // Blending #define BLEND_VALUES LIST( \ {"zero", PL_BLEND_ZERO}, \ {"one", PL_BLEND_ONE}, \ {"alpha", PL_BLEND_SRC_ALPHA}, \ {"one_minus_alpha", PL_BLEND_ONE_MINUS_SRC_ALPHA}) OPT_ENABLE_PARAMS("blend", "Enable output blending", blend_params), OPT_PRESET("blend_preset", "Output blending preset", blend_params, LIST( {"alpha_overlay", &pl_alpha_overlay})), OPT_ENUM("blend_src_rgb", "Source RGB blend mode", blend_params.src_rgb, BLEND_VALUES), OPT_ENUM("blend_src_alpha", "Source alpha blend mode", blend_params.src_alpha, BLEND_VALUES), OPT_ENUM("blend_dst_rgb", "Target RGB blend mode", blend_params.dst_rgb, BLEND_VALUES), OPT_ENUM("blend_dst_alpha", "Target alpha blend mode", blend_params.dst_alpha, BLEND_VALUES), // Deinterlacing OPT_ENABLE_PARAMS("deinterlace", "Enable deinterlacing", deinterlace_params), OPT_PRESET("deinterlace_preset", "Deinterlacing preset", deinterlace_params, LIST( {"default", &pl_deinterlace_default_params})), OPT_ENUM("deinterlace_algo", "Deinterlacing algorithm", deinterlace_params.algo, LIST( {"weave", PL_DEINTERLACE_WEAVE}, {"bob", PL_DEINTERLACE_BOB}, {"yadif", PL_DEINTERLACE_YADIF})), OPT_BOOL("deinterlace_skip_spatial", "Skip spatial interlacing check", deinterlace_params.skip_spatial_check), // Distortion OPT_ENABLE_PARAMS("distort", "Enable distortion", distort_params), OPT_PRESET("distort_preset", "Distortion preset", distort_params, LIST( {"default", &pl_distort_default_params})), OPT_FLOAT("distort_scale_x", "Distortion X scale", distort_params.transform.mat.m[0][0]), OPT_FLOAT("distort_scale_y", "Distortion Y scale", distort_params.transform.mat.m[1][1]), OPT_FLOAT("distort_shear_x", "Distortion X shear", distort_params.transform.mat.m[0][1]), OPT_FLOAT("distort_shear_y", "Distortion Y shear", distort_params.transform.mat.m[1][0]), OPT_FLOAT("distort_offset_x", "Distortion X offset", distort_params.transform.c[0]), OPT_FLOAT("distort_offset_y", "Distortion Y offset", distort_params.transform.c[1]), OPT_BOOL("distort_unscaled", "Distortion unscaled", distort_params.unscaled), OPT_BOOL("distort_constrain", "Constrain distortion", distort_params.constrain), OPT_BOOL("distort_bicubic", "Distortion bicubic interpolation", distort_params.bicubic), OPT_ENUM("distort_address_mode", "Distortion texture address mode", distort_params.address_mode, LIST( {"clamp", PL_TEX_ADDRESS_CLAMP}, {"repeat", PL_TEX_ADDRESS_REPEAT}, {"mirror", PL_TEX_ADDRESS_MIRROR})), OPT_ENUM("distort_alpha_mode", "Distortion alpha blending mode", distort_params.alpha_mode, LIST( {"unknown", PL_ALPHA_UNKNOWN}, {"independent", PL_ALPHA_INDEPENDENT}, {"premultiplied", PL_ALPHA_PREMULTIPLIED}, {"none", PL_ALPHA_NONE})), // Misc renderer settings OPT_NAMED("error_diffusion", "Error diffusion kernel", params.error_diffusion, pl_error_diffusion_kernels), OPT_ENUM("lut_type", "Color mapping LUT type", params.lut_type, LIST( {"unknown", PL_LUT_UNKNOWN}, {"native", PL_LUT_NATIVE}, {"normalized", PL_LUT_NORMALIZED}, {"conversion", PL_LUT_CONVERSION})), OPT_ENUM("background", "Background clearing mode", params.background, LIST( {"color", PL_CLEAR_COLOR}, {"tiles", PL_CLEAR_TILES}, {"skip", PL_CLEAR_SKIP})), OPT_ENUM("border", "Border clearing mode", params.border, LIST( {"color", PL_CLEAR_COLOR}, {"tiles", PL_CLEAR_TILES}, {"skip", PL_CLEAR_SKIP})), OPT_FLOAT("background_r", "Background color R", params.background_color[0], .max = 1.0), OPT_FLOAT("background_g", "Background color G", params.background_color[1], .max = 1.0), OPT_FLOAT("background_b", "Background color B", params.background_color[2], .max = 1.0), OPT_FLOAT("background_transparency", "Background color transparency", params.background_transparency, .max = 1), OPT_BOOL("skip_target_clearing", "Skip target clearing", params.skip_target_clearing, .deprecated = true), OPT_FLOAT("corner_rounding", "Corner rounding", params.corner_rounding, .max = 1.0), OPT_BOOL("blend_against_tiles", "Blend against tiles", params.blend_against_tiles, .deprecated = true), OPT_FLOAT("tile_color_hi_r", "Bright tile R", params.tile_colors[0][0], .max = 1.0), OPT_FLOAT("tile_color_hi_g", "Bright tile G", params.tile_colors[0][1], .max = 1.0), OPT_FLOAT("tile_color_hi_b", "Bright tile B", params.tile_colors[0][2], .max = 1.0), OPT_FLOAT("tile_color_lo_r", "Dark tile R", params.tile_colors[1][0], .max = 1.0), OPT_FLOAT("tile_color_lo_g", "Dark tile G", params.tile_colors[1][1], .max = 1.0), OPT_FLOAT("tile_color_lo_b", "Dark tile B", params.tile_colors[1][2], .max = 1.0), OPT_INT("tile_size", "Tile size", params.tile_size, .min = 2, .max = 256), // Performance / quality trade-offs and debugging options OPT_BOOL("skip_anti_aliasing", "Skip anti-aliasing", params.skip_anti_aliasing), OPT_INT("lut_entries", "Scaler LUT entries", params.lut_entries, .max = 256, .deprecated = true), OPT_FLOAT("polar_cutoff", "Polar LUT cutoff", params.polar_cutoff, .max = 1.0, .deprecated = true), OPT_BOOL("preserve_mixing_cache", "Preserve mixing cache", params.preserve_mixing_cache), OPT_BOOL("skip_caching_single_frame", "Skip caching single frame", params.skip_caching_single_frame), OPT_BOOL("disable_linear_scaling", "Disable linear scaling", params.disable_linear_scaling), OPT_BOOL("disable_builtin_scalers", "Disable built-in scalers", params.disable_builtin_scalers), OPT_BOOL("correct_subpixel_offset", "Correct subpixel offsets", params.correct_subpixel_offsets), OPT_BOOL("ignore_icc_profiles", "Ignore ICC profiles", params.ignore_icc_profiles, .deprecated = true), OPT_BOOL("force_dither", "Force-enable dithering", params.force_dither), OPT_BOOL("disable_dither_gamma_correction", "Disable gamma-correct dithering", params.disable_dither_gamma_correction), OPT_BOOL("disable_fbos", "Disable FBOs", params.disable_fbos), OPT_BOOL("force_low_bit_depth_fbos", "Force 8-bit FBOs", params.force_low_bit_depth_fbos), OPT_BOOL("dynamic_constants", "Dynamic constants", params.dynamic_constants), {0}, }; const int pl_option_count = PL_ARRAY_SIZE(pl_option_list) - 1; pl_opt pl_find_option(const char *key) { for (int i = 0; i < pl_option_count; i++) { if (!strcmp(key, pl_option_list[i].key)) return &pl_option_list[i]; } return NULL; } libplacebo-v7.349.0/src/os.h000066400000000000000000000015321463457750100155560ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #ifdef __unix__ #define PL_HAVE_UNIX #endif #ifdef _WIN32 #define PL_HAVE_WIN32 #endif #ifdef __APPLE__ #define PL_HAVE_APPLE #endif libplacebo-v7.349.0/src/pl_alloc.c000066400000000000000000000156351463457750100167260ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "common.h" struct header { #ifndef NDEBUG #define MAGIC 0x20210119LU uint32_t magic; #endif size_t size; struct header *parent; struct ext *ext; // Pointer to actual data, for alignment purposes max_align_t data[]; }; // Lazily allocated, to save space for leaf allocations and allocations which // don't need fancy requirements struct ext { size_t num_children; size_t children_size; // total allocated size of `children` struct header *children[]; }; #define PTR_OFFSET offsetof(struct header, data) #define MAX_ALLOC (SIZE_MAX - PTR_OFFSET) #define MINIMUM_CHILDREN 4 static inline struct header *get_header(void *ptr) { if (!ptr) return NULL; struct header *hdr = (struct header *) ((uintptr_t) ptr - PTR_OFFSET); #ifndef NDEBUG assert(hdr->magic == MAGIC); #endif return hdr; } static inline void *oom(void) { fprintf(stderr, "out of memory\n"); abort(); } static inline struct ext *alloc_ext(struct header *h) { if (!h) return NULL; if (!h->ext) { h->ext = malloc(sizeof(struct ext) + MINIMUM_CHILDREN * sizeof(void *)); if (!h->ext) oom(); h->ext->num_children = 0; h->ext->children_size = MINIMUM_CHILDREN; } return h->ext; } static inline void attach_child(struct header *parent, struct header *child) { child->parent = parent; if (!parent) return; struct ext *ext = alloc_ext(parent); if (ext->num_children == ext->children_size) { size_t new_size = ext->children_size * 2; ext = realloc(ext, sizeof(struct ext) + new_size * sizeof(void *)); if (!ext) oom(); ext->children_size = new_size; parent->ext = ext; } ext->children[ext->num_children++] = child; } static inline void unlink_child(struct header *parent, struct header *child) { child->parent = NULL; if (!parent) return; struct ext *ext = parent->ext; for (size_t i = 0; i < ext->num_children; i++) { if (ext->children[i] == child) { memmove(&ext->children[i], &ext->children[i + 1], (--ext->num_children - i) * sizeof(ext->children[0])); return; } } assert(!"unlinking orphaned child?"); } void *pl_alloc(void *parent, size_t size) { if (size >= MAX_ALLOC) return oom(); struct header *h = malloc(PTR_OFFSET + size); if (!h) return oom(); #ifndef NDEBUG h->magic = MAGIC; #endif h->size = size; h->ext = NULL; attach_child(get_header(parent), h); return h->data; } void *pl_zalloc(void *parent, size_t size) { if (size >= MAX_ALLOC) return oom(); struct header *h = calloc(1, PTR_OFFSET + size); if (!h) return oom(); #ifndef NDEBUG h->magic = MAGIC; #endif h->size = size; attach_child(get_header(parent), h); return h->data; } void *pl_realloc(void *parent, void *ptr, size_t size) { if (size >= MAX_ALLOC) return oom(); if (!ptr) return pl_alloc(parent, size); struct header *h = get_header(ptr); assert(get_header(parent) == h->parent); if (h->size == size) return ptr; struct header *old_h = h; h = realloc(h, PTR_OFFSET + size); if (!h) return oom(); h->size = size; if (h != old_h) { if (h->parent) { struct ext *ext = h->parent->ext; for (size_t i = 0; i < ext->num_children; i++) { if (ext->children[i] == old_h) { ext->children[i] = h; goto done_reparenting; } } assert(!"reallocating orphaned child?"); } done_reparenting: if (h->ext) { for (size_t i = 0; i < h->ext->num_children; i++) h->ext->children[i]->parent = h; } } return h->data; } void pl_free(void *ptr) { struct header *h = get_header(ptr); if (!h) return; pl_free_children(ptr); unlink_child(h->parent, h); free(h->ext); free(h); } void pl_free_children(void *ptr) { struct header *h = get_header(ptr); if (!h || !h->ext) return; #ifndef NDEBUG // this detects recursive hierarchies h->magic = 0; #endif for (size_t i = 0; i < h->ext->num_children; i++) { h->ext->children[i]->parent = NULL; // prevent recursive access pl_free(h->ext->children[i]->data); } h->ext->num_children = 0; #ifndef NDEBUG h->magic = MAGIC; #endif } size_t pl_get_size(const void *ptr) { const struct header *h = get_header((void *) ptr); return h ? h->size : 0; } void *pl_steal(void *parent, void *ptr) { struct header *h = get_header(ptr); if (!h) return NULL; struct header *new_par = get_header(parent); if (new_par != h->parent) { unlink_child(h->parent, h); attach_child(new_par, h); } return h->data; } void *pl_memdup(void *parent, const void *ptr, size_t size) { if (!size) return NULL; void *new = pl_alloc(parent, size); if (!new) return oom(); assert(ptr); memcpy(new, ptr, size); return new; } char *pl_str0dup0(void *parent, const char *str) { if (!str) return NULL; return pl_memdup(parent, str, strlen(str) + 1); } char *pl_strndup0(void *parent, const char *str, size_t size) { if (!str) return NULL; size_t str_size = strnlen(str, size); char *new = pl_alloc(parent, str_size + 1); if (!new) return oom(); memcpy(new, str, str_size); new[str_size] = '\0'; return new; } char *pl_asprintf(void *parent, const char *fmt, ...) { char *str; va_list ap; va_start(ap, fmt); str = pl_vasprintf(parent, fmt, ap); va_end(ap); return str; } char *pl_vasprintf(void *parent, const char *fmt, va_list ap) { // First, we need to determine the size that will be required for // printing the entire string. Do this by making a copy of the va_list // and printing it to a null buffer. va_list copy; va_copy(copy, ap); int size = vsnprintf(NULL, 0, fmt, copy); va_end(copy); if (size < 0) return NULL; char *str = pl_alloc(parent, size + 1); vsnprintf(str, size + 1, fmt, ap); return str; } libplacebo-v7.349.0/src/pl_alloc.h000066400000000000000000000223621463457750100167260ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include #include #include #include #include // Unlike standard malloc, `size` may be 0, in which case this returns an empty // allocation which can still be used as a parent for other allocations. void *pl_alloc(void *parent, size_t size); void *pl_zalloc(void *parent, size_t size); void *pl_realloc(void *parent, void *ptr, size_t size); static inline void *pl_calloc(void *parent, size_t count, size_t size) { return pl_zalloc(parent, count * size); } #define pl_tmp(parent) pl_alloc(parent, 0) // Variants of the above which resolve to sizeof(*ptr) #define pl_alloc_ptr(parent, ptr) \ (__typeof__(ptr)) pl_alloc(parent, sizeof(*(ptr))) #define pl_zalloc_ptr(parent, ptr) \ (__typeof__(ptr)) pl_zalloc(parent, sizeof(*(ptr))) #define pl_calloc_ptr(parent, num, ptr) \ (__typeof__(ptr)) pl_calloc(parent, num, sizeof(*(ptr))) // Helper function to allocate a struct and immediately assign it #define pl_alloc_struct(parent, type, ...) \ (type *) pl_memdup(parent, &(type) __VA_ARGS__, sizeof(type)) // Free an allocation and its children (recursively) void pl_free(void *ptr); void pl_free_children(void *ptr); #define pl_free_ptr(ptr) \ do { \ pl_free(*(ptr)); \ *(ptr) = NULL; \ } while (0) // Get the current size of an allocation. size_t pl_get_size(const void *ptr); #define pl_grow(parent, ptr, size) \ do { \ size_t _size = (size); \ if (_size > pl_get_size(*(ptr))) \ *(ptr) = pl_realloc(parent, *(ptr), _size); \ } while (0) // Reparent an allocation onto a new parent void *pl_steal(void *parent, void *ptr); // Wrapper functions around common string utilities void *pl_memdup(void *parent, const void *ptr, size_t size); char *pl_str0dup0(void *parent, const char *str); char *pl_strndup0(void *parent, const char *str, size_t size); #define pl_memdup_ptr(parent, ptr) \ (__typeof__(ptr)) pl_memdup(parent, ptr, sizeof(*(ptr))) // Helper functions for allocating public/private pairs, done by allocating // `priv` at the address of `pub` + sizeof(pub), rounded up to the maximum // alignment requirements. #define PL_ALIGN_MEM(size) PL_ALIGN2(size, alignof(max_align_t)) #define PL_PRIV(pub) \ (void *) ((uintptr_t) (pub) + PL_ALIGN_MEM(sizeof(*(pub)))) #define pl_alloc_obj(parent, ptr, priv) \ (__typeof__(ptr)) pl_alloc(parent, PL_ALIGN_MEM(sizeof(*(ptr))) + sizeof(priv)) #define pl_zalloc_obj(parent, ptr, priv) \ (__typeof__(ptr)) pl_zalloc(parent, PL_ALIGN_MEM(sizeof(*(ptr))) + sizeof(priv)) // Helper functions for dealing with arrays #define PL_ARRAY(type) struct { type *elem; int num; } #define PL_ARRAY_REALLOC(parent, arr, len) \ do { \ size_t _new_size = (len) * sizeof((arr).elem[0]); \ (arr).elem = pl_realloc((void *) parent, (arr).elem, _new_size); \ } while (0) #define PL_ARRAY_RESIZE(parent, arr, len) \ do { \ size_t _avail = pl_get_size((arr).elem) / sizeof((arr).elem[0]); \ size_t _min_len = (len); \ if (_avail < _min_len) \ PL_ARRAY_REALLOC(parent, arr, _min_len); \ } while (0) #define PL_ARRAY_MEMDUP(parent, arr, ptr, len) \ do { \ size_t _len = (len); \ PL_ARRAY_RESIZE(parent, arr, _len); \ memcpy((arr).elem, ptr, _len * sizeof((arr).elem[0])); \ (arr).num = _len; \ } while (0) #define PL_ARRAY_GROW(parent, arr) \ do { \ size_t _avail = pl_get_size((arr).elem) / sizeof((arr).elem[0]); \ if (_avail < 10) { \ PL_ARRAY_REALLOC(parent, arr, 10); \ } else if ((arr).num == _avail) { \ PL_ARRAY_REALLOC(parent, arr, (arr).num * 1.5); \ } else { \ assert((arr).elem); \ } \ } while (0) #define PL_ARRAY_APPEND(parent, arr, ...) \ do { \ PL_ARRAY_GROW(parent, arr); \ (arr).elem[(arr).num++] = __VA_ARGS__; \ } while (0) #define PL_ARRAY_CONCAT(parent, to, from) \ do { \ if ((from).num) { \ PL_ARRAY_RESIZE(parent, to, (to).num + (from).num); \ memmove(&(to).elem[(to).num], (from).elem, \ (from).num * sizeof((from).elem[0])); \ (to).num += (from).num; \ } \ } while (0) #define PL_ARRAY_REMOVE_RANGE(arr, idx, count) \ do { \ ptrdiff_t _idx = (idx); \ if (_idx < 0) \ _idx += (arr).num; \ size_t _count = (count); \ assert(_idx >= 0 && _idx + _count <= (arr).num); \ memmove(&(arr).elem[_idx], &(arr).elem[_idx + _count], \ ((arr).num - _idx - _count) * sizeof((arr).elem[0])); \ (arr).num -= _count; \ } while (0) #define PL_ARRAY_REMOVE_AT(arr, idx) PL_ARRAY_REMOVE_RANGE(arr, idx, 1) #define PL_ARRAY_INSERT_AT(parent, arr, idx, ...) \ do { \ ptrdiff_t _idx = (idx); \ if (_idx < 0) \ _idx += (arr).num + 1; \ assert(_idx >= 0 && _idx <= (arr).num); \ PL_ARRAY_GROW(parent, arr); \ memmove(&(arr).elem[_idx + 1], &(arr).elem[_idx], \ ((arr).num++ - _idx) * sizeof((arr).elem[0])); \ (arr).elem[_idx] = __VA_ARGS__; \ } while (0) // Returns whether or not there was any element to pop #define PL_ARRAY_POP(arr, out) \ ((arr).num > 0 \ ? (*(out) = (arr).elem[--(arr).num], true) \ : false \ ) // Wrapper for dealing with non-PL_ARRAY arrays #define PL_ARRAY_APPEND_RAW(parent, arr, idxvar, ...) \ do { \ PL_ARRAY(__typeof__((arr)[0])) _arr = { (arr), (idxvar) }; \ PL_ARRAY_APPEND(parent, _arr, __VA_ARGS__); \ (arr) = _arr.elem; \ (idxvar) = _arr.num; \ } while (0) libplacebo-v7.349.0/src/pl_assert.h000066400000000000000000000025761463457750100171420ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include #include #ifndef NDEBUG # define pl_assert assert #else # define pl_assert(expr) \ do { \ if (!(expr)) { \ fprintf(stderr, "Assertion failed: %s in %s:%d\n", \ #expr, __FILE__, __LINE__); \ abort(); \ } \ } while (0) #endif // In C11, static asserts must have a string message #define pl_static_assert(expr) static_assert(expr, #expr) libplacebo-v7.349.0/src/pl_clock.h000066400000000000000000000052211463457750100167220ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include #include #include "os.h" #ifdef PL_HAVE_WIN32 # include # define PL_CLOCK_QPC #elif defined(PL_HAVE_APPLE) # include # if (defined(__MAC_OS_X_VERSION_MIN_REQUIRED) && __MAC_OS_X_VERSION_MIN_REQUIRED < 101200) || \ (defined(__IPHONE_OS_VERSION_MIN_REQUIRED) && __IPHONE_OS_VERSION_MIN_REQUIRED < 100000) || \ (defined(__TV_OS_VERSION_MIN_REQUIRED) && __TV_OS_VERSION_MIN_REQUIRED < 100000) || \ (defined(__WATCH_OS_VERSION_MIN_REQUIRED) && __WATCH_OS_VERSION_MIN_REQUIRED < 30000) || \ !defined(CLOCK_MONOTONIC_RAW) # include # define PL_CLOCK_MACH # else # define PL_CLOCK_MONOTONIC_RAW # endif #elif defined(CLOCK_MONOTONIC_RAW) # define PL_CLOCK_MONOTONIC_RAW #elif defined(TIME_UTC) # define PL_CLOCK_TIMESPEC_GET #else # warning "pl_clock not implemented for this platform!" #endif typedef uint64_t pl_clock_t; static inline pl_clock_t pl_clock_now(void) { #if defined(PL_CLOCK_QPC) LARGE_INTEGER counter; QueryPerformanceCounter(&counter); return counter.QuadPart; #elif defined(PL_CLOCK_MACH) return mach_absolute_time(); #else struct timespec tp = { .tv_sec = 0, .tv_nsec = 0 }; #if defined(PL_CLOCK_MONOTONIC_RAW) clock_gettime(CLOCK_MONOTONIC_RAW, &tp); #elif defined(PL_CLOCK_TIMESPEC_GET) timespec_get(&tp, TIME_UTC); #endif return tp.tv_sec * UINT64_C(1000000000) + tp.tv_nsec; #endif } static inline double pl_clock_diff(pl_clock_t a, pl_clock_t b) { double frequency = 1e9; #if defined(PL_CLOCK_QPC) LARGE_INTEGER freq; QueryPerformanceFrequency(&freq); frequency = freq.QuadPart; #elif defined(PL_CLOCK_MACH) mach_timebase_info_data_t time_base; if (mach_timebase_info(&time_base) != KERN_SUCCESS) return 0; frequency = (time_base.denom * 1e9) / time_base.numer; #endif if (b > a) return (b - a) / -frequency; else return (a - b) / frequency; } libplacebo-v7.349.0/src/pl_string.c000066400000000000000000000251561463457750100171410ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "common.h" #include "hash.h" static void grow_str(void *alloc, pl_str *str, size_t len) { // Like pl_grow, but with some extra headroom if (len > pl_get_size(str->buf)) str->buf = pl_realloc(alloc, str->buf, len * 1.5); } void pl_str_append(void *alloc, pl_str *str, pl_str append) { // Also append an extra \0 for convenience, since a lot of the time // this function will be used to generate a string buffer grow_str(alloc, str, str->len + append.len + 1); if (append.len) { memcpy(str->buf + str->len, append.buf, append.len); str->len += append.len; } str->buf[str->len] = '\0'; } void pl_str_append_raw(void *alloc, pl_str *str, const void *ptr, size_t size) { if (!size) return; grow_str(alloc, str, str->len + size); memcpy(str->buf + str->len, ptr, size); str->len += size; } void pl_str_append_asprintf(void *alloc, pl_str *str, const char *fmt, ...) { va_list ap; va_start(ap, fmt); pl_str_append_vasprintf(alloc, str, fmt, ap); va_end(ap); } void pl_str_append_vasprintf(void *alloc, pl_str *str, const char *fmt, va_list ap) { // First, we need to determine the size that will be required for // printing the entire string. Do this by making a copy of the va_list // and printing it to a null buffer. va_list copy; va_copy(copy, ap); int size = vsnprintf(NULL, 0, fmt, copy); va_end(copy); if (size < 0) return; // Make room in `str` and format to there directly grow_str(alloc, str, str->len + size + 1); str->len += vsnprintf((char *) (str->buf + str->len), size + 1, fmt, ap); } int pl_str_sscanf(pl_str str, const char *fmt, ...) { char *tmp = pl_strdup0(NULL, str); va_list va; va_start(va, fmt); int ret = vsscanf(tmp, fmt, va); va_end(va); pl_free(tmp); return ret; } int pl_strchr(pl_str str, int c) { if (!str.len) return -1; void *pos = memchr(str.buf, c, str.len); if (pos) return (intptr_t) pos - (intptr_t) str.buf; return -1; } size_t pl_strspn(pl_str str, const char *accept) { for (size_t i = 0; i < str.len; i++) { if (!strchr(accept, str.buf[i])) return i; } return str.len; } size_t pl_strcspn(pl_str str, const char *reject) { for (size_t i = 0; i < str.len; i++) { if (strchr(reject, str.buf[i])) return i; } return str.len; } static inline bool pl_isspace(char c) { switch (c) { case ' ': case '\n': case '\r': case '\t': case '\v': case '\f': return true; default: return false; } } pl_str pl_str_strip(pl_str str) { while (str.len && pl_isspace(str.buf[0])) { str.buf++; str.len--; } while (str.len && pl_isspace(str.buf[str.len - 1])) str.len--; return str; } int pl_str_find(pl_str haystack, pl_str needle) { if (!needle.len) return 0; for (size_t i = 0; i + needle.len <= haystack.len; i++) { if (memcmp(&haystack.buf[i], needle.buf, needle.len) == 0) return i; } return -1; } pl_str pl_str_split_char(pl_str str, char sep, pl_str *out_rest) { int pos = pl_strchr(str, sep); if (pos < 0) { if (out_rest) *out_rest = (pl_str) {0}; return str; } else { if (out_rest) *out_rest = pl_str_drop(str, pos + 1); return pl_str_take(str, pos); } } pl_str pl_str_split_chars(pl_str str, const char *seps, pl_str *out_rest) { int pos = pl_strcspn(str, seps); if (pos < 0) { if (out_rest) *out_rest = (pl_str) {0}; return str; } else { if (out_rest) *out_rest = pl_str_drop(str, pos + 1); return pl_str_take(str, pos); } } pl_str pl_str_split_str(pl_str str, pl_str sep, pl_str *out_rest) { int pos = pl_str_find(str, sep); if (pos < 0) { if (out_rest) *out_rest = (pl_str) {0}; return str; } else { if (out_rest) *out_rest = pl_str_drop(str, pos + sep.len); return pl_str_take(str, pos); } } static bool get_hexdigit(pl_str *str, int *digit) { while (str->len && pl_isspace(str->buf[0])) { str->buf++; str->len--; } if (!str->len) { *digit = -1; // EOF return true; } char c = str->buf[0]; str->buf++; str->len--; if (c >= '0' && c <= '9') { *digit = c - '0'; } else if (c >= 'a' && c <= 'f') { *digit = c - 'a' + 10; } else if (c >= 'A' && c <= 'F') { *digit = c - 'A' + 10; } else { return false; // invalid char } return true; } bool pl_str_decode_hex(void *alloc, pl_str hex, pl_str *out) { if (!out) return false; uint8_t *buf = pl_alloc(alloc, hex.len / 2); int len = 0; while (hex.len) { int a, b; if (!get_hexdigit(&hex, &a) || !get_hexdigit(&hex, &b)) goto error; // invalid char if (a < 0) // EOF break; if (b < 0) // only one digit goto error; buf[len++] = (a << 4) | b; } *out = (pl_str) { buf, len }; return true; error: pl_free(buf); return false; } struct pl_str_builder_t { PL_ARRAY(pl_str_template) templates; pl_str args; pl_str output; }; pl_str_builder pl_str_builder_alloc(void *alloc) { pl_str_builder b = pl_zalloc_ptr(alloc, b); return b; } void pl_str_builder_free(pl_str_builder *b) { if (*b) pl_free_ptr(b); } void pl_str_builder_reset(pl_str_builder b) { *b = (struct pl_str_builder_t) { .templates.elem = b->templates.elem, .args.buf = b->args.buf, .output.buf = b->output.buf, }; } uint64_t pl_str_builder_hash(const pl_str_builder b) { size_t size = b->templates.num * sizeof(b->templates.elem[0]); uint64_t hash = pl_mem_hash(b->templates.elem, size); pl_hash_merge(&hash, pl_str_hash(b->args)); return hash; } pl_str pl_str_builder_exec(pl_str_builder b) { pl_str args = b->args; b->output.len = 0; for (int i = 0; i < b->templates.num; i++) { size_t consumed = b->templates.elem[i](b, &b->output, args.buf); pl_assert(consumed <= args.len); args = pl_str_drop(args, consumed); } // Terminate with an extra \0 byte for convenience grow_str(b, &b->output, b->output.len + 1); b->output.buf[b->output.len] = '\0'; return b->output; } void pl_str_builder_append(pl_str_builder b, pl_str_template tmpl, const void *args, size_t size) { PL_ARRAY_APPEND(b, b->templates, tmpl); pl_str_append_raw(b, &b->args, args, size); } void pl_str_builder_concat(pl_str_builder b, const pl_str_builder append) { PL_ARRAY_CONCAT(b, b->templates, append->templates); pl_str_append_raw(b, &b->args, append->args.buf, append->args.len); } static size_t template_str_ptr(void *alloc, pl_str *buf, const uint8_t *args) { const char *str; memcpy(&str, args, sizeof(str)); pl_str_append_raw(alloc, buf, str, strlen(str)); return sizeof(str); } void pl_str_builder_const_str(pl_str_builder b, const char *str) { pl_str_builder_append(b, template_str_ptr, &str, sizeof(str)); } static size_t template_str(void *alloc, pl_str *buf, const uint8_t *args) { pl_str str; memcpy(&str.len, args, sizeof(str.len)); pl_str_append_raw(alloc, buf, args + sizeof(str.len), str.len); return sizeof(str.len) + str.len; } void pl_str_builder_str(pl_str_builder b, const pl_str str) { pl_str_builder_append(b, template_str, &str.len, sizeof(str.len)); pl_str_append_raw(b, &b->args, str.buf, str.len); } void pl_str_builder_printf_c(pl_str_builder b, const char *fmt, ...) { va_list ap; va_start(ap, fmt); pl_str_builder_vprintf_c(b, fmt, ap); va_end(ap); } static size_t template_printf(void *alloc, pl_str *str, const uint8_t *args) { const char *fmt; memcpy(&fmt, args, sizeof(fmt)); args += sizeof(fmt); return sizeof(fmt) + pl_str_append_memprintf_c(alloc, str, fmt, args); } void pl_str_builder_vprintf_c(pl_str_builder b, const char *fmt, va_list ap) { pl_str_builder_append(b, template_printf, &fmt, sizeof(fmt)); // Push all of the variadic arguments directly onto `b->args` for (const char *c; (c = strchr(fmt, '%')) != NULL; fmt = c + 1) { c++; switch (c[0]) { #define WRITE(T, x) pl_str_append_raw(b, &b->args, &(T) {x}, sizeof(T)) case '%': continue; case 'c': WRITE(char, va_arg(ap, int)); break; case 'd': WRITE(int, va_arg(ap, int)); break; case 'u': WRITE(unsigned, va_arg(ap, unsigned)); break; case 'f': WRITE(double, va_arg(ap, double)); break; case 'h': assert(c[1] == 'x'); WRITE(unsigned short, va_arg(ap, unsigned)); c++; break; case 'l': assert(c[1] == 'l'); switch (c[2]) { case 'u': WRITE(long long unsigned, va_arg(ap, long long unsigned)); break; case 'd': WRITE(long long int, va_arg(ap, long long int)); break; default: abort(); } c += 2; break; case 'z': assert(c[1] == 'u'); WRITE(size_t, va_arg(ap, size_t)); c++; break; case 's': { pl_str str = pl_str0(va_arg(ap, const char *)); pl_str_append(b, &b->args, str); b->args.len++; // expand to include \0 byte (from pl_str_append) break; } case '.': { assert(c[1] == '*'); assert(c[2] == 's'); int len = va_arg(ap, int); const char *str = va_arg(ap, const char *); WRITE(int, len); pl_str_append_raw(b, &b->args, str, len); c += 2; break; } default: fprintf(stderr, "Invalid conversion character: '%c'!\n", c[0]); abort(); } #undef WRITE } } libplacebo-v7.349.0/src/pl_string.h000066400000000000000000000262151463457750100171430ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "common.h" PL_API_BEGIN typedef struct pl_str { uint8_t *buf; size_t len; } pl_str; // For formatting with "%.*s" #define PL_STR_FMT(str) (int)((str).len), ((str).buf ? (char *)((str).buf) : "") static inline pl_str pl_str0(const char *str) { return (pl_str) { .buf = (uint8_t *) str, .len = str ? strlen(str) : 0, }; } // Macro version of pl_str0, for constants #define PL_STR0(str) ((pl_str) { (uint8_t *) (str), (str) ? strlen(str) : 0 }) static inline pl_str pl_strdup(void *alloc, pl_str str) { return (pl_str) { .buf = (uint8_t *) (str.len ? pl_memdup(alloc, str.buf, str.len) : NULL), .len = str.len, }; } // Always returns a valid string static inline char *pl_strdup0(void *alloc, pl_str str) { return pl_strndup0(alloc, str.len ? (char *) str.buf : "", str.len); } // Adds a trailing \0 for convenience, even if `append` is an empty string void pl_str_append(void *alloc, pl_str *str, pl_str append); // Like `pl_str_append` but for raw memory, omits trailing \0 void pl_str_append_raw(void *alloc, pl_str *str, const void *ptr, size_t size); // Locale-sensitive string functions char *pl_asprintf(void *parent, const char *fmt, ...) PL_PRINTF(2, 3); char *pl_vasprintf(void *parent, const char *fmt, va_list ap) PL_PRINTF(2, 0); void pl_str_append_asprintf(void *alloc, pl_str *str, const char *fmt, ...) PL_PRINTF(3, 4); void pl_str_append_vasprintf(void *alloc, pl_str *str, const char *fmt, va_list va) PL_PRINTF(3, 0); int pl_str_sscanf(pl_str str, const char *fmt, ...); // Locale-invariant versions of append_(v)asprintf // // NOTE: These only support a small handful of modifiers. Check `format.c` // for a list. Calling them on an invalid string will abort! void pl_str_append_asprintf_c(void *alloc, pl_str *str, const char *fmt, ...) PL_PRINTF(3, 4); void pl_str_append_vasprintf_c(void *alloc, pl_str *str, const char *fmt, va_list va) PL_PRINTF(3, 0); // Variant of the above which takes arguments directly from a pointer in memory, // reading them incrementally (tightly packed). Returns the amount of bytes // read from `args`, as determined by the following table: // // %c: sizeof(char) // %d, %u: sizeof(int) // %f: sizeof(double) // %lld, %llu: sizeof(long long int) // %zu: sizeof(size_t) // %s: \0 terminated string // %.*s: sizeof(int) + that many bytes (no \0 terminator) size_t pl_str_append_memprintf_c(void *alloc, pl_str *str, const char *fmt, const void *args) PL_PRINTF(3, 0); // Locale-invariant number printing int pl_str_print_hex(char *buf, size_t len, unsigned short n); int pl_str_print_int(char *buf, size_t len, int n); int pl_str_print_uint(char *buf, size_t len, unsigned int n); int pl_str_print_int64(char *buf, size_t len, int64_t n); int pl_str_print_uint64(char *buf, size_t len, uint64_t n); int pl_str_print_float(char *buf, size_t len, float n); int pl_str_print_double(char *buf, size_t len, double n); // Locale-invariant number parsing bool pl_str_parse_hex(pl_str str, unsigned short *out); bool pl_str_parse_int(pl_str str, int *out); bool pl_str_parse_uint(pl_str str, unsigned int *out); bool pl_str_parse_int64(pl_str str, int64_t *out); bool pl_str_parse_uint64(pl_str str, uint64_t *out); bool pl_str_parse_float(pl_str str, float *out); bool pl_str_parse_double(pl_str str, double *out); // Variants of string.h functions int pl_strchr(pl_str str, int c); size_t pl_strspn(pl_str str, const char *accept); size_t pl_strcspn(pl_str str, const char *reject); // Strip leading/trailing whitespace pl_str pl_str_strip(pl_str str); // Generic functions for cutting up strings static inline pl_str pl_str_take(pl_str str, size_t len) { if (len < str.len) str.len = len; return str; } static inline pl_str pl_str_drop(pl_str str, size_t len) { if (len >= str.len) return (pl_str) { .buf = NULL, .len = 0 }; str.buf += len; str.len -= len; return str; } // Find a substring in another string, and return its index (or -1) int pl_str_find(pl_str haystack, pl_str needle); // String splitting functions. These return the part of the string before // the separator, and optionally the rest (in `out_rest`). // // Note that the separator is not included as part of either string. pl_str pl_str_split_char(pl_str str, char sep, pl_str *out_rest); pl_str pl_str_split_str(pl_str str, pl_str sep, pl_str *out_rest); // Like `pl_str_split_char`, but splits on any char in `seps` pl_str pl_str_split_chars(pl_str str, const char *seps, pl_str *out_rest); static inline pl_str pl_str_getline(pl_str str, pl_str *out_rest) { return pl_str_split_char(str, '\n', out_rest); } // Decode a string containing hexadecimal data. All whitespace will be silently // ignored. When successful, this allocates a new array to store the output. bool pl_str_decode_hex(void *alloc, pl_str hex, pl_str *out); static inline bool pl_str_equals(pl_str str1, pl_str str2) { if (str1.len != str2.len) return false; if (str1.buf == str2.buf || !str1.len) return true; return memcmp(str1.buf, str2.buf, str1.len) == 0; } static inline bool pl_str_startswith(pl_str str, pl_str prefix) { if (!prefix.len) return true; if (str.len < prefix.len) return false; return memcmp(str.buf, prefix.buf, prefix.len) == 0; } static inline bool pl_str_endswith(pl_str str, pl_str suffix) { if (!suffix.len) return true; if (str.len < suffix.len) return false; return memcmp(str.buf + str.len - suffix.len, suffix.buf, suffix.len) == 0; } static inline bool pl_str_eatstart(pl_str *str, pl_str prefix) { if (!pl_str_startswith(*str, prefix)) return false; str->buf += prefix.len; str->len -= prefix.len; return true; } static inline bool pl_str_eatend(pl_str *str, pl_str suffix) { if (!pl_str_endswith(*str, suffix)) return false; str->len -= suffix.len; return true; } // Convenience wrappers for the above which save the use of a pl_str0 static inline pl_str pl_str_split_str0(pl_str str, const char *sep, pl_str *out_rest) { return pl_str_split_str(str, pl_str0(sep), out_rest); } static inline bool pl_str_startswith0(pl_str str, const char *prefix) { return pl_str_startswith(str, pl_str0(prefix)); } static inline bool pl_str_endswith0(pl_str str, const char *suffix) { return pl_str_endswith(str, pl_str0(suffix)); } static inline bool pl_str_equals0(pl_str str1, const char *str2) { return pl_str_equals(str1, pl_str0(str2)); } static inline bool pl_str_eatstart0(pl_str *str, const char *prefix) { return pl_str_eatstart(str, pl_str0(prefix)); } static inline bool pl_str_eatend0(pl_str *str, const char *prefix) { return pl_str_eatend(str, pl_str0(prefix)); } // String building helpers, used to lazily construct a string by appending a // series of string templates which can be executed on-demand into a final // output buffer. typedef struct pl_str_builder_t *pl_str_builder; // Returns the number of bytes consumed from `args`. Be warned that the pointer // given will not necessarily be aligned to the type you need it as, so make // sure to use `memcpy` or some other method of safely loading arbitrary data // from memory. typedef size_t (*pl_str_template)(void *alloc, pl_str *buf, const uint8_t *args); pl_str_builder pl_str_builder_alloc(void *alloc); void pl_str_builder_free(pl_str_builder *builder); // Resets string builder without destroying buffer void pl_str_builder_reset(pl_str_builder builder); // Returns a representative hash of the string builder's output, without // actually executing it. Note that this is *not* the same as a pl_str_hash of // the string builder's output. // // Note also that the output of this may not survive a process restart because // of position-independent code and address randomization moving around the // locatons of template functions, so special care must be taken not to // compare such hashes across process invocations. uint64_t pl_str_builder_hash(const pl_str_builder builder); // Executes a string builder, dispatching all templates. The resulting string // is guaranteed to be \0-terminated, as a minor convenience. // // Calling any other `pl_str_builder_*` function on this builder causes the // contents of the returned string to become undefined. pl_str pl_str_builder_exec(pl_str_builder builder); // Append a template and its arguments to a string builder void pl_str_builder_append(pl_str_builder builder, pl_str_template tmpl, const void *args, size_t args_size); // Append an entire other `pl_str_builder` onto `builder` void pl_str_builder_concat(pl_str_builder builder, const pl_str_builder append); // Append a constant string. This will only record &str into the buffer, which // may have a number of unwanted consequences if the memory pointed at by // `str` mutates at any point in time in the future, or if `str` is not // at a stable location in memory. // // This is intended for strings which are compile-time constants. void pl_str_builder_const_str(pl_str_builder builder, const char *str); // Append a string. This will make a full copy of `str` void pl_str_builder_str(pl_str_builder builder, const pl_str str); #define pl_str_builder_str0(b, str) pl_str_builder_str(b, pl_str0(str)) // Append a string printf-style. This will preprocess `fmt` to determine the // number and type of arguments. Supports the same format conversion characters // as `pl_str_append_asprintf_c`. void pl_str_builder_printf_c(pl_str_builder builder, const char *fmt, ...) PL_PRINTF(2, 3); void pl_str_builder_vprintf_c(pl_str_builder builder, const char *fmt, va_list ap) PL_PRINTF(2, 0); // Helper macro to specialize `pl_str_builder_printf_c` to // `pl_str_builder_const_str` if it contains no format characters. #define pl_str_builder_addf(builder, ...) do \ { \ if (_contains_fmt_chars(__VA_ARGS__)) { \ pl_str_builder_printf_c(builder, __VA_ARGS__); \ } else { \ pl_str_builder_const_str(builder, _get_fmt(__VA_ARGS__)); \ } \ } while (0) // Helper macros to deal with the non-portability of __VA_OPT__(,) #define _contains_fmt_chars(fmt, ...) (strchr(fmt, '%')) #define _get_fmt(fmt, ...) fmt PL_API_END libplacebo-v7.349.0/src/pl_thread.h000066400000000000000000000043611463457750100171020ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "os.h" enum pl_mutex_type { PL_MUTEX_NORMAL = 0, PL_MUTEX_RECURSIVE, }; #define pl_mutex_init(mutex) \ pl_mutex_init_type(mutex, PL_MUTEX_NORMAL) // Note: This is never compiled, and only documents the API. The actual // implementations of these prototypes may be macros. #ifdef PL_API_REFERENCE typedef void pl_mutex; void pl_mutex_init_type(pl_mutex *mutex, enum pl_mutex_type mtype); int pl_mutex_destroy(pl_mutex *mutex); int pl_mutex_lock(pl_mutex *mutex); int pl_mutex_unlock(pl_mutex *mutex); typedef void pl_cond; int pl_cond_init(pl_cond *cond); int pl_cond_destroy(pl_cond *cond); int pl_cond_broadcast(pl_cond *cond); int pl_cond_signal(pl_cond *cond); // `timeout` is in nanoseconds, or UINT64_MAX to block forever int pl_cond_timedwait(pl_cond *cond, pl_mutex *mutex, uint64_t timeout); int pl_cond_wait(pl_cond *cond, pl_mutex *mutex); typedef void pl_static_mutex; #define PL_STATIC_MUTEX_INITIALIZER int pl_static_mutex_lock(pl_static_mutex *mutex); int pl_static_mutex_unlock(pl_static_mutex *mutex); typedef void pl_thread; #define PL_THREAD_VOID void #define PL_THREAD_RETURN() return int pl_thread_create(pl_thread *thread, PL_THREAD_VOID (*fun)(void *), void *arg); int pl_thread_join(pl_thread thread); // Returns true if slept the full time, false otherwise bool pl_thread_sleep(double t); #endif // Actual platform-specific implementation #ifdef PL_HAVE_WIN32 #include "pl_thread_win32.h" #elif defined(PL_HAVE_PTHREAD) #include "pl_thread_pthread.h" #else #error No threading implementation available! #endif libplacebo-v7.349.0/src/pl_thread_pthread.h000066400000000000000000000073461463457750100206170ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include #include #include #include #include typedef pthread_mutex_t pl_mutex; typedef pthread_cond_t pl_cond; typedef pthread_mutex_t pl_static_mutex; typedef pthread_t pl_thread; #define PL_STATIC_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER static inline int pl_mutex_init_type_internal(pl_mutex *mutex, enum pl_mutex_type mtype) { int mutex_type; switch (mtype) { case PL_MUTEX_RECURSIVE: mutex_type = PTHREAD_MUTEX_RECURSIVE; break; case PL_MUTEX_NORMAL: default: #ifndef NDEBUG mutex_type = PTHREAD_MUTEX_ERRORCHECK; #else mutex_type = PTHREAD_MUTEX_DEFAULT; #endif break; } int ret = 0; pthread_mutexattr_t attr; ret = pthread_mutexattr_init(&attr); if (ret != 0) return ret; pthread_mutexattr_settype(&attr, mutex_type); ret = pthread_mutex_init(mutex, &attr); pthread_mutexattr_destroy(&attr); return ret; } #define pl_mutex_init_type(mutex, mtype) \ pl_assert(!pl_mutex_init_type_internal(mutex, mtype)) #define pl_mutex_destroy pthread_mutex_destroy #define pl_mutex_lock pthread_mutex_lock #define pl_mutex_unlock pthread_mutex_unlock static inline int pl_cond_init(pl_cond *cond) { int ret = 0; pthread_condattr_t attr; ret = pthread_condattr_init(&attr); if (ret != 0) return ret; #ifdef PTHREAD_HAS_SETCLOCK pthread_condattr_setclock(&attr, CLOCK_MONOTONIC); #endif ret = pthread_cond_init(cond, &attr); pthread_condattr_destroy(&attr); return ret; } #define pl_cond_destroy pthread_cond_destroy #define pl_cond_broadcast pthread_cond_broadcast #define pl_cond_signal pthread_cond_signal #define pl_cond_wait pthread_cond_wait static inline int pl_cond_timedwait(pl_cond *cond, pl_mutex *mutex, uint64_t timeout) { if (timeout == UINT64_MAX) return pthread_cond_wait(cond, mutex); struct timespec ts; #ifdef PTHREAD_HAS_SETCLOCK if (clock_gettime(CLOCK_MONOTONIC, &ts) < 0) return errno; #else struct timeval tv; if (gettimeofday(&tv, NULL) < 0) // equivalent to CLOCK_REALTIME return errno; ts.tv_sec = tv.tv_sec; ts.tv_nsec = tv.tv_usec * 1000; #endif ts.tv_sec += timeout / 1000000000LLU; ts.tv_nsec += timeout % 1000000000LLU; if (ts.tv_nsec > 1000000000L) { ts.tv_nsec -= 1000000000L; ts.tv_sec++; } return pthread_cond_timedwait(cond, mutex, &ts); } #define pl_static_mutex_lock pthread_mutex_lock #define pl_static_mutex_unlock pthread_mutex_unlock #define PL_THREAD_VOID void * #define PL_THREAD_RETURN() return NULL #define pl_thread_create(t, f, a) pthread_create(t, NULL, f, a) #define pl_thread_join(t) pthread_join(t, NULL) static inline bool pl_thread_sleep(double t) { if (t <= 0.0) return true; struct timespec ts; ts.tv_sec = (time_t) t; ts.tv_nsec = (t - ts.tv_sec) * 1e9; return nanosleep(&ts, NULL) == 0; } libplacebo-v7.349.0/src/pl_thread_win32.h000066400000000000000000000106711463457750100201250ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include #include #include #include #include typedef CRITICAL_SECTION pl_mutex; typedef CONDITION_VARIABLE pl_cond; static inline int pl_mutex_init_type_internal(pl_mutex *mutex, enum pl_mutex_type mtype) { (void) mtype; return !InitializeCriticalSectionEx(mutex, 0, 0); } #define pl_mutex_init_type(mutex, mtype) \ pl_assert(!pl_mutex_init_type_internal(mutex, mtype)) static inline int pl_mutex_destroy(pl_mutex *mutex) { DeleteCriticalSection(mutex); return 0; } static inline int pl_mutex_lock(pl_mutex *mutex) { EnterCriticalSection(mutex); return 0; } static inline int pl_mutex_unlock(pl_mutex *mutex) { LeaveCriticalSection(mutex); return 0; } static inline int pl_cond_init(pl_cond *cond) { InitializeConditionVariable(cond); return 0; } static inline int pl_cond_destroy(pl_cond *cond) { // condition variables are not destroyed (void) cond; return 0; } static inline int pl_cond_broadcast(pl_cond *cond) { WakeAllConditionVariable(cond); return 0; } static inline int pl_cond_signal(pl_cond *cond) { WakeConditionVariable(cond); return 0; } static inline int pl_cond_wait(pl_cond *cond, pl_mutex *mutex) { return !SleepConditionVariableCS(cond, mutex, INFINITE); } static inline int pl_cond_timedwait(pl_cond *cond, pl_mutex *mutex, uint64_t timeout) { if (timeout == UINT64_MAX) return pl_cond_wait(cond, mutex); timeout /= UINT64_C(1000000); if (timeout > INFINITE - 1) timeout = INFINITE - 1; BOOL bRet = SleepConditionVariableCS(cond, mutex, timeout); if (bRet == FALSE) { if (GetLastError() == ERROR_TIMEOUT) return ETIMEDOUT; else return EINVAL; } return 0; } typedef SRWLOCK pl_static_mutex; #define PL_STATIC_MUTEX_INITIALIZER SRWLOCK_INIT static inline int pl_static_mutex_lock(pl_static_mutex *mutex) { AcquireSRWLockExclusive(mutex); return 0; } static inline int pl_static_mutex_unlock(pl_static_mutex *mutex) { ReleaseSRWLockExclusive(mutex); return 0; } typedef HANDLE pl_thread; #define PL_THREAD_VOID unsigned __stdcall #define PL_THREAD_RETURN() return 0 static inline int pl_thread_create(pl_thread *thread, PL_THREAD_VOID (*fun)(void *), void *__restrict arg) { *thread = (HANDLE) _beginthreadex(NULL, 0, fun, arg, 0, NULL); return *thread ? 0 : -1; } static inline int pl_thread_join(pl_thread thread) { DWORD ret = WaitForSingleObject(thread, INFINITE); if (ret != WAIT_OBJECT_0) return ret == WAIT_ABANDONED ? EINVAL : EDEADLK; CloseHandle(thread); return 0; } static inline bool pl_thread_sleep(double t) { // Time is expected in 100 nanosecond intervals. // Negative values indicate relative time. LARGE_INTEGER time = { .QuadPart = -(LONGLONG) (t * 1e7) }; if (time.QuadPart >= 0) return true; bool ret = false; #ifndef CREATE_WAITABLE_TIMER_HIGH_RESOLUTION # define CREATE_WAITABLE_TIMER_HIGH_RESOLUTION 0x2 #endif HANDLE timer = CreateWaitableTimerEx(NULL, NULL, CREATE_WAITABLE_TIMER_HIGH_RESOLUTION, TIMER_ALL_ACCESS); // CREATE_WAITABLE_TIMER_HIGH_RESOLUTION is supported in Windows 10 1803+, // retry without it. if (!timer) timer = CreateWaitableTimerEx(NULL, NULL, 0, TIMER_ALL_ACCESS); if (!timer) goto end; if (!SetWaitableTimer(timer, &time, 0, NULL, NULL, 0)) goto end; if (WaitForSingleObject(timer, INFINITE) != WAIT_OBJECT_0) goto end; ret = true; end: if (timer) CloseHandle(timer); return ret; } libplacebo-v7.349.0/src/renderer.c000066400000000000000000004212621463457750100167440ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include "common.h" #include "filters.h" #include "hash.h" #include "shaders.h" #include "dispatch.h" #include struct cached_frame { uint64_t signature; uint64_t params_hash; // for detecting `pl_render_params` changes struct pl_color_space color; struct pl_icc_profile profile; pl_rect2df crop; pl_tex tex; int comps; bool evict; // for garbage collection }; struct sampler { pl_shader_obj upscaler_state; pl_shader_obj downscaler_state; }; struct osd_vertex { float pos[2]; float coord[2]; float color[4]; }; struct icc_state { pl_icc_object icc; uint64_t error; // set to profile signature on failure }; struct pl_renderer_t { pl_gpu gpu; pl_dispatch dp; pl_log log; // Cached feature checks (inverted) enum pl_render_error errors; // List containing signatures of disabled hooks PL_ARRAY(uint64_t) disabled_hooks; // Shader resource objects and intermediate textures (FBOs) pl_shader_obj tone_map_state; pl_shader_obj dither_state; pl_shader_obj grain_state[4]; pl_shader_obj lut_state[3]; pl_shader_obj icc_state[2]; PL_ARRAY(pl_tex) fbos; struct sampler sampler_main; struct sampler sampler_contrast; struct sampler samplers_src[4]; struct sampler samplers_dst[4]; // Temporary storage for vertex/index data PL_ARRAY(struct osd_vertex) osd_vertices; PL_ARRAY(uint16_t) osd_indices; struct pl_vertex_attrib osd_attribs[3]; // Frame cache (for frame mixing / interpolation) PL_ARRAY(struct cached_frame) frames; PL_ARRAY(pl_tex) frame_fbos; // For debugging / logging purposes int prev_dither; // For backwards compatibility struct icc_state icc_fallback[2]; }; enum { // Index into `lut_state` LUT_IMAGE, LUT_TARGET, LUT_PARAMS, }; enum { // Index into `icc_state` ICC_IMAGE, ICC_TARGET }; pl_renderer pl_renderer_create(pl_log log, pl_gpu gpu) { pl_renderer rr = pl_alloc_ptr(NULL, rr); *rr = (struct pl_renderer_t) { .gpu = gpu, .log = log, .dp = pl_dispatch_create(log, gpu), .osd_attribs = { { .name = "pos", .offset = offsetof(struct osd_vertex, pos), .fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 2), }, { .name = "coord", .offset = offsetof(struct osd_vertex, coord), .fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 2), }, { .name = "osd_color", .offset = offsetof(struct osd_vertex, color), .fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 4), } }, }; assert(rr->dp); return rr; } static void sampler_destroy(pl_renderer rr, struct sampler *sampler) { pl_shader_obj_destroy(&sampler->upscaler_state); pl_shader_obj_destroy(&sampler->downscaler_state); } void pl_renderer_destroy(pl_renderer *p_rr) { pl_renderer rr = *p_rr; if (!rr) return; // Free all intermediate FBOs for (int i = 0; i < rr->fbos.num; i++) pl_tex_destroy(rr->gpu, &rr->fbos.elem[i]); for (int i = 0; i < rr->frames.num; i++) pl_tex_destroy(rr->gpu, &rr->frames.elem[i].tex); for (int i = 0; i < rr->frame_fbos.num; i++) pl_tex_destroy(rr->gpu, &rr->frame_fbos.elem[i]); // Free all shader resource objects pl_shader_obj_destroy(&rr->tone_map_state); pl_shader_obj_destroy(&rr->dither_state); for (int i = 0; i < PL_ARRAY_SIZE(rr->lut_state); i++) pl_shader_obj_destroy(&rr->lut_state[i]); for (int i = 0; i < PL_ARRAY_SIZE(rr->grain_state); i++) pl_shader_obj_destroy(&rr->grain_state[i]); for (int i = 0; i < PL_ARRAY_SIZE(rr->icc_state); i++) pl_shader_obj_destroy(&rr->icc_state[i]); // Free all samplers sampler_destroy(rr, &rr->sampler_main); sampler_destroy(rr, &rr->sampler_contrast); for (int i = 0; i < PL_ARRAY_SIZE(rr->samplers_src); i++) sampler_destroy(rr, &rr->samplers_src[i]); for (int i = 0; i < PL_ARRAY_SIZE(rr->samplers_dst); i++) sampler_destroy(rr, &rr->samplers_dst[i]); // Free fallback ICC profiles for (int i = 0; i < PL_ARRAY_SIZE(rr->icc_fallback); i++) pl_icc_close(&rr->icc_fallback[i].icc); pl_dispatch_destroy(&rr->dp); pl_free_ptr(p_rr); } size_t pl_renderer_save(pl_renderer rr, uint8_t *out) { return pl_cache_save(pl_gpu_cache(rr->gpu), out, out ? SIZE_MAX : 0); } void pl_renderer_load(pl_renderer rr, const uint8_t *cache) { pl_cache_load(pl_gpu_cache(rr->gpu), cache, SIZE_MAX); } void pl_renderer_flush_cache(pl_renderer rr) { for (int i = 0; i < rr->frames.num; i++) pl_tex_destroy(rr->gpu, &rr->frames.elem[i].tex); rr->frames.num = 0; pl_reset_detected_peak(rr->tone_map_state); } const struct pl_render_params pl_render_fast_params = { PL_RENDER_DEFAULTS }; const struct pl_render_params pl_render_default_params = { PL_RENDER_DEFAULTS .upscaler = &pl_filter_lanczos, .downscaler = &pl_filter_hermite, .frame_mixer = &pl_filter_oversample, .sigmoid_params = &pl_sigmoid_default_params, .dither_params = &pl_dither_default_params, .peak_detect_params = &pl_peak_detect_default_params, }; const struct pl_render_params pl_render_high_quality_params = { PL_RENDER_DEFAULTS .upscaler = &pl_filter_ewa_lanczossharp, .downscaler = &pl_filter_hermite, .frame_mixer = &pl_filter_oversample, .sigmoid_params = &pl_sigmoid_default_params, .peak_detect_params = &pl_peak_detect_high_quality_params, .color_map_params = &pl_color_map_high_quality_params, .dither_params = &pl_dither_default_params, .deband_params = &pl_deband_default_params, }; const struct pl_filter_preset pl_frame_mixers[] = { { "none", NULL, "No frame mixing" }, { "linear", &pl_filter_bilinear, "Linear frame mixing" }, { "oversample", &pl_filter_oversample, "Oversample (AKA SmoothMotion)" }, { "mitchell_clamp", &pl_filter_mitchell_clamp, "Clamped Mitchell spline" }, { "hermite", &pl_filter_hermite, "Cubic spline (Hermite)" }, {0} }; const int pl_num_frame_mixers = PL_ARRAY_SIZE(pl_frame_mixers) - 1; const struct pl_filter_preset pl_scale_filters[] = { {"none", NULL, "Built-in sampling"}, {"oversample", &pl_filter_oversample, "Oversample (Aspect-preserving NN)"}, COMMON_FILTER_PRESETS, {0} }; const int pl_num_scale_filters = PL_ARRAY_SIZE(pl_scale_filters) - 1; // Represents a "in-flight" image, which is either a shader that's in the // process of producing some sort of image, or a texture that needs to be // sampled from struct img { // Effective texture size, always set int w, h; // Recommended format (falls back to fbofmt otherwise), only for shaders pl_fmt fmt; // Exactly *one* of these two is set: pl_shader sh; pl_tex tex; // If true, created shaders will be set to unique bool unique; // Information about what to log/disable/fallback to if the shader fails const char *err_msg; enum pl_render_error err_enum; pl_tex err_tex; // Current effective source area, will be sampled by the main scaler pl_rect2df rect; // The current effective colorspace struct pl_color_repr repr; struct pl_color_space color; int comps; }; // Plane 'type', ordered by incrementing priority enum plane_type { PLANE_INVALID = 0, PLANE_ALPHA, PLANE_CHROMA, PLANE_LUMA, PLANE_RGB, PLANE_XYZ, }; static inline enum plane_type detect_plane_type(const struct pl_plane *plane, const struct pl_color_repr *repr) { if (pl_color_system_is_ycbcr_like(repr->sys)) { int t = PLANE_INVALID; for (int c = 0; c < plane->components; c++) { switch (plane->component_mapping[c]) { case PL_CHANNEL_Y: t = PL_MAX(t, PLANE_LUMA); continue; case PL_CHANNEL_A: t = PL_MAX(t, PLANE_ALPHA); continue; case PL_CHANNEL_CB: case PL_CHANNEL_CR: t = PL_MAX(t, PLANE_CHROMA); continue; default: continue; } } pl_assert(t); return t; } // Extra test for exclusive / separated alpha plane if (plane->components == 1 && plane->component_mapping[0] == PL_CHANNEL_A) return PLANE_ALPHA; switch (repr->sys) { case PL_COLOR_SYSTEM_UNKNOWN: // fall through to RGB case PL_COLOR_SYSTEM_RGB: return PLANE_RGB; case PL_COLOR_SYSTEM_XYZ: return PLANE_XYZ; // For the switch completeness check case PL_COLOR_SYSTEM_BT_601: case PL_COLOR_SYSTEM_BT_709: case PL_COLOR_SYSTEM_SMPTE_240M: case PL_COLOR_SYSTEM_BT_2020_NC: case PL_COLOR_SYSTEM_BT_2020_C: case PL_COLOR_SYSTEM_BT_2100_PQ: case PL_COLOR_SYSTEM_BT_2100_HLG: case PL_COLOR_SYSTEM_DOLBYVISION: case PL_COLOR_SYSTEM_YCGCO: case PL_COLOR_SYSTEM_COUNT: break; } pl_unreachable(); } struct pass_state { void *tmp; pl_renderer rr; const struct pl_render_params *params; struct pl_render_info info; // for info callback // Represents the "current" image which we're in the process of rendering. // This is initially set by pass_read_image, and all of the subsequent // rendering steps will mutate this in-place. struct img img; // Represents the "reference rect". Canonically, this is functionally // equivalent to `image.crop`, but also updates as the refplane evolves // (e.g. due to user hook prescalers) pl_rect2df ref_rect; // Integer version of `target.crop`. Semantically identical. pl_rect2d dst_rect; // Logical end-to-end rotation pl_rotation rotation; // Cached copies of the `image` / `target` for this rendering pass, // corrected to make sure all rects etc. are properly defaulted/inferred. struct pl_frame image; struct pl_frame target; // Cached copies of the `prev` / `next` frames, for deinterlacing. struct pl_frame prev, next; // Some extra plane metadata, inferred from `planes` enum plane_type src_type[4]; int src_ref, dst_ref; // index into `planes` // Metadata for `rr->fbos` pl_fmt fbofmt[5]; bool *fbos_used; bool need_peak_fbo; // need indirection for peak detection // Map of acquired frames struct { bool target, image, prev, next; } acquired; }; static void find_fbo_format(struct pass_state *pass) { const struct pl_render_params *params = pass->params; pl_renderer rr = pass->rr; if (params->disable_fbos || (rr->errors & PL_RENDER_ERR_FBO) || pass->fbofmt[4]) return; struct { enum pl_fmt_type type; int depth; enum pl_fmt_caps caps; } configs[] = { // Prefer floating point formats first {PL_FMT_FLOAT, 16, PL_FMT_CAP_LINEAR}, {PL_FMT_FLOAT, 16, PL_FMT_CAP_SAMPLEABLE}, // Otherwise, fall back to unorm/snorm, preferring linearly sampleable {PL_FMT_UNORM, 16, PL_FMT_CAP_LINEAR}, {PL_FMT_SNORM, 16, PL_FMT_CAP_LINEAR}, {PL_FMT_UNORM, 16, PL_FMT_CAP_SAMPLEABLE}, {PL_FMT_SNORM, 16, PL_FMT_CAP_SAMPLEABLE}, // As a final fallback, allow 8-bit FBO formats (for UNORM only) {PL_FMT_UNORM, 8, PL_FMT_CAP_LINEAR}, {PL_FMT_UNORM, 8, PL_FMT_CAP_SAMPLEABLE}, }; pl_fmt fmt = NULL; for (int i = 0; i < PL_ARRAY_SIZE(configs); i++) { if (params->force_low_bit_depth_fbos && configs[i].depth > 8) continue; fmt = pl_find_fmt(rr->gpu, configs[i].type, 4, configs[i].depth, 0, PL_FMT_CAP_RENDERABLE | configs[i].caps); if (!fmt) continue; pass->fbofmt[4] = fmt; // Probe the right variant for each number of channels, falling // back to the next biggest format for (int c = 1; c < 4; c++) { pass->fbofmt[c] = pl_find_fmt(rr->gpu, configs[i].type, c, configs[i].depth, 0, fmt->caps); pass->fbofmt[c] = PL_DEF(pass->fbofmt[c], pass->fbofmt[c+1]); } return; } PL_WARN(rr, "Found no renderable FBO format! Most features disabled"); rr->errors |= PL_RENDER_ERR_FBO; } static void info_callback(void *priv, const struct pl_dispatch_info *dinfo) { struct pass_state *pass = priv; const struct pl_render_params *params = pass->params; if (!params->info_callback) return; pass->info.pass = dinfo; params->info_callback(params->info_priv, &pass->info); pass->info.index++; } static pl_tex get_fbo(struct pass_state *pass, int w, int h, pl_fmt fmt, int comps, pl_debug_tag debug_tag) { pl_renderer rr = pass->rr; comps = PL_DEF(comps, 4); fmt = PL_DEF(fmt, pass->fbofmt[comps]); if (!fmt) return NULL; struct pl_tex_params params = { .w = w, .h = h, .format = fmt, .sampleable = true, .renderable = true, .blit_src = fmt->caps & PL_FMT_CAP_BLITTABLE, .storable = fmt->caps & PL_FMT_CAP_STORABLE, .debug_tag = debug_tag, }; int best_idx = -1; int best_diff = 0; // Find the best-fitting texture out of rr->fbos for (int i = 0; i < rr->fbos.num; i++) { if (pass->fbos_used[i]) continue; // Orthogonal distance, with penalty for format mismatches int diff = abs(rr->fbos.elem[i]->params.w - w) + abs(rr->fbos.elem[i]->params.h - h) + ((rr->fbos.elem[i]->params.format != fmt) ? 1000 : 0); if (best_idx < 0 || diff < best_diff) { best_idx = i; best_diff = diff; } } // No texture found at all, add a new one if (best_idx < 0) { best_idx = rr->fbos.num; PL_ARRAY_APPEND(rr, rr->fbos, NULL); pl_grow(pass->tmp, &pass->fbos_used, rr->fbos.num * sizeof(bool)); pass->fbos_used[best_idx] = false; } if (!pl_tex_recreate(rr->gpu, &rr->fbos.elem[best_idx], ¶ms)) return NULL; pass->fbos_used[best_idx] = true; return rr->fbos.elem[best_idx]; } // Forcibly convert an img to `tex`, dispatching where necessary static pl_tex _img_tex(struct pass_state *pass, struct img *img, pl_debug_tag tag) { if (img->tex) { pl_assert(!img->sh); return img->tex; } pl_renderer rr = pass->rr; pl_tex tex = get_fbo(pass, img->w, img->h, img->fmt, img->comps, tag); img->fmt = NULL; if (!tex) { PL_ERR(rr, "Failed creating FBO texture! Disabling advanced rendering.."); memset(pass->fbofmt, 0, sizeof(pass->fbofmt)); pl_dispatch_abort(rr->dp, &img->sh); rr->errors |= PL_RENDER_ERR_FBO; return img->err_tex; } pl_assert(img->sh); bool ok = pl_dispatch_finish(rr->dp, pl_dispatch_params( .shader = &img->sh, .target = tex, )); const char *err_msg = img->err_msg; enum pl_render_error err_enum = img->err_enum; pl_tex err_tex = img->err_tex; img->err_msg = NULL; img->err_enum = PL_RENDER_ERR_NONE; img->err_tex = NULL; if (!ok) { PL_ERR(rr, "%s", PL_DEF(err_msg, "Failed dispatching intermediate pass!")); rr->errors |= err_enum; img->sh = pl_dispatch_begin(rr->dp); img->tex = err_tex; return img->tex; } img->tex = tex; return img->tex; } #define img_tex(pass, img) _img_tex(pass, img, PL_DEBUG_TAG) // Forcibly convert an img to `sh`, sampling where necessary static pl_shader img_sh(struct pass_state *pass, struct img *img) { if (img->sh) { pl_assert(!img->tex); return img->sh; } pl_assert(img->tex); img->sh = pl_dispatch_begin_ex(pass->rr->dp, img->unique); pl_shader_sample_direct(img->sh, pl_sample_src( .tex = img->tex )); img->tex = NULL; return img->sh; } enum sampler_type { SAMPLER_DIRECT, // pick based on texture caps SAMPLER_NEAREST, // direct sampling, force nearest SAMPLER_BICUBIC, // fast bicubic scaling SAMPLER_HERMITE, // fast hermite scaling SAMPLER_GAUSSIAN, // fast gaussian scaling SAMPLER_COMPLEX, // complex custom filters SAMPLER_OVERSAMPLE, }; enum sampler_dir { SAMPLER_NOOP, // 1:1 scaling SAMPLER_UP, // upscaling SAMPLER_DOWN, // downscaling }; enum sampler_usage { SAMPLER_MAIN, SAMPLER_PLANE, SAMPLER_CONTRAST, }; struct sampler_info { const struct pl_filter_config *config; // if applicable enum sampler_usage usage; enum sampler_type type; enum sampler_dir dir; enum sampler_dir dir_sep[2]; }; static struct sampler_info sample_src_info(struct pass_state *pass, const struct pl_sample_src *src, enum sampler_usage usage) { const struct pl_render_params *params = pass->params; struct sampler_info info = { .usage = usage }; pl_renderer rr = pass->rr; float rx = src->new_w / fabsf(pl_rect_w(src->rect)); if (rx < 1.0 - 1e-6) { info.dir_sep[0] = SAMPLER_DOWN; } else if (rx > 1.0 + 1e-6) { info.dir_sep[0] = SAMPLER_UP; } float ry = src->new_h / fabsf(pl_rect_h(src->rect)); if (ry < 1.0 - 1e-6) { info.dir_sep[1] = SAMPLER_DOWN; } else if (ry > 1.0 + 1e-6) { info.dir_sep[1] = SAMPLER_UP; } if (params->correct_subpixel_offsets) { if (!info.dir_sep[0] && fabsf(src->rect.x0) > 1e-6f) info.dir_sep[0] = SAMPLER_UP; if (!info.dir_sep[1] && fabsf(src->rect.y0) > 1e-6f) info.dir_sep[1] = SAMPLER_UP; } // We use PL_MAX so downscaling overrides upscaling when choosing scalers info.dir = PL_MAX(info.dir_sep[0], info.dir_sep[1]); switch (info.dir) { case SAMPLER_DOWN: if (usage == SAMPLER_CONTRAST) { info.config = &pl_filter_bicubic; } else if (usage == SAMPLER_PLANE && params->plane_downscaler) { info.config = params->plane_downscaler; } else { info.config = params->downscaler; } break; case SAMPLER_UP: if (usage == SAMPLER_PLANE && params->plane_upscaler) { info.config = params->plane_upscaler; } else { pl_assert(usage != SAMPLER_CONTRAST); info.config = params->upscaler; } break; case SAMPLER_NOOP: info.type = SAMPLER_NEAREST; return info; } if ((rr->errors & PL_RENDER_ERR_SAMPLING) || !info.config) { info.type = SAMPLER_DIRECT; } else if (info.config->kernel == &pl_filter_function_oversample) { info.type = SAMPLER_OVERSAMPLE; } else { info.type = SAMPLER_COMPLEX; // Try using faster replacements for GPU built-in scalers pl_fmt texfmt = src->tex ? src->tex->params.format : pass->fbofmt[4]; bool can_linear = texfmt->caps & PL_FMT_CAP_LINEAR; bool can_fast = info.dir == SAMPLER_UP || params->skip_anti_aliasing; if (can_fast && !params->disable_builtin_scalers) { if (can_linear && pl_filter_config_eq(info.config, &pl_filter_bicubic)) info.type = SAMPLER_BICUBIC; if (can_linear && pl_filter_config_eq(info.config, &pl_filter_hermite)) info.type = SAMPLER_HERMITE; if (can_linear && pl_filter_config_eq(info.config, &pl_filter_gaussian)) info.type = SAMPLER_GAUSSIAN; if (can_linear && pl_filter_config_eq(info.config, &pl_filter_bilinear)) info.type = SAMPLER_DIRECT; if (pl_filter_config_eq(info.config, &pl_filter_nearest)) info.type = can_linear ? SAMPLER_NEAREST : SAMPLER_DIRECT; } } // Disable advanced scaling without FBOs if (!pass->fbofmt[4] && info.type == SAMPLER_COMPLEX) info.type = SAMPLER_DIRECT; return info; } static void dispatch_sampler(struct pass_state *pass, pl_shader sh, struct sampler *sampler, enum sampler_usage usage, pl_tex target_tex, const struct pl_sample_src *src) { const struct pl_render_params *params = pass->params; if (!sampler) goto fallback; pl_renderer rr = pass->rr; struct sampler_info info = sample_src_info(pass, src, usage); pl_shader_obj *lut = NULL; switch (info.dir) { case SAMPLER_NOOP: goto fallback; case SAMPLER_DOWN: lut = &sampler->downscaler_state; break; case SAMPLER_UP: lut = &sampler->upscaler_state; break; } switch (info.type) { case SAMPLER_DIRECT: goto fallback; case SAMPLER_NEAREST: pl_shader_sample_nearest(sh, src); return; case SAMPLER_OVERSAMPLE: pl_shader_sample_oversample(sh, src, info.config->kernel->params[0]); return; case SAMPLER_BICUBIC: pl_shader_sample_bicubic(sh, src); return; case SAMPLER_HERMITE: pl_shader_sample_hermite(sh, src); return; case SAMPLER_GAUSSIAN: pl_shader_sample_gaussian(sh, src); return; case SAMPLER_COMPLEX: break; // continue below } pl_assert(lut); struct pl_sample_filter_params fparams = { .filter = *info.config, .antiring = params->antiringing_strength, .no_widening = params->skip_anti_aliasing && usage != SAMPLER_CONTRAST, .lut = lut, }; if (target_tex) { fparams.no_compute = !target_tex->params.storable; } else { fparams.no_compute = !(pass->fbofmt[4]->caps & PL_FMT_CAP_STORABLE); } bool ok; if (info.config->polar) { // Polar samplers are always a single function call ok = pl_shader_sample_polar(sh, src, &fparams); } else if (info.dir_sep[0] && info.dir_sep[1]) { // Scaling is needed in both directions struct pl_sample_src src1 = *src, src2 = *src; src1.new_w = src->tex->params.w; src1.rect.x0 = 0; src1.rect.x1 = src1.new_w;; src2.rect.y0 = 0; src2.rect.y1 = src1.new_h; pl_shader tsh = pl_dispatch_begin(rr->dp); ok = pl_shader_sample_ortho2(tsh, &src1, &fparams); if (!ok) { pl_dispatch_abort(rr->dp, &tsh); goto done; } struct img img = { .sh = tsh, .w = src1.new_w, .h = src1.new_h, .comps = src->components, }; src2.tex = img_tex(pass, &img); src2.scale = 1.0; ok = src2.tex && pl_shader_sample_ortho2(sh, &src2, &fparams); } else { // Scaling is needed only in one direction ok = pl_shader_sample_ortho2(sh, src, &fparams); } done: if (!ok) { PL_ERR(rr, "Failed dispatching scaler.. disabling"); rr->errors |= PL_RENDER_ERR_SAMPLING; goto fallback; } return; fallback: // If all else fails, fall back to auto sampling pl_shader_sample_direct(sh, src); } static void swizzle_color(pl_shader sh, int comps, const int comp_map[4], bool force_alpha) { ident_t orig = sh_fresh(sh, "orig_color"); GLSL("vec4 "$" = color; \n" "color = vec4(0.0, 0.0, 0.0, 1.0); \n", orig); static const int def_map[4] = {0, 1, 2, 3}; comp_map = PL_DEF(comp_map, def_map); for (int c = 0; c < comps; c++) { if (comp_map[c] >= 0) GLSL("color[%d] = "$"[%d]; \n", c, orig, comp_map[c]); } if (force_alpha) GLSL("color.a = "$".a; \n", orig); } // `scale` adapts from `pass->dst_rect` to the plane being rendered to static void draw_overlays(struct pass_state *pass, pl_tex fbo, int comps, const int comp_map[4], const struct pl_overlay *overlays, int num, struct pl_color_space color, struct pl_color_repr repr, const pl_transform2x2 *output_shift) { pl_renderer rr = pass->rr; if (num <= 0 || (rr->errors & PL_RENDER_ERR_OVERLAY)) return; enum pl_fmt_caps caps = fbo->params.format->caps; if (!(rr->errors & PL_RENDER_ERR_BLENDING) && !(caps & PL_FMT_CAP_BLENDABLE)) { PL_WARN(rr, "Trying to draw an overlay to a non-blendable target. " "Alpha blending is disabled, results may be incorrect!"); rr->errors |= PL_RENDER_ERR_BLENDING; } const struct pl_frame *image = pass->src_ref >= 0 ? &pass->image : NULL; pl_transform2x2 src_to_dst; if (image) { float rx = pl_rect_w(pass->dst_rect) / pl_rect_w(image->crop); float ry = pl_rect_h(pass->dst_rect) / pl_rect_h(image->crop); src_to_dst = (pl_transform2x2) { .mat.m = {{ rx, 0 }, { 0, ry }}, .c = { pass->dst_rect.x0 - rx * image->crop.x0, pass->dst_rect.y0 - ry * image->crop.y0, }, }; if (pass->rotation % PL_ROTATION_180 == PL_ROTATION_90) { PL_SWAP(src_to_dst.c[0], src_to_dst.c[1]); src_to_dst.mat = (pl_matrix2x2) {{{ 0, ry }, { rx, 0 }}}; } } const struct pl_frame *target = &pass->target; pl_rect2df dst_crop = target->crop; pl_rect2df_rotate(&dst_crop, -pass->rotation); pl_rect2df_normalize(&dst_crop); for (int n = 0; n < num; n++) { struct pl_overlay ol = overlays[n]; if (!ol.num_parts) continue; if (!ol.coords) { ol.coords = overlays == target->overlays ? PL_OVERLAY_COORDS_DST_FRAME : PL_OVERLAY_COORDS_SRC_FRAME; } pl_transform2x2 tf = pl_transform2x2_identity; switch (ol.coords) { case PL_OVERLAY_COORDS_SRC_CROP: if (!image) continue; tf.c[0] = image->crop.x0; tf.c[1] = image->crop.y0; // fall through case PL_OVERLAY_COORDS_SRC_FRAME: if (!image) continue; pl_transform2x2_rmul(&src_to_dst, &tf); break; case PL_OVERLAY_COORDS_DST_CROP: tf.c[0] = dst_crop.x0; tf.c[1] = dst_crop.y0; break; case PL_OVERLAY_COORDS_DST_FRAME: break; case PL_OVERLAY_COORDS_AUTO: case PL_OVERLAY_COORDS_COUNT: pl_unreachable(); } if (output_shift) pl_transform2x2_rmul(output_shift, &tf); // Construct vertex/index buffers rr->osd_vertices.num = 0; rr->osd_indices.num = 0; for (int i = 0; i < ol.num_parts; i++) { const struct pl_overlay_part *part = &ol.parts[i]; #define EMIT_VERT(x, y) \ do { \ float pos[2] = { part->dst.x, part->dst.y }; \ pl_transform2x2_apply(&tf, pos); \ PL_ARRAY_APPEND(rr, rr->osd_vertices, (struct osd_vertex) { \ .pos = { \ 2.0 * (pos[0] / fbo->params.w) - 1.0, \ 2.0 * (pos[1] / fbo->params.h) - 1.0, \ }, \ .coord = { \ part->src.x / ol.tex->params.w, \ part->src.y / ol.tex->params.h, \ }, \ .color = { \ part->color[0], part->color[1], \ part->color[2], part->color[3], \ }, \ }); \ } while (0) int idx_base = rr->osd_vertices.num; EMIT_VERT(x0, y0); // idx 0: top left EMIT_VERT(x1, y0); // idx 1: top right EMIT_VERT(x0, y1); // idx 2: bottom left EMIT_VERT(x1, y1); // idx 3: bottom right PL_ARRAY_APPEND(rr, rr->osd_indices, idx_base + 0); PL_ARRAY_APPEND(rr, rr->osd_indices, idx_base + 1); PL_ARRAY_APPEND(rr, rr->osd_indices, idx_base + 2); PL_ARRAY_APPEND(rr, rr->osd_indices, idx_base + 2); PL_ARRAY_APPEND(rr, rr->osd_indices, idx_base + 1); PL_ARRAY_APPEND(rr, rr->osd_indices, idx_base + 3); } // Draw parts pl_shader sh = pl_dispatch_begin(rr->dp); ident_t tex = sh_desc(sh, (struct pl_shader_desc) { .desc = { .name = "osd_tex", .type = PL_DESC_SAMPLED_TEX, }, .binding = { .object = ol.tex, .sample_mode = (ol.tex->params.format->caps & PL_FMT_CAP_LINEAR) ? PL_TEX_SAMPLE_LINEAR : PL_TEX_SAMPLE_NEAREST, }, }); sh_describe(sh, "overlay"); GLSL("// overlay \n"); switch (ol.mode) { case PL_OVERLAY_NORMAL: GLSL("vec4 color = textureLod("$", coord, 0.0); \n", tex); break; case PL_OVERLAY_MONOCHROME: GLSL("vec4 color = osd_color; \n"); break; case PL_OVERLAY_MODE_COUNT: pl_unreachable(); }; static const struct pl_color_map_params osd_params = { PL_COLOR_MAP_DEFAULTS .tone_mapping_function = &pl_tone_map_linear, .gamut_mapping = &pl_gamut_map_saturation, }; sh->output = PL_SHADER_SIG_COLOR; pl_shader_decode_color(sh, &ol.repr, NULL); if (target->icc) color.transfer = PL_COLOR_TRC_LINEAR; pl_shader_color_map_ex(sh, &osd_params, pl_color_map_args(ol.color, color)); if (target->icc) pl_icc_encode(sh, target->icc, &rr->icc_state[ICC_TARGET]); bool premul = repr.alpha == PL_ALPHA_PREMULTIPLIED; pl_shader_encode_color(sh, &repr); if (ol.mode == PL_OVERLAY_MONOCHROME) { GLSL("color.%s *= textureLod("$", coord, 0.0).r; \n", premul ? "rgba" : "a", tex); } swizzle_color(sh, comps, comp_map, true); struct pl_blend_params blend_params = { .src_rgb = premul ? PL_BLEND_ONE : PL_BLEND_SRC_ALPHA, .src_alpha = PL_BLEND_ONE, .dst_rgb = PL_BLEND_ONE_MINUS_SRC_ALPHA, .dst_alpha = PL_BLEND_ONE_MINUS_SRC_ALPHA, }; bool ok = pl_dispatch_vertex(rr->dp, pl_dispatch_vertex_params( .shader = &sh, .target = fbo, .blend_params = (rr->errors & PL_RENDER_ERR_BLENDING) ? NULL : &blend_params, .vertex_stride = sizeof(struct osd_vertex), .num_vertex_attribs = ol.mode == PL_OVERLAY_NORMAL ? 2 : 3, .vertex_attribs = rr->osd_attribs, .vertex_position_idx = 0, .vertex_coords = PL_COORDS_NORMALIZED, .vertex_type = PL_PRIM_TRIANGLE_LIST, .vertex_count = rr->osd_indices.num, .vertex_data = rr->osd_vertices.elem, .index_data = rr->osd_indices.elem, )); if (!ok) { PL_ERR(rr, "Failed rendering overlays!"); rr->errors |= PL_RENDER_ERR_OVERLAY; return; } } } static pl_tex get_hook_tex(void *priv, int width, int height) { struct pass_state *pass = priv; return get_fbo(pass, width, height, NULL, 4, PL_DEBUG_TAG); } // Returns if any hook was applied (even if there were errors) static bool pass_hook(struct pass_state *pass, struct img *img, enum pl_hook_stage stage) { const struct pl_render_params *params = pass->params; pl_renderer rr = pass->rr; if (!pass->fbofmt[4] || !stage) return false; bool ret = false; for (int n = 0; n < params->num_hooks; n++) { const struct pl_hook *hook = params->hooks[n]; if (!(hook->stages & stage)) continue; // Hopefully the list of disabled hooks is small, search linearly. for (int i = 0; i < rr->disabled_hooks.num; i++) { if (rr->disabled_hooks.elem[i] != hook->signature) continue; PL_TRACE(rr, "Skipping hook %d (0x%"PRIx64") stage 0x%x", n, hook->signature, stage); goto hook_skip; } PL_TRACE(rr, "Dispatching hook %d (0x%"PRIx64") stage 0x%x", n, hook->signature, stage); struct pl_hook_params hparams = { .gpu = rr->gpu, .dispatch = rr->dp, .get_tex = get_hook_tex, .priv = pass, .stage = stage, .rect = img->rect, .repr = img->repr, .color = img->color, .orig_repr = &pass->image.repr, .orig_color = &pass->image.color, .components = img->comps, .src_rect = pass->ref_rect, .dst_rect = pass->dst_rect, }; // TODO: Add some sort of `test` API function to the hooks that allows // us to skip having to touch the `img` state at all for no-ops switch (hook->input) { case PL_HOOK_SIG_NONE: break; case PL_HOOK_SIG_TEX: { hparams.tex = img_tex(pass, img); if (!hparams.tex) { PL_ERR(rr, "Failed dispatching shader prior to hook!"); goto hook_error; } break; } case PL_HOOK_SIG_COLOR: hparams.sh = img_sh(pass, img); break; case PL_HOOK_SIG_COUNT: pl_unreachable(); } struct pl_hook_res res = hook->hook(hook->priv, &hparams); if (res.failed) { PL_ERR(rr, "Failed executing hook, disabling"); goto hook_error; } bool resizable = pl_hook_stage_resizable(stage); switch (res.output) { case PL_HOOK_SIG_NONE: break; case PL_HOOK_SIG_TEX: if (!resizable) { if (res.tex->params.w != img->w || res.tex->params.h != img->h || !pl_rect2d_eq(res.rect, img->rect)) { PL_ERR(rr, "User hook tried resizing non-resizable stage!"); goto hook_error; } } *img = (struct img) { .tex = res.tex, .repr = res.repr, .color = res.color, .comps = res.components, .rect = res.rect, .w = res.tex->params.w, .h = res.tex->params.h, .unique = img->unique, }; break; case PL_HOOK_SIG_COLOR: if (!resizable) { if (res.sh->output_w != img->w || res.sh->output_h != img->h || !pl_rect2d_eq(res.rect, img->rect)) { PL_ERR(rr, "User hook tried resizing non-resizable stage!"); goto hook_error; } } *img = (struct img) { .sh = res.sh, .repr = res.repr, .color = res.color, .comps = res.components, .rect = res.rect, .w = res.sh->output_w, .h = res.sh->output_h, .unique = img->unique, .err_enum = PL_RENDER_ERR_HOOKS, .err_msg = "Failed applying user hook", .err_tex = hparams.tex, // if any }; break; case PL_HOOK_SIG_COUNT: pl_unreachable(); } // a hook was performed successfully ret = true; hook_skip: continue; hook_error: PL_ARRAY_APPEND(rr, rr->disabled_hooks, hook->signature); rr->errors |= PL_RENDER_ERR_HOOKS; } // Make sure the state remains as valid as possible, even if the resulting // shaders might end up nonsensical, to prevent segfaults if (!img->tex && !img->sh) img->sh = pl_dispatch_begin(rr->dp); return ret; } static void hdr_update_peak(struct pass_state *pass) { const struct pl_render_params *params = pass->params; pl_renderer rr = pass->rr; if (!params->peak_detect_params || !pl_color_space_is_hdr(&pass->img.color)) goto cleanup; if (rr->errors & PL_RENDER_ERR_PEAK_DETECT) goto cleanup; if (pass->fbofmt[4] && !(pass->fbofmt[4]->caps & PL_FMT_CAP_STORABLE)) goto cleanup; if (!rr->gpu->limits.max_ssbo_size) goto cleanup; float max_peak = pl_color_transfer_nominal_peak(pass->img.color.transfer) * PL_COLOR_SDR_WHITE; if (pass->img.color.transfer == PL_COLOR_TRC_HLG) max_peak = pass->img.color.hdr.max_luma; if (max_peak <= pass->target.color.hdr.max_luma + 1e-6) goto cleanup; // no adaptation needed if (pass->img.color.hdr.avg_pq_y) goto cleanup; // DV metadata already present enum pl_hdr_metadata_type metadata = PL_HDR_METADATA_ANY; if (params->color_map_params) metadata = params->color_map_params->metadata; if (metadata && metadata != PL_HDR_METADATA_CIE_Y) goto cleanup; // metadata will be unused const struct pl_color_map_params *cpars = params->color_map_params; bool uses_ootf = cpars && cpars->tone_mapping_function == &pl_tone_map_st2094_40; if (uses_ootf && pass->img.color.hdr.ootf.num_anchors) goto cleanup; // HDR10+ OOTF is being used if (params->lut && params->lut_type == PL_LUT_CONVERSION) goto cleanup; // LUT handles tone mapping if (!pass->fbofmt[4] && !params->peak_detect_params->allow_delayed) { PL_WARN(rr, "Disabling peak detection because " "`pl_peak_detect_params.allow_delayed` is false, but lack of " "FBOs forces the result to be delayed."); rr->errors |= PL_RENDER_ERR_PEAK_DETECT; goto cleanup; } bool ok = pl_shader_detect_peak(img_sh(pass, &pass->img), pass->img.color, &rr->tone_map_state, params->peak_detect_params); if (!ok) { PL_WARN(rr, "Failed creating HDR peak detection shader.. disabling"); rr->errors |= PL_RENDER_ERR_PEAK_DETECT; goto cleanup; } pass->need_peak_fbo = !params->peak_detect_params->allow_delayed; return; cleanup: // No peak detection required or supported, so clean up the state to avoid // confusing it with later frames where peak detection is enabled again pl_reset_detected_peak(rr->tone_map_state); } bool pl_renderer_get_hdr_metadata(pl_renderer rr, struct pl_hdr_metadata *metadata) { return pl_get_detected_hdr_metadata(rr->tone_map_state, metadata); } struct plane_state { enum plane_type type; struct pl_plane plane; struct img img; // for per-plane shaders float plane_w, plane_h; // logical plane dimensions }; static const char *plane_type_names[] = { [PLANE_INVALID] = "invalid", [PLANE_ALPHA] = "alpha", [PLANE_CHROMA] = "chroma", [PLANE_LUMA] = "luma", [PLANE_RGB] = "rgb", [PLANE_XYZ] = "xyz", }; static void log_plane_info(pl_renderer rr, const struct plane_state *st) { const struct pl_plane *plane = &st->plane; PL_TRACE(rr, " Type: %s", plane_type_names[st->type]); switch (plane->components) { case 0: PL_TRACE(rr, " Components: (none)"); break; case 1: PL_TRACE(rr, " Components: {%d}", plane->component_mapping[0]); break; case 2: PL_TRACE(rr, " Components: {%d %d}", plane->component_mapping[0], plane->component_mapping[1]); break; case 3: PL_TRACE(rr, " Components: {%d %d %d}", plane->component_mapping[0], plane->component_mapping[1], plane->component_mapping[2]); break; case 4: PL_TRACE(rr, " Components: {%d %d %d %d}", plane->component_mapping[0], plane->component_mapping[1], plane->component_mapping[2], plane->component_mapping[3]); break; } PL_TRACE(rr, " Rect: {%f %f} -> {%f %f}", st->img.rect.x0, st->img.rect.y0, st->img.rect.x1, st->img.rect.y1); PL_TRACE(rr, " Bits: %d (used) / %d (sampled), shift %d", st->img.repr.bits.color_depth, st->img.repr.bits.sample_depth, st->img.repr.bits.bit_shift); } // Returns true if debanding was applied static bool plane_deband(struct pass_state *pass, struct img *img, float neutral[3]) { const struct pl_render_params *params = pass->params; const struct pl_frame *image = &pass->image; pl_renderer rr = pass->rr; if ((rr->errors & PL_RENDER_ERR_DEBANDING) || !params->deband_params || !pass->fbofmt[4]) { return false; } struct pl_color_repr repr = img->repr; struct pl_sample_src src = { .tex = img_tex(pass, img), .components = img->comps, .scale = pl_color_repr_normalize(&repr), }; // Divide the deband grain scale by the effective current colorspace nominal // peak, to make sure the output intensity of the grain is as independent // of the source as possible, even though it happens this early in the // process (well before any linearization / output adaptation) struct pl_deband_params dparams = *params->deband_params; dparams.grain /= image->color.hdr.max_luma / PL_COLOR_SDR_WHITE; memcpy(dparams.grain_neutral, neutral, sizeof(dparams.grain_neutral)); img->tex = NULL; img->sh = pl_dispatch_begin_ex(rr->dp, true); pl_shader_deband(img->sh, &src, &dparams); img->err_msg = "Failed applying debanding... disabling!"; img->err_enum = PL_RENDER_ERR_DEBANDING; img->err_tex = src.tex; img->repr = repr; return true; } // Returns true if grain was applied static bool plane_film_grain(struct pass_state *pass, int plane_idx, struct plane_state *st, const struct plane_state *ref) { const struct pl_frame *image = &pass->image; pl_renderer rr = pass->rr; if (rr->errors & PL_RENDER_ERR_FILM_GRAIN) return false; struct img *img = &st->img; struct pl_plane *plane = &st->plane; struct pl_color_repr repr = image->repr; bool is_orig_repr = pl_color_repr_equal(&st->img.repr, &image->repr); if (!is_orig_repr) { // Propagate the original color depth to the film grain algorithm, but // update the sample depth and effective bit shift based on the state // of the current texture, which is guaranteed to already be // normalized. pl_assert(st->img.repr.bits.bit_shift == 0); repr.bits.sample_depth = st->img.repr.bits.sample_depth; repr.bits.bit_shift = repr.bits.sample_depth - repr.bits.color_depth; } struct pl_film_grain_params grain_params = { .data = image->film_grain, .luma_tex = ref->plane.texture, .repr = &repr, .components = plane->components, }; switch (image->film_grain.type) { case PL_FILM_GRAIN_NONE: return false; case PL_FILM_GRAIN_H274: break; case PL_FILM_GRAIN_AV1: grain_params.luma_tex = ref->plane.texture; for (int c = 0; c < ref->plane.components; c++) { if (ref->plane.component_mapping[c] == PL_CHANNEL_Y) grain_params.luma_comp = c; } break; default: pl_unreachable(); } for (int c = 0; c < plane->components; c++) grain_params.component_mapping[c] = plane->component_mapping[c]; if (!pl_needs_film_grain(&grain_params)) return false; if (!pass->fbofmt[plane->components]) { PL_ERR(rr, "Film grain required but no renderable format available.. " "disabling!"); rr->errors |= PL_RENDER_ERR_FILM_GRAIN; return false; } grain_params.tex = img_tex(pass, img); if (!grain_params.tex) return false; img->sh = pl_dispatch_begin_ex(rr->dp, true); if (!pl_shader_film_grain(img->sh, &rr->grain_state[plane_idx], &grain_params)) { pl_dispatch_abort(rr->dp, &img->sh); rr->errors |= PL_RENDER_ERR_FILM_GRAIN; return false; } img->tex = NULL; img->err_msg = "Failed applying film grain.. disabling!"; img->err_enum = PL_RENDER_ERR_FILM_GRAIN; img->err_tex = grain_params.tex; if (is_orig_repr) img->repr = repr; return true; } static const enum pl_hook_stage plane_hook_stages[] = { [PLANE_ALPHA] = PL_HOOK_ALPHA_INPUT, [PLANE_CHROMA] = PL_HOOK_CHROMA_INPUT, [PLANE_LUMA] = PL_HOOK_LUMA_INPUT, [PLANE_RGB] = PL_HOOK_RGB_INPUT, [PLANE_XYZ] = PL_HOOK_XYZ_INPUT, }; static const enum pl_hook_stage plane_scaled_hook_stages[] = { [PLANE_ALPHA] = PL_HOOK_ALPHA_SCALED, [PLANE_CHROMA] = PL_HOOK_CHROMA_SCALED, [PLANE_LUMA] = 0, // never hooked [PLANE_RGB] = 0, [PLANE_XYZ] = 0, }; static enum pl_lut_type guess_frame_lut_type(const struct pl_frame *frame, bool reversed) { if (!frame->lut) return PL_LUT_UNKNOWN; if (frame->lut_type) return frame->lut_type; enum pl_color_system sys_in = frame->lut->repr_in.sys; enum pl_color_system sys_out = frame->lut->repr_out.sys; if (reversed) PL_SWAP(sys_in, sys_out); if (sys_in == PL_COLOR_SYSTEM_RGB && sys_out == sys_in) return PL_LUT_NORMALIZED; if (sys_in == frame->repr.sys && sys_out == PL_COLOR_SYSTEM_RGB) return PL_LUT_CONVERSION; // Unknown, just fall back to the default return PL_LUT_NATIVE; } static pl_fmt merge_fmt(struct pass_state *pass, const struct img *a, const struct img *b) { pl_renderer rr = pass->rr; pl_fmt fmta = a->tex ? a->tex->params.format : PL_DEF(a->fmt, pass->fbofmt[a->comps]); pl_fmt fmtb = b->tex ? b->tex->params.format : PL_DEF(b->fmt, pass->fbofmt[b->comps]); pl_assert(fmta && fmtb); if (fmta->type != fmtb->type) return NULL; int num_comps = PL_MIN(4, a->comps + b->comps); int min_depth = PL_MAX(a->repr.bits.sample_depth, b->repr.bits.sample_depth); // Only return formats that support all relevant caps of both formats const enum pl_fmt_caps mask = PL_FMT_CAP_SAMPLEABLE | PL_FMT_CAP_LINEAR; enum pl_fmt_caps req_caps = (fmta->caps & mask) | (fmtb->caps & mask); return pl_find_fmt(rr->gpu, fmta->type, num_comps, min_depth, 0, req_caps); } // Applies a series of rough heuristics to figure out whether we expect any // performance gains from plane merging. This is basically a series of checks // for operations that we *know* benefit from merged planes static bool want_merge(struct pass_state *pass, const struct plane_state *st, const struct plane_state *ref) { const struct pl_render_params *params = pass->params; const pl_renderer rr = pass->rr; if (!pass->fbofmt[4]) return false; // Debanding if (!(rr->errors & PL_RENDER_ERR_DEBANDING) && params->deband_params) return true; // Other plane hooks, which are generally nontrivial enum pl_hook_stage stage = plane_hook_stages[st->type]; for (int i = 0; i < params->num_hooks; i++) { if (params->hooks[i]->stages & stage) return true; } // Non-trivial scaling struct pl_sample_src src = { .new_w = ref->img.w, .new_h = ref->img.h, .rect = { .x1 = st->img.w, .y1 = st->img.h, }, }; struct sampler_info info = sample_src_info(pass, &src, SAMPLER_PLANE); if (info.type == SAMPLER_COMPLEX) return true; // Film grain synthesis, can be merged for compatible channels, saving on // redundant sampling of the grain/offset textures struct pl_film_grain_params grain_params = { .data = pass->image.film_grain, .repr = (struct pl_color_repr *) &st->img.repr, .components = st->plane.components, }; for (int c = 0; c < st->plane.components; c++) grain_params.component_mapping[c] = st->plane.component_mapping[c]; if (!(rr->errors & PL_RENDER_ERR_FILM_GRAIN) && pl_needs_film_grain(&grain_params)) { return true; } return false; } // This scales and merges all of the source images, and initializes pass->img. static bool pass_read_image(struct pass_state *pass) { const struct pl_render_params *params = pass->params; struct pl_frame *image = &pass->image; pl_renderer rr = pass->rr; struct plane_state planes[4]; struct plane_state *ref = &planes[pass->src_ref]; pl_assert(pass->src_ref >= 0 && pass->src_ref < image->num_planes); for (int i = 0; i < image->num_planes; i++) { planes[i] = (struct plane_state) { .type = detect_plane_type(&image->planes[i], &image->repr), .plane = image->planes[i], .img = { .w = image->planes[i].texture->params.w, .h = image->planes[i].texture->params.h, .tex = image->planes[i].texture, .repr = image->repr, .color = image->color, .comps = image->planes[i].components, }, }; // Explicitly skip alpha channel when overridden if (image->repr.alpha == PL_ALPHA_NONE) { if (planes[i].type == PLANE_ALPHA) { planes[i].type = PLANE_INVALID; continue; } else { for (int j = 0; j < planes[i].plane.components; j++) { if (planes[i].plane.component_mapping[j] == PL_CHANNEL_A) planes[i].plane.component_mapping[j] = PL_CHANNEL_NONE; } } } // Deinterlace plane if needed if (image->field != PL_FIELD_NONE && params->deinterlace_params && pass->fbofmt[4] && !(rr->errors & PL_RENDER_ERR_DEINTERLACING)) { struct img *img = &planes[i].img; struct pl_deinterlace_source src = { .cur.top = img->tex, .prev.top = image->prev ? image->prev->planes[i].texture : NULL, .next.top = image->next ? image->next->planes[i].texture : NULL, .field = image->field, .first_field = image->first_field, .component_mask = (1 << img->comps) - 1, }; img->tex = NULL; img->sh = pl_dispatch_begin_ex(pass->rr->dp, true); pl_shader_deinterlace(img->sh, &src, params->deinterlace_params); img->err_msg = "Failed deinterlacing plane.. disabling!"; img->err_enum = PL_RENDER_ERR_DEINTERLACING; img->err_tex = planes[i].plane.texture; } } // Original ref texture, even after preprocessing pl_tex ref_tex = ref->plane.texture; // Merge all compatible planes into 'combined' shaders for (int i = 0; i < image->num_planes; i++) { struct plane_state *sti = &planes[i]; if (!sti->type) continue; if (!want_merge(pass, sti, ref)) continue; bool did_merge = false; for (int j = i+1; j < image->num_planes; j++) { struct plane_state *stj = &planes[j]; bool merge = sti->type == stj->type && sti->img.w == stj->img.w && sti->img.h == stj->img.h && sti->plane.shift_x == stj->plane.shift_x && sti->plane.shift_y == stj->plane.shift_y; if (!merge) continue; pl_fmt fmt = merge_fmt(pass, &sti->img, &stj->img); if (!fmt) continue; PL_TRACE(rr, "Merging plane %d into plane %d", j, i); pl_shader sh = sti->img.sh; if (!sh) { sh = sti->img.sh = pl_dispatch_begin_ex(pass->rr->dp, true); pl_shader_sample_direct(sh, pl_sample_src( .tex = sti->img.tex )); sti->img.tex = NULL; } pl_shader psh = NULL; if (!stj->img.sh) { psh = pl_dispatch_begin_ex(pass->rr->dp, true); pl_shader_sample_direct(psh, pl_sample_src( .tex = stj->img.tex )); } ident_t sub = sh_subpass(sh, psh ? psh : stj->img.sh); pl_dispatch_abort(rr->dp, &psh); if (!sub) break; // skip merging sh_describe(sh, "merging planes"); GLSL("{ \n" "vec4 tmp = "$"(); \n", sub); for (int jc = 0; jc < stj->img.comps; jc++) { int map = stj->plane.component_mapping[jc]; if (map == PL_CHANNEL_NONE) continue; int ic = sti->img.comps++; pl_assert(ic < 4); GLSL("color[%d] = tmp[%d]; \n", ic, jc); sti->plane.components = sti->img.comps; sti->plane.component_mapping[ic] = map; } GLSL("} \n"); sti->img.fmt = fmt; pl_dispatch_abort(rr->dp, &stj->img.sh); *stj = (struct plane_state) {0}; did_merge = true; } if (!did_merge) continue; if (!img_tex(pass, &sti->img)) { PL_ERR(rr, "Failed dispatching plane merging shader, disabling FBOs!"); memset(pass->fbofmt, 0, sizeof(pass->fbofmt)); rr->errors |= PL_RENDER_ERR_FBO; return false; } } int bits = image->repr.bits.sample_depth; float out_scale = bits ? (1llu << bits) / ((1llu << bits) - 1.0f) : 1.0f; float neutral_luma = 0.0, neutral_chroma = 0.5f * out_scale; if (pl_color_levels_guess(&image->repr) == PL_COLOR_LEVELS_LIMITED) neutral_luma = 16 / 256.0f * out_scale; if (!pl_color_system_is_ycbcr_like(image->repr.sys)) neutral_chroma = neutral_luma; // Compute the sampling rc of each plane for (int i = 0; i < image->num_planes; i++) { struct plane_state *st = &planes[i]; if (!st->type) continue; float rx = (float) st->plane.texture->params.w / ref_tex->params.w, ry = (float) st->plane.texture->params.h / ref_tex->params.h; // Only accept integer scaling ratios. This accounts for the fact that // fractionally subsampled planes get rounded up to the nearest integer // size, which we want to discard. float rrx = rx >= 1 ? roundf(rx) : 1.0 / roundf(1.0 / rx), rry = ry >= 1 ? roundf(ry) : 1.0 / roundf(1.0 / ry); float sx = st->plane.shift_x, sy = st->plane.shift_y; st->img.rect = (pl_rect2df) { .x0 = (image->crop.x0 - sx) * rrx, .y0 = (image->crop.y0 - sy) * rry, .x1 = (image->crop.x1 - sx) * rrx, .y1 = (image->crop.y1 - sy) * rry, }; st->plane_w = ref_tex->params.w * rrx; st->plane_h = ref_tex->params.h * rry; PL_TRACE(rr, "Plane %d:", i); log_plane_info(rr, st); float neutral[3] = {0.0}; for (int c = 0, idx = 0; c < st->plane.components; c++) { switch (st->plane.component_mapping[c]) { case PL_CHANNEL_Y: neutral[idx++] = neutral_luma; break; case PL_CHANNEL_U: // fall through case PL_CHANNEL_V: neutral[idx++] = neutral_chroma; break; } } // The order of operations (deband -> film grain -> user hooks) is // chosen to maximize quality. Note that film grain requires unmodified // plane sizes, so it has to be before user hooks. As for debanding, // it's reduced in quality after e.g. plane scalers as well. It's also // made less effective by performing film grain synthesis first. if (plane_deband(pass, &st->img, neutral)) { PL_TRACE(rr, "After debanding:"); log_plane_info(rr, st); } if (plane_film_grain(pass, i, st, ref)) { PL_TRACE(rr, "After film grain:"); log_plane_info(rr, st); } if (pass_hook(pass, &st->img, plane_hook_stages[st->type])) { PL_TRACE(rr, "After user hooks:"); log_plane_info(rr, st); } } pl_shader sh = pl_dispatch_begin_ex(rr->dp, true); sh_require(sh, PL_SHADER_SIG_NONE, 0, 0); // Initialize the color to black GLSL("vec4 color = vec4("$", vec2("$"), 1.0); \n" "// pass_read_image \n" "{ \n" "vec4 tmp; \n", SH_FLOAT(neutral_luma), SH_FLOAT(neutral_chroma)); // For quality reasons, explicitly drop subpixel offsets from the ref rect // and re-add them as part of `pass->img.rect`, always rounding towards 0. // Additionally, drop anamorphic subpixel mismatches. const pl_rect2df ref_rc = ref->img.rect; pl_rect2d ref_rounded; ref_rounded.x0 = truncf(ref_rc.x0); ref_rounded.y0 = truncf(ref_rc.y0); ref_rounded.x1 = ref_rounded.x0 + roundf(pl_rect_w(ref_rc)); ref_rounded.y1 = ref_rounded.y0 + roundf(pl_rect_h(ref_rc)); PL_TRACE(rr, "Rounded reference rect: {%d %d %d %d}", ref_rounded.x0, ref_rounded.y0, ref_rounded.x1, ref_rounded.y1); float off_x = ref_rc.x0 - ref_rounded.x0, off_y = ref_rc.y0 - ref_rounded.y0, stretch_x = pl_rect_w(ref_rounded) / pl_rect_w(ref_rc), stretch_y = pl_rect_h(ref_rounded) / pl_rect_h(ref_rc); for (int i = 0; i < image->num_planes; i++) { struct plane_state *st = &planes[i]; const struct pl_plane *plane = &st->plane; if (!st->type) continue; float scale_x = pl_rect_w(st->img.rect) / pl_rect_w(ref_rc), scale_y = pl_rect_h(st->img.rect) / pl_rect_h(ref_rc), base_x = st->img.rect.x0 - scale_x * off_x, base_y = st->img.rect.y0 - scale_y * off_y; struct pl_sample_src src = { .components = plane->components, .address_mode = plane->address_mode, .scale = pl_color_repr_normalize(&st->img.repr), .new_w = pl_rect_w(ref_rounded), .new_h = pl_rect_h(ref_rounded), .rect = { base_x, base_y, base_x + stretch_x * pl_rect_w(st->img.rect), base_y + stretch_y * pl_rect_h(st->img.rect), }, }; if (plane->flipped) { src.rect.y0 = st->plane_h - src.rect.y0; src.rect.y1 = st->plane_h - src.rect.y1; } PL_TRACE(rr, "Aligning plane %d: {%f %f %f %f} -> {%f %f %f %f}%s", i, st->img.rect.x0, st->img.rect.y0, st->img.rect.x1, st->img.rect.y1, src.rect.x0, src.rect.y0, src.rect.x1, src.rect.y1, plane->flipped ? " (flipped) " : ""); st->img.unique = true; pl_rect2d unscaled = { .x1 = src.new_w, .y1 = src.new_h }; if (st->img.sh && st->img.w == src.new_w && st->img.h == src.new_h && pl_rect2d_eq(src.rect, unscaled)) { // Image rects are already equal, no indirect scaling needed } else { src.tex = img_tex(pass, &st->img); st->img.tex = NULL; st->img.sh = pl_dispatch_begin_ex(rr->dp, true); dispatch_sampler(pass, st->img.sh, &rr->samplers_src[i], SAMPLER_PLANE, NULL, &src); st->img.err_enum |= PL_RENDER_ERR_SAMPLING; st->img.rect.x0 = st->img.rect.y0 = 0.0f; st->img.w = st->img.rect.x1 = src.new_w; st->img.h = st->img.rect.y1 = src.new_h; } pass_hook(pass, &st->img, plane_scaled_hook_stages[st->type]); ident_t sub = sh_subpass(sh, img_sh(pass, &st->img)); if (!sub) { if (!img_tex(pass, &st->img)) { pl_dispatch_abort(rr->dp, &sh); return false; } sub = sh_subpass(sh, img_sh(pass, &st->img)); pl_assert(sub); } GLSL("tmp = "$"(); \n", sub); for (int c = 0; c < src.components; c++) { if (plane->component_mapping[c] < 0) continue; GLSL("color[%d] = tmp[%d];\n", plane->component_mapping[c], c); } // we don't need it anymore pl_dispatch_abort(rr->dp, &st->img.sh); } GLSL("}\n"); pass->img = (struct img) { .sh = sh, .w = pl_rect_w(ref_rounded), .h = pl_rect_h(ref_rounded), .repr = ref->img.repr, .color = image->color, .comps = ref->img.repr.alpha == PL_ALPHA_NONE ? 3 : 4, .rect = { off_x, off_y, off_x + pl_rect_w(ref_rc), off_y + pl_rect_h(ref_rc), }, }; // Update the reference rect to our adjusted image coordinates pass->ref_rect = pass->img.rect; pass_hook(pass, &pass->img, PL_HOOK_NATIVE); // Apply LUT logic and colorspace conversion enum pl_lut_type lut_type = guess_frame_lut_type(image, false); sh = img_sh(pass, &pass->img); bool needs_conversion = true; if (lut_type == PL_LUT_NATIVE || lut_type == PL_LUT_CONVERSION) { // Fix bit depth normalization before applying LUT float scale = pl_color_repr_normalize(&pass->img.repr); GLSL("color *= vec4("$"); \n", SH_FLOAT(scale)); pl_shader_set_alpha(sh, &pass->img.repr, PL_ALPHA_INDEPENDENT); pl_shader_custom_lut(sh, image->lut, &rr->lut_state[LUT_IMAGE]); if (lut_type == PL_LUT_CONVERSION) { pass->img.repr.sys = PL_COLOR_SYSTEM_RGB; pass->img.repr.levels = PL_COLOR_LEVELS_FULL; needs_conversion = false; } } if (needs_conversion) { if (pass->img.repr.sys == PL_COLOR_SYSTEM_XYZ) pass->img.color.transfer = PL_COLOR_TRC_LINEAR; pl_shader_decode_color(sh, &pass->img.repr, params->color_adjustment); } if (lut_type == PL_LUT_NORMALIZED) pl_shader_custom_lut(sh, image->lut, &rr->lut_state[LUT_IMAGE]); // A main PL_LUT_CONVERSION LUT overrides ICC profiles bool main_lut_override = params->lut && params->lut_type == PL_LUT_CONVERSION; if (image->icc && !main_lut_override) { pl_shader_set_alpha(sh, &pass->img.repr, PL_ALPHA_INDEPENDENT); pl_icc_decode(sh, image->icc, &rr->icc_state[ICC_IMAGE], &pass->img.color); } // Pre-multiply alpha channel before the rest of the pipeline, to avoid // bleeding colors from transparent regions into non-transparent regions pl_shader_set_alpha(sh, &pass->img.repr, PL_ALPHA_PREMULTIPLIED); pass_hook(pass, &pass->img, PL_HOOK_RGB); sh = NULL; return true; } static bool pass_scale_main(struct pass_state *pass) { const struct pl_render_params *params = pass->params; pl_renderer rr = pass->rr; pl_fmt fbofmt = pass->fbofmt[pass->img.comps]; if (!fbofmt) { PL_TRACE(rr, "Skipping main scaler (no FBOs)"); return true; } const pl_rect2df new_rect = { .x1 = abs(pl_rect_w(pass->dst_rect)), .y1 = abs(pl_rect_h(pass->dst_rect)), }; struct img *img = &pass->img; struct pl_sample_src src = { .components = img->comps, .new_w = pl_rect_w(new_rect), .new_h = pl_rect_h(new_rect), .rect = img->rect, }; const struct pl_frame *image = &pass->image; bool need_fbo = false; // Force FBO indirection if this shader is non-resizable int out_w, out_h; if (img->sh && pl_shader_output_size(img->sh, &out_w, &out_h)) need_fbo |= out_w != src.new_w || out_h != src.new_h; struct sampler_info info = sample_src_info(pass, &src, SAMPLER_MAIN); bool use_sigmoid = info.dir == SAMPLER_UP && params->sigmoid_params; bool use_linear = info.dir == SAMPLER_DOWN; // Opportunistically update peak here if it would save performance if (info.dir == SAMPLER_UP) hdr_update_peak(pass); // We need to enable the full rendering pipeline if there are any user // shaders / hooks that might depend on it. uint64_t scaling_hooks = PL_HOOK_PRE_KERNEL | PL_HOOK_POST_KERNEL; uint64_t linear_hooks = PL_HOOK_LINEAR | PL_HOOK_SIGMOID; for (int i = 0; i < params->num_hooks; i++) { if (params->hooks[i]->stages & (scaling_hooks | linear_hooks)) { need_fbo = true; if (params->hooks[i]->stages & linear_hooks) use_linear = true; if (params->hooks[i]->stages & PL_HOOK_SIGMOID) use_sigmoid = true; } } if (info.dir == SAMPLER_NOOP && !need_fbo) { pl_assert(src.new_w == img->w && src.new_h == img->h); PL_TRACE(rr, "Skipping main scaler (would be no-op)"); goto done; } if (info.type == SAMPLER_DIRECT && !need_fbo) { img->w = src.new_w; img->h = src.new_h; img->rect = new_rect; PL_TRACE(rr, "Skipping main scaler (free sampling)"); goto done; } // Hard-disable both sigmoidization and linearization when required if (params->disable_linear_scaling || fbofmt->component_depth[0] < 16) use_sigmoid = use_linear = false; // Avoid sigmoidization for HDR content because it clips to [0,1], and // linearization because it causes very nasty ringing artefacts. if (pl_color_space_is_hdr(&img->color)) use_sigmoid = use_linear = false; if (!(use_linear || use_sigmoid) && img->color.transfer == PL_COLOR_TRC_LINEAR) { img->color.transfer = image->color.transfer; if (image->color.transfer == PL_COLOR_TRC_LINEAR) img->color.transfer = PL_COLOR_TRC_GAMMA22; // arbitrary fallback pl_shader_delinearize(img_sh(pass, img), &img->color); } if (use_linear || use_sigmoid) { pl_shader_linearize(img_sh(pass, img), &img->color); img->color.transfer = PL_COLOR_TRC_LINEAR; pass_hook(pass, img, PL_HOOK_LINEAR); } if (use_sigmoid) { pl_shader_sigmoidize(img_sh(pass, img), params->sigmoid_params); pass_hook(pass, img, PL_HOOK_SIGMOID); } pass_hook(pass, img, PL_HOOK_PRE_KERNEL); src.tex = img_tex(pass, img); if (!src.tex) return false; pass->need_peak_fbo = false; pl_shader sh = pl_dispatch_begin_ex(rr->dp, true); dispatch_sampler(pass, sh, &rr->sampler_main, SAMPLER_MAIN, NULL, &src); img->tex = NULL; img->sh = sh; img->w = src.new_w; img->h = src.new_h; img->rect = new_rect; pass_hook(pass, img, PL_HOOK_POST_KERNEL); if (use_sigmoid) pl_shader_unsigmoidize(img_sh(pass, img), params->sigmoid_params); done: if (info.dir != SAMPLER_UP) hdr_update_peak(pass); pass_hook(pass, img, PL_HOOK_SCALED); return true; } static pl_tex get_feature_map(struct pass_state *pass) { const struct pl_render_params *params = pass->params; pl_renderer rr = pass->rr; const struct pl_color_map_params *cparams = params->color_map_params; cparams = PL_DEF(cparams, &pl_color_map_default_params); if (!cparams->contrast_recovery || cparams->contrast_smoothness <= 1) return NULL; if (!pass->fbofmt[4]) return NULL; if (!pl_color_space_is_hdr(&pass->img.color)) return NULL; if (rr->errors & (PL_RENDER_ERR_SAMPLING | PL_RENDER_ERR_CONTRAST_RECOVERY)) return NULL; if (pass->img.color.hdr.max_luma <= pass->target.color.hdr.max_luma + 1e-6) return NULL; // no adaptation needed if (params->lut && params->lut_type == PL_LUT_CONVERSION) return NULL; // LUT handles tone mapping struct img *img = &pass->img; if (!img_tex(pass, img)) return NULL; const float ratio = cparams->contrast_smoothness; const int cr_w = ceilf(abs(pl_rect_w(pass->dst_rect)) / ratio); const int cr_h = ceilf(abs(pl_rect_h(pass->dst_rect)) / ratio); pl_tex inter_tex = get_fbo(pass, img->w, img->h, NULL, 1, PL_DEBUG_TAG); pl_tex out_tex = get_fbo(pass, cr_w, cr_h, NULL, 1, PL_DEBUG_TAG); if (!inter_tex || !out_tex) goto error; pl_shader sh = pl_dispatch_begin(rr->dp); pl_shader_sample_direct(sh, pl_sample_src( .tex = img->tex )); pl_shader_extract_features(sh, img->color); bool ok = pl_dispatch_finish(rr->dp, pl_dispatch_params( .shader = &sh, .target = inter_tex, )); if (!ok) goto error; const struct pl_sample_src src = { .tex = inter_tex, .rect = img->rect, .address_mode = PL_TEX_ADDRESS_MIRROR, .components = 1, .new_w = cr_w, .new_h = cr_h, }; sh = pl_dispatch_begin(rr->dp); dispatch_sampler(pass, sh, &rr->sampler_contrast, SAMPLER_CONTRAST, out_tex, &src); ok = pl_dispatch_finish(rr->dp, pl_dispatch_params( .shader = &sh, .target = out_tex, )); if (!ok) goto error; return out_tex; error: PL_ERR(rr, "Failed extracting luma for contrast recovery, disabling"); rr->errors |= PL_RENDER_ERR_CONTRAST_RECOVERY; return NULL; } // Transforms image into the output color space (tone-mapping, ICC 3DLUT, etc) static void pass_convert_colors(struct pass_state *pass) { const struct pl_render_params *params = pass->params; const struct pl_frame *image = &pass->image; const struct pl_frame *target = &pass->target; pl_renderer rr = pass->rr; struct img *img = &pass->img; pl_shader sh = img_sh(pass, img); bool prelinearized = false; bool need_conversion = true; assert(image->color.primaries == img->color.primaries); if (img->color.transfer == PL_COLOR_TRC_LINEAR) { if (img->repr.alpha == PL_ALPHA_PREMULTIPLIED) { // Very annoying edge case: since prelinerization happens with // premultiplied alpha, but color mapping happens with independent // alpha, we need to go back to non-linear representation *before* // alpha mode conversion, to avoid distortion img->color.transfer = image->color.transfer; pl_shader_delinearize(sh, &img->color); } else { prelinearized = true; } } else if (img->color.transfer != image->color.transfer) { if (image->color.transfer == PL_COLOR_TRC_LINEAR) { // Another annoying edge case: if the input is linear light, but we // decide to un-linearize it for scaling purposes, we need to // re-linearize before passing it into `pl_shader_color_map` pl_shader_linearize(sh, &img->color); img->color.transfer = PL_COLOR_TRC_LINEAR; } } // Do all processing in independent alpha, to avoid nonlinear distortions pl_shader_set_alpha(sh, &img->repr, PL_ALPHA_INDEPENDENT); // Apply color blindness simulation if requested if (params->cone_params) pl_shader_cone_distort(sh, img->color, params->cone_params); if (params->lut) { struct pl_color_space lut_in = params->lut->color_in; struct pl_color_space lut_out = params->lut->color_out; switch (params->lut_type) { case PL_LUT_UNKNOWN: case PL_LUT_NATIVE: pl_color_space_merge(&lut_in, &image->color); pl_color_space_merge(&lut_out, &image->color); break; case PL_LUT_CONVERSION: pl_color_space_merge(&lut_in, &image->color); need_conversion = false; // conversion LUT the highest priority break; case PL_LUT_NORMALIZED: if (!prelinearized) { // PL_LUT_NORMALIZED wants linear input data pl_shader_linearize(sh, &img->color); img->color.transfer = PL_COLOR_TRC_LINEAR; prelinearized = true; } pl_color_space_merge(&lut_in, &img->color); pl_color_space_merge(&lut_out, &img->color); break; } pl_shader_color_map_ex(sh, params->color_map_params, pl_color_map_args( .src = image->color, .dst = lut_in, .prelinearized = prelinearized, )); if (params->lut_type == PL_LUT_NORMALIZED) { GLSLF("color.rgb *= vec3(1.0/"$"); \n", SH_FLOAT(pl_color_transfer_nominal_peak(lut_in.transfer))); } pl_shader_custom_lut(sh, params->lut, &rr->lut_state[LUT_PARAMS]); if (params->lut_type == PL_LUT_NORMALIZED) { GLSLF("color.rgb *= vec3("$"); \n", SH_FLOAT(pl_color_transfer_nominal_peak(lut_out.transfer))); } if (params->lut_type != PL_LUT_CONVERSION) { pl_shader_color_map_ex(sh, params->color_map_params, pl_color_map_args( .src = lut_out, .dst = img->color, )); } } if (need_conversion) { struct pl_color_space target_csp = target->color; if (target->icc) target_csp.transfer = PL_COLOR_TRC_LINEAR; if (pass->need_peak_fbo && !img_tex(pass, img)) return; // generate HDR feature map if required pl_tex feature_map = get_feature_map(pass); sh = img_sh(pass, img); // `get_feature_map` dispatches previous shader // current -> target pl_shader_color_map_ex(sh, params->color_map_params, pl_color_map_args( .src = image->color, .dst = target_csp, .prelinearized = prelinearized, .state = &rr->tone_map_state, .feature_map = feature_map, )); if (target->icc) pl_icc_encode(sh, target->icc, &rr->icc_state[ICC_TARGET]); } enum pl_lut_type lut_type = guess_frame_lut_type(target, true); if (lut_type == PL_LUT_NORMALIZED || lut_type == PL_LUT_CONVERSION) pl_shader_custom_lut(sh, target->lut, &rr->lut_state[LUT_TARGET]); img->color = target->color; } // Returns true if error diffusion was successfully performed static bool pass_error_diffusion(struct pass_state *pass, pl_shader *sh, int new_depth, int comps, int out_w, int out_h) { const struct pl_render_params *params = pass->params; pl_renderer rr = pass->rr; if (!params->error_diffusion || (rr->errors & PL_RENDER_ERR_ERROR_DIFFUSION)) return false; size_t shmem_req = pl_error_diffusion_shmem_req(params->error_diffusion, out_h); if (shmem_req > rr->gpu->glsl.max_shmem_size) { PL_TRACE(rr, "Disabling error diffusion due to shmem requirements (%zu) " "exceeding capabilities (%zu)", shmem_req, rr->gpu->glsl.max_shmem_size); return false; } pl_fmt fmt = pass->fbofmt[comps]; if (!fmt || !(fmt->caps & PL_FMT_CAP_STORABLE)) { PL_ERR(rr, "Error diffusion requires storable FBOs but GPU does not " "provide them... disabling!"); goto error; } struct pl_error_diffusion_params edpars = { .new_depth = new_depth, .kernel = params->error_diffusion, }; // Create temporary framebuffers edpars.input_tex = get_fbo(pass, out_w, out_h, fmt, comps, PL_DEBUG_TAG); edpars.output_tex = get_fbo(pass, out_w, out_h, fmt, comps, PL_DEBUG_TAG); if (!edpars.input_tex || !edpars.output_tex) goto error; pl_shader dsh = pl_dispatch_begin(rr->dp); if (!pl_shader_error_diffusion(dsh, &edpars)) { pl_dispatch_abort(rr->dp, &dsh); goto error; } // Everything was okay, run the shaders bool ok = pl_dispatch_finish(rr->dp, pl_dispatch_params( .shader = sh, .target = edpars.input_tex, )); if (ok) { ok = pl_dispatch_compute(rr->dp, pl_dispatch_compute_params( .shader = &dsh, .dispatch_size = {1, 1, 1}, )); } *sh = pl_dispatch_begin(rr->dp); pl_shader_sample_direct(*sh, pl_sample_src( .tex = ok ? edpars.output_tex : edpars.input_tex, )); return ok; error: rr->errors |= PL_RENDER_ERR_ERROR_DIFFUSION; return false; } #define CLEAR_COL(params) \ (float[4]) { \ (params)->background_color[0], \ (params)->background_color[1], \ (params)->background_color[2], \ 1.0 - (params)->background_transparency, \ } static void clear_target(pl_renderer rr, const struct pl_frame *target, const struct pl_render_params *params) { enum pl_clear_mode border = params->border; if (params->skip_target_clearing) border = PL_CLEAR_SKIP; switch (border) { case PL_CLEAR_COLOR: pl_frame_clear_rgba(rr->gpu, target, CLEAR_COL(params)); break; case PL_CLEAR_TILES: pl_frame_clear_tiles(rr->gpu, target, params->tile_colors, params->tile_size); break; case PL_CLEAR_SKIP: break; case PL_CLEAR_MODE_COUNT: pl_unreachable(); } } static bool pass_output_target(struct pass_state *pass) { const struct pl_render_params *params = pass->params; const struct pl_frame *image = &pass->image; const struct pl_frame *target = &pass->target; pl_renderer rr = pass->rr; struct img *img = &pass->img; pl_shader sh = img_sh(pass, img); if (params->corner_rounding > 0.0f) { const float out_w2 = fabsf(pl_rect_w(target->crop)) / 2.0f; const float out_h2 = fabsf(pl_rect_h(target->crop)) / 2.0f; const float radius = fminf(params->corner_rounding, 1.0f) * fminf(out_w2, out_h2); const struct pl_rect2df relpos = { .x0 = -out_w2, .y0 = -out_h2, .x1 = out_w2, .y1 = out_h2, }; GLSL("float radius = "$"; \n" "vec2 size2 = vec2("$", "$"); \n" "vec2 relpos = "$"; \n" "vec2 rd = abs(relpos) - size2 + vec2(radius); \n" "float rdist = length(max(rd, 0.0)) - radius; \n" "float border = smoothstep(2.0f, 0.0f, rdist); \n", SH_FLOAT_DYN(radius), SH_FLOAT_DYN(out_w2), SH_FLOAT_DYN(out_h2), sh_attr_vec2(sh, "relpos", &relpos)); switch (img->repr.alpha) { case PL_ALPHA_UNKNOWN: case PL_ALPHA_NONE: GLSL("color.a = border; \n"); img->repr.alpha = PL_ALPHA_INDEPENDENT; img->comps = 4; break; case PL_ALPHA_INDEPENDENT: GLSL("color.a *= border; \n"); break; case PL_ALPHA_PREMULTIPLIED: GLSL("color *= border; \n"); break; case PL_ALPHA_MODE_COUNT: pl_unreachable(); } } const struct pl_plane *ref = &target->planes[pass->dst_ref]; pl_rect2d dst_rect = pass->dst_rect; if (params->distort_params) { struct pl_distort_params dpars = *params->distort_params; if (dpars.alpha_mode) { pl_shader_set_alpha(sh, &img->repr, dpars.alpha_mode); img->repr.alpha = dpars.alpha_mode; img->comps = 4; } pl_tex tex = img_tex(pass, img); if (!tex) return false; // Expand canvas to fit result of distortion const float ar = pl_rect2df_aspect(&target->crop); const float sx = fminf(ar, 1.0f); const float sy = fminf(1.0f / ar, 1.0f); pl_rect2df bb = pl_transform2x2_bounds(&dpars.transform, &(pl_rect2df) { .x0 = -sx, .x1 = sx, .y0 = -sy, .y1 = sy, }); // Clamp to output size and adjust as needed when constraining output pl_rect2df tmp = target->crop; pl_rect2df_stretch(&tmp, pl_rect_w(bb) / (2*sx), pl_rect_h(bb) / (2*sy)); const float tmp_w = pl_rect_w(tmp), tmp_h = pl_rect_h(tmp); int canvas_w = ref->texture->params.w, canvas_h = ref->texture->params.h; if (pass->rotation % PL_ROTATION_180 == PL_ROTATION_90) PL_SWAP(canvas_w, canvas_h); tmp.x0 = PL_CLAMP(tmp.x0, 0.0f, canvas_w); tmp.x1 = PL_CLAMP(tmp.x1, 0.0f, canvas_w); tmp.y0 = PL_CLAMP(tmp.y0, 0.0f, canvas_h); tmp.y1 = PL_CLAMP(tmp.y1, 0.0f, canvas_h); if (dpars.constrain) { const float rx = pl_rect_w(tmp) / tmp_w; const float ry = pl_rect_h(tmp) / tmp_h; pl_rect2df_stretch(&tmp, fminf(ry / rx, 1.0f), fminf(rx / ry, 1.0f)); } dst_rect.x0 = roundf(tmp.x0); dst_rect.x1 = roundf(tmp.x1); dst_rect.y0 = roundf(tmp.y0); dst_rect.y1 = roundf(tmp.y1); dpars.unscaled = true; img->w = abs(pl_rect_w(dst_rect)); img->h = abs(pl_rect_h(dst_rect)); img->tex = NULL; img->sh = sh = pl_dispatch_begin(rr->dp); pl_shader_distort(sh, tex, img->w, img->h, &dpars); } pass_hook(pass, img, PL_HOOK_PRE_OUTPUT); enum pl_clear_mode background = params->background; if (params->blend_against_tiles) background = PL_CLEAR_TILES; bool has_alpha = target->repr.alpha != PL_ALPHA_NONE || params->blend_params; bool need_blend = background != PL_CLEAR_SKIP || !has_alpha; if (img->comps == 4 && need_blend) { pl_shader_set_alpha(sh, &img->repr, PL_ALPHA_PREMULTIPLIED); switch (background) { case PL_CLEAR_COLOR: GLSL("color += (1.0 - color.a) * vec4("$", "$", "$", "$"); \n", SH_FLOAT(params->background_color[0]), SH_FLOAT(params->background_color[1]), SH_FLOAT(params->background_color[2]), SH_FLOAT(1.0 - params->background_transparency)); if (!params->background_transparency) { img->repr.alpha = PL_ALPHA_NONE; img->comps = 3; } break; case PL_CLEAR_TILES:; static const float zero[2][3] = {0}; const float (*color)[3] = params->tile_colors; if (memcmp(color, zero, sizeof(zero)) == 0) color = pl_render_default_params.tile_colors; GLSL("vec2 outcoord = gl_FragCoord.xy * "$"; \n" "bvec2 tile = lessThan(fract(outcoord), vec2(0.5)); \n" "vec3 tile_color = tile.x == tile.y ? vec3("$", "$", "$") \n" " : vec3("$", "$", "$"); \n" "color.rgb += (1.0 - color.a) * tile_color; \n" "color.a = 1.0; \n", SH_FLOAT(1.0 / PL_DEF(params->tile_size, pl_render_default_params.tile_size)), SH_FLOAT(color[0][0]), SH_FLOAT(color[0][1]), SH_FLOAT(color[0][2]), SH_FLOAT(color[1][0]), SH_FLOAT(color[1][1]), SH_FLOAT(color[1][2])); img->repr.alpha = PL_ALPHA_NONE; img->comps = 3; break; case PL_CLEAR_SKIP: break; case PL_CLEAR_MODE_COUNT: pl_unreachable(); } } // Apply the color scale separately, after encoding is done, to make sure // that the intermediate FBO (if any) has the correct precision. struct pl_color_repr repr = target->repr; float scale = pl_color_repr_normalize(&repr); enum pl_lut_type lut_type = guess_frame_lut_type(target, true); if (lut_type != PL_LUT_CONVERSION) pl_shader_encode_color(sh, &repr); if (lut_type == PL_LUT_NATIVE) { pl_shader_set_alpha(sh, &img->repr, PL_ALPHA_INDEPENDENT); pl_shader_custom_lut(sh, target->lut, &rr->lut_state[LUT_TARGET]); pl_shader_set_alpha(sh, &img->repr, PL_ALPHA_PREMULTIPLIED); } // Rotation handling if (pass->rotation % PL_ROTATION_180 == PL_ROTATION_90) { PL_SWAP(dst_rect.x0, dst_rect.y0); PL_SWAP(dst_rect.x1, dst_rect.y1); PL_SWAP(img->w, img->h); sh->transpose = true; } pass_hook(pass, img, PL_HOOK_OUTPUT); sh = NULL; bool flipped_x = dst_rect.x1 < dst_rect.x0, flipped_y = dst_rect.y1 < dst_rect.y0; if (pl_frame_is_cropped(target)) clear_target(rr, target, params); for (int p = 0; p < target->num_planes; p++) { const struct pl_plane *plane = &target->planes[p]; float rx = (float) plane->texture->params.w / ref->texture->params.w, ry = (float) plane->texture->params.h / ref->texture->params.h; // Only accept integer scaling ratios. This accounts for the fact // that fractionally subsampled planes get rounded up to the // nearest integer size, which we want to over-render. float rrx = rx >= 1 ? roundf(rx) : 1.0 / roundf(1.0 / rx), rry = ry >= 1 ? roundf(ry) : 1.0 / roundf(1.0 / ry); float sx = plane->shift_x, sy = plane->shift_y; pl_rect2df plane_rectf = { .x0 = (dst_rect.x0 - sx) * rrx, .y0 = (dst_rect.y0 - sy) * rry, .x1 = (dst_rect.x1 - sx) * rrx, .y1 = (dst_rect.y1 - sy) * rry, }; // Normalize to make the math easier pl_rect2df_normalize(&plane_rectf); // Round the output rect int rx0 = floorf(plane_rectf.x0), ry0 = floorf(plane_rectf.y0), rx1 = ceilf(plane_rectf.x1), ry1 = ceilf(plane_rectf.y1); PL_TRACE(rr, "Subsampled target %d: {%f %f %f %f} -> {%d %d %d %d}", p, plane_rectf.x0, plane_rectf.y0, plane_rectf.x1, plane_rectf.y1, rx0, ry0, rx1, ry1); if (target->num_planes > 1) { // Planar output, so we need to sample from an intermediate FBO struct pl_sample_src src = { .tex = img_tex(pass, img), .new_w = rx1 - rx0, .new_h = ry1 - ry0, .rect = { .x0 = (rx0 - plane_rectf.x0) / rrx, .x1 = (rx1 - plane_rectf.x0) / rrx, .y0 = (ry0 - plane_rectf.y0) / rry, .y1 = (ry1 - plane_rectf.y0) / rry, }, }; if (!src.tex) { PL_ERR(rr, "Output requires multiple planes, but FBOs are " "unavailable. This combination is unsupported."); return false; } PL_TRACE(rr, "Sampling %dx%d img aligned from {%f %f %f %f}", pass->img.w, pass->img.h, src.rect.x0, src.rect.y0, src.rect.x1, src.rect.y1); for (int c = 0; c < plane->components; c++) { if (plane->component_mapping[c] < 0) continue; src.component_mask |= 1 << plane->component_mapping[c]; } sh = pl_dispatch_begin(rr->dp); dispatch_sampler(pass, sh, &rr->samplers_dst[p], SAMPLER_PLANE, plane->texture, &src); } else { // Single plane, so we can directly re-use the img shader unless // it's incompatible with the FBO capabilities bool is_comp = pl_shader_is_compute(img_sh(pass, img)); if (is_comp && !plane->texture->params.storable) { if (!img_tex(pass, img)) { PL_ERR(rr, "Rendering requires compute shaders, but output " "is not storable, and FBOs are unavailable. This " "combination is unsupported."); return false; } } sh = img_sh(pass, img); img->sh = NULL; } // Ignore dithering for > 16-bit outputs by default, since it makes // little sense to do so (and probably just adds errors) int depth = target->repr.bits.color_depth, applied_dither = 0; if (depth && (depth < 16 || params->force_dither)) { if (pass_error_diffusion(pass, &sh, depth, plane->components, rx1 - rx0, ry1 - ry0)) { applied_dither = depth; } else if (params->dither_params) { struct pl_dither_params dparams = *params->dither_params; if (!params->disable_dither_gamma_correction) dparams.transfer = target->color.transfer; pl_shader_dither(sh, depth, &rr->dither_state, &dparams); applied_dither = depth; } } if (applied_dither != rr->prev_dither) { if (applied_dither) { PL_INFO(rr, "Dithering to %d bit depth", applied_dither); } else { PL_INFO(rr, "Dithering disabled"); } rr->prev_dither = applied_dither; } GLSL("color *= vec4(1.0 / "$"); \n", SH_FLOAT(scale)); swizzle_color(sh, plane->components, plane->component_mapping, params->blend_params); pl_rect2d plane_rect = { .x0 = flipped_x ? rx1 : rx0, .x1 = flipped_x ? rx0 : rx1, .y0 = flipped_y ? ry1 : ry0, .y1 = flipped_y ? ry0 : ry1, }; pl_transform2x2 tscale = { .mat = {{{ rrx, 0.0 }, { 0.0, rry }}}, .c = { -sx, -sy }, }; if (plane->flipped) { int plane_h = rry * ref->texture->params.h; plane_rect.y0 = plane_h - plane_rect.y0; plane_rect.y1 = plane_h - plane_rect.y1; tscale.mat.m[1][1] = -tscale.mat.m[1][1]; tscale.c[1] += plane->texture->params.h; } bool ok = pl_dispatch_finish(rr->dp, pl_dispatch_params( .shader = &sh, .target = plane->texture, .blend_params = params->blend_params, .rect = plane_rect, )); if (!ok) return false; if (pass->info.stage != PL_RENDER_STAGE_BLEND) { draw_overlays(pass, plane->texture, plane->components, plane->component_mapping, image->overlays, image->num_overlays, target->color, target->repr, &tscale); } draw_overlays(pass, plane->texture, plane->components, plane->component_mapping, target->overlays, target->num_overlays, target->color, target->repr, &tscale); } *img = (struct img) {0}; return true; } #define require(expr) pl_require(rr, expr) #define validate_plane(plane, param) \ do { \ require((plane).texture); \ require((plane).texture->params.param); \ require((plane).components > 0 && (plane).components <= 4); \ for (int c = 0; c < (plane).components; c++) { \ require((plane).component_mapping[c] >= PL_CHANNEL_NONE && \ (plane).component_mapping[c] <= PL_CHANNEL_A); \ } \ } while (0) #define validate_overlay(overlay) \ do { \ require((overlay).tex); \ require((overlay).tex->params.sampleable); \ require((overlay).num_parts >= 0); \ for (int n = 0; n < (overlay).num_parts; n++) { \ const struct pl_overlay_part *p = &(overlay).parts[n]; \ require(pl_rect_w(p->dst) && pl_rect_h(p->dst)); \ } \ } while (0) #define validate_deinterlace_ref(image, ref) \ do { \ require((image)->num_planes == (ref)->num_planes); \ const struct pl_tex_params *imgp, *refp; \ for (int p = 0; p < (image)->num_planes; p++) { \ validate_plane((ref)->planes[p], sampleable); \ imgp = &(image)->planes[p].texture->params; \ refp = &(ref)->planes[p].texture->params; \ require(imgp->w == refp->w); \ require(imgp->h == refp->h); \ require(imgp->format->num_components == refp->format->num_components);\ } \ } while (0) // Perform some basic validity checks on incoming structs to help catch invalid // API usage. This is not an exhaustive check. In particular, enums are not // bounds checked. This is because most functions accepting enums already // abort() in the default case, and because it's not the intent of this check // to catch all instances of memory corruption - just common logic bugs. static bool validate_structs(pl_renderer rr, const struct pl_frame *image, const struct pl_frame *target) { // Rendering to/from a frame with no planes is technically allowed, but so // pointless that it's more likely to be a user error worth catching. require(target->num_planes > 0 && target->num_planes <= PL_MAX_PLANES); for (int i = 0; i < target->num_planes; i++) validate_plane(target->planes[i], renderable); require(!pl_rect_w(target->crop) == !pl_rect_h(target->crop)); require(target->num_overlays >= 0); for (int i = 0; i < target->num_overlays; i++) validate_overlay(target->overlays[i]); if (!image) return true; require(image->num_planes > 0 && image->num_planes <= PL_MAX_PLANES); for (int i = 0; i < image->num_planes; i++) validate_plane(image->planes[i], sampleable); require(!pl_rect_w(image->crop) == !pl_rect_h(image->crop)); require(image->num_overlays >= 0); for (int i = 0; i < image->num_overlays; i++) validate_overlay(image->overlays[i]); if (image->field != PL_FIELD_NONE) { require(image->first_field != PL_FIELD_NONE); if (image->prev) validate_deinterlace_ref(image, image->prev); if (image->next) validate_deinterlace_ref(image, image->next); } return true; error: return false; } // returns index static int frame_ref(const struct pl_frame *frame) { pl_assert(frame->num_planes); for (int i = 0; i < frame->num_planes; i++) { switch (detect_plane_type(&frame->planes[i], &frame->repr)) { case PLANE_RGB: case PLANE_LUMA: case PLANE_XYZ: return i; case PLANE_CHROMA: case PLANE_ALPHA: continue; case PLANE_INVALID: pl_unreachable(); } } return 0; } static void fix_refs_and_rects(struct pass_state *pass) { struct pl_frame *target = &pass->target; pl_rect2df *dst = &target->crop; pass->dst_ref = frame_ref(target); pl_tex dst_ref = target->planes[pass->dst_ref].texture; int dst_w = dst_ref->params.w, dst_h = dst_ref->params.h; if ((!dst->x0 && !dst->x1) || (!dst->y0 && !dst->y1)) { dst->x1 = dst_w; dst->y1 = dst_h; } if (pass->src_ref < 0) { // Simplified version of the below code which only rounds the target // rect but doesn't retroactively apply the crop to the image pass->rotation = pl_rotation_normalize(-target->rotation); pl_rect2df_rotate(dst, -pass->rotation); if (pass->rotation % PL_ROTATION_180 == PL_ROTATION_90) PL_SWAP(dst_w, dst_h); *dst = (pl_rect2df) { .x0 = roundf(PL_CLAMP(dst->x0, 0.0, dst_w)), .y0 = roundf(PL_CLAMP(dst->y0, 0.0, dst_h)), .x1 = roundf(PL_CLAMP(dst->x1, 0.0, dst_w)), .y1 = roundf(PL_CLAMP(dst->y1, 0.0, dst_h)), }; pass->dst_rect = (pl_rect2d) { dst->x0, dst->y0, dst->x1, dst->y1, }; return; } struct pl_frame *image = &pass->image; pl_rect2df *src = &image->crop; pass->src_ref = frame_ref(image); pl_tex src_ref = image->planes[pass->src_ref].texture; if ((!src->x0 && !src->x1) || (!src->y0 && !src->y1)) { src->x1 = src_ref->params.w; src->y1 = src_ref->params.h; }; // Compute end-to-end rotation pass->rotation = pl_rotation_normalize(image->rotation - target->rotation); pl_rect2df_rotate(dst, -pass->rotation); // normalize by counter-rotating if (pass->rotation % PL_ROTATION_180 == PL_ROTATION_90) PL_SWAP(dst_w, dst_h); // Keep track of whether the end-to-end rendering is flipped bool flipped_x = (src->x0 > src->x1) != (dst->x0 > dst->x1), flipped_y = (src->y0 > src->y1) != (dst->y0 > dst->y1); // Normalize both rects to make the math easier pl_rect2df_normalize(src); pl_rect2df_normalize(dst); // Round the output rect and clip it to the framebuffer dimensions float rx0 = roundf(PL_CLAMP(dst->x0, 0.0, dst_w)), ry0 = roundf(PL_CLAMP(dst->y0, 0.0, dst_h)), rx1 = roundf(PL_CLAMP(dst->x1, 0.0, dst_w)), ry1 = roundf(PL_CLAMP(dst->y1, 0.0, dst_h)); // Adjust the src rect corresponding to the rounded crop float scale_x = pl_rect_w(*src) / pl_rect_w(*dst), scale_y = pl_rect_h(*src) / pl_rect_h(*dst), base_x = src->x0, base_y = src->y0; src->x0 = base_x + (rx0 - dst->x0) * scale_x; src->x1 = base_x + (rx1 - dst->x0) * scale_x; src->y0 = base_y + (ry0 - dst->y0) * scale_y; src->y1 = base_y + (ry1 - dst->y0) * scale_y; // Update dst_rect to the rounded values and re-apply flip if needed. We // always do this in the `dst` rather than the `src`` because this allows // e.g. polar sampling compute shaders to work. *dst = (pl_rect2df) { .x0 = flipped_x ? rx1 : rx0, .y0 = flipped_y ? ry1 : ry0, .x1 = flipped_x ? rx0 : rx1, .y1 = flipped_y ? ry0 : ry1, }; // Copies of the above, for convenience pass->ref_rect = *src; pass->dst_rect = (pl_rect2d) { dst->x0, dst->y0, dst->x1, dst->y1, }; } static void fix_frame(struct pl_frame *frame) { pl_tex tex = frame->planes[frame_ref(frame)].texture; if (frame->repr.sys == PL_COLOR_SYSTEM_XYZ) { // XYZ is implicity converted to linear DCI-P3 in pl_color_repr_decode frame->color.primaries = PL_COLOR_PRIM_DCI_P3; frame->color.transfer = PL_COLOR_TRC_ST428; } // If the primaries are not known, guess them based on the resolution if (tex && !frame->color.primaries) frame->color.primaries = pl_color_primaries_guess(tex->params.w, tex->params.h); // For UNORM formats, we can infer the sampled bit depth from the texture // itself. This is ignored for other format types, because the logic // doesn't really work out for them anyways, and it's best not to do // anything too crazy unless the user provides explicit details. struct pl_bit_encoding *bits = &frame->repr.bits; if (!bits->sample_depth && tex && tex->params.format->type == PL_FMT_UNORM) { // Just assume the first component's depth is canonical. This works in // practice, since for cases like rgb565 we want to use the lower depth // anyway. Plus, every format has at least one component. bits->sample_depth = tex->params.format->component_depth[0]; // If we don't know the color depth, assume it spans the full range of // the texture. Otherwise, clamp it to the texture depth. bits->color_depth = PL_DEF(bits->color_depth, bits->sample_depth); bits->color_depth = PL_MIN(bits->color_depth, bits->sample_depth); // If the texture depth is higher than the known color depth, assume // the colors were left-shifted. bits->bit_shift += bits->sample_depth - bits->color_depth; } } static bool acquire_frame(struct pass_state *pass, struct pl_frame *frame, bool *acquired) { if (!frame || !frame->acquire || *acquired) return true; *acquired = true; return frame->acquire(pass->rr->gpu, frame); } static void release_frame(struct pass_state *pass, struct pl_frame *frame, bool *acquired) { if (frame && frame->release && *acquired) frame->release(pass->rr->gpu, frame); *acquired = false; } static void pass_uninit(struct pass_state *pass) { pl_renderer rr = pass->rr; pl_dispatch_abort(rr->dp, &pass->img.sh); release_frame(pass, &pass->next, &pass->acquired.next); release_frame(pass, &pass->prev, &pass->acquired.prev); release_frame(pass, &pass->image, &pass->acquired.image); release_frame(pass, &pass->target, &pass->acquired.target); pl_free_ptr(&pass->tmp); } static void icc_fallback(struct pass_state *pass, struct pl_frame *frame, struct icc_state *fallback) { if (!frame || frame->icc || !frame->profile.data) return; // Don't re-attempt opening already failed profiles if (fallback->error && fallback->error == frame->profile.signature) return; #ifdef PL_HAVE_LCMS pl_renderer rr = pass->rr; if (pl_icc_update(rr->log, &fallback->icc, &frame->profile, NULL)) { frame->icc = fallback->icc; } else { PL_WARN(rr, "Failed opening ICC profile... ignoring"); fallback->error = frame->profile.signature; } #endif } static void pass_fix_frames(struct pass_state *pass) { pl_renderer rr = pass->rr; struct pl_frame *image = pass->src_ref < 0 ? NULL : &pass->image; struct pl_frame *target = &pass->target; fix_refs_and_rects(pass); // Fallback for older ICC profile API icc_fallback(pass, image, &rr->icc_fallback[ICC_IMAGE]); icc_fallback(pass, target, &rr->icc_fallback[ICC_TARGET]); // Force colorspace metadata to ICC profile values, if present if (image && image->icc) { image->color.primaries = image->icc->containing_primaries; image->color.hdr = image->icc->csp.hdr; if (image->icc->csp.transfer) image->color.transfer = image->icc->csp.transfer; } if (target->icc) { target->color.primaries = target->icc->containing_primaries; target->color.hdr = target->icc->csp.hdr; if (target->icc->csp.transfer) target->color.transfer = target->icc->csp.transfer; } // Infer the target color space info based on the image's if (image) { fix_frame(image); pl_color_space_infer_map(&image->color, &target->color); fix_frame(target); // do this only after infer_map } else { fix_frame(target); pl_color_space_infer(&target->color); } // Detect the presence of an alpha channel in the frames and explicitly // default the alpha mode in this case, so we can use it to detect whether // or not to strip the alpha channel during rendering. // // Note the different defaults for the image and target, because files // are usually independent but windowing systems usually expect // premultiplied. (We also premultiply for internal rendering, so this // way of doing it avoids a possible division-by-zero path!) if (image && !image->repr.alpha) { image->repr.alpha = PL_ALPHA_NONE; for (int i = 0; i < image->num_planes; i++) { const struct pl_plane *plane = &image->planes[i]; for (int c = 0; c < plane->components; c++) { if (plane->component_mapping[c] == PL_CHANNEL_A) image->repr.alpha = PL_ALPHA_INDEPENDENT; } } } if (!target->repr.alpha) { target->repr.alpha = PL_ALPHA_NONE; for (int i = 0; i < target->num_planes; i++) { const struct pl_plane *plane = &target->planes[i]; for (int c = 0; c < plane->components; c++) { if (plane->component_mapping[c] == PL_CHANNEL_A) target->repr.alpha = PL_ALPHA_PREMULTIPLIED; } } } } void pl_frames_infer(pl_renderer rr, struct pl_frame *image, struct pl_frame *target) { struct pass_state pass = { .rr = rr, .image = *image, .target = *target, }; pass_fix_frames(&pass); *image = pass.image; *target = pass.target; } static bool pass_init(struct pass_state *pass, bool acquire_image) { struct pl_frame *image = pass->src_ref < 0 ? NULL : &pass->image; struct pl_frame *target = &pass->target; if (!acquire_frame(pass, target, &pass->acquired.target)) goto error; if (acquire_image && image) { if (!acquire_frame(pass, image, &pass->acquired.image)) goto error; const struct pl_render_params *params = pass->params; const struct pl_deinterlace_params *deint = params->deinterlace_params; bool needs_refs = image->field != PL_FIELD_NONE && deint && pl_deinterlace_needs_refs(deint->algo); if (image->prev && needs_refs) { // Move into local copy so we can acquire/release it pass->prev = *image->prev; image->prev = &pass->prev; if (!acquire_frame(pass, &pass->prev, &pass->acquired.prev)) goto error; } if (image->next && needs_refs) { pass->next = *image->next; image->next = &pass->next; if (!acquire_frame(pass, &pass->next, &pass->acquired.next)) goto error; } } if (!validate_structs(pass->rr, acquire_image ? image : NULL, target)) goto error; find_fbo_format(pass); pass_fix_frames(pass); pass->tmp = pl_tmp(NULL); return true; error: pass_uninit(pass); return false; } static void pass_begin_frame(struct pass_state *pass) { pl_renderer rr = pass->rr; const struct pl_render_params *params = pass->params; pl_dispatch_callback(rr->dp, pass, info_callback); pl_dispatch_reset_frame(rr->dp); for (int i = 0; i < params->num_hooks; i++) { if (params->hooks[i]->reset) params->hooks[i]->reset(params->hooks[i]->priv); } size_t size = rr->fbos.num * sizeof(bool); pass->fbos_used = pl_realloc(pass->tmp, pass->fbos_used, size); memset(pass->fbos_used, 0, size); } static bool draw_empty_overlays(pl_renderer rr, const struct pl_frame *ptarget, const struct pl_render_params *params) { clear_target(rr, ptarget, params); if (!ptarget->num_overlays) return true; struct pass_state pass = { .rr = rr, .params = params, .src_ref = -1, .target = *ptarget, .info.stage = PL_RENDER_STAGE_BLEND, .info.count = 0, }; if (!pass_init(&pass, false)) return false; pass_begin_frame(&pass); struct pl_frame *target = &pass.target; pl_tex ref = target->planes[pass.dst_ref].texture; for (int p = 0; p < target->num_planes; p++) { const struct pl_plane *plane = &target->planes[p]; // Math replicated from `pass_output_target` float rx = (float) plane->texture->params.w / ref->params.w, ry = (float) plane->texture->params.h / ref->params.h; float rrx = rx >= 1 ? roundf(rx) : 1.0 / roundf(1.0 / rx), rry = ry >= 1 ? roundf(ry) : 1.0 / roundf(1.0 / ry); float sx = plane->shift_x, sy = plane->shift_y; pl_transform2x2 tscale = { .mat = {{{ rrx, 0.0 }, { 0.0, rry }}}, .c = { -sx, -sy }, }; if (plane->flipped) { tscale.mat.m[1][1] = -tscale.mat.m[1][1]; tscale.c[1] += plane->texture->params.h; } draw_overlays(&pass, plane->texture, plane->components, plane->component_mapping, target->overlays, target->num_overlays, target->color, target->repr, &tscale); } pass_uninit(&pass); return true; } bool pl_render_image(pl_renderer rr, const struct pl_frame *pimage, const struct pl_frame *ptarget, const struct pl_render_params *params) { params = PL_DEF(params, &pl_render_default_params); pl_dispatch_mark_dynamic(rr->dp, params->dynamic_constants); if (!pimage) return draw_empty_overlays(rr, ptarget, params); struct pass_state pass = { .rr = rr, .params = params, .image = *pimage, .target = *ptarget, .info.stage = PL_RENDER_STAGE_FRAME, }; if (!pass_init(&pass, true)) return false; // No-op (empty crop) if (!pl_rect_w(pass.dst_rect) || !pl_rect_h(pass.dst_rect)) { pass_uninit(&pass); return draw_empty_overlays(rr, ptarget, params); } pass_begin_frame(&pass); if (!pass_read_image(&pass)) goto error; if (!pass_scale_main(&pass)) goto error; pass_convert_colors(&pass); if (!pass_output_target(&pass)) goto error; pass_uninit(&pass); return true; error: PL_ERR(rr, "Failed rendering image!"); pass_uninit(&pass); return false; } const struct pl_frame *pl_frame_mix_current(const struct pl_frame_mix *mix) { const struct pl_frame *cur = NULL; for (int i = 0; i < mix->num_frames; i++) { if (mix->timestamps[i] > 0.0f) break; cur = mix->frames[i]; } return cur; } const struct pl_frame *pl_frame_mix_nearest(const struct pl_frame_mix *mix) { if (!mix->num_frames) return NULL; const struct pl_frame *best = mix->frames[0]; float best_dist = fabsf(mix->timestamps[0]); for (int i = 1; i < mix->num_frames; i++) { float dist = fabsf(mix->timestamps[i]); if (dist < best_dist) { best = mix->frames[i]; best_dist = dist; continue; } else { break; } } return best; } struct params_info { uint64_t hash; bool trivial; }; static struct params_info render_params_info(const struct pl_render_params *params_orig) { struct pl_render_params params = *params_orig; struct params_info info = { .trivial = true, .hash = 0, }; #define HASH_PTR(ptr, def, ptr_trivial) \ do { \ if (ptr) { \ pl_hash_merge(&info.hash, pl_mem_hash(ptr, sizeof(*ptr))); \ info.trivial &= (ptr_trivial); \ ptr = NULL; \ } else if ((def) != NULL) { \ pl_hash_merge(&info.hash, pl_mem_hash(def, sizeof(*ptr))); \ } \ } while (0) #define HASH_FILTER(scaler) \ do { \ if ((scaler == &pl_filter_bilinear || scaler == &pl_filter_nearest) && \ params.skip_anti_aliasing) \ { \ /* treat as NULL */ \ } else if (scaler) { \ struct pl_filter_config filter = *scaler; \ HASH_PTR(filter.kernel, NULL, false); \ HASH_PTR(filter.window, NULL, false); \ pl_hash_merge(&info.hash, pl_var_hash(filter)); \ scaler = NULL; \ } \ } while (0) HASH_FILTER(params.upscaler); HASH_FILTER(params.downscaler); HASH_PTR(params.deband_params, NULL, false); HASH_PTR(params.sigmoid_params, NULL, false); HASH_PTR(params.deinterlace_params, NULL, false); HASH_PTR(params.cone_params, NULL, true); HASH_PTR(params.icc_params, &pl_icc_default_params, true); HASH_PTR(params.color_adjustment, &pl_color_adjustment_neutral, true); HASH_PTR(params.color_map_params, &pl_color_map_default_params, true); HASH_PTR(params.peak_detect_params, NULL, false); // Hash all hooks for (int i = 0; i < params.num_hooks; i++) { const struct pl_hook *hook = params.hooks[i]; if (hook->stages == PL_HOOK_OUTPUT) continue; // ignore hooks only relevant to pass_output_target pl_hash_merge(&info.hash, pl_var_hash(*hook)); info.trivial = false; } params.hooks = NULL; // Hash the LUT by only looking at the signature if (params.lut) { pl_hash_merge(&info.hash, params.lut->signature); info.trivial = false; params.lut = NULL; } #define CLEAR(field) field = (__typeof__(field)) {0} // Clear out fields only relevant to pl_render_image_mix CLEAR(params.frame_mixer); CLEAR(params.preserve_mixing_cache); CLEAR(params.skip_caching_single_frame); // Clear out fields only relevant to pass_output_target CLEAR(params.background); CLEAR(params.border); CLEAR(params.skip_target_clearing); CLEAR(params.blend_against_tiles); memset(params.background_color, 0, sizeof(params.background_color)); CLEAR(params.background_transparency); memset(params.tile_colors, 0, sizeof(params.tile_colors)); CLEAR(params.tile_size); CLEAR(params.blend_params); CLEAR(params.distort_params); CLEAR(params.dither_params); CLEAR(params.error_diffusion); CLEAR(params.force_dither); CLEAR(params.corner_rounding); // Clear out other irrelevant fields CLEAR(params.dynamic_constants); CLEAR(params.info_callback); CLEAR(params.info_priv); pl_hash_merge(&info.hash, pl_var_hash(params)); return info; } #define MAX_MIX_FRAMES 16 bool pl_render_image_mix(pl_renderer rr, const struct pl_frame_mix *images, const struct pl_frame *ptarget, const struct pl_render_params *params) { if (!images->num_frames) return pl_render_image(rr, NULL, ptarget, params); params = PL_DEF(params, &pl_render_default_params); struct params_info par_info = render_params_info(params); pl_dispatch_mark_dynamic(rr->dp, params->dynamic_constants); require(images->num_frames >= 1); require(images->vsync_duration > 0.0); for (int i = 0; i < images->num_frames - 1; i++) require(images->timestamps[i] <= images->timestamps[i+1]); const struct pl_frame *refimg = pl_frame_mix_nearest(images); struct pass_state pass = { .rr = rr, .params = params, .image = *refimg, .target = *ptarget, .info.stage = PL_RENDER_STAGE_BLEND, }; if (rr->errors & PL_RENDER_ERR_FRAME_MIXING) goto fallback; if (!pass_init(&pass, false)) return false; if (!pass.fbofmt[4]) goto fallback; const struct pl_frame *target = &pass.target; int out_w = abs(pl_rect_w(pass.dst_rect)), out_h = abs(pl_rect_h(pass.dst_rect)); if (!out_w || !out_h) goto fallback; int fidx = 0; struct cached_frame frames[MAX_MIX_FRAMES]; float weights[MAX_MIX_FRAMES]; float wsum = 0.0; // Garbage collect the cache by evicting all frames from the cache that are // not determined to still be required for (int i = 0; i < rr->frames.num; i++) rr->frames.elem[i].evict = true; // Blur frame mixer according to vsync ratio (source / display) struct pl_filter_config mixer; if (params->frame_mixer) { mixer = *params->frame_mixer; mixer.blur = PL_DEF(mixer.blur, 1.0); for (int i = 1; i < images->num_frames; i++) { if (images->timestamps[i] >= 0.0 && images->timestamps[i - 1] < 0) { float frame_dur = images->timestamps[i] - images->timestamps[i - 1]; if (images->vsync_duration > frame_dur && !params->skip_anti_aliasing) mixer.blur *= images->vsync_duration / frame_dur; break; } } } // Traverse the input frames and determine/prepare the ones we need bool single_frame = !params->frame_mixer || images->num_frames == 1; retry: for (int i = 0; i < images->num_frames; i++) { uint64_t sig = images->signatures[i]; float rts = images->timestamps[i]; const struct pl_frame *img = images->frames[i]; PL_TRACE(rr, "Considering image with signature 0x%llx, rts %f", (unsigned long long) sig, rts); // Combining images with different rotations is basically unfeasible if (pl_rotation_normalize(img->rotation - refimg->rotation)) { PL_TRACE(rr, " -> Skipping: incompatible rotation"); continue; } float weight; if (single_frame) { // Only render the refimg, ignore others if (img == refimg) { weight = 1.0; } else { PL_TRACE(rr, " -> Skipping: no frame mixer"); continue; } // For backwards compatibility, treat !kernel as oversample } else if (!mixer.kernel || mixer.kernel == &pl_filter_function_oversample) { // Compute the visible interval [rts, end] of this frame float end = i+1 < images->num_frames ? images->timestamps[i+1] : INFINITY; if (rts > images->vsync_duration || end < 0.0) { PL_TRACE(rr, " -> Skipping: no intersection with vsync"); continue; } else { rts = PL_MAX(rts, 0.0); end = PL_MIN(end, images->vsync_duration); pl_assert(end >= rts); } // Weight is the fraction of vsync interval that frame is visible weight = (end - rts) / images->vsync_duration; PL_TRACE(rr, " -> Frame [%f, %f] intersects [%f, %f] = weight %f", rts, end, 0.0, images->vsync_duration, weight); if (weight < mixer.kernel->params[0]) { PL_TRACE(rr, " (culling due to threshold)"); weight = 0.0; } } else { const float radius = pl_filter_radius_bound(&mixer); if (fabsf(rts) >= radius) { PL_TRACE(rr, " -> Skipping: outside filter radius (%f)", radius); continue; } // Weight is directly sampled from the filter weight = pl_filter_sample(&mixer, rts); PL_TRACE(rr, " -> Filter offset %f = weight %f", rts, weight); } struct cached_frame *f = NULL; for (int j = 0; j < rr->frames.num; j++) { if (rr->frames.elem[j].signature == sig) { f = &rr->frames.elem[j]; f->evict = false; break; } } // Skip frames with negligible contributions. Do this after the loop // above to make sure these frames don't get evicted just yet, and // also exclude the reference image from this optimization to ensure // that we always have at least one frame. const float cutoff = 1e-3; if (fabsf(weight) <= cutoff && img != refimg) { PL_TRACE(rr, " -> Skipping: weight (%f) below threshold (%f)", weight, cutoff); continue; } bool skip_cache = single_frame && (params->skip_caching_single_frame || par_info.trivial); if (!f && skip_cache) { PL_TRACE(rr, "Single frame not found in cache, bypassing"); goto fallback; } if (!f) { // Signature does not exist in the cache at all yet, // so grow the cache by this entry. PL_ARRAY_GROW(rr, rr->frames); f = &rr->frames.elem[rr->frames.num++]; *f = (struct cached_frame) { .signature = sig, }; } // Check to see if we can blindly reuse this cache entry. This is the // case if either the params are compatible, or the user doesn't care bool can_reuse = f->tex; bool strict_reuse = skip_cache || single_frame || !params->preserve_mixing_cache; if (can_reuse && strict_reuse) { can_reuse = f->tex->params.w == out_w && f->tex->params.h == out_h && pl_rect2d_eq(f->crop, img->crop) && f->params_hash == par_info.hash && pl_color_space_equal(&f->color, &target->color) && pl_icc_profile_equal(&f->profile, &target->profile); } if (!can_reuse && skip_cache) { PL_TRACE(rr, "Single frame cache entry invalid, bypassing"); goto fallback; } if (!can_reuse) { // If we can't reuse the entry, we need to re-render this frame PL_TRACE(rr, " -> Cached texture missing or invalid.. (re)creating"); if (!f->tex) { if (PL_ARRAY_POP(rr->frame_fbos, &f->tex)) pl_tex_invalidate(rr->gpu, f->tex); } bool ok = pl_tex_recreate(rr->gpu, &f->tex, pl_tex_params( .w = out_w, .h = out_h, .format = pass.fbofmt[4], .sampleable = true, .renderable = true, .blit_dst = pass.fbofmt[4]->caps & PL_FMT_CAP_BLITTABLE, .storable = pass.fbofmt[4]->caps & PL_FMT_CAP_STORABLE, )); if (!ok) { PL_ERR(rr, "Could not create intermediate texture for " "frame mixing.. disabling!"); rr->errors |= PL_RENDER_ERR_FRAME_MIXING; goto fallback; } struct pass_state inter_pass = { .rr = rr, .params = pass.params, .image = *img, .target = *ptarget, .info.stage = PL_RENDER_STAGE_FRAME, .acquired = pass.acquired, }; // Render a single frame up to `pass_output_target` memcpy(inter_pass.fbofmt, pass.fbofmt, sizeof(pass.fbofmt)); if (!pass_init(&inter_pass, true)) goto fail; pass_begin_frame(&inter_pass); if (!(ok = pass_read_image(&inter_pass))) goto inter_pass_error; if (!(ok = pass_scale_main(&inter_pass))) goto inter_pass_error; pass_convert_colors(&inter_pass); pl_assert(inter_pass.img.sh); // guaranteed by `pass_convert_colors` pl_shader_set_alpha(inter_pass.img.sh, &inter_pass.img.repr, PL_ALPHA_PREMULTIPLIED); // for frame mixing pl_assert(inter_pass.img.w == out_w && inter_pass.img.h == out_h); ok = pl_dispatch_finish(rr->dp, pl_dispatch_params( .shader = &inter_pass.img.sh, .target = f->tex, )); if (!ok) goto inter_pass_error; float sx = out_w / pl_rect_w(inter_pass.dst_rect), sy = out_h / pl_rect_h(inter_pass.dst_rect); pl_transform2x2 shift = { .mat.m = {{ sx, 0, }, { 0, sy, }}, .c = { -sx * inter_pass.dst_rect.x0, -sy * inter_pass.dst_rect.y0 }, }; if (inter_pass.rotation % PL_ROTATION_180 == PL_ROTATION_90) { PL_SWAP(shift.mat.m[0][0], shift.mat.m[0][1]); PL_SWAP(shift.mat.m[1][0], shift.mat.m[1][1]); } draw_overlays(&inter_pass, f->tex, inter_pass.img.comps, NULL, inter_pass.image.overlays, inter_pass.image.num_overlays, inter_pass.img.color, inter_pass.img.repr, &shift); f->params_hash = par_info.hash; f->crop = img->crop; f->color = inter_pass.img.color; f->comps = inter_pass.img.comps; f->profile = target->profile; // fall through inter_pass_error: inter_pass.acquired.target = false; // don't release target pass_uninit(&inter_pass); if (!ok) goto fail; } pl_assert(fidx < MAX_MIX_FRAMES); frames[fidx] = *f; weights[fidx] = weight; wsum += weight; fidx++; } // Evict the frames we *don't* need for (int i = 0; i < rr->frames.num; ) { if (rr->frames.elem[i].evict) { PL_TRACE(rr, "Evicting frame with signature %llx from cache", (unsigned long long) rr->frames.elem[i].signature); PL_ARRAY_APPEND(rr, rr->frame_fbos, rr->frames.elem[i].tex); PL_ARRAY_REMOVE_AT(rr->frames, i); continue; } else { i++; } } // If we got back no frames, retry with ZOH semantics if (!fidx) { pl_assert(!single_frame); single_frame = true; goto retry; } // Sample and mix the output color pass_begin_frame(&pass); pass.info.count = fidx; pl_assert(fidx > 0); pl_shader sh = pl_dispatch_begin(rr->dp); sh_describef(sh, "frame mixing (%d frame%s)", fidx, fidx > 1 ? "s" : ""); sh->output = PL_SHADER_SIG_COLOR; sh->output_w = out_w; sh->output_h = out_h; GLSL("vec4 color; \n" "// pl_render_image_mix \n" "{ \n" "vec4 mix_color = vec4(0.0); \n"); int comps = 0; for (int i = 0; i < fidx; i++) { const struct pl_tex_params *tpars = &frames[i].tex->params; // Use linear sampling if desired and possible enum pl_tex_sample_mode sample_mode = PL_TEX_SAMPLE_NEAREST; if ((tpars->w != out_w || tpars->h != out_h) && (tpars->format->caps & PL_FMT_CAP_LINEAR)) { sample_mode = PL_TEX_SAMPLE_LINEAR; } ident_t pos, tex = sh_bind(sh, frames[i].tex, PL_TEX_ADDRESS_CLAMP, sample_mode, "frame", NULL, &pos, NULL); GLSL("color = textureLod("$", "$", 0.0); \n", tex, pos); // Note: This ignores differences in ICC profile, which we decide to // just simply not care about. Doing that properly would require // converting between different image profiles, and the headache of // finagling that state is just not worth it because this is an // exceptionally unlikely hypothetical. // // This also ignores differences in HDR metadata, which we deliberately // ignore because it causes aggressive shader recompilation. struct pl_color_space frame_csp = frames[i].color; struct pl_color_space mix_csp = target->color; frame_csp.hdr = mix_csp.hdr = (struct pl_hdr_metadata) {0}; pl_shader_color_map_ex(sh, NULL, pl_color_map_args(frame_csp, mix_csp)); float weight = weights[i] / wsum; GLSL("mix_color += vec4("$") * color; \n", SH_FLOAT_DYN(weight)); comps = PL_MAX(comps, frames[i].comps); } GLSL("color = mix_color; \n" "} \n"); // Dispatch this to the destination pass.img = (struct img) { .sh = sh, .w = out_w, .h = out_h, .comps = comps, .color = target->color, .repr = { .sys = PL_COLOR_SYSTEM_RGB, .levels = PL_COLOR_LEVELS_PC, .alpha = comps >= 4 ? PL_ALPHA_PREMULTIPLIED : PL_ALPHA_NONE, }, }; if (!pass_output_target(&pass)) goto fallback; pass_uninit(&pass); return true; fail: PL_ERR(rr, "Could not render image for frame mixing.. disabling!"); rr->errors |= PL_RENDER_ERR_FRAME_MIXING; // fall through fallback: pass_uninit(&pass); return pl_render_image(rr, refimg, ptarget, params); error: // for parameter validation failures return false; } void pl_frames_infer_mix(pl_renderer rr, const struct pl_frame_mix *mix, struct pl_frame *target, struct pl_frame *out_ref) { struct pass_state pass = { .rr = rr, .target = *target, }; const struct pl_frame *refimg = pl_frame_mix_nearest(mix); if (refimg) { pass.image = *refimg; } else { pass.src_ref = -1; } pass_fix_frames(&pass); *target = pass.target; if (out_ref) *out_ref = pass.image; } void pl_frame_set_chroma_location(struct pl_frame *frame, enum pl_chroma_location chroma_loc) { pl_tex ref = frame->planes[frame_ref(frame)].texture; if (ref) { // Texture dimensions are already known, so apply the chroma location // only to subsampled planes int ref_w = ref->params.w, ref_h = ref->params.h; for (int i = 0; i < frame->num_planes; i++) { struct pl_plane *plane = &frame->planes[i]; pl_tex tex = plane->texture; bool subsampled = tex->params.w < ref_w || tex->params.h < ref_h; if (subsampled) pl_chroma_location_offset(chroma_loc, &plane->shift_x, &plane->shift_y); } } else { // Texture dimensions are not yet known, so apply the chroma location // to all chroma planes, regardless of subsampling for (int i = 0; i < frame->num_planes; i++) { struct pl_plane *plane = &frame->planes[i]; if (detect_plane_type(plane, &frame->repr) == PLANE_CHROMA) pl_chroma_location_offset(chroma_loc, &plane->shift_x, &plane->shift_y); } } } void pl_frame_from_swapchain(struct pl_frame *out_frame, const struct pl_swapchain_frame *frame) { pl_tex fbo = frame->fbo; int num_comps = fbo->params.format->num_components; if (frame->color_repr.alpha == PL_ALPHA_NONE) num_comps = PL_MIN(num_comps, 3); *out_frame = (struct pl_frame) { .num_planes = 1, .planes = {{ .texture = fbo, .flipped = frame->flipped, .components = num_comps, .component_mapping = {0, 1, 2, 3}, }}, .crop = { 0, 0, fbo->params.w, fbo->params.h }, .repr = frame->color_repr, .color = frame->color_space, }; } bool pl_frame_is_cropped(const struct pl_frame *frame) { int x0 = roundf(PL_MIN(frame->crop.x0, frame->crop.x1)), y0 = roundf(PL_MIN(frame->crop.y0, frame->crop.y1)), x1 = roundf(PL_MAX(frame->crop.x0, frame->crop.x1)), y1 = roundf(PL_MAX(frame->crop.y0, frame->crop.y1)); pl_tex ref = frame->planes[frame_ref(frame)].texture; pl_assert(ref); if (!x0 && !x1) x1 = ref->params.w; if (!y0 && !y1) y1 = ref->params.h; return x0 > 0 || y0 > 0 || x1 < ref->params.w || y1 < ref->params.h; } void pl_frame_clear_tiles(pl_gpu gpu, const struct pl_frame *frame, const float tile_colors[2][3], int tile_size) { struct pl_color_repr repr = frame->repr; pl_transform3x3 tr = pl_color_repr_decode(&repr, NULL); pl_transform3x3_invert(&tr); float encoded[2][3]; memcpy(encoded, tile_colors, sizeof(encoded)); pl_transform3x3_apply(&tr, encoded[0]); pl_transform3x3_apply(&tr, encoded[1]); pl_tex ref = frame->planes[frame_ref(frame)].texture; for (int p = 0; p < frame->num_planes; p++) { const struct pl_plane *plane = &frame->planes[p]; float tiles[2][3] = {0}; for (int c = 0; c < plane->components; c++) { int ch = plane->component_mapping[c]; if (ch >= 0 && ch < 3) { tiles[0][c] = encoded[0][plane->component_mapping[c]]; tiles[1][c] = encoded[1][plane->component_mapping[c]]; } } float rx = (float) plane->texture->params.w / ref->params.w, ry = (float) plane->texture->params.h / ref->params.h; float rrx = rx >= 1 ? roundf(rx) : 1.0 / roundf(1.0 / rx), rry = ry >= 1 ? roundf(ry) : 1.0 / roundf(1.0 / ry); int size_x = tile_size * rrx, size_y = tile_size * rry; pl_dispatch dp = pl_gpu_dispatch(gpu); pl_shader sh = pl_dispatch_begin(dp); sh->output = PL_SHADER_SIG_COLOR; GLSL("// pl_frame_clear_tiles (plane %d) \n" "vec4 color; \n" "vec2 outcoord = gl_FragCoord.xy * vec2("$", "$"); \n" "bvec2 tile = lessThan(fract(outcoord), vec2(0.5)); \n" "color.rgb = tile.x == tile.y ? vec3("$", "$", "$") \n" " : vec3("$", "$", "$"); \n" "color.a = 1.0; \n", p, SH_FLOAT(1.0 / size_x), SH_FLOAT(1.0 / size_y), SH_FLOAT(tiles[0][0]), SH_FLOAT(tiles[0][1]), SH_FLOAT(tiles[0][2]), SH_FLOAT(tiles[1][0]), SH_FLOAT(tiles[1][1]), SH_FLOAT(tiles[1][2])); pl_dispatch_finish(dp, pl_dispatch_params( .shader = &sh, .target = plane->texture, )); } } void pl_frame_clear_rgba(pl_gpu gpu, const struct pl_frame *frame, const float rgba[4]) { struct pl_color_repr repr = frame->repr; pl_transform3x3 tr = pl_color_repr_decode(&repr, NULL); pl_transform3x3_invert(&tr); float encoded[3] = { rgba[0], rgba[1], rgba[2] }; pl_transform3x3_apply(&tr, encoded); float mult = frame->repr.alpha == PL_ALPHA_PREMULTIPLIED ? rgba[3] : 1.0; for (int p = 0; p < frame->num_planes; p++) { const struct pl_plane *plane = &frame->planes[p]; float clear[4] = { 0.0, 0.0, 0.0, rgba[3] }; for (int c = 0; c < plane->components; c++) { int ch = plane->component_mapping[c]; if (ch >= 0 && ch < 3) clear[c] = mult * encoded[plane->component_mapping[c]]; } pl_tex_clear(gpu, plane->texture, clear); } } struct pl_render_errors pl_renderer_get_errors(pl_renderer rr) { return (struct pl_render_errors) { .errors = rr->errors, .disabled_hooks = rr->disabled_hooks.elem, .num_disabled_hooks = rr->disabled_hooks.num, }; } void pl_renderer_reset_errors(pl_renderer rr, const struct pl_render_errors *errors) { if (!errors) { // Reset everything rr->errors = PL_RENDER_ERR_NONE; rr->disabled_hooks.num = 0; return; } // Reset only requested errors rr->errors &= ~errors->errors; // Not clearing hooks if (!(errors->errors & PL_RENDER_ERR_HOOKS)) goto done; // Remove all hook signatures if (!errors->num_disabled_hooks) { rr->disabled_hooks.num = 0; goto done; } // At this point we require valid array of hooks if (!errors->disabled_hooks) { assert(errors->disabled_hooks); goto done; } for (int i = 0; i < errors->num_disabled_hooks; i++) { for (int j = 0; j < rr->disabled_hooks.num; j++) { // Remove only requested hook signatures if (rr->disabled_hooks.elem[j] == errors->disabled_hooks[i]) { PL_ARRAY_REMOVE_AT(rr->disabled_hooks, j); break; } } } done: if (rr->disabled_hooks.num) rr->errors |= PL_RENDER_ERR_HOOKS; return; } libplacebo-v7.349.0/src/shaders.c000066400000000000000000000707341463457750100165730ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include #include "common.h" #include "log.h" #include "shaders.h" pl_shader_info pl_shader_info_ref(pl_shader_info pinfo) { struct sh_info *info = (struct sh_info *) pinfo; if (!info) return NULL; pl_rc_ref(&info->rc); return &info->info; } void pl_shader_info_deref(pl_shader_info *pinfo) { struct sh_info *info = (struct sh_info *) *pinfo; if (!info) return; if (pl_rc_deref(&info->rc)) pl_free(info); *pinfo = NULL; } static struct sh_info *sh_info_alloc(void *alloc) { struct sh_info *info = pl_zalloc_ptr(alloc, info); info->tmp = pl_tmp(info); pl_rc_init(&info->rc); return info; } // Re-use `sh_info` allocation if possible, allocate new otherwise static struct sh_info *sh_info_recycle(struct sh_info *info) { if (!pl_rc_deref(&info->rc)) return sh_info_alloc(NULL); memset(&info->info, 0, sizeof(info->info)); // reset public fields pl_free_children(info->tmp); pl_rc_ref(&info->rc); info->desc.len = 0; info->steps.num = 0; return info; } static uint8_t reverse_bits(uint8_t x) { static const uint8_t reverse_nibble[16] = { 0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe, 0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf, }; return reverse_nibble[x & 0xF] << 4 | reverse_nibble[x >> 4]; } static void init_shader(pl_shader sh, const struct pl_shader_params *params) { if (params) { sh->info->info.params = *params; // To avoid collisions for shaders with very high number of // identifiers, pack the shader ID into the highest bits (MSB -> LSB) pl_static_assert(sizeof(sh->prefix) > sizeof(params->id)); const int shift = 8 * (sizeof(sh->prefix) - sizeof(params->id)); sh->prefix = reverse_bits(params->id) << shift; } sh->name = sh_fresh(sh, "main"); } pl_shader pl_shader_alloc(pl_log log, const struct pl_shader_params *params) { static const int glsl_ver_req = 130; if (params && params->glsl.version && params->glsl.version < 130) { pl_err(log, "Requested GLSL version %d too low (required: %d)", params->glsl.version, glsl_ver_req); return NULL; } pl_shader sh = pl_alloc_ptr(NULL, sh); *sh = (struct pl_shader_t) { .log = log, .tmp = pl_tmp(sh), .info = sh_info_alloc(NULL), .mutable = true, }; for (int i = 0; i < PL_ARRAY_SIZE(sh->buffers); i++) sh->buffers[i] = pl_str_builder_alloc(sh); init_shader(sh, params); return sh; } static void sh_obj_deref(pl_shader_obj obj); void sh_deref(pl_shader sh) { pl_free_children(sh->tmp); for (int i = 0; i < sh->obj.num; i++) sh_obj_deref(sh->obj.elem[i]); sh->obj.num = 0; } void pl_shader_free(pl_shader *psh) { pl_shader sh = *psh; if (!sh) return; sh_deref(sh); pl_shader_info_deref((pl_shader_info *) &sh->info); pl_free_ptr(psh); } void pl_shader_reset(pl_shader sh, const struct pl_shader_params *params) { sh_deref(sh); struct pl_shader_t new = { .log = sh->log, .tmp = sh->tmp, .info = sh_info_recycle(sh->info), .data.buf = sh->data.buf, .mutable = true, // Preserve array allocations .obj.elem = sh->obj.elem, .vas.elem = sh->vas.elem, .vars.elem = sh->vars.elem, .descs.elem = sh->descs.elem, .consts.elem = sh->consts.elem, }; // Preserve buffer allocations memcpy(new.buffers, sh->buffers, sizeof(new.buffers)); for (int i = 0; i < PL_ARRAY_SIZE(new.buffers); i++) pl_str_builder_reset(new.buffers[i]); *sh = new; init_shader(sh, params); } static void *sh_alloc(pl_shader sh, size_t size, size_t align) { const size_t offset = PL_ALIGN2(sh->data.len, align); const size_t req_size = offset + size; if (req_size <= pl_get_size(sh->data.buf)) { sh->data.len = offset + size; return sh->data.buf + offset; } // We can't realloc this buffer because various pointers will be left // dangling, so just reparent it onto `sh->tmp` (so it will be cleaned // up when the shader is next reset) and allocate a new, larger buffer // in its place const size_t new_size = PL_MAX(req_size << 1, 256); pl_steal(sh->tmp, sh->data.buf); sh->data.buf = pl_alloc(sh, new_size); sh->data.len = size; return sh->data.buf; } static void *sh_memdup(pl_shader sh, const void *data, size_t size, size_t align) { if (!size) return NULL; void *dst = sh_alloc(sh, size, align); assert(data); memcpy(dst, data, size); return dst; } bool pl_shader_is_failed(const pl_shader sh) { return sh->failed; } struct pl_glsl_version sh_glsl(const pl_shader sh) { if (SH_PARAMS(sh).glsl.version) return SH_PARAMS(sh).glsl; if (SH_GPU(sh)) return SH_GPU(sh)->glsl; return (struct pl_glsl_version) { .version = 130 }; } bool sh_try_compute(pl_shader sh, int bw, int bh, bool flex, size_t mem) { pl_assert(bw && bh); int *sh_bw = &sh->group_size[0]; int *sh_bh = &sh->group_size[1]; struct pl_glsl_version glsl = sh_glsl(sh); if (!glsl.compute) { PL_TRACE(sh, "Disabling compute shader due to missing `compute` support"); return false; } if (sh->shmem + mem > glsl.max_shmem_size) { PL_TRACE(sh, "Disabling compute shader due to insufficient shmem"); return false; } if (sh->type == SH_FRAGMENT) { PL_TRACE(sh, "Disabling compute shader because shader is already marked " "as fragment shader"); return false; } if (bw > glsl.max_group_size[0] || bh > glsl.max_group_size[1] || (bw * bh) > glsl.max_group_threads) { if (!flex) { PL_TRACE(sh, "Disabling compute shader due to exceeded group " "thread count."); return false; } else { // Pick better group sizes bw = PL_MIN(bw, glsl.max_group_size[0]); bh = glsl.max_group_threads / bw; } } sh->shmem += mem; // If the current shader is either not a compute shader, or we have no // choice but to override the metadata, always do so if (sh->type != SH_COMPUTE || (sh->flexible_work_groups && !flex)) { *sh_bw = bw; *sh_bh = bh; sh->type = SH_COMPUTE; sh->flexible_work_groups = flex; return true; } // If both shaders are flexible, pick the larger of the two if (sh->flexible_work_groups && flex) { *sh_bw = PL_MAX(*sh_bw, bw); *sh_bh = PL_MAX(*sh_bh, bh); pl_assert(*sh_bw * *sh_bh <= glsl.max_group_threads); return true; } // At this point we're looking only at a non-flexible compute shader pl_assert(sh->type == SH_COMPUTE && !sh->flexible_work_groups); if (!flex) { // Ensure parameters match if (bw != *sh_bw || bh != *sh_bh) { PL_TRACE(sh, "Disabling compute shader due to incompatible group " "sizes %dx%d and %dx%d", *sh_bw, *sh_bh, bw, bh); sh->shmem -= mem; return false; } } return true; } bool pl_shader_is_compute(const pl_shader sh) { return sh->type == SH_COMPUTE; } bool pl_shader_output_size(const pl_shader sh, int *w, int *h) { if (!sh->output_w || !sh->output_h) return false; *w = sh->transpose ? sh->output_h : sh->output_w; *h = sh->transpose ? sh->output_w : sh->output_h; return true; } ident_t sh_fresh(pl_shader sh, const char *name) { unsigned short id = ++sh->fresh; assert(!(sh->prefix & id)); id |= sh->prefix; assert(name); return sh_mkident(id, name); } static inline ident_t sh_fresh_name(pl_shader sh, const char **pname) { ident_t id = sh_fresh(sh, *pname); *pname = sh_ident_pack(id); return id; } ident_t sh_var(pl_shader sh, struct pl_shader_var sv) { ident_t id = sh_fresh_name(sh, &sv.var.name); struct pl_var_layout layout = pl_var_host_layout(0, &sv.var); sv.data = sh_memdup(sh, sv.data, layout.size, layout.stride); PL_ARRAY_APPEND(sh, sh->vars, sv); return id; } ident_t sh_var_int(pl_shader sh, const char *name, int val, bool dynamic) { return sh_var(sh, (struct pl_shader_var) { .var = pl_var_int(name), .data = &val, .dynamic = dynamic, }); } ident_t sh_var_uint(pl_shader sh, const char *name, unsigned int val, bool dynamic) { return sh_var(sh, (struct pl_shader_var) { .var = pl_var_uint(name), .data = &val, .dynamic = dynamic, }); } ident_t sh_var_float(pl_shader sh, const char *name, float val, bool dynamic) { return sh_var(sh, (struct pl_shader_var) { .var = pl_var_float(name), .data = &val, .dynamic = dynamic, }); } ident_t sh_var_mat3(pl_shader sh, const char *name, pl_matrix3x3 val) { return sh_var(sh, (struct pl_shader_var) { .var = pl_var_mat3(name), .data = PL_TRANSPOSE_3X3(val.m), }); } ident_t sh_desc(pl_shader sh, struct pl_shader_desc sd) { switch (sd.desc.type) { case PL_DESC_BUF_UNIFORM: case PL_DESC_BUF_STORAGE: for (int i = 0; i < sh->descs.num; i++) // ensure uniqueness pl_assert(sh->descs.elem[i].binding.object != sd.binding.object); size_t bsize = sizeof(sd.buffer_vars[0]) * sd.num_buffer_vars; sd.buffer_vars = sh_memdup(sh, sd.buffer_vars, bsize, alignof(struct pl_buffer_var)); for (int i = 0; i < sd.num_buffer_vars; i++) { struct pl_var *bv = &sd.buffer_vars[i].var; const char *name = bv->name; GLSLP("#define %s "$"\n", name, sh_fresh_name(sh, &bv->name)); } break; case PL_DESC_BUF_TEXEL_UNIFORM: case PL_DESC_BUF_TEXEL_STORAGE: case PL_DESC_SAMPLED_TEX: case PL_DESC_STORAGE_IMG: pl_assert(!sd.num_buffer_vars); break; case PL_DESC_INVALID: case PL_DESC_TYPE_COUNT: pl_unreachable(); } ident_t id = sh_fresh_name(sh, &sd.desc.name); PL_ARRAY_APPEND(sh, sh->descs, sd); return id; } ident_t sh_const(pl_shader sh, struct pl_shader_const sc) { if (SH_PARAMS(sh).dynamic_constants && !sc.compile_time) { return sh_var(sh, (struct pl_shader_var) { .var = { .name = sc.name, .type = sc.type, .dim_v = 1, .dim_m = 1, .dim_a = 1, }, .data = sc.data, }); } ident_t id = sh_fresh_name(sh, &sc.name); pl_gpu gpu = SH_GPU(sh); if (gpu && gpu->limits.max_constants) { if (!sc.compile_time || gpu->limits.array_size_constants) { size_t size = pl_var_type_size(sc.type); sc.data = sh_memdup(sh, sc.data, size, size); PL_ARRAY_APPEND(sh, sh->consts, sc); return id; } } // Fallback for GPUs without specialization constants switch (sc.type) { case PL_VAR_SINT: GLSLH("const int "$" = %d; \n", id, *(int *) sc.data); return id; case PL_VAR_UINT: GLSLH("const uint "$" = uint(%u); \n", id, *(unsigned int *) sc.data); return id; case PL_VAR_FLOAT: GLSLH("const float "$" = float(%f); \n", id, *(float *) sc.data); return id; case PL_VAR_INVALID: case PL_VAR_TYPE_COUNT: break; } pl_unreachable(); } ident_t sh_const_int(pl_shader sh, const char *name, int val) { return sh_const(sh, (struct pl_shader_const) { .type = PL_VAR_SINT, .name = name, .data = &val, }); } ident_t sh_const_uint(pl_shader sh, const char *name, unsigned int val) { return sh_const(sh, (struct pl_shader_const) { .type = PL_VAR_UINT, .name = name, .data = &val, }); } ident_t sh_const_float(pl_shader sh, const char *name, float val) { return sh_const(sh, (struct pl_shader_const) { .type = PL_VAR_FLOAT, .name = name, .data = &val, }); } ident_t sh_attr(pl_shader sh, struct pl_shader_va sva) { const size_t vsize = sva.attr.fmt->texel_size; uint8_t *data = sh_alloc(sh, vsize * 4, vsize); for (int i = 0; i < 4; i++) { memcpy(data, sva.data[i], vsize); sva.data[i] = data; data += vsize; } ident_t id = sh_fresh_name(sh, &sva.attr.name); PL_ARRAY_APPEND(sh, sh->vas, sva); return id; } ident_t sh_attr_vec2(pl_shader sh, const char *name, const pl_rect2df *rc) { pl_gpu gpu = SH_GPU(sh); if (!gpu) { SH_FAIL(sh, "Failed adding vertex attr '%s': No GPU available!", name); return NULL_IDENT; } pl_fmt fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 2); if (!fmt) { SH_FAIL(sh, "Failed adding vertex attr '%s': no vertex fmt!", name); return NULL_IDENT; } float verts[4][2] = { { rc->x0, rc->y0 }, { rc->x1, rc->y0 }, { rc->x0, rc->y1 }, { rc->x1, rc->y1 }, }; return sh_attr(sh, (struct pl_shader_va) { .attr = { .name = name, .fmt = fmt, }, .data = { verts[0], verts[1], verts[2], verts[3] }, }); } ident_t sh_bind(pl_shader sh, pl_tex tex, enum pl_tex_address_mode address_mode, enum pl_tex_sample_mode sample_mode, const char *name, const pl_rect2df *rect, ident_t *out_pos, ident_t *out_pt) { if (pl_tex_params_dimension(tex->params) != 2) { SH_FAIL(sh, "Failed binding texture '%s': not a 2D texture!", name); return NULL_IDENT; } if (!tex->params.sampleable) { SH_FAIL(sh, "Failed binding texture '%s': texture not sampleable!", name); return NULL_IDENT; } ident_t itex = sh_desc(sh, (struct pl_shader_desc) { .desc = { .name = name, .type = PL_DESC_SAMPLED_TEX, }, .binding = { .object = tex, .address_mode = address_mode, .sample_mode = sample_mode, }, }); float sx, sy; if (tex->sampler_type == PL_SAMPLER_RECT) { sx = 1.0; sy = 1.0; } else { sx = 1.0 / tex->params.w; sy = 1.0 / tex->params.h; } if (out_pos) { pl_rect2df full = { .x1 = tex->params.w, .y1 = tex->params.h, }; rect = PL_DEF(rect, &full); *out_pos = sh_attr_vec2(sh, "tex_coord", &(pl_rect2df) { .x0 = sx * rect->x0, .y0 = sy * rect->y0, .x1 = sx * rect->x1, .y1 = sy * rect->y1, }); } if (out_pt) { *out_pt = sh_var(sh, (struct pl_shader_var) { .var = pl_var_vec2("tex_pt"), .data = &(float[2]) {sx, sy}, }); } return itex; } bool sh_buf_desc_append(void *alloc, pl_gpu gpu, struct pl_shader_desc *buf_desc, struct pl_var_layout *out_layout, const struct pl_var new_var) { struct pl_buffer_var bv = { .var = new_var }; size_t cur_size = sh_buf_desc_size(buf_desc); switch (buf_desc->desc.type) { case PL_DESC_BUF_UNIFORM: bv.layout = pl_std140_layout(cur_size, &new_var); if (bv.layout.offset + bv.layout.size > gpu->limits.max_ubo_size) return false; break; case PL_DESC_BUF_STORAGE: bv.layout = pl_std430_layout(cur_size, &new_var); if (bv.layout.offset + bv.layout.size > gpu->limits.max_ssbo_size) return false; break; case PL_DESC_INVALID: case PL_DESC_SAMPLED_TEX: case PL_DESC_STORAGE_IMG: case PL_DESC_BUF_TEXEL_UNIFORM: case PL_DESC_BUF_TEXEL_STORAGE: case PL_DESC_TYPE_COUNT: pl_unreachable(); } if (out_layout) *out_layout = bv.layout; PL_ARRAY_APPEND_RAW(alloc, buf_desc->buffer_vars, buf_desc->num_buffer_vars, bv); return true; } size_t sh_buf_desc_size(const struct pl_shader_desc *buf_desc) { if (!buf_desc->num_buffer_vars) return 0; const struct pl_buffer_var *last; last = &buf_desc->buffer_vars[buf_desc->num_buffer_vars - 1]; return last->layout.offset + last->layout.size; } void sh_describef(pl_shader sh, const char *fmt, ...) { va_list ap; va_start(ap, fmt); sh_describe(sh, pl_vasprintf(sh->info->tmp, fmt, ap)); va_end(ap); } static const char *insigs[] = { [PL_SHADER_SIG_NONE] = "", [PL_SHADER_SIG_COLOR] = "vec4 color", }; static const char *outsigs[] = { [PL_SHADER_SIG_NONE] = "void", [PL_SHADER_SIG_COLOR] = "vec4", }; static const char *retvals[] = { [PL_SHADER_SIG_NONE] = "", [PL_SHADER_SIG_COLOR] = "return color;", }; // libplacebo currently only allows 2D samplers for shader signatures static const char *samplers2D[] = { [PL_SAMPLER_NORMAL] = "sampler2D", [PL_SAMPLER_RECT] = "sampler2DRect", [PL_SAMPLER_EXTERNAL] = "samplerExternalOES", }; ident_t sh_subpass(pl_shader sh, pl_shader sub) { pl_assert(sh->mutable); if (sh->prefix == sub->prefix) { PL_TRACE(sh, "Can't merge shaders: conflicting identifiers!"); return NULL_IDENT; } // Check for shader compatibility int res_w = PL_DEF(sh->output_w, sub->output_w), res_h = PL_DEF(sh->output_h, sub->output_h); if ((sub->output_w && res_w != sub->output_w) || (sub->output_h && res_h != sub->output_h)) { PL_TRACE(sh, "Can't merge shaders: incompatible sizes: %dx%d and %dx%d", sh->output_w, sh->output_h, sub->output_w, sub->output_h); return NULL_IDENT; } if (sub->type == SH_COMPUTE) { int subw = sub->group_size[0], subh = sub->group_size[1]; bool flex = sub->flexible_work_groups; if (!sh_try_compute(sh, subw, subh, flex, sub->shmem)) { PL_TRACE(sh, "Can't merge shaders: incompatible block sizes or " "exceeded shared memory resource capabilities"); return NULL_IDENT; } } sh->output_w = res_w; sh->output_h = res_h; // Append the prelude and header pl_str_builder_concat(sh->buffers[SH_BUF_PRELUDE], sub->buffers[SH_BUF_PRELUDE]); pl_str_builder_concat(sh->buffers[SH_BUF_HEADER], sub->buffers[SH_BUF_HEADER]); // Append the body as a new header function if (sub->input == PL_SHADER_SIG_SAMPLER) { pl_assert(sub->sampler_prefix); GLSLH("%s "$"(%c%s src_tex, vec2 tex_coord) {\n", outsigs[sub->output], sub->name, sub->sampler_prefix, samplers2D[sub->sampler_type]); } else { GLSLH("%s "$"(%s) {\n", outsigs[sub->output], sub->name, insigs[sub->input]); } pl_str_builder_concat(sh->buffers[SH_BUF_HEADER], sub->buffers[SH_BUF_BODY]); GLSLH("%s\n}\n\n", retvals[sub->output]); // Steal all inputs and objects from the subpass #define ARRAY_STEAL(arr) do \ { \ PL_ARRAY_CONCAT(sh, sh->arr, sub->arr); \ sub->arr.num = 0; \ } while (0) ARRAY_STEAL(obj); ARRAY_STEAL(vas); ARRAY_STEAL(vars); ARRAY_STEAL(descs); ARRAY_STEAL(consts); #undef ARRAY_STEAL // Steal the scratch buffer (if it holds data) if (sub->data.len) { pl_steal(sh->tmp, sub->data.buf); sub->data = (pl_str) {0}; } // Steal all temporary allocations and mark the child as unusable pl_steal(sh->tmp, sub->tmp); sub->tmp = pl_tmp(sub); sub->failed = true; // Steal the shader steps array (and allocations) pl_assert(pl_rc_count(&sub->info->rc) == 1); PL_ARRAY_CONCAT(sh->info, sh->info->steps, sub->info->steps); pl_steal(sh->info->tmp, sub->info->tmp); sub->info->tmp = pl_tmp(sub->info); sub->info->steps.num = 0; // sanity return sub->name; } pl_str_builder sh_finalize_internal(pl_shader sh) { pl_assert(sh->mutable); // this function should only ever be called once if (sh->failed) return NULL; // Padding for readability GLSLP("\n"); // Concatenate everything onto the prelude to form the final output pl_str_builder_concat(sh->buffers[SH_BUF_PRELUDE], sh->buffers[SH_BUF_HEADER]); if (sh->input == PL_SHADER_SIG_SAMPLER) { pl_assert(sh->sampler_prefix); GLSLP("%s "$"(%c%s src_tex, vec2 tex_coord) {\n", outsigs[sh->output], sh->name, sh->sampler_prefix, samplers2D[sh->sampler_type]); } else { GLSLP("%s "$"(%s) {\n", outsigs[sh->output], sh->name, insigs[sh->input]); } pl_str_builder_concat(sh->buffers[SH_BUF_PRELUDE], sh->buffers[SH_BUF_BODY]); pl_str_builder_concat(sh->buffers[SH_BUF_PRELUDE], sh->buffers[SH_BUF_FOOTER]); GLSLP("%s\n}\n\n", retvals[sh->output]); // Generate the shader info struct sh_info *info = sh->info; info->info.steps = info->steps.elem; info->info.num_steps = info->steps.num; info->info.description = "(unknown shader)"; // Generate pretty description for (int i = 0; i < info->steps.num; i++) { const char *step = info->steps.elem[i]; // Prevent duplicates. We're okay using a weak equality check here // because most pass descriptions are static strings. for (int j = 0; j < i; j++) { if (info->steps.elem[j] == step) goto next_step; } int count = 1; for (int j = i+1; j < info->steps.num; j++) { if (info->steps.elem[j] == step) count++; } const char *prefix = i > 0 ? ", " : ""; if (count > 1) { pl_str_append_asprintf(info, &info->desc, "%s%s x%d", prefix, step, count); } else { pl_str_append_asprintf(info, &info->desc, "%s%s", prefix, step); } next_step: ; } if (info->desc.len) info->info.description = (char *) info->desc.buf; sh->mutable = false; return sh->buffers[SH_BUF_PRELUDE]; } const struct pl_shader_res *pl_shader_finalize(pl_shader sh) { if (sh->failed) { return NULL; } else if (!sh->mutable) { return &sh->result; } pl_shader_info info = &sh->info->info; pl_str_builder glsl = sh_finalize_internal(sh); // Turn ident_t into friendly strings before passing it to users #define FIX_IDENT(name) \ name = sh_ident_tostr(sh_ident_unpack(name)) for (int i = 0; i < sh->vas.num; i++) FIX_IDENT(sh->vas.elem[i].attr.name); for (int i = 0; i < sh->vars.num; i++) FIX_IDENT(sh->vars.elem[i].var.name); for (int i = 0; i < sh->consts.num; i++) FIX_IDENT(sh->consts.elem[i].name); for (int i = 0; i < sh->descs.num; i++) { struct pl_shader_desc *sd = &sh->descs.elem[i]; FIX_IDENT(sd->desc.name); for (int j = 0; j < sd->num_buffer_vars; sd++) FIX_IDENT(sd->buffer_vars[j].var.name); } #undef FIX_IDENT sh->result = (struct pl_shader_res) { .info = info, .glsl = (char *) pl_str_builder_exec(glsl).buf, .name = sh_ident_tostr(sh->name), .input = sh->input, .output = sh->output, .compute_group_size = { sh->group_size[0], sh->group_size[1] }, .compute_shmem = sh->shmem, .vertex_attribs = sh->vas.elem, .num_vertex_attribs = sh->vas.num, .variables = sh->vars.elem, .num_variables = sh->vars.num, .descriptors = sh->descs.elem, .num_descriptors = sh->descs.num, .constants = sh->consts.elem, .num_constants = sh->consts.num, // deprecated fields .params = info->params, .steps = info->steps, .num_steps = info->num_steps, .description = info->description, }; return &sh->result; } bool sh_require(pl_shader sh, enum pl_shader_sig insig, int w, int h) { if (sh->failed) { SH_FAIL(sh, "Attempting to modify a failed shader!"); return false; } if (!sh->mutable) { SH_FAIL(sh, "Attempted to modify an immutable shader!"); return false; } if ((w && sh->output_w && sh->output_w != w) || (h && sh->output_h && sh->output_h != h)) { SH_FAIL(sh, "Illegal sequence of shader operations: Incompatible " "output size requirements %dx%d and %dx%d", sh->output_w, sh->output_h, w, h); return false; } static const char *names[] = { [PL_SHADER_SIG_NONE] = "PL_SHADER_SIG_NONE", [PL_SHADER_SIG_COLOR] = "PL_SHADER_SIG_COLOR", }; // If we require an input, but there is none available - just get it from // the user by turning it into an explicit input signature. if (!sh->output && insig) { pl_assert(!sh->input); sh->input = insig; } else if (sh->output != insig) { SH_FAIL(sh, "Illegal sequence of shader operations! Current output " "signature is '%s', but called operation expects '%s'!", names[sh->output], names[insig]); return false; } // All of our shaders end up returning a vec4 color sh->output = PL_SHADER_SIG_COLOR; sh->output_w = PL_DEF(sh->output_w, w); sh->output_h = PL_DEF(sh->output_h, h); return true; } static void sh_obj_deref(pl_shader_obj obj) { if (!pl_rc_deref(&obj->rc)) return; if (obj->uninit) obj->uninit(obj->gpu, obj->priv); pl_free(obj); } void pl_shader_obj_destroy(pl_shader_obj *ptr) { pl_shader_obj obj = *ptr; if (!obj) return; sh_obj_deref(obj); *ptr = NULL; } void *sh_require_obj(pl_shader sh, pl_shader_obj *ptr, enum pl_shader_obj_type type, size_t priv_size, void (*uninit)(pl_gpu gpu, void *priv)) { if (!ptr) return NULL; pl_shader_obj obj = *ptr; if (obj && obj->gpu != SH_GPU(sh)) { SH_FAIL(sh, "Passed pl_shader_obj belongs to different GPU!"); return NULL; } if (obj && obj->type != type) { SH_FAIL(sh, "Passed pl_shader_obj of wrong type! Shader objects must " "always be used with the same type of shader."); return NULL; } if (!obj) { obj = pl_zalloc_ptr(NULL, obj); pl_rc_init(&obj->rc); obj->gpu = SH_GPU(sh); obj->type = type; obj->priv = pl_zalloc(obj, priv_size); obj->uninit = uninit; } PL_ARRAY_APPEND(sh, sh->obj, obj); pl_rc_ref(&obj->rc); *ptr = obj; return obj->priv; } ident_t sh_prng(pl_shader sh, bool temporal, ident_t *p_state) { ident_t randfun = sh_fresh(sh, "rand"), state = sh_fresh(sh, "state"); // Based on pcg3d (http://jcgt.org/published/0009/03/02/) GLSLP("#define prng_t uvec3\n"); GLSLH("vec3 "$"(inout uvec3 s) { \n" " s = 1664525u * s + uvec3(1013904223u); \n" " s.x += s.y * s.z; \n" " s.y += s.z * s.x; \n" " s.z += s.x * s.y; \n" " s ^= s >> 16u; \n" " s.x += s.y * s.z; \n" " s.y += s.z * s.x; \n" " s.z += s.x * s.y; \n" " return vec3(s) * 1.0/float(0xFFFFFFFFu); \n" "} \n", randfun); if (temporal) { GLSL("uvec3 "$" = uvec3(gl_FragCoord.xy, "$"); \n", state, SH_UINT_DYN(SH_PARAMS(sh).index)); } else { GLSL("uvec3 "$" = uvec3(gl_FragCoord.xy, 0.0); \n", state); } if (p_state) *p_state = state; ident_t res = sh_fresh(sh, "RAND"); GLSLH("#define "$" ("$"("$"))\n", res, randfun, state); return res; } libplacebo-v7.349.0/src/shaders.h000066400000000000000000000331741463457750100165750ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include #include #include "common.h" #include "cache.h" #include "log.h" #include "gpu.h" #include // This represents an identifier (e.g. name of function, uniform etc.) for // a shader resource. Not human-readable. typedef unsigned short ident_t; #define $ "_%hx" #define NULL_IDENT 0u #define sh_mkident(id, name) ((ident_t) id) #define sh_ident_tostr(id) pl_asprintf(sh->tmp, $, id) enum { IDENT_BITS = 8 * sizeof(ident_t), IDENT_MASK = (uintptr_t) USHRT_MAX, IDENT_SENTINEL = (uintptr_t) 0x20230319 << IDENT_BITS, }; // Functions to pack/unpack an identifier into a `const char *` name field. // Used to defer string templating of friendly names until actually necessary static inline const char *sh_ident_pack(ident_t id) { return (const char *)(uintptr_t) (IDENT_SENTINEL | id); } static inline ident_t sh_ident_unpack(const char *name) { uintptr_t uname = (uintptr_t) name; assert((uname & ~IDENT_MASK) == IDENT_SENTINEL); return uname & IDENT_MASK; } enum pl_shader_buf { SH_BUF_PRELUDE, // extra #defines etc. SH_BUF_HEADER, // previous passes, helper function definitions, etc. SH_BUF_BODY, // partial contents of the "current" function SH_BUF_FOOTER, // will be appended to the end of the current function SH_BUF_COUNT, }; enum pl_shader_type { SH_AUTO, SH_COMPUTE, SH_FRAGMENT }; struct sh_info { // public-facing struct struct pl_shader_info_t info; // internal fields void *tmp; pl_rc_t rc; pl_str desc; PL_ARRAY(const char *) steps; }; struct pl_shader_t { pl_log log; void *tmp; // temporary allocations (freed on pl_shader_reset) struct sh_info *info; pl_str data; // pooled/recycled scratch buffer for small allocations PL_ARRAY(pl_shader_obj) obj; bool failed; bool mutable; ident_t name; enum pl_shader_sig input, output; int output_w; int output_h; bool transpose; pl_str_builder buffers[SH_BUF_COUNT]; enum pl_shader_type type; bool flexible_work_groups; int group_size[2]; size_t shmem; enum pl_sampler_type sampler_type; char sampler_prefix; unsigned short prefix; // pre-processed version of res.params.id unsigned short fresh; // Note: internally, these `pl_shader_va` etc. use raw ident_t fields // instead of `const char *` wherever a name is required! These are // translated to legal strings either in `pl_shader_finalize`, or inside // the `pl_dispatch` shader compilation step. PL_ARRAY(struct pl_shader_va) vas; PL_ARRAY(struct pl_shader_var) vars; PL_ARRAY(struct pl_shader_desc) descs; PL_ARRAY(struct pl_shader_const) consts; // cached result of `pl_shader_finalize` struct pl_shader_res result; }; // Free temporary resources associated with a shader. Normally called by // pl_shader_reset(), but used internally to reduce memory waste. void sh_deref(pl_shader sh); // Same as `pl_shader_finalize` but doesn't generate `sh->res`, instead returns // the string builder to be used to finalize the shader. Assumes the caller // will access the shader's internal fields directly. pl_str_builder sh_finalize_internal(pl_shader sh); // Helper functions for convenience #define SH_PARAMS(sh) ((sh)->info->info.params) #define SH_GPU(sh) (SH_PARAMS(sh).gpu) #define SH_CACHE(sh) pl_gpu_cache(SH_GPU(sh)) // Returns the GLSL version, defaulting to desktop 130. struct pl_glsl_version sh_glsl(const pl_shader sh); #define SH_FAIL(sh, ...) do { \ sh->failed = true; \ PL_ERR(sh, __VA_ARGS__); \ } while (0) // Attempt enabling compute shaders for this pass, if possible bool sh_try_compute(pl_shader sh, int bw, int bh, bool flex, size_t mem); // Attempt merging a secondary shader into the current shader. Returns NULL if // merging fails (e.g. incompatible signatures); otherwise returns an identifier // corresponding to the generated subpass function. // // If successful, the subpass shader is set to an undefined failure state and // must be explicitly reset/aborted before being re-used. ident_t sh_subpass(pl_shader sh, pl_shader sub); // Helpers for adding new variables/descriptors/etc. with fresh, unique // identifier names. These will never conflict with other identifiers, even // if the shaders are merged together. ident_t sh_fresh(pl_shader sh, const char *name); // Add a new shader var and return its identifier ident_t sh_var(pl_shader sh, struct pl_shader_var sv); // Helper functions for `sh_var` ident_t sh_var_int(pl_shader sh, const char *name, int val, bool dynamic); ident_t sh_var_uint(pl_shader sh, const char *name, unsigned int val, bool dynamic); ident_t sh_var_float(pl_shader sh, const char *name, float val, bool dynamic); ident_t sh_var_mat3(pl_shader sh, const char *name, pl_matrix3x3 val); #define SH_INT_DYN(val) sh_var_int(sh, "const", val, true) #define SH_UINT_DYN(val) sh_var_uint(sh, "const", val, true) #define SH_FLOAT_DYN(val) sh_var_float(sh, "const", val, true) #define SH_MAT3(val) sh_var_mat3(sh, "mat", val) // Add a new shader desc and return its identifier. ident_t sh_desc(pl_shader sh, struct pl_shader_desc sd); // Add a new shader constant and return its identifier. ident_t sh_const(pl_shader sh, struct pl_shader_const sc); // Helper functions for `sh_const` ident_t sh_const_int(pl_shader sh, const char *name, int val); ident_t sh_const_uint(pl_shader sh, const char *name, unsigned int val); ident_t sh_const_float(pl_shader sh, const char *name, float val); #define SH_INT(val) sh_const_int(sh, "const", val) #define SH_UINT(val) sh_const_uint(sh, "const", val) #define SH_FLOAT(val) sh_const_float(sh, "const", val) // Add a new shader va and return its identifier ident_t sh_attr(pl_shader sh, struct pl_shader_va sva); // Helper to add a a vec2 VA from a pl_rect2df. Returns NULL_IDENT on failure. ident_t sh_attr_vec2(pl_shader sh, const char *name, const pl_rect2df *rc); // Bind a texture under a given transformation and make its attributes // available as well. If an output pointer for one of the attributes is left // as NULL, that attribute will not be added. Returns NULL on failure. `rect` // is optional, and defaults to the full texture if left as NULL. // // Note that for e.g. compute shaders, the vec2 out_pos might be a macro that // expands to an expensive computation, and should be cached by the user. ident_t sh_bind(pl_shader sh, pl_tex tex, enum pl_tex_address_mode address_mode, enum pl_tex_sample_mode sample_mode, const char *name, const pl_rect2df *rect, ident_t *out_pos, ident_t *out_pt); // Incrementally build up a buffer by adding new variable elements to the // buffer, resizing buf.buffer_vars if necessary. Returns whether or not the // variable could be successfully added (which may fail if you try exceeding // the size limits of the buffer type). If successful, the layout is stored // in *out_layout (may be NULL). bool sh_buf_desc_append(void *alloc, pl_gpu gpu, struct pl_shader_desc *buf_desc, struct pl_var_layout *out_layout, const struct pl_var new_var); size_t sh_buf_desc_size(const struct pl_shader_desc *buf_desc); // Underlying function for appending text to a shader #define sh_append(sh, buf, ...) \ pl_str_builder_addf((sh)->buffers[buf], __VA_ARGS__) #define sh_append_str(sh, buf, str) \ pl_str_builder_str((sh)->buffers[buf], str) #define GLSLP(...) sh_append(sh, SH_BUF_PRELUDE, __VA_ARGS__) #define GLSLH(...) sh_append(sh, SH_BUF_HEADER, __VA_ARGS__) #define GLSL(...) sh_append(sh, SH_BUF_BODY, __VA_ARGS__) #define GLSLF(...) sh_append(sh, SH_BUF_FOOTER, __VA_ARGS__) // Attach a description to a shader void sh_describef(pl_shader sh, const char *fmt, ...) PL_PRINTF(2, 3); static inline void sh_describe(pl_shader sh, const char *desc) { PL_ARRAY_APPEND(sh->info, sh->info->steps, desc); }; // Requires that the share is mutable, has an output signature compatible // with the given input signature, as well as an output size compatible with // the given size requirements. Errors and returns false otherwise. bool sh_require(pl_shader sh, enum pl_shader_sig insig, int w, int h); // Shader resources enum pl_shader_obj_type { PL_SHADER_OBJ_INVALID = 0, PL_SHADER_OBJ_COLOR_MAP, PL_SHADER_OBJ_SAMPLER, PL_SHADER_OBJ_DITHER, PL_SHADER_OBJ_LUT, PL_SHADER_OBJ_AV1_GRAIN, PL_SHADER_OBJ_FILM_GRAIN, PL_SHADER_OBJ_RESHAPE, }; struct pl_shader_obj_t { enum pl_shader_obj_type type; pl_rc_t rc; pl_gpu gpu; void (*uninit)(pl_gpu gpu, void *priv); void *priv; }; // Returns (*ptr)->priv, or NULL on failure void *sh_require_obj(pl_shader sh, pl_shader_obj *ptr, enum pl_shader_obj_type type, size_t priv_size, void (*uninit)(pl_gpu gpu, void *priv)); #define SH_OBJ(sh, ptr, type, t, uninit) \ ((t*) sh_require_obj(sh, ptr, type, sizeof(t), uninit)) // Initializes a PRNG. The resulting string will directly evaluate to a // pseudorandom, uniformly distributed vec3 from [0.0,1.0]. Since this // algorithm works by mutating a state variable, if the user wants to use the // resulting PRNG inside a subfunction, they must add an extra `inout prng_t %s` // with the contents of `state` to the signature. (Optional) // // If `temporal` is set, the PRNG will vary across frames. ident_t sh_prng(pl_shader sh, bool temporal, ident_t *state); // Backing memory type enum sh_lut_type { SH_LUT_AUTO = 0, // pick whatever makes the most sense SH_LUT_TEXTURE, // upload as texture SH_LUT_UNIFORM, // uniform array SH_LUT_LITERAL, // constant / literal array in shader source (fallback) }; // Interpolation method enum sh_lut_method { SH_LUT_NONE = 0, // no interpolation, integer indices SH_LUT_LINEAR, // linear interpolation, vecN indices in range [0,1] SH_LUT_CUBIC, // (bi/tri)cubic interpolation SH_LUT_TETRAHEDRAL, // tetrahedral interpolation for vec3, equivalent to // SH_LUT_LINEAR for lower dimensions }; struct sh_lut_params { pl_shader_obj *object; // Type of the LUT we intend to generate. // // Note: If `var_type` is PL_VAR_*INT, `method` must be SH_LUT_NONE. enum pl_var_type var_type; enum sh_lut_type lut_type; enum sh_lut_method method; // For SH_LUT_TEXTURE, this can be used to override the texture's internal // format, in which case it takes precedence over the default for `type`. pl_fmt fmt; // LUT dimensions. Unused dimensions may be left as 0. int width; int height; int depth; int comps; // If true, the LUT will always be regenerated, even if the dimensions have // not changed. bool update; // Alternate way of triggering shader invalidations. If the signature // does not match the LUT's signature, it will be regenerated. uint64_t signature; // If set to true, shader objects will be preserved and updated in-place // rather than being treated as read-only. bool dynamic; // If set , generated shader objects are automatically cached in this // cache. Requires `signature` to be set (and uniquely identify the LUT). pl_cache cache; // Will be called with a zero-initialized buffer whenever the data needs to // be computed, which happens whenever the size is changed, the shader // object is invalidated, or `update` is set to true. // // Note: Interpretation of `data` is according to `type` and `fmt`. void (*fill)(void *data, const struct sh_lut_params *params); void *priv; // Debug tag to track LUT source pl_debug_tag debug_tag; }; #define sh_lut_params(...) (&(struct sh_lut_params) { \ .debug_tag = PL_DEBUG_TAG, \ __VA_ARGS__ \ }) // Makes a table of values available as a shader variable, using an a given // method (falling back if needed). The resulting identifier can be sampled // directly as %s(pos), where pos is a vector with the right number of // dimensions. `pos` must be an integer vector within the bounds of the array, // unless the method is `SH_LUT_LINEAR`, in which case it's a float vector that // gets interpolated and clamped as needed. Returns NULL on error. ident_t sh_lut(pl_shader sh, const struct sh_lut_params *params); static inline uint8_t sh_num_comps(uint8_t mask) { pl_assert((mask & 0xF) == mask); return __builtin_popcount(mask); } static inline const char *sh_float_type(uint8_t mask) { switch (sh_num_comps(mask)) { case 1: return "float"; case 2: return "vec2"; case 3: return "vec3"; case 4: return "vec4"; } pl_unreachable(); } static inline const char *sh_swizzle(uint8_t mask) { static const char * const swizzles[0x10] = { NULL, "r", "g", "rg", "b", "rb", "gb", "rgb", "a", "ra", "ga", "rga", "ba", "rba", "gba", "rgba", }; pl_assert(mask <= PL_ARRAY_SIZE(swizzles)); return swizzles[mask]; } libplacebo-v7.349.0/src/shaders/000077500000000000000000000000001463457750100164145ustar00rootroot00000000000000libplacebo-v7.349.0/src/shaders/colorspace.c000066400000000000000000002606571463457750100207320ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include "cache.h" #include "colorspace.h" #include "shaders.h" #include void pl_shader_set_alpha(pl_shader sh, struct pl_color_repr *repr, enum pl_alpha_mode mode) { bool src_has_alpha = repr->alpha == PL_ALPHA_INDEPENDENT || repr->alpha == PL_ALPHA_PREMULTIPLIED; bool dst_not_premul = mode == PL_ALPHA_INDEPENDENT || mode == PL_ALPHA_NONE; if (repr->alpha == PL_ALPHA_PREMULTIPLIED && dst_not_premul) { GLSL("if (color.a > 1e-6) \n" " color.rgb /= vec3(color.a); \n"); repr->alpha = PL_ALPHA_INDEPENDENT; } if (repr->alpha == PL_ALPHA_INDEPENDENT && mode == PL_ALPHA_PREMULTIPLIED) { GLSL("color.rgb *= vec3(color.a); \n"); repr->alpha = PL_ALPHA_PREMULTIPLIED; } if (src_has_alpha && mode == PL_ALPHA_NONE) { GLSL("color.a = 1.0; \n"); repr->alpha = PL_ALPHA_NONE; } } #ifdef PL_HAVE_DOVI static inline void reshape_mmr(pl_shader sh, ident_t mmr, bool single, int min_order, int max_order) { if (single) { GLSL("const uint mmr_idx = 0u; \n"); } else { GLSL("uint mmr_idx = uint(coeffs.y); \n"); } assert(min_order <= max_order); if (min_order < max_order) GLSL("uint order = uint(coeffs.w); \n"); GLSL("vec4 sigX; \n" "s = coeffs.x; \n" "sigX.xyz = sig.xxy * sig.yzz; \n" "sigX.w = sigX.x * sig.z; \n" "s += dot("$"[mmr_idx + 0].xyz, sig); \n" "s += dot("$"[mmr_idx + 1], sigX); \n", mmr, mmr); if (max_order >= 2) { if (min_order < 2) GLSL("if (order >= 2) { \n"); GLSL("vec3 sig2 = sig * sig; \n" "vec4 sigX2 = sigX * sigX; \n" "s += dot("$"[mmr_idx + 2].xyz, sig2); \n" "s += dot("$"[mmr_idx + 3], sigX2); \n", mmr, mmr); if (max_order == 3) { if (min_order < 3) GLSL("if (order >= 3 { \n"); GLSL("s += dot("$"[mmr_idx + 4].xyz, sig2 * sig); \n" "s += dot("$"[mmr_idx + 5], sigX2 * sigX); \n", mmr, mmr); if (min_order < 3) GLSL("} \n"); } if (min_order < 2) GLSL("} \n"); } } static inline void reshape_poly(pl_shader sh) { GLSL("s = (coeffs.z * s + coeffs.y) * s + coeffs.x; \n"); } #endif void pl_shader_dovi_reshape(pl_shader sh, const struct pl_dovi_metadata *data) { #ifdef PL_HAVE_DOVI if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0) || !data) return; sh_describe(sh, "reshaping"); GLSL("// pl_shader_reshape \n" "{ \n" "vec3 sig; \n" "vec4 coeffs; \n" "float s; \n" "sig = clamp(color.rgb, 0.0, 1.0); \n"); float coeffs_data[8][4]; float mmr_packed_data[8*6][4]; for (int c = 0; c < 3; c++) { const struct pl_reshape_data *comp = &data->comp[c]; if (!comp->num_pivots) continue; pl_assert(comp->num_pivots >= 2 && comp->num_pivots <= 9); GLSL("s = sig[%d]; \n", c); // Prepare coefficients for GPU bool has_poly = false, has_mmr = false, mmr_single = true; int mmr_idx = 0, min_order = 3, max_order = 1; memset(coeffs_data, 0, sizeof(coeffs_data)); for (int i = 0; i < comp->num_pivots - 1; i++) { switch (comp->method[i]) { case 0: // polynomial has_poly = true; coeffs_data[i][3] = 0.0; // order=0 signals polynomial for (int k = 0; k < 3; k++) coeffs_data[i][k] = comp->poly_coeffs[i][k]; break; case 1: min_order = PL_MIN(min_order, comp->mmr_order[i]); max_order = PL_MAX(max_order, comp->mmr_order[i]); mmr_single = !has_mmr; has_mmr = true; coeffs_data[i][3] = (float) comp->mmr_order[i]; coeffs_data[i][0] = comp->mmr_constant[i]; coeffs_data[i][1] = (float) mmr_idx; for (int j = 0; j < comp->mmr_order[i]; j++) { // store weights per order as two packed vec4s float *mmr = &mmr_packed_data[mmr_idx][0]; mmr[0] = comp->mmr_coeffs[i][j][0]; mmr[1] = comp->mmr_coeffs[i][j][1]; mmr[2] = comp->mmr_coeffs[i][j][2]; mmr[3] = 0.0; // unused mmr[4] = comp->mmr_coeffs[i][j][3]; mmr[5] = comp->mmr_coeffs[i][j][4]; mmr[6] = comp->mmr_coeffs[i][j][5]; mmr[7] = comp->mmr_coeffs[i][j][6]; mmr_idx += 2; } break; default: pl_unreachable(); } } if (comp->num_pivots > 2) { // Skip the (irrelevant) lower and upper bounds float pivots_data[7]; memcpy(pivots_data, comp->pivots + 1, (comp->num_pivots - 2) * sizeof(pivots_data[0])); // Fill the remainder with a quasi-infinite sentinel pivot for (int i = comp->num_pivots - 2; i < PL_ARRAY_SIZE(pivots_data); i++) pivots_data[i] = 1e9f; ident_t pivots = sh_var(sh, (struct pl_shader_var) { .data = pivots_data, .var = { .name = "pivots", .type = PL_VAR_FLOAT, .dim_v = 1, .dim_m = 1, .dim_a = PL_ARRAY_SIZE(pivots_data), }, }); ident_t coeffs = sh_var(sh, (struct pl_shader_var) { .data = coeffs_data, .var = { .name = "coeffs", .type = PL_VAR_FLOAT, .dim_v = 4, .dim_m = 1, .dim_a = PL_ARRAY_SIZE(coeffs_data), }, }); // Efficiently branch into the correct set of coefficients GLSL("#define test(i) bvec4(s >= "$"[i]) \n" "#define coef(i) "$"[i] \n" "coeffs = mix(mix(mix(coef(0), coef(1), test(0)), \n" " mix(coef(2), coef(3), test(2)), \n" " test(1)), \n" " mix(mix(coef(4), coef(5), test(4)), \n" " mix(coef(6), coef(7), test(6)), \n" " test(5)), \n" " test(3)); \n" "#undef test \n" "#undef coef \n", pivots, coeffs); } else { // No need for a single pivot, just set the coeffs directly GLSL("coeffs = "$"; \n", sh_var(sh, (struct pl_shader_var) { .var = pl_var_vec4("coeffs"), .data = coeffs_data, })); } ident_t mmr = NULL_IDENT; if (has_mmr) { mmr = sh_var(sh, (struct pl_shader_var) { .data = mmr_packed_data, .var = { .name = "mmr", .type = PL_VAR_FLOAT, .dim_v = 4, .dim_m = 1, .dim_a = mmr_idx, }, }); } if (has_mmr && has_poly) { GLSL("if (coeffs.w == 0.0) { \n"); reshape_poly(sh); GLSL("} else { \n"); reshape_mmr(sh, mmr, mmr_single, min_order, max_order); GLSL("} \n"); } else if (has_poly) { reshape_poly(sh); } else { assert(has_mmr); GLSL("{ \n"); reshape_mmr(sh, mmr, mmr_single, min_order, max_order); GLSL("} \n"); } ident_t lo = sh_var(sh, (struct pl_shader_var) { .var = pl_var_float("lo"), .data = &comp->pivots[0], }); ident_t hi = sh_var(sh, (struct pl_shader_var) { .var = pl_var_float("hi"), .data = &comp->pivots[comp->num_pivots - 1], }); GLSL("color[%d] = clamp(s, "$", "$"); \n", c, lo, hi); } GLSL("} \n"); #else SH_FAIL(sh, "libplacebo was compiled without support for dolbyvision reshaping"); #endif } void pl_shader_decode_color(pl_shader sh, struct pl_color_repr *repr, const struct pl_color_adjustment *params) { if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0)) return; sh_describe(sh, "color decoding"); GLSL("// pl_shader_decode_color \n" "{ \n"); // Do this first because the following operations are potentially nonlinear pl_shader_set_alpha(sh, repr, PL_ALPHA_INDEPENDENT); if (repr->sys == PL_COLOR_SYSTEM_XYZ || repr->sys == PL_COLOR_SYSTEM_DOLBYVISION) { ident_t scale = SH_FLOAT(pl_color_repr_normalize(repr)); GLSL("color.rgb *= vec3("$"); \n", scale); } if (repr->sys == PL_COLOR_SYSTEM_XYZ) { pl_shader_linearize(sh, &(struct pl_color_space) { .transfer = PL_COLOR_TRC_ST428, }); } if (repr->sys == PL_COLOR_SYSTEM_DOLBYVISION) pl_shader_dovi_reshape(sh, repr->dovi); enum pl_color_system orig_sys = repr->sys; pl_transform3x3 tr = pl_color_repr_decode(repr, params); if (memcmp(&tr, &pl_transform3x3_identity, sizeof(tr))) { ident_t cmat = sh_var(sh, (struct pl_shader_var) { .var = pl_var_mat3("cmat"), .data = PL_TRANSPOSE_3X3(tr.mat.m), }); ident_t cmat_c = sh_var(sh, (struct pl_shader_var) { .var = pl_var_vec3("cmat_c"), .data = tr.c, }); GLSL("color.rgb = "$" * color.rgb + "$"; \n", cmat, cmat_c); } switch (orig_sys) { case PL_COLOR_SYSTEM_BT_2020_C: // Conversion for C'rcY'cC'bc via the BT.2020 CL system: // C'bc = (B'-Y'c) / 1.9404 | C'bc <= 0 // = (B'-Y'c) / 1.5816 | C'bc > 0 // // C'rc = (R'-Y'c) / 1.7184 | C'rc <= 0 // = (R'-Y'c) / 0.9936 | C'rc > 0 // // as per the BT.2020 specification, table 4. This is a non-linear // transformation because (constant) luminance receives non-equal // contributions from the three different channels. GLSL("// constant luminance conversion \n" "color.br = color.br * mix(vec2(1.5816, 0.9936), \n" " vec2(1.9404, 1.7184), \n" " lessThanEqual(color.br, vec2(0.0))) \n" " + color.gg; \n"); // Expand channels to camera-linear light. This shader currently just // assumes everything uses the BT.2020 12-bit gamma function, since the // difference between 10 and 12-bit is negligible for anything other // than 12-bit content. GLSL("vec3 lin = mix(color.rgb * vec3(1.0/4.5), \n" " pow((color.rgb + vec3(0.0993))*vec3(1.0/1.0993), \n" " vec3(1.0/0.45)), \n" " lessThanEqual(vec3(0.08145), color.rgb)); \n"); // Calculate the green channel from the expanded RYcB, and recompress to G' // The BT.2020 specification says Yc = 0.2627*R + 0.6780*G + 0.0593*B GLSL("color.g = (lin.g - 0.2627*lin.r - 0.0593*lin.b)*1.0/0.6780; \n" "color.g = mix(color.g * 4.5, \n" " 1.0993 * pow(color.g, 0.45) - 0.0993, \n" " 0.0181 <= color.g); \n"); break; case PL_COLOR_SYSTEM_BT_2100_PQ:; // Conversion process from the spec: // // 1. L'M'S' = cmat * ICtCp // 2. LMS = linearize(L'M'S') (EOTF for PQ, inverse OETF for HLG) // 3. RGB = lms2rgb * LMS // // After this we need to invert step 2 to arrive at non-linear RGB. // (It's important we keep the transfer function conversion separate // from the color system decoding, so we have to partially undo our // work here even though we will end up linearizing later on anyway) GLSL(// PQ EOTF "color.rgb = pow(max(color.rgb, 0.0), vec3(1.0/%f)); \n" "color.rgb = max(color.rgb - vec3(%f), 0.0) \n" " / (vec3(%f) - vec3(%f) * color.rgb); \n" "color.rgb = pow(color.rgb, vec3(1.0/%f)); \n" // LMS matrix "color.rgb = mat3( 3.43661, -0.79133, -0.0259499, \n" " -2.50645, 1.98360, -0.0989137, \n" " 0.06984, -0.192271, 1.12486) * color.rgb; \n" // PQ OETF "color.rgb = pow(max(color.rgb, 0.0), vec3(%f)); \n" "color.rgb = (vec3(%f) + vec3(%f) * color.rgb) \n" " / (vec3(1.0) + vec3(%f) * color.rgb); \n" "color.rgb = pow(color.rgb, vec3(%f)); \n", PQ_M2, PQ_C1, PQ_C2, PQ_C3, PQ_M1, PQ_M1, PQ_C1, PQ_C2, PQ_C3, PQ_M2); break; case PL_COLOR_SYSTEM_BT_2100_HLG: GLSL(// HLG OETF^-1 "color.rgb = mix(vec3(4.0) * color.rgb * color.rgb, \n" " exp((color.rgb - vec3(%f)) * vec3(1.0/%f)) \n" " + vec3(%f), \n" " lessThan(vec3(0.5), color.rgb)); \n" // LMS matrix "color.rgb = mat3( 3.43661, -0.79133, -0.0259499, \n" " -2.50645, 1.98360, -0.0989137, \n" " 0.06984, -0.192271, 1.12486) * color.rgb; \n" // HLG OETF "color.rgb = mix(vec3(0.5) * sqrt(color.rgb), \n" " vec3(%f) * log(color.rgb - vec3(%f)) + vec3(%f), \n" " lessThan(vec3(1.0), color.rgb)); \n", HLG_C, HLG_A, HLG_B, HLG_A, HLG_B, HLG_C); break; case PL_COLOR_SYSTEM_DOLBYVISION:; #ifdef PL_HAVE_DOVI // Dolby Vision always outputs BT.2020-referred HPE LMS, so hard-code // the inverse LMS->RGB matrix corresponding to this color space. pl_matrix3x3 dovi_lms2rgb = {{ { 3.06441879, -2.16597676, 0.10155818}, {-0.65612108, 1.78554118, -0.12943749}, { 0.01736321, -0.04725154, 1.03004253}, }}; pl_matrix3x3_mul(&dovi_lms2rgb, &repr->dovi->linear); ident_t mat = sh_var(sh, (struct pl_shader_var) { .var = pl_var_mat3("lms2rgb"), .data = PL_TRANSPOSE_3X3(dovi_lms2rgb.m), }); // PQ EOTF GLSL("color.rgb = pow(max(color.rgb, 0.0), vec3(1.0/%f)); \n" "color.rgb = max(color.rgb - vec3(%f), 0.0) \n" " / (vec3(%f) - vec3(%f) * color.rgb); \n" "color.rgb = pow(color.rgb, vec3(1.0/%f)); \n", PQ_M2, PQ_C1, PQ_C2, PQ_C3, PQ_M1); // LMS matrix GLSL("color.rgb = "$" * color.rgb; \n", mat); // PQ OETF GLSL("color.rgb = pow(max(color.rgb, 0.0), vec3(%f)); \n" "color.rgb = (vec3(%f) + vec3(%f) * color.rgb) \n" " / (vec3(1.0) + vec3(%f) * color.rgb); \n" "color.rgb = pow(color.rgb, vec3(%f)); \n", PQ_M1, PQ_C1, PQ_C2, PQ_C3, PQ_M2); break; #else SH_FAIL(sh, "libplacebo was compiled without support for dolbyvision reshaping"); return; #endif case PL_COLOR_SYSTEM_UNKNOWN: case PL_COLOR_SYSTEM_RGB: case PL_COLOR_SYSTEM_XYZ: case PL_COLOR_SYSTEM_BT_601: case PL_COLOR_SYSTEM_BT_709: case PL_COLOR_SYSTEM_SMPTE_240M: case PL_COLOR_SYSTEM_BT_2020_NC: case PL_COLOR_SYSTEM_YCGCO: break; // no special post-processing needed case PL_COLOR_SYSTEM_COUNT: pl_unreachable(); } // Gamma adjustment. Doing this here (in non-linear light) is technically // somewhat wrong, but this is just an aesthetic parameter and not really // meant for colorimetric precision, so we don't care too much. if (params && params->gamma == 0) { // Avoid division by zero GLSL("color.rgb = vec3(0.0); \n"); } else if (params && params->gamma != 1) { ident_t gamma = sh_var(sh, (struct pl_shader_var) { .var = pl_var_float("gamma"), .data = &(float){ 1 / params->gamma }, }); GLSL("color.rgb = pow(max(color.rgb, vec3(0.0)), vec3("$")); \n", gamma); } GLSL("}\n"); } void pl_shader_encode_color(pl_shader sh, const struct pl_color_repr *repr) { if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0)) return; sh_describe(sh, "color encoding"); GLSL("// pl_shader_encode_color \n" "{ \n"); switch (repr->sys) { case PL_COLOR_SYSTEM_BT_2020_C: // Expand R'G'B' to RGB GLSL("vec3 lin = mix(color.rgb * vec3(1.0/4.5), \n" " pow((color.rgb + vec3(0.0993))*vec3(1.0/1.0993), \n" " vec3(1.0/0.45)), \n" " lessThanEqual(vec3(0.08145), color.rgb)); \n"); // Compute Yc from RGB and compress to R'Y'cB' GLSL("color.g = dot(vec3(0.2627, 0.6780, 0.0593), lin); \n" "color.g = mix(color.g * 4.5, \n" " 1.0993 * pow(color.g, 0.45) - 0.0993, \n" " 0.0181 <= color.g); \n"); // Compute C'bc and C'rc into color.br GLSL("color.br = color.br - color.gg; \n" "color.br *= mix(vec2(1.0/1.5816, 1.0/0.9936), \n" " vec2(1.0/1.9404, 1.0/1.7184), \n" " lessThanEqual(color.br, vec2(0.0))); \n"); break; case PL_COLOR_SYSTEM_BT_2100_PQ:; GLSL("color.rgb = pow(max(color.rgb, 0.0), vec3(1.0/%f)); \n" "color.rgb = max(color.rgb - vec3(%f), 0.0) \n" " / (vec3(%f) - vec3(%f) * color.rgb); \n" "color.rgb = pow(color.rgb, vec3(1.0/%f)); \n" "color.rgb = mat3(0.412109, 0.166748, 0.024170, \n" " 0.523925, 0.720459, 0.075440, \n" " 0.063965, 0.112793, 0.900394) * color.rgb; \n" "color.rgb = pow(color.rgb, vec3(%f)); \n" "color.rgb = (vec3(%f) + vec3(%f) * color.rgb) \n" " / (vec3(1.0) + vec3(%f) * color.rgb); \n" "color.rgb = pow(color.rgb, vec3(%f)); \n", PQ_M2, PQ_C1, PQ_C2, PQ_C3, PQ_M1, PQ_M1, PQ_C1, PQ_C2, PQ_C3, PQ_M2); break; case PL_COLOR_SYSTEM_BT_2100_HLG: GLSL("color.rgb = mix(vec3(4.0) * color.rgb * color.rgb, \n" " exp((color.rgb - vec3(%f)) * vec3(1.0/%f)) \n" " + vec3(%f), \n" " lessThan(vec3(0.5), color.rgb)); \n" "color.rgb = mat3(0.412109, 0.166748, 0.024170, \n" " 0.523925, 0.720459, 0.075440, \n" " 0.063965, 0.112793, 0.900394) * color.rgb; \n" "color.rgb = mix(vec3(0.5) * sqrt(color.rgb), \n" " vec3(%f) * log(color.rgb - vec3(%f)) + vec3(%f), \n" " lessThan(vec3(1.0), color.rgb)); \n", HLG_C, HLG_A, HLG_B, HLG_A, HLG_B, HLG_C); break; case PL_COLOR_SYSTEM_DOLBYVISION: SH_FAIL(sh, "Cannot un-apply dolbyvision yet (no inverse reshaping)!"); return; case PL_COLOR_SYSTEM_UNKNOWN: case PL_COLOR_SYSTEM_RGB: case PL_COLOR_SYSTEM_XYZ: case PL_COLOR_SYSTEM_BT_601: case PL_COLOR_SYSTEM_BT_709: case PL_COLOR_SYSTEM_SMPTE_240M: case PL_COLOR_SYSTEM_BT_2020_NC: case PL_COLOR_SYSTEM_YCGCO: break; // no special pre-processing needed case PL_COLOR_SYSTEM_COUNT: pl_unreachable(); } // Since this is a relatively rare operation, bypass it as much as possible bool skip = true; skip &= PL_DEF(repr->sys, PL_COLOR_SYSTEM_RGB) == PL_COLOR_SYSTEM_RGB; skip &= PL_DEF(repr->levels, PL_COLOR_LEVELS_FULL) == PL_COLOR_LEVELS_FULL; skip &= !repr->bits.sample_depth || !repr->bits.color_depth || repr->bits.sample_depth == repr->bits.color_depth; skip &= !repr->bits.bit_shift; if (!skip) { struct pl_color_repr copy = *repr; ident_t xyzscale = NULL_IDENT; if (repr->sys == PL_COLOR_SYSTEM_XYZ) xyzscale = SH_FLOAT(1.0 / pl_color_repr_normalize(©)); pl_transform3x3 tr = pl_color_repr_decode(©, NULL); pl_transform3x3_invert(&tr); ident_t cmat = sh_var(sh, (struct pl_shader_var) { .var = pl_var_mat3("cmat"), .data = PL_TRANSPOSE_3X3(tr.mat.m), }); ident_t cmat_c = sh_var(sh, (struct pl_shader_var) { .var = pl_var_vec3("cmat_c"), .data = tr.c, }); GLSL("color.rgb = "$" * color.rgb + "$"; \n", cmat, cmat_c); if (repr->sys == PL_COLOR_SYSTEM_XYZ) { pl_shader_delinearize(sh, &(struct pl_color_space) { .transfer = PL_COLOR_TRC_ST428, }); GLSL("color.rgb *= vec3("$"); \n", xyzscale); } } if (repr->alpha == PL_ALPHA_PREMULTIPLIED) GLSL("color.rgb *= vec3(color.a); \n"); GLSL("}\n"); } static ident_t sh_luma_coeffs(pl_shader sh, const struct pl_color_space *csp) { pl_matrix3x3 rgb2xyz; rgb2xyz = pl_get_rgb2xyz_matrix(pl_raw_primaries_get(csp->primaries)); // FIXME: Cannot use `const vec3` due to glslang bug #2025 ident_t coeffs = sh_fresh(sh, "luma_coeffs"); GLSLH("#define "$" vec3("$", "$", "$") \n", coeffs, SH_FLOAT(rgb2xyz.m[1][0]), // RGB->Y vector SH_FLOAT(rgb2xyz.m[1][1]), SH_FLOAT(rgb2xyz.m[1][2])); return coeffs; } void pl_shader_linearize(pl_shader sh, const struct pl_color_space *csp) { if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0)) return; if (csp->transfer == PL_COLOR_TRC_LINEAR) return; float csp_min, csp_max; pl_color_space_nominal_luma_ex(pl_nominal_luma_params( .color = csp, .metadata = PL_HDR_METADATA_HDR10, .scaling = PL_HDR_NORM, .out_min = &csp_min, .out_max = &csp_max, )); // Note that this clamp may technically violate the definition of // ITU-R BT.2100, which allows for sub-blacks and super-whites to be // displayed on the display where such would be possible. That said, the // problem is that not all gamma curves are well-defined on the values // outside this range, so we ignore it and just clamp anyway for sanity. GLSL("// pl_shader_linearize \n" "color.rgb = max(color.rgb, 0.0); \n"); switch (csp->transfer) { case PL_COLOR_TRC_SRGB: GLSL("color.rgb = mix(color.rgb * vec3(1.0/12.92), \n" " pow((color.rgb + vec3(0.055))/vec3(1.055), \n" " vec3(2.4)), \n" " lessThan(vec3(0.04045), color.rgb)); \n"); goto scale_out; case PL_COLOR_TRC_BT_1886: { const float lb = powf(csp_min, 1/2.4f); const float lw = powf(csp_max, 1/2.4f); const float a = powf(lw - lb, 2.4f); const float b = lb / (lw - lb); GLSL("color.rgb = "$" * pow(color.rgb + vec3("$"), vec3(2.4)); \n", SH_FLOAT(a), SH_FLOAT(b)); return; } case PL_COLOR_TRC_GAMMA18: GLSL("color.rgb = pow(color.rgb, vec3(1.8));\n"); goto scale_out; case PL_COLOR_TRC_GAMMA20: GLSL("color.rgb = pow(color.rgb, vec3(2.0));\n"); goto scale_out; case PL_COLOR_TRC_UNKNOWN: case PL_COLOR_TRC_GAMMA22: GLSL("color.rgb = pow(color.rgb, vec3(2.2));\n"); goto scale_out; case PL_COLOR_TRC_GAMMA24: GLSL("color.rgb = pow(color.rgb, vec3(2.4));\n"); goto scale_out; case PL_COLOR_TRC_GAMMA26: GLSL("color.rgb = pow(color.rgb, vec3(2.6));\n"); goto scale_out; case PL_COLOR_TRC_GAMMA28: GLSL("color.rgb = pow(color.rgb, vec3(2.8));\n"); goto scale_out; case PL_COLOR_TRC_PRO_PHOTO: GLSL("color.rgb = mix(color.rgb * vec3(1.0/16.0), \n" " pow(color.rgb, vec3(1.8)), \n" " lessThan(vec3(0.03125), color.rgb)); \n"); goto scale_out; case PL_COLOR_TRC_ST428: GLSL("color.rgb = vec3(52.37/48.0) * pow(color.rgb, vec3(2.6));\n"); goto scale_out; case PL_COLOR_TRC_PQ: GLSL("color.rgb = pow(color.rgb, vec3(1.0/%f)); \n" "color.rgb = max(color.rgb - vec3(%f), 0.0) \n" " / (vec3(%f) - vec3(%f) * color.rgb); \n" "color.rgb = pow(color.rgb, vec3(1.0/%f)); \n" // PQ's output range is 0-10000, but we need it to be relative to // to PL_COLOR_SDR_WHITE instead, so rescale "color.rgb *= vec3(%f); \n", PQ_M2, PQ_C1, PQ_C2, PQ_C3, PQ_M1, 10000.0 / PL_COLOR_SDR_WHITE); return; case PL_COLOR_TRC_HLG: { const float y = fmaxf(1.2f + 0.42f * log10f(csp_max / HLG_REF), 1); const float b = sqrtf(3 * powf(csp_min / csp_max, 1 / y)); // OETF^-1 GLSL("color.rgb = "$" * color.rgb + vec3("$"); \n" "color.rgb = mix(vec3(4.0) * color.rgb * color.rgb, \n" " exp((color.rgb - vec3(%f)) * vec3(1.0/%f))\n" " + vec3(%f), \n" " lessThan(vec3(0.5), color.rgb)); \n", SH_FLOAT(1 - b), SH_FLOAT(b), HLG_C, HLG_A, HLG_B); // OOTF GLSL("color.rgb *= 1.0 / 12.0; \n" "color.rgb *= "$" * pow(max(dot("$", color.rgb), 0.0), "$"); \n", SH_FLOAT(csp_max), sh_luma_coeffs(sh, csp), SH_FLOAT(y - 1)); return; } case PL_COLOR_TRC_V_LOG: GLSL("color.rgb = mix((color.rgb - vec3(0.125)) * vec3(1.0/5.6), \n" " pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f)) \n" " - vec3(%f), \n" " lessThanEqual(vec3(0.181), color.rgb)); \n", VLOG_D, VLOG_C, VLOG_B); return; case PL_COLOR_TRC_S_LOG1: GLSL("color.rgb = pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f)) \n" " - vec3(%f); \n", SLOG_C, SLOG_A, SLOG_B); return; case PL_COLOR_TRC_S_LOG2: GLSL("color.rgb = mix((color.rgb - vec3(%f)) * vec3(1.0/%f), \n" " (pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f)) \n" " - vec3(%f)) * vec3(1.0/%f), \n" " lessThanEqual(vec3(%f), color.rgb)); \n", SLOG_Q, SLOG_P, SLOG_C, SLOG_A, SLOG_B, SLOG_K2, SLOG_Q); return; case PL_COLOR_TRC_LINEAR: case PL_COLOR_TRC_COUNT: break; } pl_unreachable(); scale_out: if (csp_max != 1 || csp_min != 0) { GLSL("color.rgb = "$" * color.rgb + vec3("$"); \n", SH_FLOAT(csp_max - csp_min), SH_FLOAT(csp_min)); } } void pl_shader_delinearize(pl_shader sh, const struct pl_color_space *csp) { if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0)) return; if (csp->transfer == PL_COLOR_TRC_LINEAR) return; float csp_min, csp_max; pl_color_space_nominal_luma_ex(pl_nominal_luma_params( .color = csp, .metadata = PL_HDR_METADATA_HDR10, .scaling = PL_HDR_NORM, .out_min = &csp_min, .out_max = &csp_max, )); GLSL("// pl_shader_delinearize \n"); if (pl_color_space_is_black_scaled(csp) && csp->transfer != PL_COLOR_TRC_HLG && (csp_max != 1 || csp_min != 0)) { GLSL("color.rgb = "$" * color.rgb + vec3("$"); \n", SH_FLOAT(1 / (csp_max - csp_min)), SH_FLOAT(-csp_min / (csp_max - csp_min))); } GLSL("color.rgb = max(color.rgb, 0.0); \n"); switch (csp->transfer) { case PL_COLOR_TRC_SRGB: GLSL("color.rgb = mix(color.rgb * vec3(12.92), \n" " vec3(1.055) * pow(color.rgb, vec3(1.0/2.4)) \n" " - vec3(0.055), \n" " lessThanEqual(vec3(0.0031308), color.rgb)); \n"); return; case PL_COLOR_TRC_BT_1886: { const float lb = powf(csp_min, 1/2.4f); const float lw = powf(csp_max, 1/2.4f); const float a = powf(lw - lb, 2.4f); const float b = lb / (lw - lb); GLSL("color.rgb = pow("$" * color.rgb, vec3(1.0/2.4)) - vec3("$"); \n", SH_FLOAT(1.0 / a), SH_FLOAT(b)); return; } case PL_COLOR_TRC_GAMMA18: GLSL("color.rgb = pow(color.rgb, vec3(1.0/1.8));\n"); return; case PL_COLOR_TRC_GAMMA20: GLSL("color.rgb = pow(color.rgb, vec3(1.0/2.0));\n"); return; case PL_COLOR_TRC_UNKNOWN: case PL_COLOR_TRC_GAMMA22: GLSL("color.rgb = pow(color.rgb, vec3(1.0/2.2));\n"); return; case PL_COLOR_TRC_GAMMA24: GLSL("color.rgb = pow(color.rgb, vec3(1.0/2.4));\n"); return; case PL_COLOR_TRC_GAMMA26: GLSL("color.rgb = pow(color.rgb, vec3(1.0/2.6));\n"); return; case PL_COLOR_TRC_GAMMA28: GLSL("color.rgb = pow(color.rgb, vec3(1.0/2.8));\n"); return; case PL_COLOR_TRC_ST428: GLSL("color.rgb = pow(color.rgb * vec3(48.0/52.37), vec3(1.0/2.6));\n"); return; case PL_COLOR_TRC_PRO_PHOTO: GLSL("color.rgb = mix(color.rgb * vec3(16.0), \n" " pow(color.rgb, vec3(1.0/1.8)), \n" " lessThanEqual(vec3(0.001953), color.rgb)); \n"); return; case PL_COLOR_TRC_PQ: GLSL("color.rgb *= vec3(1.0/%f); \n" "color.rgb = pow(color.rgb, vec3(%f)); \n" "color.rgb = (vec3(%f) + vec3(%f) * color.rgb) \n" " / (vec3(1.0) + vec3(%f) * color.rgb); \n" "color.rgb = pow(color.rgb, vec3(%f)); \n", 10000 / PL_COLOR_SDR_WHITE, PQ_M1, PQ_C1, PQ_C2, PQ_C3, PQ_M2); return; case PL_COLOR_TRC_HLG: { const float y = fmaxf(1.2f + 0.42f * log10f(csp_max / HLG_REF), 1); const float b = sqrtf(3 * powf(csp_min / csp_max, 1 / y)); // OOTF^-1 GLSL("color.rgb *= 1.0 / "$"; \n" "color.rgb *= 12.0 * max(1e-6, pow(dot("$", color.rgb), "$")); \n", SH_FLOAT(csp_max), sh_luma_coeffs(sh, csp), SH_FLOAT((1 - y) / y)); // OETF GLSL("color.rgb = mix(vec3(0.5) * sqrt(color.rgb), \n" " vec3(%f) * log(color.rgb - vec3(%f)) + vec3(%f), \n" " lessThan(vec3(1.0), color.rgb)); \n" "color.rgb = "$" * color.rgb + vec3("$"); \n", HLG_A, HLG_B, HLG_C, SH_FLOAT(1 / (1 - b)), SH_FLOAT(-b / (1 - b))); return; } case PL_COLOR_TRC_V_LOG: GLSL("color.rgb = mix(vec3(5.6) * color.rgb + vec3(0.125), \n" " vec3(%f) * log(color.rgb + vec3(%f)) \n" " + vec3(%f), \n" " lessThanEqual(vec3(0.01), color.rgb)); \n", VLOG_C / M_LN10, VLOG_B, VLOG_D); return; case PL_COLOR_TRC_S_LOG1: GLSL("color.rgb = vec3(%f) * log(color.rgb + vec3(%f)) + vec3(%f);\n", SLOG_A / M_LN10, SLOG_B, SLOG_C); return; case PL_COLOR_TRC_S_LOG2: GLSL("color.rgb = mix(vec3(%f) * color.rgb + vec3(%f), \n" " vec3(%f) * log(vec3(%f) * color.rgb + vec3(%f)) \n" " + vec3(%f), \n" " lessThanEqual(vec3(0.0), color.rgb)); \n", SLOG_P, SLOG_Q, SLOG_A / M_LN10, SLOG_K2, SLOG_B, SLOG_C); return; case PL_COLOR_TRC_LINEAR: case PL_COLOR_TRC_COUNT: break; } pl_unreachable(); } const struct pl_sigmoid_params pl_sigmoid_default_params = { PL_SIGMOID_DEFAULTS }; void pl_shader_sigmoidize(pl_shader sh, const struct pl_sigmoid_params *params) { if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0)) return; params = PL_DEF(params, &pl_sigmoid_default_params); float center = PL_DEF(params->center, pl_sigmoid_default_params.center); float slope = PL_DEF(params->slope, pl_sigmoid_default_params.slope); // This function needs to go through (0,0) and (1,1), so we compute the // values at 1 and 0, and then scale/shift them, respectively. float offset = 1.0 / (1 + expf(slope * center)); float scale = 1.0 / (1 + expf(slope * (center - 1))) - offset; GLSL("// pl_shader_sigmoidize \n" "color = clamp(color, 0.0, 1.0); \n" "color = vec4("$") - vec4("$") * \n" " log(vec4(1.0) / (color * vec4("$") + vec4("$")) \n" " - vec4(1.0)); \n", SH_FLOAT(center), SH_FLOAT(1.0 / slope), SH_FLOAT(scale), SH_FLOAT(offset)); } void pl_shader_unsigmoidize(pl_shader sh, const struct pl_sigmoid_params *params) { if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0)) return; // See: pl_shader_sigmoidize params = PL_DEF(params, &pl_sigmoid_default_params); float center = PL_DEF(params->center, pl_sigmoid_default_params.center); float slope = PL_DEF(params->slope, pl_sigmoid_default_params.slope); float offset = 1.0 / (1 + expf(slope * center)); float scale = 1.0 / (1 + expf(slope * (center - 1))) - offset; GLSL("// pl_shader_unsigmoidize \n" "color = clamp(color, 0.0, 1.0); \n" "color = vec4("$") / \n" " (vec4(1.0) + exp(vec4("$") * (vec4("$") - color))) \n" " - vec4("$"); \n", SH_FLOAT(1.0 / scale), SH_FLOAT(slope), SH_FLOAT(center), SH_FLOAT(offset / scale)); } const struct pl_peak_detect_params pl_peak_detect_default_params = { PL_PEAK_DETECT_DEFAULTS }; const struct pl_peak_detect_params pl_peak_detect_high_quality_params = { PL_PEAK_DETECT_HQ_DEFAULTS }; static bool peak_detect_params_eq(const struct pl_peak_detect_params *a, const struct pl_peak_detect_params *b) { return a->smoothing_period == b->smoothing_period && a->scene_threshold_low == b->scene_threshold_low && a->scene_threshold_high == b->scene_threshold_high && a->percentile == b->percentile; // don't compare `allow_delayed` because it doesn't change measurement } enum { // Split the peak buffer into several independent slices to reduce pressure // on global atomics SLICES = 12, // How many bits to use for storing PQ data. Be careful when setting this // too high, as it may overflow `unsigned int` on large video sources. // // The value chosen is enough to guarantee no overflow for an 8K x 4K frame // consisting entirely of 100% 10k nits PQ values, with 16x16 workgroups. PQ_BITS = 14, PQ_MAX = (1 << PQ_BITS) - 1, // How many bits to use for the histogram. We bias the histogram down // by half the PQ range (~90 nits), effectively clumping the SDR part // of the image into a single histogram bin. HIST_BITS = 7, HIST_BIAS = 1 << (HIST_BITS - 1), HIST_BINS = (1 << HIST_BITS) - HIST_BIAS, // Convert from histogram bin to (starting) PQ value #define HIST_PQ(bin) (((bin) + HIST_BIAS) << (PQ_BITS - HIST_BITS)) }; pl_static_assert(PQ_BITS >= HIST_BITS); struct peak_buf_data { unsigned frame_wg_count[SLICES]; // number of work groups processed unsigned frame_wg_active[SLICES];// number of active (nonzero) work groups unsigned frame_sum_pq[SLICES]; // sum of PQ Y values over all WGs (PQ_BITS) unsigned frame_max_pq[SLICES]; // maximum PQ Y value among these WGs (PQ_BITS) unsigned frame_hist[SLICES][HIST_BINS]; // always allocated, conditionally used }; static const struct pl_buffer_var peak_buf_vars[] = { #define VAR(field) { \ .var = { \ .name = #field, \ .type = PL_VAR_UINT, \ .dim_v = 1, \ .dim_m = 1, \ .dim_a = sizeof(((struct peak_buf_data *) NULL)->field) / \ sizeof(unsigned), \ }, \ .layout = { \ .offset = offsetof(struct peak_buf_data, field), \ .size = sizeof(((struct peak_buf_data *) NULL)->field), \ .stride = sizeof(unsigned), \ }, \ } VAR(frame_wg_count), VAR(frame_wg_active), VAR(frame_sum_pq), VAR(frame_max_pq), VAR(frame_hist), #undef VAR }; struct sh_color_map_obj { // Tone map state struct { struct pl_tone_map_params params; pl_shader_obj lut; } tone; // Gamut map state struct { pl_shader_obj lut; } gamut; // Peak detection state struct { struct pl_peak_detect_params params; // currently active parameters pl_buf buf; // pending peak detection buffer pl_buf readback; // readback buffer (fallback) float avg_pq; // current (smoothed) values float max_pq; } peak; }; // Excluding size, since this is checked by sh_lut static uint64_t gamut_map_signature(const struct pl_gamut_map_params *par) { uint64_t sig = CACHE_KEY_GAMUT_LUT; pl_hash_merge(&sig, pl_str0_hash(par->function->name)); pl_hash_merge(&sig, pl_var_hash(par->input_gamut)); pl_hash_merge(&sig, pl_var_hash(par->output_gamut)); pl_hash_merge(&sig, pl_var_hash(par->min_luma)); pl_hash_merge(&sig, pl_var_hash(par->max_luma)); pl_hash_merge(&sig, pl_var_hash(par->constants)); return sig; } static void sh_color_map_uninit(pl_gpu gpu, void *ptr) { struct sh_color_map_obj *obj = ptr; pl_shader_obj_destroy(&obj->tone.lut); pl_shader_obj_destroy(&obj->gamut.lut); pl_buf_destroy(gpu, &obj->peak.buf); pl_buf_destroy(gpu, &obj->peak.readback); memset(obj, 0, sizeof(*obj)); } static inline float iir_coeff(float rate) { if (!rate) return 1.0f; return 1.0f - expf(-1.0f / rate); } static float measure_peak(const struct peak_buf_data *data, float percentile) { unsigned frame_max_pq = data->frame_max_pq[0]; for (int k = 1; k < SLICES; k++) frame_max_pq = PL_MAX(frame_max_pq, data->frame_max_pq[k]); const float frame_max = (float) frame_max_pq / PQ_MAX; if (percentile <= 0 || percentile >= 100) return frame_max; unsigned total_pixels = 0; for (int k = 0; k < SLICES; k++) { for (int i = 0; i < HIST_BINS; i++) total_pixels += data->frame_hist[k][i]; } if (!total_pixels) // no histogram data available? return frame_max; const unsigned target_pixel = ceilf(percentile / 100.0f * total_pixels); if (target_pixel >= total_pixels) return frame_max; unsigned sum = 0; for (int i = 0; i < HIST_BINS; i++) { unsigned next = sum; for (int k = 0; k < SLICES; k++) next += data->frame_hist[k][i]; if (next < target_pixel) { sum = next; continue; } // Upper and lower frequency boundaries of the matching histogram bin const unsigned count_low = sum; // last pixel of previous bin const unsigned count_high = next + 1; // first pixel of next bin pl_assert(count_low < target_pixel && target_pixel < count_high); // PQ luminance associated with count_low/high respectively const float pq_low = (float) HIST_PQ(i) / PQ_MAX; float pq_high = (float) HIST_PQ(i + 1) / PQ_MAX; if (count_high > total_pixels) // special case for last histogram bin pq_high = frame_max; // Position of `target_pixel` inside this bin, assumes pixels are // equidistributed inside a histogram bin const float ratio = (float) (target_pixel - count_low) / (count_high - count_low); return PL_MIX(pq_low, pq_high, ratio); } pl_unreachable(); } // if `force` is true, ensures the buffer is read, even if `allow_delayed` static void update_peak_buf(pl_gpu gpu, struct sh_color_map_obj *obj, bool force) { const struct pl_peak_detect_params *params = &obj->peak.params; if (!obj->peak.buf) return; if (!force && params->allow_delayed && pl_buf_poll(gpu, obj->peak.buf, 0)) return; // buffer not ready yet bool ok; struct peak_buf_data data = {0}; if (obj->peak.readback) { pl_buf_copy(gpu, obj->peak.readback, 0, obj->peak.buf, 0, sizeof(data)); ok = pl_buf_read(gpu, obj->peak.readback, 0, &data, sizeof(data)); } else { ok = pl_buf_read(gpu, obj->peak.buf, 0, &data, sizeof(data)); } if (ok && data.frame_wg_count[0] > 0) { // Peak detection completed successfully pl_buf_destroy(gpu, &obj->peak.buf); } else { // No data read? Possibly this peak obj has not been executed yet if (!ok) { PL_ERR(gpu, "Failed reading peak detection buffer!"); } else if (params->allow_delayed) { PL_TRACE(gpu, "Peak detection buffer not yet ready, ignoring.."); } else { PL_WARN(gpu, "Peak detection usage error: attempted detecting peak " "and using detected peak in the same shader program, " "but `params->allow_delayed` is false! Ignoring, but " "expect incorrect output."); } if (force || !ok) pl_buf_destroy(gpu, &obj->peak.buf); return; } uint64_t frame_sum_pq = 0u, frame_wg_count = 0u, frame_wg_active = 0u; for (int k = 0; k < SLICES; k++) { frame_sum_pq += data.frame_sum_pq[k]; frame_wg_count += data.frame_wg_count[k]; frame_wg_active += data.frame_wg_active[k]; } float avg_pq, max_pq; if (frame_wg_active) { avg_pq = (float) frame_sum_pq / (frame_wg_active * PQ_MAX); max_pq = measure_peak(&data, params->percentile); } else { // Solid black frame avg_pq = max_pq = PL_COLOR_HDR_BLACK; } if (!obj->peak.avg_pq) { // Set the initial value accordingly if it contains no data obj->peak.avg_pq = avg_pq; obj->peak.max_pq = max_pq; } else { // Ignore small deviations from existing peak (rounding error) static const float epsilon = 1.0f / PQ_MAX; if (fabsf(avg_pq - obj->peak.avg_pq) < epsilon) avg_pq = obj->peak.avg_pq; if (fabsf(max_pq - obj->peak.max_pq) < epsilon) max_pq = obj->peak.max_pq; } // Use an IIR low-pass filter to smooth out the detected values const float coeff = iir_coeff(params->smoothing_period); obj->peak.avg_pq += coeff * (avg_pq - obj->peak.avg_pq); obj->peak.max_pq += coeff * (max_pq - obj->peak.max_pq); // Scene change hysteresis if (params->scene_threshold_low > 0 && params->scene_threshold_high > 0) { const float log10_pq = 1e-2f; // experimentally determined approximate const float thresh_low = params->scene_threshold_low * log10_pq; const float thresh_high = params->scene_threshold_high * log10_pq; const float bias = (float) frame_wg_active / frame_wg_count; const float delta = bias * fabsf(avg_pq - obj->peak.avg_pq); const float mix_coeff = pl_smoothstep(thresh_low, thresh_high, delta); obj->peak.avg_pq = PL_MIX(obj->peak.avg_pq, avg_pq, mix_coeff); obj->peak.max_pq = PL_MIX(obj->peak.max_pq, max_pq, mix_coeff); } } bool pl_shader_detect_peak(pl_shader sh, struct pl_color_space csp, pl_shader_obj *state, const struct pl_peak_detect_params *params) { params = PL_DEF(params, &pl_peak_detect_default_params); if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0)) return false; pl_gpu gpu = SH_GPU(sh); if (!gpu || gpu->limits.max_ssbo_size < sizeof(struct peak_buf_data)) { PL_ERR(sh, "HDR peak detection requires a GPU with support for at " "least %zu bytes of SSBO data (supported: %zu)", sizeof(struct peak_buf_data), gpu ? gpu->limits.max_ssbo_size : 0); return false; } const bool use_histogram = params->percentile > 0 && params->percentile < 100; size_t shmem_req = 3 * sizeof(uint32_t); if (use_histogram) shmem_req += sizeof(uint32_t[HIST_BINS]); if (!sh_try_compute(sh, 16, 16, true, shmem_req)) { PL_ERR(sh, "HDR peak detection requires compute shaders with support " "for at least %zu bytes of shared memory! (avail: %zu)", shmem_req, sh_glsl(sh).max_shmem_size); return false; } struct sh_color_map_obj *obj; obj = SH_OBJ(sh, state, PL_SHADER_OBJ_COLOR_MAP, struct sh_color_map_obj, sh_color_map_uninit); if (!obj) return false; if (peak_detect_params_eq(&obj->peak.params, params)) { update_peak_buf(gpu, obj, true); // prevent over-writing previous frame } else { pl_reset_detected_peak(*state); } pl_assert(!obj->peak.buf); static const struct peak_buf_data zero = {0}; retry_ssbo: if (obj->peak.readback) { obj->peak.buf = pl_buf_create(gpu, pl_buf_params( .size = sizeof(struct peak_buf_data), .storable = true, .initial_data = &zero, )); } else { obj->peak.buf = pl_buf_create(gpu, pl_buf_params( .size = sizeof(struct peak_buf_data), .memory_type = PL_BUF_MEM_DEVICE, .host_readable = true, .storable = true, .initial_data = &zero, )); } if (!obj->peak.buf && !obj->peak.readback) { PL_WARN(sh, "Failed creating host-readable peak detection SSBO, " "retrying with fallback buffer"); obj->peak.readback = pl_buf_create(gpu, pl_buf_params( .size = sizeof(struct peak_buf_data), .host_readable = true, )); if (obj->peak.readback) goto retry_ssbo; } if (!obj->peak.buf) { SH_FAIL(sh, "Failed creating peak detection SSBO!"); return false; } obj->peak.params = *params; sh_desc(sh, (struct pl_shader_desc) { .desc = { .name = "PeakBuf", .type = PL_DESC_BUF_STORAGE, .access = PL_DESC_ACCESS_READWRITE, }, .binding.object = obj->peak.buf, .buffer_vars = (struct pl_buffer_var *) peak_buf_vars, .num_buffer_vars = PL_ARRAY_SIZE(peak_buf_vars), }); // For performance, we want to do as few atomic operations on global // memory as possible, so use an atomic in shmem for the work group. ident_t wg_sum = sh_fresh(sh, "wg_sum"), wg_max = sh_fresh(sh, "wg_max"), wg_black = sh_fresh(sh, "wg_black"), wg_hist = NULL_IDENT; GLSLH("shared uint "$", "$", "$"; \n", wg_sum, wg_max, wg_black); if (use_histogram) { wg_hist = sh_fresh(sh, "wg_hist"); GLSLH("shared uint "$"[%u]; \n", wg_hist, HIST_BINS); } sh_describe(sh, "peak detection"); #pragma GLSL /* pl_shader_detect_peak */ \ { \ const uint wg_size = gl_WorkGroupSize.x * gl_WorkGroupSize.y; \ const uint wg_idx = gl_WorkGroupID.y * gl_NumWorkGroups.x + gl_WorkGroupID.x;\ const uint local_idx = gl_LocalInvocationIndex; \ const uint slice = wg_idx % ${const uint: SLICES}; \ const uint hist_base = slice * ${const uint: HIST_BINS}; \ const vec4 color_orig = color; \ $wg_sum = $wg_max = $wg_black = 0u; \ @if (use_histogram) { \ for (uint i = local_idx; i < ${const uint: HIST_BINS}; i += wg_size) \ $wg_hist[i] = 0u; \ @} \ barrier(); // Decode color into linear light representation pl_color_space_infer(&csp); pl_shader_linearize(sh, &csp); bool has_subgroups = sh_glsl(sh).subgroup_size > 0; const float cutoff = fmaxf(params->black_cutoff, 0.0f) * 1e-2f; #pragma GLSL /* Measure luminance as N-bit PQ */ \ float luma = dot(${sh_luma_coeffs(sh, &csp)}, color.rgb); \ luma *= ${const float: PL_COLOR_SDR_WHITE / 10000.0}; \ luma = pow(clamp(luma, 0.0, 1.0), ${const float: PQ_M1}); \ luma = (${const float: PQ_C1} + ${const float: PQ_C2} * luma) / \ (1.0 + ${const float: PQ_C3} * luma); \ luma = pow(luma, ${const float: PQ_M2}); \ @if (cutoff) \ luma *= smoothstep(0.0, ${float: cutoff}, luma); \ uint y_pq = uint(${const float: PQ_MAX} * luma); \ \ /* Update the work group's shared atomics */ \ @if (use_histogram) { \ int bin = int(y_pq) >> ${const int: PQ_BITS - HIST_BITS}; \ bin -= ${const int: HIST_BIAS}; \ bin = clamp(bin, 0, ${const int: HIST_BINS - 1}); \ @if (has_subgroups) { \ /* Optimize for the very common case of identical histogram bins */ \ if (subgroupAllEqual(bin)) { \ if (subgroupElect()) \ atomicAdd($wg_hist[bin], gl_SubgroupSize); \ } else { \ atomicAdd($wg_hist[bin], 1u); \ } \ @} else { \ atomicAdd($wg_hist[bin], 1u); \ @} \ @} \ \ @if (has_subgroups) { \ uint group_sum = subgroupAdd(y_pq); \ uint group_max = subgroupMax(y_pq); \ @if (cutoff) \ uvec4 b = subgroupBallot(y_pq == 0u); \ if (subgroupElect()) { \ atomicAdd($wg_sum, group_sum); \ atomicMax($wg_max, group_max); \ @if (cutoff) \ atomicAdd($wg_black, subgroupBallotBitCount(b)); \ } \ @} else { \ atomicAdd($wg_sum, y_pq); \ atomicMax($wg_max, y_pq); \ @if (cutoff) { \ if (y_pq == 0u) \ atomicAdd($wg_black, 1u); \ @} \ @} \ barrier(); \ \ @if (use_histogram) { \ @if (cutoff) { \ if (gl_LocalInvocationIndex == 0u) \ $wg_hist[0] -= $wg_black; \ @} \ /* Update the histogram with a cooperative loop */ \ for (uint i = local_idx; i < ${const uint: HIST_BINS}; i += wg_size) \ atomicAdd(frame_hist[hist_base + i], $wg_hist[i]); \ @} \ \ /* Have one thread per work group update the global atomics */ \ if (gl_LocalInvocationIndex == 0u) { \ uint num = wg_size - $wg_black; \ atomicAdd(frame_wg_count[slice], 1u); \ atomicAdd(frame_wg_active[slice], min(num, 1u)); \ if (num > 0u) { \ atomicAdd(frame_sum_pq[slice], $wg_sum / num); \ atomicMax(frame_max_pq[slice], $wg_max); \ } \ } \ color = color_orig; \ } return true; } bool pl_get_detected_hdr_metadata(const pl_shader_obj state, struct pl_hdr_metadata *out) { if (!state || state->type != PL_SHADER_OBJ_COLOR_MAP) return false; struct sh_color_map_obj *obj = state->priv; update_peak_buf(state->gpu, obj, false); if (!obj->peak.avg_pq) return false; out->max_pq_y = obj->peak.max_pq; out->avg_pq_y = obj->peak.avg_pq; return true; } void pl_reset_detected_peak(pl_shader_obj state) { if (!state || state->type != PL_SHADER_OBJ_COLOR_MAP) return; struct sh_color_map_obj *obj = state->priv; pl_buf readback = obj->peak.readback; pl_buf_destroy(state->gpu, &obj->peak.buf); memset(&obj->peak, 0, sizeof(obj->peak)); obj->peak.readback = readback; } void pl_shader_extract_features(pl_shader sh, struct pl_color_space csp) { if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0)) return; sh_describe(sh, "feature extraction"); pl_shader_linearize(sh, &csp); GLSL("// pl_shader_extract_features \n" "{ \n" "vec3 lms = %f * "$" * color.rgb; \n" "lms = pow(max(lms, 0.0), vec3(%f)); \n" "lms = (vec3(%f) + %f * lms) \n" " / (vec3(1.0) + %f * lms); \n" "lms = pow(lms, vec3(%f)); \n" "float I = dot(vec3(%f, %f, %f), lms); \n" "color = vec4(I, 0.0, 0.0, 1.0); \n" "} \n", PL_COLOR_SDR_WHITE / 10000, SH_MAT3(pl_ipt_rgb2lms(pl_raw_primaries_get(csp.primaries))), PQ_M1, PQ_C1, PQ_C2, PQ_C3, PQ_M2, pl_ipt_lms2ipt.m[0][0], pl_ipt_lms2ipt.m[0][1], pl_ipt_lms2ipt.m[0][2]); } const struct pl_color_map_params pl_color_map_default_params = { PL_COLOR_MAP_DEFAULTS }; const struct pl_color_map_params pl_color_map_high_quality_params = { PL_COLOR_MAP_HQ_DEFAULTS }; static ident_t rect_pos(pl_shader sh, pl_rect2df rc) { if (!rc.x0 && !rc.x1) rc.x1 = 1.0f; if (!rc.y0 && !rc.y1) rc.y1 = 1.0f; return sh_attr_vec2(sh, "tone_map_coords", &(pl_rect2df) { .x0 = -rc.x0 / (rc.x1 - rc.x0), .x1 = (1.0f - rc.x0) / (rc.x1 - rc.x0), .y0 = -rc.y1 / (rc.y0 - rc.y1), .y1 = (1.0f - rc.y1) / (rc.y0 - rc.y1), }); } static void visualize_tone_map(pl_shader sh, pl_rect2df rc, float alpha, const struct pl_tone_map_params *params) { pl_assert(params->input_scaling == PL_HDR_PQ); pl_assert(params->output_scaling == PL_HDR_PQ); GLSL("// Visualize tone mapping \n" "{ \n" "vec2 pos = "$"; \n" "if (min(pos.x, pos.y) >= 0.0 && \n" // visualizer rect " max(pos.x, pos.y) <= 1.0) \n" "{ \n" "float xmin = "$"; \n" "float xmax = "$"; \n" "float xavg = "$"; \n" "float ymin = "$"; \n" "float ymax = "$"; \n" "float alpha = 0.8 * "$"; \n" "vec3 viz = color.rgb; \n" "float vv = tone_map(pos.x); \n" // Color based on region "if (pos.x < xmin || pos.x > xmax) { \n" // outside source "} else if (pos.y < ymin || pos.y > ymax) {\n" // outside target " if (pos.y < xmin || pos.y > xmax) { \n" // and also source " viz = vec3(0.1, 0.1, 0.5); \n" " } else { \n" " viz = vec3(0.2, 0.05, 0.05); \n" // but inside source " } \n" "} else { \n" // inside domain " if (abs(pos.x - pos.y) < 1e-3) { \n" // main diagonal " viz = vec3(0.2); \n" " } else if (pos.y < vv) { \n" // inside function " alpha *= 0.6; \n" " viz = vec3(0.05); \n" " if (vv > pos.x && pos.y > pos.x) \n" // output brighter than input " viz.rg = vec2(0.5, 0.7); \n" " } else { \n" // outside function " if (vv < pos.x && pos.y < pos.x) \n" // output darker than input " viz = vec3(0.0, 0.1, 0.2); \n" " } \n" " if (pos.y > xmax) { \n" // inverse tone-mapping region " vec3 hi = vec3(0.2, 0.5, 0.8); \n" " viz = mix(viz, hi, 0.5); \n" " } else if (pos.y < xmin) { \n" // black point region " viz = mix(viz, vec3(0.0), 0.3); \n" " } \n" " if (xavg > 0.0 && abs(pos.x - xavg) < 1e-3)\n" // source avg brightness " viz = vec3(0.5); \n" "} \n" "color.rgb = mix(color.rgb, viz, alpha); \n" "} \n" "} \n", rect_pos(sh, rc), SH_FLOAT_DYN(params->input_min), SH_FLOAT_DYN(params->input_max), SH_FLOAT_DYN(params->input_avg), SH_FLOAT(params->output_min), SH_FLOAT_DYN(params->output_max), SH_FLOAT_DYN(alpha)); } static void visualize_gamut_map(pl_shader sh, pl_rect2df rc, ident_t lut, float hue, float theta, const struct pl_gamut_map_params *params) { ident_t ipt2lms = SH_MAT3(pl_ipt_ipt2lms); ident_t lms2rgb_src = SH_MAT3(pl_ipt_lms2rgb(¶ms->input_gamut)); ident_t lms2rgb_dst = SH_MAT3(pl_ipt_lms2rgb(¶ms->output_gamut)); GLSL("// Visualize gamut mapping \n" "vec2 pos = "$"; \n" "float pqmin = "$"; \n" "float pqmax = "$"; \n" "float rgbmin = "$"; \n" "float rgbmax = "$"; \n" "vec3 orig = ipt; \n" "if (min(pos.x, pos.y) >= 0.0 && \n" " max(pos.x, pos.y) <= 1.0) \n" "{ \n" // Source color to visualize "float mid = mix(pqmin, pqmax, 0.6); \n" "vec3 base = vec3(0.5, 0.0, 0.0); \n" "float hue = "$", theta = "$"; \n" "base.x = mix(base.x, mid, sin(theta)); \n" "mat3 rot1 = mat3(1.0, 0.0, 0.0, \n" " 0.0, cos(hue), sin(hue), \n" " 0.0, -sin(hue), cos(hue)); \n" "mat3 rot2 = mat3( cos(theta), 0.0, sin(theta), \n" " 0.0, 1.0, 0.0, \n" " -sin(theta), 0.0, cos(theta)); \n" "vec3 dir = vec3(pos.yx - vec2(0.5), 0.0); \n" "ipt = base + rot1 * rot2 * dir; \n" // Convert back to RGB (for gamut boundary testing) "lmspq = "$" * ipt; \n" "lms = pow(max(lmspq, 0.0), vec3(1.0/%f)); \n" "lms = max(lms - vec3(%f), 0.0) \n" " / (vec3(%f) - %f * lms); \n" "lms = pow(lms, vec3(1.0/%f)); \n" "lms *= %f; \n" // Check against src/dst gamut boundaries "vec3 rgbsrc = "$" * lms; \n" "vec3 rgbdst = "$" * lms; \n" "bool insrc, indst; \n" "insrc = all(lessThan(rgbsrc, vec3(rgbmax))) && \n" " all(greaterThan(rgbsrc, vec3(rgbmin))); \n" "indst = all(lessThan(rgbdst, vec3(rgbmax))) && \n" " all(greaterThan(rgbdst, vec3(rgbmin))); \n" // Sample from gamut mapping 3DLUT "idx.x = (ipt.x - pqmin) / (pqmax - pqmin); \n" "idx.y = 2.0 * length(ipt.yz); \n" "idx.z = %f * atan(ipt.z, ipt.y) + 0.5; \n" "vec3 mapped = "$"(idx).xyz; \n" "mapped.yz -= vec2(32768.0/65535.0); \n" "float mappedhue = atan(mapped.z, mapped.y); \n" "float mappedchroma = length(mapped.yz); \n" "ipt = mapped; \n" // Visualize gamuts "if (!insrc && !indst) { \n" " ipt = orig; \n" "} else if (insrc && !indst) { \n" " ipt.x -= 0.1; \n" "} else if (indst && !insrc) { \n" " ipt.x += 0.1; \n" "} \n" // Visualize iso-luminance and iso-hue lines "vec3 line; \n" "if (insrc && fract(50.0 * mapped.x) < 1e-1) { \n" " float k = smoothstep(0.1, 0.0, abs(sin(theta))); \n" " line.x = mix(mapped.x, 0.3, 0.5); \n" " line.yz = sqrt(length(mapped.yz)) * \n" " normalize(mapped.yz); \n" " ipt = mix(ipt, line, k); \n" "} \n" "if (insrc && fract(10.0 * (mappedhue - hue)) < 1e-1) {\n" " float k = smoothstep(0.3, 0.0, abs(cos(theta))); \n" " line.x = mapped.x - 0.05; \n" " line.yz = 1.2 * mapped.yz; \n" " ipt = mix(ipt, line, k); \n" "} \n" "if (insrc && fract(100.0 * mappedchroma) < 1e-1) { \n" " line.x = mapped.x + 0.1; \n" " line.yz = 0.4 * mapped.yz; \n" " ipt = mix(ipt, line, 0.5); \n" "} \n" "} \n", rect_pos(sh, rc), SH_FLOAT(params->min_luma), SH_FLOAT(params->max_luma), SH_FLOAT(pl_hdr_rescale(PL_HDR_PQ, PL_HDR_NORM, params->min_luma)), SH_FLOAT(pl_hdr_rescale(PL_HDR_PQ, PL_HDR_NORM, params->max_luma)), SH_FLOAT_DYN(hue), SH_FLOAT_DYN(theta), ipt2lms, PQ_M2, PQ_C1, PQ_C2, PQ_C3, PQ_M1, 10000 / PL_COLOR_SDR_WHITE, lms2rgb_src, lms2rgb_dst, 0.5f / M_PI, lut); } static void fill_tone_lut(void *data, const struct sh_lut_params *params) { const struct pl_tone_map_params *lut_params = params->priv; pl_tone_map_generate(data, lut_params); } static void fill_gamut_lut(void *data, const struct sh_lut_params *params) { const struct pl_gamut_map_params *lut_params = params->priv; const int lut_size = params->width * params->height * params->depth; void *tmp = pl_alloc(NULL, lut_size * sizeof(float) * lut_params->lut_stride); pl_gamut_map_generate(tmp, lut_params); // Convert to 16-bit unsigned integer for GPU texture const float *in = tmp; uint16_t *out = data; pl_assert(lut_params->lut_stride == 3); pl_assert(params->comps == 4); for (int i = 0; i < lut_size; i++) { out[0] = roundf(in[0] * UINT16_MAX); out[1] = roundf(in[1] * UINT16_MAX + (UINT16_MAX >> 1)); out[2] = roundf(in[2] * UINT16_MAX + (UINT16_MAX >> 1)); in += 3; out += 4; } pl_free(tmp); } void pl_shader_color_map_ex(pl_shader sh, const struct pl_color_map_params *params, const struct pl_color_map_args *args) { if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0)) return; struct pl_color_space src = args->src, dst = args->dst; struct sh_color_map_obj *obj = NULL; if (args->state) { pl_get_detected_hdr_metadata(*args->state, &src.hdr); obj = SH_OBJ(sh, args->state, PL_SHADER_OBJ_COLOR_MAP, struct sh_color_map_obj, sh_color_map_uninit); if (!obj) return; } pl_color_space_infer_map(&src, &dst); if (pl_color_space_equal(&src, &dst)) { if (args->prelinearized) pl_shader_delinearize(sh, &dst); return; } params = PL_DEF(params, &pl_color_map_default_params); GLSL("// pl_shader_color_map \n" "{ \n"); struct pl_tone_map_params tone = { .function = PL_DEF(params->tone_mapping_function, &pl_tone_map_clip), .constants = params->tone_constants, .param = params->tone_mapping_param, .input_scaling = PL_HDR_PQ, .output_scaling = PL_HDR_PQ, .lut_size = PL_DEF(params->lut_size, pl_color_map_default_params.lut_size), .hdr = src.hdr, }; pl_color_space_nominal_luma_ex(pl_nominal_luma_params( .color = &src, .metadata = params->metadata, .scaling = tone.input_scaling, .out_min = &tone.input_min, .out_max = &tone.input_max, .out_avg = &tone.input_avg, )); pl_color_space_nominal_luma_ex(pl_nominal_luma_params( .color = &dst, .metadata = PL_HDR_METADATA_HDR10, .scaling = tone.output_scaling, .out_min = &tone.output_min, .out_max = &tone.output_max, )); pl_tone_map_params_infer(&tone); // Round sufficiently similar values if (fabs(tone.input_max - tone.output_max) < 1e-6) tone.output_max = tone.input_max; if (fabs(tone.input_min - tone.output_min) < 1e-6) tone.output_min = tone.input_min; if (!params->inverse_tone_mapping) { // Never exceed the source unless requested, but still allow // black point adaptation tone.output_max = PL_MIN(tone.output_max, tone.input_max); } const int *lut3d_size_def = pl_color_map_default_params.lut3d_size; struct pl_gamut_map_params gamut = { .function = PL_DEF(params->gamut_mapping, &pl_gamut_map_clip), .constants = params->gamut_constants, .input_gamut = src.hdr.prim, .output_gamut = dst.hdr.prim, .lut_size_I = PL_DEF(params->lut3d_size[0], lut3d_size_def[0]), .lut_size_C = PL_DEF(params->lut3d_size[1], lut3d_size_def[1]), .lut_size_h = PL_DEF(params->lut3d_size[2], lut3d_size_def[2]), .lut_stride = 3, }; float src_peak_static; pl_color_space_nominal_luma_ex(pl_nominal_luma_params( .color = &src, .metadata = PL_HDR_METADATA_HDR10, .scaling = PL_HDR_PQ, .out_max = &src_peak_static, )); pl_color_space_nominal_luma_ex(pl_nominal_luma_params( .color = &dst, .metadata = PL_HDR_METADATA_HDR10, .scaling = PL_HDR_PQ, .out_min = &gamut.min_luma, .out_max = &gamut.max_luma, )); // Clip the gamut mapping output to the input gamut if disabled if (!params->gamut_expansion && gamut.function->bidirectional) { if (pl_primaries_compatible(&gamut.input_gamut, &gamut.output_gamut)) { gamut.output_gamut = pl_primaries_clip(&gamut.output_gamut, &gamut.input_gamut); } } // Backwards compatibility with older API switch (params->gamut_mode) { case PL_GAMUT_CLIP: switch (params->intent) { case PL_INTENT_AUTO: case PL_INTENT_PERCEPTUAL: case PL_INTENT_RELATIVE_COLORIMETRIC: break; // leave default case PL_INTENT_SATURATION: gamut.function = &pl_gamut_map_saturation; break; case PL_INTENT_ABSOLUTE_COLORIMETRIC: gamut.function = &pl_gamut_map_absolute; break; } break; case PL_GAMUT_DARKEN: gamut.function = &pl_gamut_map_darken; break; case PL_GAMUT_WARN: gamut.function = &pl_gamut_map_highlight; break; case PL_GAMUT_DESATURATE: gamut.function = &pl_gamut_map_desaturate; break; case PL_GAMUT_MODE_COUNT: pl_unreachable(); } bool can_fast = !params->force_tone_mapping_lut; if (!args->state) { // No state object provided, forcibly disable advanced methods can_fast = true; if (tone.function != &pl_tone_map_clip) tone.function = &pl_tone_map_linear; if (gamut.function != &pl_gamut_map_clip) gamut.function = &pl_gamut_map_saturation; } pl_fmt gamut_fmt = pl_find_fmt(SH_GPU(sh), PL_FMT_UNORM, 4, 16, 16, PL_FMT_CAP_LINEAR); if (!gamut_fmt) { gamut.function = &pl_gamut_map_saturation; can_fast = true; } bool need_tone_map = !pl_tone_map_params_noop(&tone); bool need_gamut_map = !pl_gamut_map_params_noop(&gamut); if (!args->prelinearized) pl_shader_linearize(sh, &src); pl_matrix3x3 rgb2lms = pl_ipt_rgb2lms(pl_raw_primaries_get(src.primaries)); pl_matrix3x3 lms2rgb = pl_ipt_lms2rgb(pl_raw_primaries_get(dst.primaries)); ident_t lms2ipt = SH_MAT3(pl_ipt_lms2ipt); ident_t ipt2lms = SH_MAT3(pl_ipt_ipt2lms); if (need_gamut_map && gamut.function == &pl_gamut_map_saturation && can_fast) { const pl_matrix3x3 lms2src = pl_ipt_lms2rgb(&gamut.input_gamut); const pl_matrix3x3 dst2lms = pl_ipt_rgb2lms(&gamut.output_gamut); sh_describe(sh, "gamut map (saturation)"); pl_matrix3x3_mul(&lms2rgb, &dst2lms); pl_matrix3x3_mul(&lms2rgb, &lms2src); need_gamut_map = false; } // Fast path: simply convert between primaries (if needed) if (!need_tone_map && !need_gamut_map) { if (src.primaries != dst.primaries) { sh_describe(sh, "colorspace conversion"); pl_matrix3x3_mul(&lms2rgb, &rgb2lms); GLSL("color.rgb = "$" * color.rgb; \n", SH_MAT3(lms2rgb)); } goto done; } // Full path: convert input from normalized RGB to IPT GLSL("vec3 lms = "$" * color.rgb; \n" "vec3 lmspq = %f * lms; \n" "lmspq = pow(max(lmspq, 0.0), vec3(%f)); \n" "lmspq = (vec3(%f) + %f * lmspq) \n" " / (vec3(1.0) + %f * lmspq); \n" "lmspq = pow(lmspq, vec3(%f)); \n" "vec3 ipt = "$" * lmspq; \n" "float i_orig = ipt.x; \n", SH_MAT3(rgb2lms), PL_COLOR_SDR_WHITE / 10000, PQ_M1, PQ_C1, PQ_C2, PQ_C3, PQ_M2, lms2ipt); if (params->show_clipping) { const float eps = 1e-6f; GLSL("bool clip_hi, clip_lo; \n" "clip_hi = any(greaterThan(color.rgb, vec3("$"))); \n" "clip_lo = any(lessThan(color.rgb, vec3("$"))); \n" "clip_hi = clip_hi || ipt.x > "$"; \n" "clip_lo = clip_lo || ipt.x < "$"; \n", SH_FLOAT_DYN(pl_hdr_rescale(PL_HDR_PQ, PL_HDR_NORM, tone.input_max) + eps), SH_FLOAT(pl_hdr_rescale(PL_HDR_PQ, PL_HDR_NORM, tone.input_min) - eps), SH_FLOAT_DYN(tone.input_max + eps), SH_FLOAT(tone.input_min - eps)); } if (need_tone_map) { const struct pl_tone_map_function *fun = tone.function; sh_describef(sh, "%s tone map (%.0f -> %.0f)", fun->name, pl_hdr_rescale(PL_HDR_PQ, PL_HDR_NITS, tone.input_max), pl_hdr_rescale(PL_HDR_PQ, PL_HDR_NITS, tone.output_max)); if (fun == &pl_tone_map_clip && can_fast) { GLSL("#define tone_map(x) clamp((x), "$", "$") \n", SH_FLOAT(tone.input_min), SH_FLOAT_DYN(tone.input_max)); } else if (fun == &pl_tone_map_linear && can_fast) { const float gain = tone.constants.exposure; const float scale = tone.input_max - tone.input_min; ident_t linfun = sh_fresh(sh, "linear_pq"); GLSLH("float "$"(float x) { \n" // Stretch the input range (while clipping) " x = "$" * x + "$"; \n" " x = clamp(x, 0.0, 1.0); \n" " x = "$" * x + "$"; \n" " return x; \n" "} \n", linfun, SH_FLOAT_DYN(gain / scale), SH_FLOAT_DYN(-gain / scale * tone.input_min), SH_FLOAT_DYN(tone.output_max - tone.output_min), SH_FLOAT(tone.output_min)); GLSL("#define tone_map(x) ("$"(x)) \n", linfun); } else { pl_assert(obj); ident_t lut = sh_lut(sh, sh_lut_params( .object = &obj->tone.lut, .var_type = PL_VAR_FLOAT, .lut_type = SH_LUT_AUTO, .method = SH_LUT_LINEAR, .width = tone.lut_size, .comps = 1, .update = !pl_tone_map_params_equal(&tone, &obj->tone.params), .dynamic = tone.input_avg > 0, // dynamic metadata .fill = fill_tone_lut, .priv = &tone, )); obj->tone.params = tone; if (!lut) { SH_FAIL(sh, "Failed generating tone-mapping LUT!"); return; } const float lut_range = tone.input_max - tone.input_min; GLSL("#define tone_map(x) ("$"("$" * (x) + "$")) \n", lut, SH_FLOAT_DYN(1.0f / lut_range), SH_FLOAT_DYN(-tone.input_min / lut_range)); } bool need_recovery = tone.input_max >= tone.output_max; if (need_recovery && params->contrast_recovery && args->feature_map) { ident_t pos, pt; ident_t lowres = sh_bind(sh, args->feature_map, PL_TEX_ADDRESS_CLAMP, PL_TEX_SAMPLE_LINEAR, "feature_map", NULL, &pos, &pt); // Obtain HF detail map from bicubic interpolation of LF features GLSL("vec2 lpos = "$"; \n" "vec2 lpt = "$"; \n" "vec2 lsize = vec2(textureSize("$", 0)); \n" "vec2 frac = fract(lpos * lsize + vec2(0.5)); \n" "vec2 frac2 = frac * frac; \n" "vec2 inv = vec2(1.0) - frac; \n" "vec2 inv2 = inv * inv; \n" "vec2 w0 = 1.0/6.0 * inv2 * inv; \n" "vec2 w1 = 2.0/3.0 - 0.5 * frac2 * (2.0 - frac); \n" "vec2 w2 = 2.0/3.0 - 0.5 * inv2 * (2.0 - inv); \n" "vec2 w3 = 1.0/6.0 * frac2 * frac; \n" "vec4 g = vec4(w0 + w1, w2 + w3); \n" "vec4 h = vec4(w1, w3) / g + inv.xyxy; \n" "h.xy -= vec2(2.0); \n" "vec4 p = lpos.xyxy + lpt.xyxy * h; \n" "float l00 = textureLod("$", p.xy, 0.0).r; \n" "float l01 = textureLod("$", p.xw, 0.0).r; \n" "float l0 = mix(l01, l00, g.y); \n" "float l10 = textureLod("$", p.zy, 0.0).r; \n" "float l11 = textureLod("$", p.zw, 0.0).r; \n" "float l1 = mix(l11, l10, g.y); \n" "float luma = mix(l1, l0, g.x); \n" // Mix low-resolution tone mapped image with high-resolution // tone mapped image according to desired strength. "float highres = clamp(ipt.x, 0.0, 1.0); \n" "float lowres = clamp(luma, 0.0, 1.0); \n" "float detail = highres - lowres; \n" "float base = tone_map(highres); \n" "float sharp = tone_map(lowres) + detail; \n" "ipt.x = clamp(mix(base, sharp, "$"), "$", "$"); \n", pos, pt, lowres, lowres, lowres, lowres, lowres, SH_FLOAT(params->contrast_recovery), SH_FLOAT(tone.output_min), SH_FLOAT_DYN(tone.output_max)); } else { GLSL("ipt.x = tone_map(ipt.x); \n"); } // Avoid raising saturation excessively when raising brightness, and // also desaturate when reducing brightness greatly to account for the // reduction in gamut volume. GLSL("vec2 hull = vec2(i_orig, ipt.x); \n" "hull = ((hull - 6.0) * hull + 9.0) * hull; \n" "ipt.yz *= min(i_orig / ipt.x, hull.y / hull.x); \n"); } if (need_gamut_map) { const struct pl_gamut_map_function *fun = gamut.function; sh_describef(sh, "gamut map (%s)", fun->name); pl_assert(obj); ident_t lut = sh_lut(sh, sh_lut_params( .object = &obj->gamut.lut, .var_type = PL_VAR_FLOAT, .lut_type = SH_LUT_TEXTURE, .fmt = gamut_fmt, .method = params->lut3d_tricubic ? SH_LUT_CUBIC : SH_LUT_LINEAR, .width = gamut.lut_size_I, .height = gamut.lut_size_C, .depth = gamut.lut_size_h, .comps = 4, .signature = gamut_map_signature(&gamut), .cache = SH_CACHE(sh), .fill = fill_gamut_lut, .priv = &gamut, )); if (!lut) { SH_FAIL(sh, "Failed generating gamut-mapping LUT!"); return; } // 3D LUT lookup (in ICh space) const float lut_range = gamut.max_luma - gamut.min_luma; GLSL("vec3 idx; \n" "idx.x = "$" * ipt.x + "$"; \n" "idx.y = 2.0 * length(ipt.yz); \n" "idx.z = %f * atan(ipt.z, ipt.y) + 0.5;\n" "ipt = "$"(idx).xyz; \n" "ipt.yz -= vec2(32768.0/65535.0); \n", SH_FLOAT(1.0f / lut_range), SH_FLOAT(-gamut.min_luma / lut_range), 0.5f / M_PI, lut); if (params->show_clipping) { GLSL("clip_lo = clip_lo || any(lessThan(idx, vec3(0.0))); \n" "clip_hi = clip_hi || any(greaterThan(idx, vec3(1.0))); \n"); } if (params->visualize_lut) { visualize_gamut_map(sh, params->visualize_rect, lut, params->visualize_hue, params->visualize_theta, &gamut); } } // Convert IPT back to linear RGB GLSL("lmspq = "$" * ipt; \n" "lms = pow(max(lmspq, 0.0), vec3(1.0/%f)); \n" "lms = max(lms - vec3(%f), 0.0) \n" " / (vec3(%f) - %f * lms); \n" "lms = pow(lms, vec3(1.0/%f)); \n" "lms *= %f; \n" "color.rgb = "$" * lms; \n", ipt2lms, PQ_M2, PQ_C1, PQ_C2, PQ_C3, PQ_M1, 10000 / PL_COLOR_SDR_WHITE, SH_MAT3(lms2rgb)); if (params->show_clipping) { GLSL("if (clip_hi) { \n" " float k = dot(color.rgb, vec3(2.0 / 3.0)); \n" " color.rgb = clamp(vec3(k) - color.rgb, 0.0, 1.0); \n" " float cmin = min(min(color.r, color.g), color.b); \n" " float cmax = max(max(color.r, color.g), color.b); \n" " float delta = cmax - cmin; \n" " vec3 sat = smoothstep(cmin - 1e-6, cmax, color.rgb); \n" " const vec3 red = vec3(1.0, 0.0, 0.0); \n" " color.rgb = mix(red, sat, smoothstep(0.0, 0.3, delta)); \n" "} else if (clip_lo) { \n" " vec3 hi = vec3(0.0, 0.3, 0.3); \n" " color.rgb = mix(color.rgb, hi, 0.5); \n" "} \n"); } if (need_tone_map) { if (params->visualize_lut) { float alpha = need_gamut_map ? powf(cosf(params->visualize_theta), 5.0f) : 1.0f; visualize_tone_map(sh, params->visualize_rect, alpha, &tone); } GLSL("#undef tone_map \n"); } done: pl_shader_delinearize(sh, &dst); GLSL("}\n"); } // Backwards compatibility wrapper around `pl_shader_color_map_ex` void pl_shader_color_map(pl_shader sh, const struct pl_color_map_params *params, struct pl_color_space src, struct pl_color_space dst, pl_shader_obj *state, bool prelinearized) { pl_shader_color_map_ex(sh, params, pl_color_map_args( .src = src, .dst = dst, .prelinearized = prelinearized, .state = state, .feature_map = NULL )); } void pl_shader_cone_distort(pl_shader sh, struct pl_color_space csp, const struct pl_cone_params *params) { if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0)) return; if (!params || !params->cones) return; sh_describe(sh, "cone distortion"); GLSL("// pl_shader_cone_distort\n"); GLSL("{\n"); pl_color_space_infer(&csp); pl_shader_linearize(sh, &csp); pl_matrix3x3 cone_mat; cone_mat = pl_get_cone_matrix(params, pl_raw_primaries_get(csp.primaries)); GLSL("color.rgb = "$" * color.rgb; \n", sh_var(sh, (struct pl_shader_var) { .var = pl_var_mat3("cone_mat"), .data = PL_TRANSPOSE_3X3(cone_mat.m), })); pl_shader_delinearize(sh, &csp); GLSL("}\n"); } libplacebo-v7.349.0/src/shaders/custom.c000066400000000000000000000057161463457750100201030ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "shaders.h" #include bool pl_shader_custom(pl_shader sh, const struct pl_custom_shader *params) { if (params->compute) { int bw = PL_DEF(params->compute_group_size[0], 16); int bh = PL_DEF(params->compute_group_size[1], 16); bool flex = !params->compute_group_size[0] || !params->compute_group_size[1]; if (!sh_try_compute(sh, bw, bh, flex, params->compute_shmem)) return false; } if (!sh_require(sh, params->input, params->output_w, params->output_h)) return false; sh->output = params->output; for (int i = 0; i < params->num_variables; i++) { struct pl_shader_var sv = params->variables[i]; GLSLP("#define %s "$"\n", sv.var.name, sh_var(sh, sv)); } for (int i = 0; i < params->num_descriptors; i++) { struct pl_shader_desc sd = params->descriptors[i]; GLSLP("#define %s "$"\n", sd.desc.name, sh_desc(sh, sd)); } for (int i = 0; i < params->num_vertex_attribs; i++) { struct pl_shader_va sva = params->vertex_attribs[i]; GLSLP("#define %s "$"\n", sva.attr.name, sh_attr(sh, sva)); } for (int i = 0; i < params->num_constants; i++) { struct pl_shader_const sc = params->constants[i]; GLSLP("#define %s "$"\n", sc.name, sh_const(sh, sc)); } if (params->prelude) GLSLP("// pl_shader_custom prelude: \n%s\n", params->prelude); if (params->header) GLSLH("// pl_shader_custom header: \n%s\n", params->header); if (params->description) sh_describef(sh, "%s", params->description); if (params->body) { const char *output_decl = ""; if (params->output != params->input) { switch (params->output) { case PL_SHADER_SIG_NONE: break; case PL_SHADER_SIG_COLOR: output_decl = "vec4 color = vec4(0.0);"; break; case PL_SHADER_SIG_SAMPLER: pl_unreachable(); } } GLSL("// pl_shader_custom \n" "%s \n" "{ \n" "%s \n" "} \n", output_decl, params->body); } return true; } libplacebo-v7.349.0/src/shaders/custom_mpv.c000066400000000000000000001631711463457750100207650ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include #include "gpu.h" #include "shaders.h" #include #include // Hard-coded size limits, mainly for convenience (to avoid dynamic memory) #define SHADER_MAX_HOOKS 16 #define SHADER_MAX_BINDS 16 #define MAX_SHEXP_SIZE 32 enum shexp_op { SHEXP_OP_ADD, SHEXP_OP_SUB, SHEXP_OP_MUL, SHEXP_OP_DIV, SHEXP_OP_MOD, SHEXP_OP_NOT, SHEXP_OP_GT, SHEXP_OP_LT, SHEXP_OP_EQ, }; enum shexp_tag { SHEXP_END = 0, // End of an RPN expression SHEXP_CONST, // Push a constant value onto the stack SHEXP_TEX_W, // Get the width/height of a named texture (variable) SHEXP_TEX_H, SHEXP_OP2, // Pop two elements and push the result of a dyadic operation SHEXP_OP1, // Pop one element and push the result of a monadic operation SHEXP_VAR, // Arbitrary variable (e.g. shader parameters) }; struct shexp { enum shexp_tag tag; union { float cval; pl_str varname; enum shexp_op op; } val; }; struct custom_shader_hook { // Variable/literal names of textures pl_str pass_desc; pl_str hook_tex[SHADER_MAX_HOOKS]; pl_str bind_tex[SHADER_MAX_BINDS]; pl_str save_tex; // Shader body itself + metadata pl_str pass_body; float offset[2]; bool offset_align; int comps; // Special expressions governing the output size and execution conditions struct shexp width[MAX_SHEXP_SIZE]; struct shexp height[MAX_SHEXP_SIZE]; struct shexp cond[MAX_SHEXP_SIZE]; // Special metadata for compute shaders bool is_compute; int block_w, block_h; // Block size (each block corresponds to one WG) int threads_w, threads_h; // How many threads form a WG }; static bool parse_rpn_shexpr(pl_str line, struct shexp out[MAX_SHEXP_SIZE]) { int pos = 0; while (line.len > 0) { pl_str word = pl_str_split_char(line, ' ', &line); if (word.len == 0) continue; if (pos >= MAX_SHEXP_SIZE) return false; struct shexp *exp = &out[pos++]; if (pl_str_eatend0(&word, ".w") || pl_str_eatend0(&word, ".width")) { exp->tag = SHEXP_TEX_W; exp->val.varname = word; continue; } if (pl_str_eatend0(&word, ".h") || pl_str_eatend0(&word, ".height")) { exp->tag = SHEXP_TEX_H; exp->val.varname = word; continue; } switch (word.buf[0]) { case '+': exp->tag = SHEXP_OP2; exp->val.op = SHEXP_OP_ADD; continue; case '-': exp->tag = SHEXP_OP2; exp->val.op = SHEXP_OP_SUB; continue; case '*': exp->tag = SHEXP_OP2; exp->val.op = SHEXP_OP_MUL; continue; case '/': exp->tag = SHEXP_OP2; exp->val.op = SHEXP_OP_DIV; continue; case '%': exp->tag = SHEXP_OP2; exp->val.op = SHEXP_OP_MOD; continue; case '!': exp->tag = SHEXP_OP1; exp->val.op = SHEXP_OP_NOT; continue; case '>': exp->tag = SHEXP_OP2; exp->val.op = SHEXP_OP_GT; continue; case '<': exp->tag = SHEXP_OP2; exp->val.op = SHEXP_OP_LT; continue; case '=': exp->tag = SHEXP_OP2; exp->val.op = SHEXP_OP_EQ; continue; } if (word.buf[0] >= '0' && word.buf[0] <= '9') { exp->tag = SHEXP_CONST; if (!pl_str_parse_float(word, &exp->val.cval)) return false; continue; } // Treat as generic variable exp->tag = SHEXP_VAR; exp->val.varname = word; } return true; } static inline pl_str split_magic(pl_str *body) { pl_str ret = pl_str_split_str0(*body, "//!", body); if (body->len) { // Make sure the separator is included in the remainder body->buf -= 3; body->len += 3; } return ret; } static bool parse_hook(pl_log log, pl_str *body, struct custom_shader_hook *out) { *out = (struct custom_shader_hook){ .pass_desc = pl_str0("unknown user shader"), .width = {{ SHEXP_TEX_W, { .varname = pl_str0("HOOKED") }}}, .height = {{ SHEXP_TEX_H, { .varname = pl_str0("HOOKED") }}}, .cond = {{ SHEXP_CONST, { .cval = 1.0 }}}, }; int hook_idx = 0; int bind_idx = 0; // Parse all headers while (true) { pl_str rest; pl_str line = pl_str_strip(pl_str_getline(*body, &rest)); // Check for the presence of the magic line beginning if (!pl_str_eatstart0(&line, "//!")) break; *body = rest; // Parse the supported commands if (pl_str_eatstart0(&line, "HOOK")) { if (hook_idx == SHADER_MAX_HOOKS) { pl_err(log, "Passes may only hook up to %d textures!", SHADER_MAX_HOOKS); return false; } out->hook_tex[hook_idx++] = pl_str_strip(line); continue; } if (pl_str_eatstart0(&line, "BIND")) { if (bind_idx == SHADER_MAX_BINDS) { pl_err(log, "Passes may only bind up to %d textures!", SHADER_MAX_BINDS); return false; } out->bind_tex[bind_idx++] = pl_str_strip(line); continue; } if (pl_str_eatstart0(&line, "SAVE")) { pl_str save_tex = pl_str_strip(line); if (pl_str_equals0(save_tex, "HOOKED")) { // This is a special name that means "overwrite existing" // texture, which we just signal by not having any `save_tex` // name set. out->save_tex = (pl_str) {0}; } else if (pl_str_equals0(save_tex, "MAIN")) { // Compatibility alias out->save_tex = pl_str0("MAINPRESUB"); } else { out->save_tex = save_tex; }; continue; } if (pl_str_eatstart0(&line, "DESC")) { out->pass_desc = pl_str_strip(line); continue; } if (pl_str_eatstart0(&line, "OFFSET")) { line = pl_str_strip(line); if (pl_str_equals0(line, "ALIGN")) { out->offset_align = true; } else { if (!pl_str_parse_float(pl_str_split_char(line, ' ', &line), &out->offset[0]) || !pl_str_parse_float(pl_str_split_char(line, ' ', &line), &out->offset[1]) || line.len) { pl_err(log, "Error while parsing OFFSET!"); return false; } } continue; } if (pl_str_eatstart0(&line, "WIDTH")) { if (!parse_rpn_shexpr(line, out->width)) { pl_err(log, "Error while parsing WIDTH!"); return false; } continue; } if (pl_str_eatstart0(&line, "HEIGHT")) { if (!parse_rpn_shexpr(line, out->height)) { pl_err(log, "Error while parsing HEIGHT!"); return false; } continue; } if (pl_str_eatstart0(&line, "WHEN")) { if (!parse_rpn_shexpr(line, out->cond)) { pl_err(log, "Error while parsing WHEN!"); return false; } continue; } if (pl_str_eatstart0(&line, "COMPONENTS")) { if (!pl_str_parse_int(pl_str_strip(line), &out->comps)) { pl_err(log, "Error parsing COMPONENTS: '%.*s'", PL_STR_FMT(line)); return false; } continue; } if (pl_str_eatstart0(&line, "COMPUTE")) { line = pl_str_strip(line); bool ok = pl_str_parse_int(pl_str_split_char(line, ' ', &line), &out->block_w) && pl_str_parse_int(pl_str_split_char(line, ' ', &line), &out->block_h); line = pl_str_strip(line); if (ok && line.len) { ok = pl_str_parse_int(pl_str_split_char(line, ' ', &line), &out->threads_w) && pl_str_parse_int(pl_str_split_char(line, ' ', &line), &out->threads_h) && !line.len; } else { out->threads_w = out->block_w; out->threads_h = out->block_h; } if (!ok) { pl_err(log, "Error while parsing COMPUTE!"); return false; } out->is_compute = true; continue; } // Unknown command type pl_err(log, "Unrecognized command '%.*s'!", PL_STR_FMT(line)); return false; } // The rest of the file up until the next magic line beginning (if any) // shall be the shader body out->pass_body = split_magic(body); // Sanity checking if (hook_idx == 0) pl_warn(log, "Pass has no hooked textures (will be ignored)!"); return true; } static bool parse_tex(pl_gpu gpu, void *alloc, pl_str *body, struct pl_shader_desc *out) { *out = (struct pl_shader_desc) { .desc = { .name = "USER_TEX", .type = PL_DESC_SAMPLED_TEX, }, }; struct pl_tex_params params = { .w = 1, .h = 1, .d = 0, .sampleable = true, .debug_tag = PL_DEBUG_TAG, }; while (true) { pl_str rest; pl_str line = pl_str_strip(pl_str_getline(*body, &rest)); if (!pl_str_eatstart0(&line, "//!")) break; *body = rest; if (pl_str_eatstart0(&line, "TEXTURE")) { out->desc.name = pl_strdup0(alloc, pl_str_strip(line)); continue; } if (pl_str_eatstart0(&line, "SIZE")) { line = pl_str_strip(line); int dims = 0; int dim[4]; // extra space to catch invalid extra entries while (line.len && dims < PL_ARRAY_SIZE(dim)) { if (!pl_str_parse_int(pl_str_split_char(line, ' ', &line), &dim[dims++])) { PL_ERR(gpu, "Error while parsing SIZE!"); return false; } } uint32_t lim = dims == 1 ? gpu->limits.max_tex_1d_dim : dims == 2 ? gpu->limits.max_tex_2d_dim : dims == 3 ? gpu->limits.max_tex_3d_dim : 0; // Sanity check against GPU size limits switch (dims) { case 3: params.d = dim[2]; if (params.d < 1 || params.d > lim) { PL_ERR(gpu, "SIZE %d exceeds GPU's texture size limits (%d)!", params.d, lim); return false; } // fall through case 2: params.h = dim[1]; if (params.h < 1 || params.h > lim) { PL_ERR(gpu, "SIZE %d exceeds GPU's texture size limits (%d)!", params.h, lim); return false; } // fall through case 1: params.w = dim[0]; if (params.w < 1 || params.w > lim) { PL_ERR(gpu, "SIZE %d exceeds GPU's texture size limits (%d)!", params.w, lim); return false; } break; default: PL_ERR(gpu, "Invalid number of texture dimensions!"); return false; }; // Clear out the superfluous components if (dims < 3) params.d = 0; if (dims < 2) params.h = 0; continue; } if (pl_str_eatstart0(&line, "FORMAT")) { line = pl_str_strip(line); params.format = NULL; for (int n = 0; n < gpu->num_formats; n++) { pl_fmt fmt = gpu->formats[n]; if (pl_str_equals0(line, fmt->name)) { params.format = fmt; break; } } if (!params.format || params.format->opaque) { PL_ERR(gpu, "Unrecognized/unavailable FORMAT name: '%.*s'!", PL_STR_FMT(line)); return false; } if (!(params.format->caps & PL_FMT_CAP_SAMPLEABLE)) { PL_ERR(gpu, "Chosen FORMAT '%.*s' is not sampleable!", PL_STR_FMT(line)); return false; } continue; } if (pl_str_eatstart0(&line, "FILTER")) { line = pl_str_strip(line); if (pl_str_equals0(line, "LINEAR")) { out->binding.sample_mode = PL_TEX_SAMPLE_LINEAR; } else if (pl_str_equals0(line, "NEAREST")) { out->binding.sample_mode = PL_TEX_SAMPLE_NEAREST; } else { PL_ERR(gpu, "Unrecognized FILTER: '%.*s'!", PL_STR_FMT(line)); return false; } continue; } if (pl_str_eatstart0(&line, "BORDER")) { line = pl_str_strip(line); if (pl_str_equals0(line, "CLAMP")) { out->binding.address_mode = PL_TEX_ADDRESS_CLAMP; } else if (pl_str_equals0(line, "REPEAT")) { out->binding.address_mode = PL_TEX_ADDRESS_REPEAT; } else if (pl_str_equals0(line, "MIRROR")) { out->binding.address_mode = PL_TEX_ADDRESS_MIRROR; } else { PL_ERR(gpu, "Unrecognized BORDER: '%.*s'!", PL_STR_FMT(line)); return false; } continue; } if (pl_str_eatstart0(&line, "STORAGE")) { params.storable = true; out->desc.type = PL_DESC_STORAGE_IMG; out->desc.access = PL_DESC_ACCESS_READWRITE; out->memory = PL_MEMORY_COHERENT; continue; } PL_ERR(gpu, "Unrecognized command '%.*s'!", PL_STR_FMT(line)); return false; } if (!params.format) { PL_ERR(gpu, "No FORMAT specified!"); return false; } int caps = params.format->caps; if (out->binding.sample_mode == PL_TEX_SAMPLE_LINEAR && !(caps & PL_FMT_CAP_LINEAR)) { PL_ERR(gpu, "The specified texture format cannot be linear filtered!"); return false; } // Decode the rest of the section (up to the next //! marker) as raw hex // data for the texture pl_str tex, hexdata = split_magic(body); if (!pl_str_decode_hex(NULL, pl_str_strip(hexdata), &tex)) { PL_ERR(gpu, "Error while parsing TEXTURE body: must be a valid " "hexadecimal sequence!"); return false; } int texels = params.w * PL_DEF(params.h, 1) * PL_DEF(params.d, 1); size_t expected_len = texels * params.format->texel_size; if (tex.len == 0 && params.storable) { // In this case, it's okay that the texture has no initial data pl_free_ptr(&tex.buf); } else if (tex.len != expected_len) { PL_ERR(gpu, "Shader TEXTURE size mismatch: got %zu bytes, expected %zu!", tex.len, expected_len); pl_free(tex.buf); return false; } params.initial_data = tex.buf; out->binding.object = pl_tex_create(gpu, ¶ms); pl_free(tex.buf); if (!out->binding.object) { PL_ERR(gpu, "Failed creating custom texture!"); return false; } return true; } static bool parse_buf(pl_gpu gpu, void *alloc, pl_str *body, struct pl_shader_desc *out) { *out = (struct pl_shader_desc) { .desc = { .name = "USER_BUF", .type = PL_DESC_BUF_UNIFORM, }, }; // Temporary, to allow deferring variable placement until all headers // have been processed (in order to e.g. determine buffer type) void *tmp = pl_tmp(alloc); // will be freed automatically on failure PL_ARRAY(struct pl_var) vars = {0}; while (true) { pl_str rest; pl_str line = pl_str_strip(pl_str_getline(*body, &rest)); if (!pl_str_eatstart0(&line, "//!")) break; *body = rest; if (pl_str_eatstart0(&line, "BUFFER")) { out->desc.name = pl_strdup0(alloc, pl_str_strip(line)); continue; } if (pl_str_eatstart0(&line, "STORAGE")) { out->desc.type = PL_DESC_BUF_STORAGE; out->desc.access = PL_DESC_ACCESS_READWRITE; out->memory = PL_MEMORY_COHERENT; continue; } if (pl_str_eatstart0(&line, "VAR")) { pl_str type_name = pl_str_split_char(pl_str_strip(line), ' ', &line); struct pl_var var = {0}; for (const struct pl_named_var *nv = pl_var_glsl_types; nv->glsl_name; nv++) { if (pl_str_equals0(type_name, nv->glsl_name)) { var = nv->var; break; } } if (!var.type) { // No type found PL_ERR(gpu, "Unrecognized GLSL type '%.*s'!", PL_STR_FMT(type_name)); return false; } pl_str var_name = pl_str_split_char(line, '[', &line); if (line.len > 0) { // Parse array dimension if (!pl_str_parse_int(pl_str_split_char(line, ']', NULL), &var.dim_a)) { PL_ERR(gpu, "Failed parsing array dimension from [%.*s!", PL_STR_FMT(line)); return false; } if (var.dim_a < 1) { PL_ERR(gpu, "Invalid array dimension %d!", var.dim_a); return false; } } var.name = pl_strdup0(alloc, pl_str_strip(var_name)); PL_ARRAY_APPEND(tmp, vars, var); continue; } PL_ERR(gpu, "Unrecognized command '%.*s'!", PL_STR_FMT(line)); return false; } // Try placing all of the buffer variables for (int i = 0; i < vars.num; i++) { if (!sh_buf_desc_append(alloc, gpu, out, NULL, vars.elem[i])) { PL_ERR(gpu, "Custom buffer exceeds GPU limitations!"); return false; } } // Decode the rest of the section (up to the next //! marker) as raw hex // data for the buffer pl_str data, hexdata = split_magic(body); if (!pl_str_decode_hex(tmp, pl_str_strip(hexdata), &data)) { PL_ERR(gpu, "Error while parsing BUFFER body: must be a valid " "hexadecimal sequence!"); return false; } size_t buf_size = sh_buf_desc_size(out); if (data.len == 0 && out->desc.type == PL_DESC_BUF_STORAGE) { // In this case, it's okay that the buffer has no initial data } else if (data.len != buf_size) { PL_ERR(gpu, "Shader BUFFER size mismatch: got %zu bytes, expected %zu!", data.len, buf_size); return false; } out->binding.object = pl_buf_create(gpu, pl_buf_params( .size = buf_size, .uniform = out->desc.type == PL_DESC_BUF_UNIFORM, .storable = out->desc.type == PL_DESC_BUF_STORAGE, .initial_data = data.len ? data.buf : NULL, )); if (!out->binding.object) { PL_ERR(gpu, "Failed creating custom buffer!"); return false; } pl_free(tmp); return true; } static bool parse_var(pl_log log, pl_str str, enum pl_var_type type, pl_var_data *out) { if (!str.len) return true; pl_str buf = str; bool ok = false; switch (type) { case PL_VAR_SINT: ok = pl_str_parse_int(pl_str_split_char(buf, ' ', &buf), &out->i); break; case PL_VAR_UINT: ok = pl_str_parse_uint(pl_str_split_char(buf, ' ', &buf), &out->u); break; case PL_VAR_FLOAT: ok = pl_str_parse_float(pl_str_split_char(buf, ' ', &buf), &out->f); break; case PL_VAR_INVALID: case PL_VAR_TYPE_COUNT: pl_unreachable(); } if (pl_str_strip(buf).len > 0) ok = false; // left-over garbage if (!ok) { pl_err(log, "Failed parsing variable data: %.*s", PL_STR_FMT(str)); return false; } return true; } static bool check_bounds(pl_log log, enum pl_var_type type, const pl_var_data data, const pl_var_data minimum, const pl_var_data maximum) { #define CHECK_BOUNDS(v, fmt) do \ { \ if (data.v < minimum.v) { \ pl_err(log, "Initial value "fmt" below declared minimum "fmt"!", \ data.v, minimum.v); \ return false; \ } \ if (data.v > maximum.v) { \ pl_err(log, "Initial value "fmt" above declared maximum "fmt"!", \ data.v, maximum.v); \ return false; \ } \ } while (0) switch (type) { case PL_VAR_SINT: CHECK_BOUNDS(i, "%d"); break; case PL_VAR_UINT: CHECK_BOUNDS(u, "%u"); break; case PL_VAR_FLOAT: CHECK_BOUNDS(f, "%f"); break; case PL_VAR_INVALID: case PL_VAR_TYPE_COUNT: pl_unreachable(); } #undef CHECK_BOUNDS return true; } static bool parse_param(pl_log log, void *alloc, pl_str *body, struct pl_hook_par *out) { *out = (struct pl_hook_par) {0}; pl_str minimum = {0}; pl_str maximum = {0}; bool is_enum = false; while (true) { pl_str rest; pl_str line = pl_str_strip(pl_str_getline(*body, &rest)); if (!pl_str_eatstart0(&line, "//!")) break; *body = rest; if (pl_str_eatstart0(&line, "PARAM")) { out->name = pl_strdup0(alloc, pl_str_strip(line)); continue; } if (pl_str_eatstart0(&line, "DESC")) { out->description = pl_strdup0(alloc, pl_str_strip(line)); continue; } if (pl_str_eatstart0(&line, "MINIMUM")) { minimum = pl_str_strip(line); continue; } if (pl_str_eatstart0(&line, "MAXIMUM")) { maximum = pl_str_strip(line); continue; } if (pl_str_eatstart0(&line, "TYPE")) { line = pl_str_strip(line); is_enum = pl_str_eatstart0(&line, "ENUM"); line = pl_str_strip(line); if (pl_str_eatstart0(&line, "DYNAMIC")) { out->mode = PL_HOOK_PAR_DYNAMIC; } else if (pl_str_eatstart0(&line, "CONSTANT")) { out->mode = PL_HOOK_PAR_CONSTANT; } else if (pl_str_eatstart0(&line, "DEFINE")) { out->mode = PL_HOOK_PAR_DEFINE; out->type = PL_VAR_SINT; if (pl_str_strip(line).len > 0) { pl_err(log, "TYPE DEFINE does not take any extra arguments, " "unexpected: '%.*s'", PL_STR_FMT(line)); return false; } continue; } else { out->mode = PL_HOOK_PAR_VARIABLE; } line = pl_str_strip(line); for (const struct pl_named_var *nv = pl_var_glsl_types; nv->glsl_name; nv++) { if (pl_str_equals0(line, nv->glsl_name)) { if (nv->var.dim_v > 1 || nv->var.dim_m > 1) { pl_err(log, "GLSL type '%s' is incompatible with " "shader parameters, must be scalar type!", nv->glsl_name); return false; } out->type = nv->var.type; if (is_enum && out->type != PL_VAR_SINT) { pl_err(log, "ENUM is only compatible with type int/DEFINE!"); return false; } goto next; } } pl_err(log, "Unrecognized GLSL type '%.*s'!", PL_STR_FMT(line)); return false; } pl_err(log, "Unrecognized command '%.*s'!", PL_STR_FMT(line)); return false; next: ; } switch (out->type) { case PL_VAR_INVALID: pl_err(log, "Missing variable type!"); return false; case PL_VAR_SINT: out->minimum.i = INT_MIN; out->maximum.i = INT_MAX; break; case PL_VAR_UINT: out->minimum.u = 0; out->maximum.u = UINT_MAX; break; case PL_VAR_FLOAT: out->minimum.f = -INFINITY; out->maximum.f = INFINITY; break; case PL_VAR_TYPE_COUNT: pl_unreachable(); } pl_str initial = pl_str_strip(split_magic(body)); if (!initial.len) { pl_err(log, "Missing initial parameter value!"); return false; } if (is_enum) { PL_ARRAY(const char *) names = {0}; pl_assert(out->type == PL_VAR_SINT); do { pl_str line = pl_str_strip(pl_str_getline(initial, &initial)); if (!line.len) continue; PL_ARRAY_APPEND(alloc, names, pl_strdup0(alloc, line)); } while (initial.len); pl_assert(names.num >= 1); out->initial.i = 0; out->minimum.i = 0; out->maximum.i = names.num - 1; out->names = names.elem; } else { if (!parse_var(log, initial, out->type, &out->initial)) return false; if (!parse_var(log, minimum, out->type, &out->minimum)) return false; if (!parse_var(log, maximum, out->type, &out->maximum)) return false; if (!check_bounds(log, out->type, out->initial, out->minimum, out->maximum)) return false; } out->data = pl_memdup(alloc, &out->initial, sizeof(out->initial)); return true; } static enum pl_hook_stage mp_stage_to_pl(pl_str stage) { if (pl_str_equals0(stage, "RGB")) return PL_HOOK_RGB_INPUT; if (pl_str_equals0(stage, "LUMA")) return PL_HOOK_LUMA_INPUT; if (pl_str_equals0(stage, "CHROMA")) return PL_HOOK_CHROMA_INPUT; if (pl_str_equals0(stage, "ALPHA")) return PL_HOOK_ALPHA_INPUT; if (pl_str_equals0(stage, "XYZ")) return PL_HOOK_XYZ_INPUT; if (pl_str_equals0(stage, "CHROMA_SCALED")) return PL_HOOK_CHROMA_SCALED; if (pl_str_equals0(stage, "ALPHA_SCALED")) return PL_HOOK_ALPHA_SCALED; if (pl_str_equals0(stage, "NATIVE")) return PL_HOOK_NATIVE; if (pl_str_equals0(stage, "MAINPRESUB")) return PL_HOOK_RGB; if (pl_str_equals0(stage, "MAIN")) return PL_HOOK_RGB; // Note: conflicts with above! if (pl_str_equals0(stage, "LINEAR")) return PL_HOOK_LINEAR; if (pl_str_equals0(stage, "SIGMOID")) return PL_HOOK_SIGMOID; if (pl_str_equals0(stage, "PREKERNEL")) return PL_HOOK_PRE_KERNEL; if (pl_str_equals0(stage, "POSTKERNEL")) return PL_HOOK_POST_KERNEL; if (pl_str_equals0(stage, "SCALED")) return PL_HOOK_SCALED; if (pl_str_equals0(stage, "PREOUTPUT")) return PL_HOOK_PRE_OUTPUT; if (pl_str_equals0(stage, "OUTPUT")) return PL_HOOK_OUTPUT; return 0; } static pl_str pl_stage_to_mp(enum pl_hook_stage stage) { switch (stage) { case PL_HOOK_RGB_INPUT: return pl_str0("RGB"); case PL_HOOK_LUMA_INPUT: return pl_str0("LUMA"); case PL_HOOK_CHROMA_INPUT: return pl_str0("CHROMA"); case PL_HOOK_ALPHA_INPUT: return pl_str0("ALPHA"); case PL_HOOK_XYZ_INPUT: return pl_str0("XYZ"); case PL_HOOK_CHROMA_SCALED: return pl_str0("CHROMA_SCALED"); case PL_HOOK_ALPHA_SCALED: return pl_str0("ALPHA_SCALED"); case PL_HOOK_NATIVE: return pl_str0("NATIVE"); case PL_HOOK_RGB: return pl_str0("MAINPRESUB"); case PL_HOOK_LINEAR: return pl_str0("LINEAR"); case PL_HOOK_SIGMOID: return pl_str0("SIGMOID"); case PL_HOOK_PRE_KERNEL: return pl_str0("PREKERNEL"); case PL_HOOK_POST_KERNEL: return pl_str0("POSTKERNEL"); case PL_HOOK_SCALED: return pl_str0("SCALED"); case PL_HOOK_PRE_OUTPUT: return pl_str0("PREOUTPUT"); case PL_HOOK_OUTPUT: return pl_str0("OUTPUT"); }; pl_unreachable(); } struct hook_pass { enum pl_hook_stage exec_stages; struct custom_shader_hook hook; }; struct pass_tex { pl_str name; pl_tex tex; // Metadata pl_rect2df rect; struct pl_color_repr repr; struct pl_color_space color; int comps; }; struct hook_priv { pl_log log; pl_gpu gpu; void *alloc; PL_ARRAY(struct hook_pass) hook_passes; PL_ARRAY(struct pl_hook_par) hook_params; // Fixed (for shader-local resources) PL_ARRAY(struct pl_shader_desc) descriptors; // Dynamic per pass enum pl_hook_stage save_stages; PL_ARRAY(struct pass_tex) pass_textures; pl_shader trc_helper; // State for PRNG/frame count int frame_count; uint64_t prng_state[4]; }; static void hook_reset(void *priv) { struct hook_priv *p = priv; p->pass_textures.num = 0; } // Context during execution of a hook struct hook_ctx { struct hook_priv *priv; const struct pl_hook_params *params; struct pass_tex hooked; }; static bool lookup_tex(struct hook_ctx *ctx, pl_str var, float size[2]) { struct hook_priv *p = ctx->priv; const struct pl_hook_params *params = ctx->params; if (pl_str_equals0(var, "HOOKED")) { pl_assert(ctx->hooked.tex); size[0] = ctx->hooked.tex->params.w; size[1] = ctx->hooked.tex->params.h; return true; } if (pl_str_equals0(var, "NATIVE_CROPPED")) { size[0] = fabs(pl_rect_w(params->src_rect)); size[1] = fabs(pl_rect_h(params->src_rect)); return true; } if (pl_str_equals0(var, "OUTPUT")) { size[0] = abs(pl_rect_w(params->dst_rect)); size[1] = abs(pl_rect_h(params->dst_rect)); return true; } if (pl_str_equals0(var, "MAIN")) var = pl_str0("MAINPRESUB"); for (int i = 0; i < p->pass_textures.num; i++) { if (pl_str_equals(var, p->pass_textures.elem[i].name)) { pl_tex tex = p->pass_textures.elem[i].tex; size[0] = tex->params.w; size[1] = tex->params.h; return true; } } return false; } static bool lookup_var(struct hook_ctx *ctx, pl_str var, float *val) { struct hook_priv *p = ctx->priv; for (int i = 0; i < p->hook_params.num; i++) { const struct pl_hook_par *hp = &p->hook_params.elem[i]; if (pl_str_equals0(var, hp->name)) { switch (hp->type) { case PL_VAR_SINT: *val = hp->data->i; return true; case PL_VAR_UINT: *val = hp->data->u; return true; case PL_VAR_FLOAT: *val = hp->data->f; return true; case PL_VAR_INVALID: case PL_VAR_TYPE_COUNT: break; } pl_unreachable(); } if (hp->names) { for (int j = hp->minimum.i; j <= hp->maximum.i; j++) { if (pl_str_equals0(var, hp->names[j])) { *val = j; return true; } } } } PL_WARN(p, "Variable '%.*s' not found in RPN expression!", PL_STR_FMT(var)); return false; } // Returns whether successful. 'result' is left untouched on failure static bool eval_shexpr(struct hook_ctx *ctx, const struct shexp expr[MAX_SHEXP_SIZE], float *result) { struct hook_priv *p = ctx->priv; float stack[MAX_SHEXP_SIZE] = {0}; int idx = 0; // points to next element to push for (int i = 0; i < MAX_SHEXP_SIZE; i++) { switch (expr[i].tag) { case SHEXP_END: goto done; case SHEXP_CONST: // Since our SHEXPs are bound by MAX_SHEXP_SIZE, it should be // impossible to overflow the stack assert(idx < MAX_SHEXP_SIZE); stack[idx++] = expr[i].val.cval; continue; case SHEXP_OP1: if (idx < 1) { PL_WARN(p, "Stack underflow in RPN expression!"); return false; } switch (expr[i].val.op) { case SHEXP_OP_NOT: stack[idx-1] = !stack[idx-1]; break; default: pl_unreachable(); } continue; case SHEXP_OP2: if (idx < 2) { PL_WARN(p, "Stack underflow in RPN expression!"); return false; } // Pop the operands in reverse order float op2 = stack[--idx]; float op1 = stack[--idx]; float res = 0.0; switch (expr[i].val.op) { case SHEXP_OP_ADD: res = op1 + op2; break; case SHEXP_OP_SUB: res = op1 - op2; break; case SHEXP_OP_MUL: res = op1 * op2; break; case SHEXP_OP_DIV: res = op1 / op2; break; case SHEXP_OP_MOD: res = fmodf(op1, op2); break; case SHEXP_OP_GT: res = op1 > op2; break; case SHEXP_OP_LT: res = op1 < op2; break; case SHEXP_OP_EQ: res = fabsf(op1 - op2) <= 1e-6 * fmaxf(op1, op2); break; case SHEXP_OP_NOT: pl_unreachable(); } if (!isfinite(res)) { PL_WARN(p, "Illegal operation in RPN expression!"); return false; } stack[idx++] = res; continue; case SHEXP_TEX_W: case SHEXP_TEX_H: { pl_str name = expr[i].val.varname; float size[2]; if (!lookup_tex(ctx, name, size)) { PL_WARN(p, "Variable '%.*s' not found in RPN expression!", PL_STR_FMT(name)); return false; } stack[idx++] = (expr[i].tag == SHEXP_TEX_W) ? size[0] : size[1]; continue; } case SHEXP_VAR: { pl_str name = expr[i].val.varname; float val; if (!lookup_var(ctx, name, &val)) return false; stack[idx++] = val; continue; } } } done: // Return the single stack element if (idx != 1) { PL_WARN(p, "Malformed stack after RPN expression!"); return false; } *result = stack[0]; return true; } static double prng_step(uint64_t s[4]) { const uint64_t result = s[0] + s[3]; const uint64_t t = s[1] << 17; s[2] ^= s[0]; s[3] ^= s[1]; s[1] ^= s[2]; s[0] ^= s[3]; s[2] ^= t; s[3] = (s[3] << 45) | (s[3] >> (64 - 45)); return (result >> 11) * 0x1.0p-53; } static bool bind_pass_tex(pl_shader sh, pl_str name, const struct pass_tex *ptex, const pl_rect2df *rect, bool hooked, bool mainpresub) { ident_t id, pos, pt; // Compatibility with mpv texture binding semantics id = sh_bind(sh, ptex->tex, PL_TEX_ADDRESS_CLAMP, PL_TEX_SAMPLE_LINEAR, "hook_tex", rect, &pos, &pt); if (!id) return false; GLSLH("#define %.*s_raw "$" \n", PL_STR_FMT(name), id); GLSLH("#define %.*s_pos "$" \n", PL_STR_FMT(name), pos); GLSLH("#define %.*s_map "$"_map \n", PL_STR_FMT(name), pos); GLSLH("#define %.*s_size vec2(textureSize("$", 0)) \n", PL_STR_FMT(name), id); GLSLH("#define %.*s_pt "$" \n", PL_STR_FMT(name), pt); float off[2] = { ptex->rect.x0, ptex->rect.y0 }; GLSLH("#define %.*s_off "$" \n", PL_STR_FMT(name), sh_var(sh, (struct pl_shader_var) { .var = pl_var_vec2("offset"), .data = off, })); struct pl_color_repr repr = ptex->repr; ident_t scale = SH_FLOAT(pl_color_repr_normalize(&repr)); GLSLH("#define %.*s_mul "$" \n", PL_STR_FMT(name), scale); // Compatibility with mpv GLSLH("#define %.*s_rot mat2(1.0, 0.0, 0.0, 1.0) \n", PL_STR_FMT(name)); // Sampling function boilerplate GLSLH("#define %.*s_tex(pos) ("$" * vec4(textureLod("$", pos, 0.0))) \n", PL_STR_FMT(name), scale, id); GLSLH("#define %.*s_texOff(off) (%.*s_tex("$" + "$" * vec2(off))) \n", PL_STR_FMT(name), PL_STR_FMT(name), pos, pt); bool can_gather = ptex->tex->params.format->gatherable; if (can_gather) { GLSLH("#define %.*s_gather(pos, c) ("$" * vec4(textureGather("$", pos, c))) \n", PL_STR_FMT(name), scale, id); } if (hooked) { GLSLH("#define HOOKED_raw %.*s_raw \n", PL_STR_FMT(name)); GLSLH("#define HOOKED_pos %.*s_pos \n", PL_STR_FMT(name)); GLSLH("#define HOOKED_size %.*s_size \n", PL_STR_FMT(name)); GLSLH("#define HOOKED_rot %.*s_rot \n", PL_STR_FMT(name)); GLSLH("#define HOOKED_off %.*s_off \n", PL_STR_FMT(name)); GLSLH("#define HOOKED_pt %.*s_pt \n", PL_STR_FMT(name)); GLSLH("#define HOOKED_map %.*s_map \n", PL_STR_FMT(name)); GLSLH("#define HOOKED_mul %.*s_mul \n", PL_STR_FMT(name)); GLSLH("#define HOOKED_tex %.*s_tex \n", PL_STR_FMT(name)); GLSLH("#define HOOKED_texOff %.*s_texOff \n", PL_STR_FMT(name)); if (can_gather) GLSLH("#define HOOKED_gather %.*s_gather \n", PL_STR_FMT(name)); } if (mainpresub) { GLSLH("#define MAIN_raw MAINPRESUB_raw \n"); GLSLH("#define MAIN_pos MAINPRESUB_pos \n"); GLSLH("#define MAIN_size MAINPRESUB_size \n"); GLSLH("#define MAIN_rot MAINPRESUB_rot \n"); GLSLH("#define MAIN_off MAINPRESUB_off \n"); GLSLH("#define MAIN_pt MAINPRESUB_pt \n"); GLSLH("#define MAIN_map MAINPRESUB_map \n"); GLSLH("#define MAIN_mul MAINPRESUB_mul \n"); GLSLH("#define MAIN_tex MAINPRESUB_tex \n"); GLSLH("#define MAIN_texOff MAINPRESUB_texOff \n"); if (can_gather) GLSLH("#define MAIN_gather MAINPRESUB_gather \n"); } return true; } static void save_pass_tex(struct hook_priv *p, struct pass_tex ptex) { for (int i = 0; i < p->pass_textures.num; i++) { if (!pl_str_equals(p->pass_textures.elem[i].name, ptex.name)) continue; p->pass_textures.elem[i] = ptex; return; } // No texture with this name yet, append new one PL_ARRAY_APPEND(p->alloc, p->pass_textures, ptex); } static struct pl_hook_res hook_hook(void *priv, const struct pl_hook_params *params) { struct hook_priv *p = priv; pl_str stage = pl_stage_to_mp(params->stage); struct pl_hook_res res = {0}; pl_shader sh = NULL; struct hook_ctx ctx = { .priv = p, .params = params, .hooked = { .name = stage, .tex = params->tex, .rect = params->rect, .repr = params->repr, .color = params->color, .comps = params->components, }, }; // Save the input texture if needed if (p->save_stages & params->stage) { PL_TRACE(p, "Saving input texture '%.*s' for binding", PL_STR_FMT(ctx.hooked.name)); save_pass_tex(p, ctx.hooked); } for (int n = 0; n < p->hook_passes.num; n++) { const struct hook_pass *pass = &p->hook_passes.elem[n]; if (!(pass->exec_stages & params->stage)) continue; const struct custom_shader_hook *hook = &pass->hook; PL_TRACE(p, "Executing hook pass %d on stage '%.*s': %.*s", n, PL_STR_FMT(stage), PL_STR_FMT(hook->pass_desc)); // Test for execution condition float run = 0; if (!eval_shexpr(&ctx, hook->cond, &run)) goto error; if (!run) { PL_TRACE(p, "Skipping hook due to condition"); continue; } // Generate a new shader object sh = pl_dispatch_begin(params->dispatch); // Bind all necessary input textures for (int i = 0; i < PL_ARRAY_SIZE(hook->bind_tex); i++) { pl_str texname = hook->bind_tex[i]; if (!texname.len) break; // Convenience alias, to allow writing shaders that are oblivious // of the exact stage they hooked. This simply translates to // whatever stage actually fired the hook. bool hooked = false, mainpresub = false; if (pl_str_equals0(texname, "HOOKED")) { // Continue with binding this, under the new name texname = stage; hooked = true; } // Compatibility alias, because MAIN and MAINPRESUB mean the same // thing to libplacebo, but user shaders are still written as // though they can be different concepts. if (pl_str_equals0(texname, "MAIN") || pl_str_equals0(texname, "MAINPRESUB")) { texname = pl_str0("MAINPRESUB"); mainpresub = true; } for (int j = 0; j < p->descriptors.num; j++) { if (pl_str_equals0(texname, p->descriptors.elem[j].desc.name)) { // Directly bind this, no need to bother with all the // `bind_pass_tex` boilerplate ident_t id = sh_desc(sh, p->descriptors.elem[j]); GLSLH("#define %.*s "$" \n", PL_STR_FMT(texname), id); if (p->descriptors.elem[j].desc.type == PL_DESC_SAMPLED_TEX) { GLSLH("#define %.*s_tex(pos) (textureLod("$", pos, 0.0)) \n", PL_STR_FMT(texname), id); } goto next_bind; } } for (int j = 0; j < p->pass_textures.num; j++) { if (pl_str_equals(texname, p->pass_textures.elem[j].name)) { // Note: We bind the whole texture, rather than // hooked.rect, because user shaders in general are not // designed to handle cropped input textures. const struct pass_tex *ptex = &p->pass_textures.elem[j]; pl_rect2df rect = { 0, 0, ptex->tex->params.w, ptex->tex->params.h, }; if (hook->offset_align && pl_str_equals(texname, stage)) { float sx = pl_rect_w(ctx.hooked.rect) / pl_rect_w(params->src_rect), sy = pl_rect_h(ctx.hooked.rect) / pl_rect_h(params->src_rect), ox = ctx.hooked.rect.x0 - sx * params->src_rect.x0, oy = ctx.hooked.rect.y0 - sy * params->src_rect.y0; PL_TRACE(p, "Aligning plane with ref: %f %f", ox, oy); pl_rect2df_offset(&rect, ox, oy); } if (!bind_pass_tex(sh, texname, &p->pass_textures.elem[j], &rect, hooked, mainpresub)) { goto error; } goto next_bind; } } // If none of the above matched, this is an unknown texture name, // so silently ignore this pass to match the mpv behavior PL_TRACE(p, "Skipping hook due to no texture named '%.*s'.", PL_STR_FMT(texname)); pl_dispatch_abort(params->dispatch, &sh); goto next_pass; next_bind: ; // outer 'continue' } // Set up the input variables p->frame_count++; GLSLH("#define frame "$" \n", sh_var(sh, (struct pl_shader_var) { .var = pl_var_int("frame"), .data = &p->frame_count, .dynamic = true, })); float random = prng_step(p->prng_state); GLSLH("#define random "$" \n", sh_var(sh, (struct pl_shader_var) { .var = pl_var_float("random"), .data = &random, .dynamic = true, })); float src_size[2] = { pl_rect_w(params->src_rect), pl_rect_h(params->src_rect) }; GLSLH("#define input_size "$" \n", sh_var(sh, (struct pl_shader_var) { .var = pl_var_vec2("input_size"), .data = src_size, })); float dst_size[2] = { pl_rect_w(params->dst_rect), pl_rect_h(params->dst_rect) }; GLSLH("#define target_size "$" \n", sh_var(sh, (struct pl_shader_var) { .var = pl_var_vec2("target_size"), .data = dst_size, })); float tex_off[2] = { params->src_rect.x0, params->src_rect.y0 }; GLSLH("#define tex_offset "$" \n", sh_var(sh, (struct pl_shader_var) { .var = pl_var_vec2("tex_offset"), .data = tex_off, })); // Custom parameters for (int i = 0; i < p->hook_params.num; i++) { const struct pl_hook_par *hp = &p->hook_params.elem[i]; switch (hp->mode) { case PL_HOOK_PAR_VARIABLE: case PL_HOOK_PAR_DYNAMIC: GLSLH("#define %s "$" \n", hp->name, sh_var(sh, (struct pl_shader_var) { .var = { .name = hp->name, .type = hp->type, .dim_v = 1, .dim_m = 1, .dim_a = 1, }, .data = hp->data, .dynamic = hp->mode == PL_HOOK_PAR_DYNAMIC, })); break; case PL_HOOK_PAR_CONSTANT: GLSLH("#define %s "$" \n", hp->name, sh_const(sh, (struct pl_shader_const) { .name = hp->name, .type = hp->type, .data = hp->data, .compile_time = true, })); break; case PL_HOOK_PAR_DEFINE: GLSLH("#define %s %d \n", hp->name, hp->data->i); break; case PL_HOOK_PAR_MODE_COUNT: pl_unreachable(); } if (hp->names) { for (int j = hp->minimum.i; j <= hp->maximum.i; j++) GLSLH("#define %s %d \n", hp->names[j], j); } } // Helper sub-shaders uint64_t sh_id = SH_PARAMS(sh).id; pl_shader_reset(p->trc_helper, pl_shader_params( .id = ++sh_id, .gpu = p->gpu, )); pl_shader_linearize(p->trc_helper, params->orig_color); GLSLH("#define linearize "$" \n", sh_subpass(sh, p->trc_helper)); pl_shader_reset(p->trc_helper, pl_shader_params( .id = ++sh_id, .gpu = p->gpu, )); pl_shader_delinearize(p->trc_helper, params->orig_color); GLSLH("#define delinearize "$" \n", sh_subpass(sh, p->trc_helper)); // Load and run the user shader itself sh_append_str(sh, SH_BUF_HEADER, hook->pass_body); sh_describef(sh, "%.*s", PL_STR_FMT(hook->pass_desc)); // Resolve output size and create framebuffer float out_size[2] = {0}; if (!eval_shexpr(&ctx, hook->width, &out_size[0]) || !eval_shexpr(&ctx, hook->height, &out_size[1])) { goto error; } int out_w = roundf(out_size[0]), out_h = roundf(out_size[1]); if (!sh_require(sh, PL_SHADER_SIG_NONE, out_w, out_h)) goto error; // Generate a new texture to store the render result pl_tex fbo; fbo = params->get_tex(params->priv, out_w, out_h); if (!fbo) { PL_ERR(p, "Failed dispatching hook: `get_tex` callback failed?"); goto error; } bool ok; if (hook->is_compute) { if (!sh_try_compute(sh, hook->threads_w, hook->threads_h, false, 0) || !fbo->params.storable) { PL_ERR(p, "Failed dispatching COMPUTE shader"); goto error; } GLSLP("#define out_image "$" \n", sh_desc(sh, (struct pl_shader_desc) { .binding.object = fbo, .desc = { .name = "out_image", .type = PL_DESC_STORAGE_IMG, .access = PL_DESC_ACCESS_WRITEONLY, }, })); sh->output = PL_SHADER_SIG_NONE; GLSL("hook(); \n"); ok = pl_dispatch_compute(params->dispatch, pl_dispatch_compute_params( .shader = &sh, .dispatch_size = { // Round up as many blocks as are needed to cover the image PL_DIV_UP(out_w, hook->block_w), PL_DIV_UP(out_h, hook->block_h), 1, }, .width = out_w, .height = out_h, )); } else { // Default non-COMPUTE shaders to explicitly use fragment shaders // only, to avoid breaking things like fwidth() sh->type = PL_DEF(sh->type, SH_FRAGMENT); GLSL("vec4 color = hook(); \n"); ok = pl_dispatch_finish(params->dispatch, pl_dispatch_params( .shader = &sh, .target = fbo, )); } if (!ok) goto error; float sx = (float) out_w / ctx.hooked.tex->params.w, sy = (float) out_h / ctx.hooked.tex->params.h, x0 = sx * ctx.hooked.rect.x0 + hook->offset[0], y0 = sy * ctx.hooked.rect.y0 + hook->offset[1]; pl_rect2df new_rect = { x0, y0, x0 + sx * pl_rect_w(ctx.hooked.rect), y0 + sy * pl_rect_h(ctx.hooked.rect), }; if (hook->offset_align) { float rx = pl_rect_w(new_rect) / pl_rect_w(params->src_rect), ry = pl_rect_h(new_rect) / pl_rect_h(params->src_rect), ox = rx * params->src_rect.x0 - sx * ctx.hooked.rect.x0, oy = ry * params->src_rect.y0 - sy * ctx.hooked.rect.y0; pl_rect2df_offset(&new_rect, ox, oy); } // Save the result of this shader invocation struct pass_tex ptex = { .name = hook->save_tex.len ? hook->save_tex : stage, .tex = fbo, .repr = ctx.hooked.repr, .color = ctx.hooked.color, .comps = PL_DEF(hook->comps, ctx.hooked.comps), .rect = new_rect, }; // It's assumed that users will correctly normalize the input pl_color_repr_normalize(&ptex.repr); PL_TRACE(p, "Saving output texture '%.*s' from hook execution on '%.*s'", PL_STR_FMT(ptex.name), PL_STR_FMT(stage)); save_pass_tex(p, ptex); // Update the result object, unless we saved to a different name if (pl_str_equals(ptex.name, stage)) { ctx.hooked = ptex; res = (struct pl_hook_res) { .output = PL_HOOK_SIG_TEX, .tex = fbo, .repr = ptex.repr, .color = ptex.color, .components = ptex.comps, .rect = new_rect, }; } next_pass: ; } return res; error: pl_dispatch_abort(params->dispatch, &sh); return (struct pl_hook_res) { .failed = true }; } const struct pl_hook *pl_mpv_user_shader_parse(pl_gpu gpu, const char *shader_text, size_t shader_len) { if (!shader_len) return NULL; pl_str shader = { (uint8_t *) shader_text, shader_len }; struct pl_hook *hook = pl_zalloc_obj(NULL, hook, struct hook_priv); struct hook_priv *p = PL_PRIV(hook); *hook = (struct pl_hook) { .input = PL_HOOK_SIG_TEX, .priv = p, .reset = hook_reset, .hook = hook_hook, .signature = pl_str_hash(shader), }; *p = (struct hook_priv) { .log = gpu->log, .gpu = gpu, .alloc = hook, .trc_helper = pl_shader_alloc(gpu->log, NULL), .prng_state = { // Determined by fair die roll 0xb76d71f9443c228allu, 0x93a02092fc4807e8llu, 0x06d81748f838bd07llu, 0x9381ee129dddce6cllu, }, }; shader = pl_strdup(hook, shader); // Skip all garbage (e.g. comments) before the first header int pos = pl_str_find(shader, pl_str0("//!")); if (pos < 0) { PL_ERR(gpu, "Shader appears to contain no headers?"); goto error; } shader = pl_str_drop(shader, pos); // Loop over the file while (shader.len > 0) { // Peek at the first header to dispatch the right type if (pl_str_startswith0(shader, "//!TEXTURE")) { struct pl_shader_desc sd; if (!parse_tex(gpu, hook, &shader, &sd)) goto error; PL_INFO(gpu, "Registering named texture '%s'", sd.desc.name); PL_ARRAY_APPEND(hook, p->descriptors, sd); continue; } if (pl_str_startswith0(shader, "//!BUFFER")) { struct pl_shader_desc sd; if (!parse_buf(gpu, hook, &shader, &sd)) goto error; PL_INFO(gpu, "Registering named buffer '%s'", sd.desc.name); PL_ARRAY_APPEND(hook, p->descriptors, sd); continue; } if (pl_str_startswith0(shader, "//!PARAM")) { struct pl_hook_par hp; if (!parse_param(gpu->log, hook, &shader, &hp)) goto error; PL_INFO(gpu, "Registering named parameter '%s'", hp.name); PL_ARRAY_APPEND(hook, p->hook_params, hp); continue; } struct custom_shader_hook h; if (!parse_hook(gpu->log, &shader, &h)) goto error; struct hook_pass pass = { .exec_stages = 0, .hook = h, }; for (int i = 0; i < PL_ARRAY_SIZE(h.hook_tex); i++) pass.exec_stages |= mp_stage_to_pl(h.hook_tex[i]); for (int i = 0; i < PL_ARRAY_SIZE(h.bind_tex); i++) { p->save_stages |= mp_stage_to_pl(h.bind_tex[i]); if (pl_str_equals0(h.bind_tex[i], "HOOKED")) p->save_stages |= pass.exec_stages; } // As an extra precaution, this avoids errors when trying to run // conditions against planes that were never hooked. As a sole // exception, OUTPUT is special because it's hard-coded to return the // dst_rect even before it was hooked. (This is an apparently // undocumented mpv quirk, but shaders rely on it in practice) enum pl_hook_stage rpn_stages = 0; for (int i = 0; i < PL_ARRAY_SIZE(h.width); i++) { if (h.width[i].tag == SHEXP_TEX_W || h.width[i].tag == SHEXP_TEX_H) rpn_stages |= mp_stage_to_pl(h.width[i].val.varname); } for (int i = 0; i < PL_ARRAY_SIZE(h.height); i++) { if (h.height[i].tag == SHEXP_TEX_W || h.height[i].tag == SHEXP_TEX_H) rpn_stages |= mp_stage_to_pl(h.height[i].val.varname); } for (int i = 0; i < PL_ARRAY_SIZE(h.cond); i++) { if (h.cond[i].tag == SHEXP_TEX_W || h.cond[i].tag == SHEXP_TEX_H) rpn_stages |= mp_stage_to_pl(h.cond[i].val.varname); } p->save_stages |= rpn_stages & ~PL_HOOK_OUTPUT; PL_INFO(gpu, "Registering hook pass: %.*s", PL_STR_FMT(h.pass_desc)); PL_ARRAY_APPEND(hook, p->hook_passes, pass); } // We need to hook on both the exec and save stages, so that we can keep // track of any textures we might need hook->stages |= p->save_stages; for (int i = 0; i < p->hook_passes.num; i++) hook->stages |= p->hook_passes.elem[i].exec_stages; hook->parameters = p->hook_params.elem; hook->num_parameters = p->hook_params.num; PL_MSG(gpu, PL_LOG_DEBUG, "Loaded user shader:"); pl_msg_source(gpu->log, PL_LOG_DEBUG, shader_text); return hook; error: pl_mpv_user_shader_destroy((const struct pl_hook **) &hook); PL_MSG(gpu, PL_LOG_ERR, "Failed to parse user shader:"); pl_msg_source(gpu->log, PL_LOG_ERR, shader_text); pl_log_stack_trace(gpu->log, PL_LOG_ERR); return NULL; } void pl_mpv_user_shader_destroy(const struct pl_hook **hookp) { const struct pl_hook *hook = *hookp; if (!hook) return; struct hook_priv *p = PL_PRIV(hook); for (int i = 0; i < p->descriptors.num; i++) { switch (p->descriptors.elem[i].desc.type) { case PL_DESC_BUF_UNIFORM: case PL_DESC_BUF_STORAGE: case PL_DESC_BUF_TEXEL_UNIFORM: case PL_DESC_BUF_TEXEL_STORAGE: { pl_buf buf = p->descriptors.elem[i].binding.object; pl_buf_destroy(p->gpu, &buf); break; } case PL_DESC_SAMPLED_TEX: case PL_DESC_STORAGE_IMG: { pl_tex tex = p->descriptors.elem[i].binding.object; pl_tex_destroy(p->gpu, &tex); break; case PL_DESC_INVALID: case PL_DESC_TYPE_COUNT: pl_unreachable(); } } } pl_shader_free(&p->trc_helper); pl_free((void *) hook); *hookp = NULL; } libplacebo-v7.349.0/src/shaders/deinterlacing.c000066400000000000000000000304421463457750100213730ustar00rootroot00000000000000/* * This file is part of libplacebo, but also based on vf_yadif_cuda.cu: * Copyright (C) 2018 Philip Langdale * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "shaders.h" #include const struct pl_deinterlace_params pl_deinterlace_default_params = { PL_DEINTERLACE_DEFAULTS }; void pl_shader_deinterlace(pl_shader sh, const struct pl_deinterlace_source *src, const struct pl_deinterlace_params *params) { params = PL_DEF(params, &pl_deinterlace_default_params); const struct pl_tex_params *texparams = &src->cur.top->params; if (!sh_require(sh, PL_SHADER_SIG_NONE, texparams->w, texparams->h)) return; sh_describe(sh, "deinterlacing"); GLSL("vec4 color = vec4(0,0,0,1); \n" "// pl_shader_deinterlace \n" "{ \n"); uint8_t comp_mask = PL_DEF(src->component_mask, 0xFu); comp_mask &= (1u << texparams->format->num_components) - 1u; if (!comp_mask) { SH_FAIL(sh, "pl_shader_deinterlace: empty component mask?"); return; } const uint8_t num_comps = sh_num_comps(comp_mask); const char *swiz = sh_swizzle(comp_mask); GLSL("#define T %s \n", sh_float_type(comp_mask)); ident_t pos, pt; ident_t cur = sh_bind(sh, src->cur.top, PL_TEX_ADDRESS_MIRROR, PL_TEX_SAMPLE_NEAREST, "cur", NULL, &pos, &pt); if (!cur) return; GLSL("#define GET(TEX, X, Y) \\\n" " (textureLod(TEX, pos + pt * vec2(X, Y), 0.0).%s) \n" "vec2 pos = "$"; \n" "vec2 pt = "$"; \n" "T res; \n", swiz, pos, pt); if (src->field == PL_FIELD_NONE) { GLSL("res = GET("$", 0, 0); \n", cur); goto done; } // Don't modify the primary field GLSL("int yh = textureSize("$", 0).y; \n" "int yo = int("$".y * float(yh)); \n" "if (yo %% 2 == %d) { \n" " res = GET("$", 0, 0); \n" "} else { \n", cur, pos, src->field == PL_FIELD_TOP ? 0 : 1, cur); switch (params->algo) { case PL_DEINTERLACE_WEAVE: GLSL("res = GET("$", 0, 0); \n", cur); break; case PL_DEINTERLACE_BOB: GLSL("res = GET("$", 0, %d); \n", cur, src->field == PL_FIELD_TOP ? -1 : 1); break; case PL_DEINTERLACE_YADIF: { // Try using a compute shader for this, for the sole reason of // optimizing for thread group synchronicity. Otherwise, because we // alternate between lines output as-is and lines output deinterlaced, // half of our thread group will be mostly idle at any point in time. const int bw = PL_DEF(sh_glsl(sh).subgroup_size, 32); sh_try_compute(sh, bw, 1, true, 0); // This magic constant is hard-coded in the original implementation as // '1' on an 8-bit scale. Since we work with arbitrary bit depth // floating point textures, we have to convert this somehow. Hard-code // it as 1/255 under the assumption that the original intent was to be // roughly 1 unit of brightness increment on an 8-bit source. This may // or may not produce suboptimal results on higher-bit-depth content. static const float spatial_bias = 1 / 255.0f; // Calculate spatial prediction ident_t spatial_pred = sh_fresh(sh, "spatial_predictor"); GLSLH("float "$"(float a, float b, float c, float d, float e, float f, float g, \n" " float h, float i, float j, float k, float l, float m, float n) \n" "{ \n" " float spatial_pred = (d + k) / 2.0; \n" " float spatial_score = abs(c - j) + abs(d - k) + abs(e - l) - %f; \n" " float score = abs(b - k) + abs(c - l) + abs(d - m); \n" " if (score < spatial_score) { \n" " spatial_pred = (c + l) / 2.0; \n" " spatial_score = score; \n" " score = abs(a - l) + abs(b - m) + abs(c - n); \n" " if (score < spatial_score) { \n" " spatial_pred = (b + m) / 2.0; \n" " spatial_score = score; \n" " } \n" " } \n" " score = abs(d - i) + abs(e - j) + abs(f - k); \n" " if (score < spatial_score) { \n" " spatial_pred = (e + j) / 2.0; \n" " spatial_score = score; \n" " score = abs(e - h) + abs(f - i) + abs(g - j); \n" " if (score < spatial_score) { \n" " spatial_pred = (f + i) / 2.0; \n" " spatial_score = score; \n" " } \n" " } \n" " return spatial_pred; \n" "} \n", spatial_pred, spatial_bias); GLSL("T a = GET("$", -3, -1); \n" "T b = GET("$", -2, -1); \n" "T c = GET("$", -1, -1); \n" "T d = GET("$", 0, -1); \n" "T e = GET("$", +1, -1); \n" "T f = GET("$", +2, -1); \n" "T g = GET("$", +3, -1); \n" "T h = GET("$", -3, +1); \n" "T i = GET("$", -2, +1); \n" "T j = GET("$", -1, +1); \n" "T k = GET("$", 0, +1); \n" "T l = GET("$", +1, +1); \n" "T m = GET("$", +2, +1); \n" "T n = GET("$", +3, +1); \n", cur, cur, cur, cur, cur, cur, cur, cur, cur, cur, cur, cur, cur, cur); if (num_comps == 1) { GLSL("res = "$"(a, b, c, d, e, f, g, h, i, j, k, l, m, n); \n", spatial_pred); } else { for (uint8_t i = 0; i < num_comps; i++) { char c = "xyzw"[i]; GLSL("res.%c = "$"(a.%c, b.%c, c.%c, d.%c, e.%c, f.%c, g.%c, \n" " h.%c, i.%c, j.%c, k.%c, l.%c, m.%c, n.%c); \n", c, spatial_pred, c, c, c, c, c, c, c, c, c, c, c, c, c, c); } } // Calculate temporal prediction ident_t temporal_pred = sh_fresh(sh, "temporal_predictor"); GLSLH("float "$"(float A, float B, float C, float D, float E, float F, \n" " float G, float H, float I, float J, float K, float L, \n" " float spatial_pred) \n" "{ \n" " float p0 = (C + H) / 2.0; \n" " float p1 = F; \n" " float p2 = (D + I) / 2.0; \n" " float p3 = G; \n" " float p4 = (E + J) / 2.0; \n" " float tdiff0 = abs(D - I) / 2.0; \n" " float tdiff1 = (abs(A - F) + abs(B - G)) / 2.0; \n" " float tdiff2 = (abs(K - F) + abs(G - L)) / 2.0; \n" " float diff = max(tdiff0, max(tdiff1, tdiff2)); \n", temporal_pred); if (!params->skip_spatial_check) { GLSLH("float maxi = max(p2 - min(p3, p1), min(p0 - p1, p4 - p3)); \n" "float mini = min(p2 - max(p3, p1), max(p0 - p1, p4 - p3)); \n" "diff = max(diff, max(mini, -maxi)); \n"); } GLSLH(" if (spatial_pred > p2 + diff) \n" " spatial_pred = p2 + diff; \n" " if (spatial_pred < p2 - diff) \n" " spatial_pred = p2 - diff; \n" " return spatial_pred; \n" "} \n"); ident_t prev2 = cur, next2 = cur; if (src->prev.top && src->prev.top != src->cur.top) { pl_assert(src->prev.top->params.w == texparams->w); pl_assert(src->prev.top->params.h == texparams->h); prev2 = sh_bind(sh, src->prev.top, PL_TEX_ADDRESS_MIRROR, PL_TEX_SAMPLE_NEAREST, "prev", NULL, NULL, NULL); if (!prev2) return; } if (src->next.top && src->next.top != src->cur.top) { pl_assert(src->next.top->params.w == texparams->w); pl_assert(src->next.top->params.h == texparams->h); next2 = sh_bind(sh, src->next.top, PL_TEX_ADDRESS_MIRROR, PL_TEX_SAMPLE_NEAREST, "next", NULL, NULL, NULL); if (!next2) return; } enum pl_field first_field = PL_DEF(src->first_field, PL_FIELD_TOP); ident_t prev1 = src->field == first_field ? prev2 : cur; ident_t next1 = src->field == first_field ? cur : next2; GLSL("T A = GET("$", 0, -1); \n" "T B = GET("$", 0, 1); \n" "T C = GET("$", 0, -2); \n" "T D = GET("$", 0, 0); \n" "T E = GET("$", 0, +2); \n" "T F = GET("$", 0, -1); \n" "T G = GET("$", 0, +1); \n" "T H = GET("$", 0, -2); \n" "T I = GET("$", 0, 0); \n" "T J = GET("$", 0, +2); \n" "T K = GET("$", 0, -1); \n" "T L = GET("$", 0, +1); \n", prev2, prev2, prev1, prev1, prev1, cur, cur, next1, next1, next1, next2, next2); if (num_comps == 1) { GLSL("res = "$"(A, B, C, D, E, F, G, H, I, J, K, L, res); \n", temporal_pred); } else { for (uint8_t i = 0; i < num_comps; i++) { char c = "xyzw"[i]; GLSL("res.%c = "$"(A.%c, B.%c, C.%c, D.%c, E.%c, F.%c, \n" " G.%c, H.%c, I.%c, J.%c, K.%c, L.%c, \n" " res.%c); \n", c, temporal_pred, c, c, c, c, c, c, c, c, c, c, c, c, c); } } break; } case PL_DEINTERLACE_ALGORITHM_COUNT: pl_unreachable(); } GLSL("}\n"); // End of primary/secondary field branch done: GLSL("color.%s = res; \n" "#undef T \n" "#undef GET \n" "} \n", swiz); } libplacebo-v7.349.0/src/shaders/dithering.c000066400000000000000000000505231463457750100205420ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include "shaders.h" #include const struct pl_dither_params pl_dither_default_params = { PL_DITHER_DEFAULTS }; struct sh_dither_obj { pl_shader_obj lut; }; static void sh_dither_uninit(pl_gpu gpu, void *ptr) { struct sh_dither_obj *obj = ptr; pl_shader_obj_destroy(&obj->lut); *obj = (struct sh_dither_obj) {0}; } static void fill_dither_matrix(void *data, const struct sh_lut_params *params) { pl_assert(params->width > 0 && params->height > 0 && params->comps == 1); const struct pl_dither_params *dpar = params->priv; switch (dpar->method) { case PL_DITHER_ORDERED_LUT: pl_assert(params->width == params->height); pl_generate_bayer_matrix(data, params->width); return; case PL_DITHER_BLUE_NOISE: pl_assert(params->width == params->height); pl_generate_blue_noise(data, params->width); return; case PL_DITHER_ORDERED_FIXED: case PL_DITHER_WHITE_NOISE: case PL_DITHER_METHOD_COUNT: return; } pl_unreachable(); } static bool dither_method_is_lut(enum pl_dither_method method) { switch (method) { case PL_DITHER_BLUE_NOISE: case PL_DITHER_ORDERED_LUT: return true; case PL_DITHER_ORDERED_FIXED: case PL_DITHER_WHITE_NOISE: return false; case PL_DITHER_METHOD_COUNT: break; } pl_unreachable(); } static inline float approx_gamma(enum pl_color_transfer trc) { switch (trc) { case PL_COLOR_TRC_UNKNOWN: return 1.0f; case PL_COLOR_TRC_LINEAR: return 1.0f; case PL_COLOR_TRC_PRO_PHOTO:return 1.8f; case PL_COLOR_TRC_GAMMA18: return 1.8f; case PL_COLOR_TRC_GAMMA20: return 2.0f; case PL_COLOR_TRC_GAMMA24: return 2.4f; case PL_COLOR_TRC_GAMMA26: return 2.6f; case PL_COLOR_TRC_ST428: return 2.6f; case PL_COLOR_TRC_GAMMA28: return 2.8f; case PL_COLOR_TRC_SRGB: case PL_COLOR_TRC_BT_1886: case PL_COLOR_TRC_GAMMA22: return 2.2f; case PL_COLOR_TRC_PQ: case PL_COLOR_TRC_HLG: case PL_COLOR_TRC_V_LOG: case PL_COLOR_TRC_S_LOG1: case PL_COLOR_TRC_S_LOG2: return 2.0f; // TODO: handle this better case PL_COLOR_TRC_COUNT: break; } pl_unreachable(); } void pl_shader_dither(pl_shader sh, int new_depth, pl_shader_obj *dither_state, const struct pl_dither_params *params) { if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0)) return; if (new_depth <= 0 || new_depth > 256) { PL_WARN(sh, "Invalid dither depth: %d.. ignoring", new_depth); return; } sh_describef(sh, "dithering (%d bits)", new_depth); GLSL("// pl_shader_dither \n" "{ \n" "float bias; \n"); params = PL_DEF(params, &pl_dither_default_params); if (params->lut_size < 0 || params->lut_size > 8) { SH_FAIL(sh, "Invalid `lut_size` specified: %d", params->lut_size); return; } enum pl_dither_method method = params->method; ident_t lut = NULL_IDENT; int lut_size = 0; if (dither_method_is_lut(method)) { if (!dither_state) { PL_WARN(sh, "LUT-based dither method specified but no dither state " "object given, falling back to non-LUT based methods."); goto fallback; } struct sh_dither_obj *obj; obj = SH_OBJ(sh, dither_state, PL_SHADER_OBJ_DITHER, struct sh_dither_obj, sh_dither_uninit); if (!obj) goto fallback; bool cache = method == PL_DITHER_BLUE_NOISE; lut_size = 1 << PL_DEF(params->lut_size, pl_dither_default_params.lut_size); lut = sh_lut(sh, sh_lut_params( .object = &obj->lut, .var_type = PL_VAR_FLOAT, .width = lut_size, .height = lut_size, .comps = 1, .fill = fill_dither_matrix, .signature = (CACHE_KEY_DITHER ^ method) * lut_size, .cache = cache ? SH_CACHE(sh) : NULL, .priv = (void *) params, )); if (!lut) goto fallback; } goto done; fallback: method = PL_DITHER_ORDERED_FIXED; // fall through done: ; int size = 0; if (lut) { size = lut_size; } else if (method == PL_DITHER_ORDERED_FIXED) { size = 16; // hard-coded size } if (size) { // Transform the screen position to the cyclic range [0,1) GLSL("vec2 pos = fract(gl_FragCoord.xy * 1.0/"$"); \n", SH_FLOAT(size)); if (params->temporal) { int phase = SH_PARAMS(sh).index % 8; float r = phase * (M_PI / 2); // rotate float m = phase < 4 ? 1 : -1; // mirror float mat[2][2] = { {cos(r), -sin(r) }, {sin(r) * m, cos(r) * m}, }; ident_t rot = sh_var(sh, (struct pl_shader_var) { .var = pl_var_mat2("dither_rot"), .data = &mat[0][0], .dynamic = true, }); GLSL("pos = fract("$" * pos + vec2(1.0));\n", rot); } } switch (method) { case PL_DITHER_WHITE_NOISE: { ident_t prng = sh_prng(sh, params->temporal, NULL); GLSL("bias = "$".x;\n", prng); break; } case PL_DITHER_ORDERED_FIXED: // Bitwise ordered dither using only 32-bit uints GLSL("uvec2 xy = uvec2(pos * 16.0) %% 16u; \n" // Bitwise merge (morton number) "xy.x = xy.x ^ xy.y; \n" "xy = (xy | xy << 2) & uvec2(0x33333333); \n" "xy = (xy | xy << 1) & uvec2(0x55555555); \n" // Bitwise inversion "uint b = xy.x + (xy.y << 1); \n" "b = (b * 0x0802u & 0x22110u) | \n" " (b * 0x8020u & 0x88440u); \n" "b = 0x10101u * b; \n" "b = (b >> 16) & 0xFFu; \n" // Generate bias value "bias = float(b) * 1.0/256.0; \n"); break; case PL_DITHER_BLUE_NOISE: case PL_DITHER_ORDERED_LUT: pl_assert(lut); GLSL("bias = "$"(ivec2(pos * "$"));\n", lut, SH_FLOAT(lut_size)); break; case PL_DITHER_METHOD_COUNT: pl_unreachable(); } // Scale factor for dither rounding GLSL("const float scale = %llu.0; \n", (1LLU << new_depth) - 1); const float gamma = approx_gamma(params->transfer); if (gamma != 1.0f && new_depth <= 4) { GLSL("const float gamma = "$"; \n" "vec4 color_lin = pow(color, vec4(gamma)); \n", SH_FLOAT(gamma)); if (new_depth == 1) { // Special case for bit depth 1 dithering, in this case we can just // ignore the low/high rounding because we know we are always // dithering between 0.0 and 1.0. GLSL("const vec4 low = vec4(0.0); \n" "const vec4 high = vec4(1.0); \n" "vec4 offset = color_lin; \n"); } else { // Linearize the low, high and current color values GLSL("vec4 low = floor(color * scale) / scale; \n" "vec4 high = ceil(color * scale) / scale; \n" "vec4 low_lin = pow(low, vec4(gamma)); \n" "vec4 high_lin = pow(high, vec4(gamma)); \n" "vec4 range = high_lin - low_lin; \n" "vec4 offset = (color_lin - low_lin) / \n" " max(range, 1e-6); \n"); } // Mix in the correct ratio corresponding to the offset and bias GLSL("color = mix(low, high, greaterThan(offset, vec4(bias))); \n"); } else { // Approximate each gamma segment as a straight line, this simplifies // the process of dithering down to a single scale and (biased) round. GLSL("color = scale * color + vec4(bias); \n" "color = floor(color) * (1.0 / scale); \n"); } GLSL("} \n"); } /* Error diffusion code is taken from mpv, original copyright (c) 2019 Bin Jin * * mpv is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * mpv is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with mpv. If not, see . */ // After a (y, x) -> (y, x + y * shift) mapping, find the right most column that // will be affected by the current column. static int compute_rightmost_shifted_column(const struct pl_error_diffusion_kernel *k) { int ret = 0; for (int y = 0; y <= PL_EDF_MAX_DY; y++) { for (int x = PL_EDF_MIN_DX; x <= PL_EDF_MAX_DX; x++) { if (k->pattern[y][x - PL_EDF_MIN_DX] != 0) { int shifted_x = x + y * k->shift; // The shift mapping guarantees current column (or left of it) // won't be affected by error diffusion. assert(shifted_x > 0); ret = PL_MAX(ret, shifted_x); } } } return ret; } size_t pl_error_diffusion_shmem_req(const struct pl_error_diffusion_kernel *kernel, int height) { // We add PL_EDF_MAX_DY empty lines on the bottom to handle errors // propagated out from bottom side. int rows = height + PL_EDF_MAX_DY; int shifted_columns = compute_rightmost_shifted_column(kernel) + 1; // The shared memory is an array of size rows*shifted_columns. Each element // is a single uint for three RGB component. return rows * shifted_columns * sizeof(uint32_t); } bool pl_shader_error_diffusion(pl_shader sh, const struct pl_error_diffusion_params *params) { const int width = params->input_tex->params.w, height = params->input_tex->params.h; const struct pl_glsl_version glsl = sh_glsl(sh); const struct pl_error_diffusion_kernel *kernel = PL_DEF(params->kernel, &pl_error_diffusion_sierra_lite); pl_assert(params->output_tex->params.w == width); pl_assert(params->output_tex->params.h == height); if (!sh_require(sh, PL_SHADER_SIG_NONE, width, height)) return false; if (params->new_depth <= 0 || params->new_depth > 256) { PL_WARN(sh, "Invalid dither depth: %d.. ignoring", params->new_depth); return false; } // The parallel error diffusion works by applying the shift mapping first. // Taking the Floyd and Steinberg algorithm for example. After applying // the (y, x) -> (y, x + y * shift) mapping (with shift=2), all errors are // propagated into the next few columns, which makes parallel processing on // the same column possible. // // X 7/16 X 7/16 // 3/16 5/16 1/16 ==> 0 0 3/16 5/16 1/16 // Figuring out the size of rectangle containing all shifted pixels. // The rectangle height is not changed. int shifted_width = width + (height - 1) * kernel->shift; // We process all pixels from the shifted rectangles column by column, with // a single global work group of size |block_size|. // Figuring out how many block are required to process all pixels. We need // this explicitly to make the number of barrier() calls match. int block_size = PL_MIN(glsl.max_group_threads, height); int blocks = PL_DIV_UP(height * shifted_width, block_size); // If we figure out how many of the next columns will be affected while the // current columns is being processed. We can store errors of only a few // columns in the shared memory. Using a ring buffer will further save the // cost while iterating to next column. // int ring_buffer_rows = height + PL_EDF_MAX_DY; int ring_buffer_columns = compute_rightmost_shifted_column(kernel) + 1; ident_t ring_buffer_size = sh_const(sh, (struct pl_shader_const) { .type = PL_VAR_UINT, .name = "ring_buffer_size", .data = &(unsigned) { ring_buffer_rows * ring_buffer_columns }, .compile_time = true, }); // Compute shared memory requirements and try enabling compute shader. size_t shmem_req = ring_buffer_rows * ring_buffer_columns * sizeof(uint32_t); if (!sh_try_compute(sh, block_size, 1, false, shmem_req)) { PL_ERR(sh, "Cannot execute error diffusion kernel: too old GPU or " "insufficient compute shader memory!"); return false; } ident_t in_tex = sh_desc(sh, (struct pl_shader_desc) { .binding.object = params->input_tex, .desc = { .name = "input_tex", .type = PL_DESC_SAMPLED_TEX, }, }); ident_t out_img = sh_desc(sh, (struct pl_shader_desc) { .binding.object = params->output_tex, .desc = { .name = "output_tex", .type = PL_DESC_STORAGE_IMG, .access = PL_DESC_ACCESS_WRITEONLY, }, }); sh->output = PL_SHADER_SIG_NONE; sh_describef(sh, "error diffusion (%s, %d bits)", kernel->name, params->new_depth); // Defines the ring buffer in shared memory. GLSLH("shared uint err_rgb8["$"]; \n", ring_buffer_size); GLSL("// pl_shader_error_diffusion \n" // Safeguard against accidental over-execution "if (gl_WorkGroupID != uvec3(0)) \n" " return; \n" // Initialize the ring buffer. "for (uint i = gl_LocalInvocationIndex; i < "$"; i+=gl_WorkGroupSize.x)\n" " err_rgb8[i] = 0u; \n" // Main block loop, add barrier here to have previous block all // processed before starting the processing of the next. "for (uint block_id = 0; block_id < "$"; block_id++) { \n" "barrier(); \n" // Compute the coordinate of the pixel we are currently processing, // both before and after the shift mapping. "uint id = block_id * gl_WorkGroupSize.x + gl_LocalInvocationIndex; \n" "const uint height = "$"; \n" "int y = int(id %% height), x_shifted = int(id / height); \n" "int x = x_shifted - y * %d; \n" // Proceed only if we are processing a valid pixel. "if (x >= 0 && x < "$") { \n" // The index that the current pixel have on the ring buffer. "uint idx = uint(x_shifted * "$" + y) %% "$"; \n" // Fetch the current pixel. "vec4 pix_orig = texelFetch("$", ivec2(x, y), 0); \n" "vec3 pix = pix_orig.rgb; \n", ring_buffer_size, SH_UINT(blocks), SH_UINT(height), kernel->shift, SH_INT(width), SH_INT(ring_buffer_rows), ring_buffer_size, in_tex); // The dithering will quantize pixel value into multiples of 1/dither_quant. int dither_quant = (1 << params->new_depth) - 1; // We encode errors in RGB components into a single 32-bit unsigned integer. // The error we propagate from the current pixel is in range of // [-0.5 / dither_quant, 0.5 / dither_quant]. While not quite obvious, the // sum of all errors been propagated into a pixel is also in the same range. // It's possible to map errors in this range into [-127, 127], and use an // unsigned 8-bit integer to store it (using standard two's complement). // The three 8-bit unsigned integers can then be encoded into a single // 32-bit unsigned integer, with two 4-bit padding to prevent addition // operation overflows affecting other component. There are at most 12 // addition operations on each pixel, so 4-bit padding should be enough. // The overflow from R component will be discarded. // // The following figure is how the encoding looks like. // // +------------------------------------+ // |RRRRRRRR|0000|GGGGGGGG|0000|BBBBBBBB| // +------------------------------------+ // // The bitshift position for R and G component. const int bitshift_r = 24, bitshift_g = 12; // The multiplier we use to map [-0.5, 0.5] to [-127, 127]. const int uint8_mul = 127 * 2; GLSL(// Add the error previously propagated into current pixel, and clear // it in the ring buffer. "uint err_u32 = err_rgb8[idx] + %uu; \n" "pix = pix * %d.0 + vec3(int((err_u32 >> %d) & 0xFFu) - 128, \n" " int((err_u32 >> %d) & 0xFFu) - 128, \n" " int( err_u32 & 0xFFu) - 128) / %d.0; \n" "err_rgb8[idx] = 0u; \n" // Write the dithered pixel. "vec3 dithered = round(pix); \n" "imageStore("$", ivec2(x, y), vec4(dithered / %d.0, pix_orig.a)); \n" // Prepare for error propagation pass "vec3 err_divided = (pix - dithered) * %d.0 / %d.0; \n" "ivec3 tmp; \n", (128u << bitshift_r) | (128u << bitshift_g) | 128u, dither_quant, bitshift_r, bitshift_g, uint8_mul, out_img, dither_quant, uint8_mul, kernel->divisor); // Group error propagation with same weight factor together, in order to // reduce the number of annoying error encoding. for (int dividend = 1; dividend <= kernel->divisor; dividend++) { bool err_assigned = false; for (int y = 0; y <= PL_EDF_MAX_DY; y++) { for (int x = PL_EDF_MIN_DX; x <= PL_EDF_MAX_DX; x++) { if (kernel->pattern[y][x - PL_EDF_MIN_DX] != dividend) continue; if (!err_assigned) { err_assigned = true; GLSL("tmp = ivec3(round(err_divided * %d.0)); \n" "err_u32 = (uint(tmp.r & 0xFF) << %d) | \n" " (uint(tmp.g & 0xFF) << %d) | \n" " uint(tmp.b & 0xFF); \n", dividend, bitshift_r, bitshift_g); } int shifted_x = x + y * kernel->shift; // Unlike the right border, errors propagated out from left // border will remain in the ring buffer. This will produce // visible artifacts near the left border, especially for // shift=3 kernels. if (x < 0) GLSL("if (x >= %d) \n", -x); // Calculate the new position in the ring buffer to propagate // the error into. int ring_buffer_delta = shifted_x * ring_buffer_rows + y; GLSL("atomicAdd(err_rgb8[(idx + %du) %% "$"], err_u32); \n", ring_buffer_delta, ring_buffer_size); } } } GLSL("}} \n"); // end of main loop + valid pixel conditional return true; } libplacebo-v7.349.0/src/shaders/film_grain.c000066400000000000000000000042101463457750100206640ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "shaders.h" #include "shaders/film_grain.h" bool pl_needs_film_grain(const struct pl_film_grain_params *params) { switch (params->data.type) { case PL_FILM_GRAIN_NONE: return false; case PL_FILM_GRAIN_AV1: return pl_needs_fg_av1(params); case PL_FILM_GRAIN_H274: return pl_needs_fg_h274(params); default: pl_unreachable(); } } struct sh_grain_obj { pl_shader_obj av1; pl_shader_obj h274; }; static void sh_grain_uninit(pl_gpu gpu, void *ptr) { struct sh_grain_obj *obj = ptr; pl_shader_obj_destroy(&obj->av1); pl_shader_obj_destroy(&obj->h274); } bool pl_shader_film_grain(pl_shader sh, pl_shader_obj *grain_state, const struct pl_film_grain_params *params) { if (!pl_needs_film_grain(params)) { // FIXME: Instead of erroring, sample directly SH_FAIL(sh, "pl_shader_film_grain called but no film grain needs to be " "applied, test with `pl_needs_film_grain` first!"); return false; } struct sh_grain_obj *obj; obj = SH_OBJ(sh, grain_state, PL_SHADER_OBJ_FILM_GRAIN, struct sh_grain_obj, sh_grain_uninit); if (!obj) return false; switch (params->data.type) { case PL_FILM_GRAIN_NONE: return false; case PL_FILM_GRAIN_AV1: return pl_shader_fg_av1(sh, &obj->av1, params); case PL_FILM_GRAIN_H274: return pl_shader_fg_h274(sh, &obj->h274, params); default: pl_unreachable(); } } libplacebo-v7.349.0/src/shaders/film_grain.h000066400000000000000000000044221463457750100206760ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "common.h" #include bool pl_needs_fg_av1(const struct pl_film_grain_params *); bool pl_needs_fg_h274(const struct pl_film_grain_params *); bool pl_shader_fg_av1(pl_shader, pl_shader_obj *, const struct pl_film_grain_params *); bool pl_shader_fg_h274(pl_shader, pl_shader_obj *, const struct pl_film_grain_params *); // Common helper function static inline enum pl_channel channel_map(int i, const struct pl_film_grain_params *params) { static const enum pl_channel map_rgb[3] = { [PL_CHANNEL_G] = PL_CHANNEL_Y, [PL_CHANNEL_B] = PL_CHANNEL_CB, [PL_CHANNEL_R] = PL_CHANNEL_CR, }; static const enum pl_channel map_xyz[3] = { [1] = PL_CHANNEL_Y, // Y [2] = PL_CHANNEL_CB, // Z [0] = PL_CHANNEL_CR, // X }; if (i >= params->components) return PL_CHANNEL_NONE; int comp = params->component_mapping[i]; if (comp < 0 || comp > 2) return PL_CHANNEL_NONE; switch (params->repr->sys) { case PL_COLOR_SYSTEM_UNKNOWN: case PL_COLOR_SYSTEM_RGB: return map_rgb[comp]; case PL_COLOR_SYSTEM_XYZ: return map_xyz[comp]; case PL_COLOR_SYSTEM_BT_601: case PL_COLOR_SYSTEM_BT_709: case PL_COLOR_SYSTEM_SMPTE_240M: case PL_COLOR_SYSTEM_BT_2020_NC: case PL_COLOR_SYSTEM_BT_2020_C: case PL_COLOR_SYSTEM_BT_2100_PQ: case PL_COLOR_SYSTEM_BT_2100_HLG: case PL_COLOR_SYSTEM_DOLBYVISION: case PL_COLOR_SYSTEM_YCGCO: return comp; case PL_COLOR_SYSTEM_COUNT: break; } pl_unreachable(); } libplacebo-v7.349.0/src/shaders/film_grain_av1.c000066400000000000000000001263251463457750100214470ustar00rootroot00000000000000/* * This file is part of libplacebo, which is normally licensed under the terms * of the LGPL v2.1+. However, this file (film_grain_av1.c) is also available * under the terms of the more permissive MIT license: * * Copyright (c) 2018-2019 Niklas Haas * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "shaders.h" #include "shaders/film_grain.h" // Taken from the spec. Range is [-2048, 2047], mean is 0 and stddev is 512 static const int16_t gaussian_sequence[2048] = { 56, 568, -180, 172, 124, -84, 172, -64, -900, 24, 820, 224, 1248, 996, 272, -8, -916, -388, -732, -104, -188, 800, 112, -652, -320, -376, 140, -252, 492, -168, 44, -788, 588, -584, 500, -228, 12, 680, 272, -476, 972, -100, 652, 368, 432, -196, -720, -192, 1000, -332, 652, -136, -552, -604, -4, 192, -220, -136, 1000, -52, 372, -96, -624, 124, -24, 396, 540, -12, -104, 640, 464, 244, -208, -84, 368, -528, -740, 248, -968, -848, 608, 376, -60, -292, -40, -156, 252, -292, 248, 224, -280, 400, -244, 244, -60, 76, -80, 212, 532, 340, 128, -36, 824, -352, -60, -264, -96, -612, 416, -704, 220, -204, 640, -160, 1220, -408, 900, 336, 20, -336, -96, -792, 304, 48, -28, -1232, -1172, -448, 104, -292, -520, 244, 60, -948, 0, -708, 268, 108, 356, -548, 488, -344, -136, 488, -196, -224, 656, -236, -1128, 60, 4, 140, 276, -676, -376, 168, -108, 464, 8, 564, 64, 240, 308, -300, -400, -456, -136, 56, 120, -408, -116, 436, 504, -232, 328, 844, -164, -84, 784, -168, 232, -224, 348, -376, 128, 568, 96, -1244, -288, 276, 848, 832, -360, 656, 464, -384, -332, -356, 728, -388, 160, -192, 468, 296, 224, 140, -776, -100, 280, 4, 196, 44, -36, -648, 932, 16, 1428, 28, 528, 808, 772, 20, 268, 88, -332, -284, 124, -384, -448, 208, -228, -1044, -328, 660, 380, -148, -300, 588, 240, 540, 28, 136, -88, -436, 256, 296, -1000, 1400, 0, -48, 1056, -136, 264, -528, -1108, 632, -484, -592, -344, 796, 124, -668, -768, 388, 1296, -232, -188, -200, -288, -4, 308, 100, -168, 256, -500, 204, -508, 648, -136, 372, -272, -120, -1004, -552, -548, -384, 548, -296, 428, -108, -8, -912, -324, -224, -88, -112, -220, -100, 996, -796, 548, 360, -216, 180, 428, -200, -212, 148, 96, 148, 284, 216, -412, -320, 120, -300, -384, -604, -572, -332, -8, -180, -176, 696, 116, -88, 628, 76, 44, -516, 240, -208, -40, 100, -592, 344, -308, -452, -228, 20, 916, -1752, -136, -340, -804, 140, 40, 512, 340, 248, 184, -492, 896, -156, 932, -628, 328, -688, -448, -616, -752, -100, 560, -1020, 180, -800, -64, 76, 576, 1068, 396, 660, 552, -108, -28, 320, -628, 312, -92, -92, -472, 268, 16, 560, 516, -672, -52, 492, -100, 260, 384, 284, 292, 304, -148, 88, -152, 1012, 1064, -228, 164, -376, -684, 592, -392, 156, 196, -524, -64, -884, 160, -176, 636, 648, 404, -396, -436, 864, 424, -728, 988, -604, 904, -592, 296, -224, 536, -176, -920, 436, -48, 1176, -884, 416, -776, -824, -884, 524, -548, -564, -68, -164, -96, 692, 364, -692, -1012, -68, 260, -480, 876, -1116, 452, -332, -352, 892, -1088, 1220, -676, 12, -292, 244, 496, 372, -32, 280, 200, 112, -440, -96, 24, -644, -184, 56, -432, 224, -980, 272, -260, 144, -436, 420, 356, 364, -528, 76, 172, -744, -368, 404, -752, -416, 684, -688, 72, 540, 416, 92, 444, 480, -72, -1416, 164, -1172, -68, 24, 424, 264, 1040, 128, -912, -524, -356, 64, 876, -12, 4, -88, 532, 272, -524, 320, 276, -508, 940, 24, -400, -120, 756, 60, 236, -412, 100, 376, -484, 400, -100, -740, -108, -260, 328, -268, 224, -200, -416, 184, -604, -564, -20, 296, 60, 892, -888, 60, 164, 68, -760, 216, -296, 904, -336, -28, 404, -356, -568, -208, -1480, -512, 296, 328, -360, -164, -1560, -776, 1156, -428, 164, -504, -112, 120, -216, -148, -264, 308, 32, 64, -72, 72, 116, 176, -64, -272, 460, -536, -784, -280, 348, 108, -752, -132, 524, -540, -776, 116, -296, -1196, -288, -560, 1040, -472, 116, -848, -1116, 116, 636, 696, 284, -176, 1016, 204, -864, -648, -248, 356, 972, -584, -204, 264, 880, 528, -24, -184, 116, 448, -144, 828, 524, 212, -212, 52, 12, 200, 268, -488, -404, -880, 824, -672, -40, 908, -248, 500, 716, -576, 492, -576, 16, 720, -108, 384, 124, 344, 280, 576, -500, 252, 104, -308, 196, -188, -8, 1268, 296, 1032, -1196, 436, 316, 372, -432, -200, -660, 704, -224, 596, -132, 268, 32, -452, 884, 104, -1008, 424, -1348, -280, 4, -1168, 368, 476, 696, 300, -8, 24, 180, -592, -196, 388, 304, 500, 724, -160, 244, -84, 272, -256, -420, 320, 208, -144, -156, 156, 364, 452, 28, 540, 316, 220, -644, -248, 464, 72, 360, 32, -388, 496, -680, -48, 208, -116, -408, 60, -604, -392, 548, -840, 784, -460, 656, -544, -388, -264, 908, -800, -628, -612, -568, 572, -220, 164, 288, -16, -308, 308, -112, -636, -760, 280, -668, 432, 364, 240, -196, 604, 340, 384, 196, 592, -44, -500, 432, -580, -132, 636, -76, 392, 4, -412, 540, 508, 328, -356, -36, 16, -220, -64, -248, -60, 24, -192, 368, 1040, 92, -24, -1044, -32, 40, 104, 148, 192, -136, -520, 56, -816, -224, 732, 392, 356, 212, -80, -424, -1008, -324, 588, -1496, 576, 460, -816, -848, 56, -580, -92, -1372, -112, -496, 200, 364, 52, -140, 48, -48, -60, 84, 72, 40, 132, -356, -268, -104, -284, -404, 732, -520, 164, -304, -540, 120, 328, -76, -460, 756, 388, 588, 236, -436, -72, -176, -404, -316, -148, 716, -604, 404, -72, -88, -888, -68, 944, 88, -220, -344, 960, 472, 460, -232, 704, 120, 832, -228, 692, -508, 132, -476, 844, -748, -364, -44, 1116, -1104, -1056, 76, 428, 552, -692, 60, 356, 96, -384, -188, -612, -576, 736, 508, 892, 352, -1132, 504, -24, -352, 324, 332, -600, -312, 292, 508, -144, -8, 484, 48, 284, -260, -240, 256, -100, -292, -204, -44, 472, -204, 908, -188, -1000, -256, 92, 1164, -392, 564, 356, 652, -28, -884, 256, 484, -192, 760, -176, 376, -524, -452, -436, 860, -736, 212, 124, 504, -476, 468, 76, -472, 552, -692, -944, -620, 740, -240, 400, 132, 20, 192, -196, 264, -668, -1012, -60, 296, -316, -828, 76, -156, 284, -768, -448, -832, 148, 248, 652, 616, 1236, 288, -328, -400, -124, 588, 220, 520, -696, 1032, 768, -740, -92, -272, 296, 448, -464, 412, -200, 392, 440, -200, 264, -152, -260, 320, 1032, 216, 320, -8, -64, 156, -1016, 1084, 1172, 536, 484, -432, 132, 372, -52, -256, 84, 116, -352, 48, 116, 304, -384, 412, 924, -300, 528, 628, 180, 648, 44, -980, -220, 1320, 48, 332, 748, 524, -268, -720, 540, -276, 564, -344, -208, -196, 436, 896, 88, -392, 132, 80, -964, -288, 568, 56, -48, -456, 888, 8, 552, -156, -292, 948, 288, 128, -716, -292, 1192, -152, 876, 352, -600, -260, -812, -468, -28, -120, -32, -44, 1284, 496, 192, 464, 312, -76, -516, -380, -456, -1012, -48, 308, -156, 36, 492, -156, -808, 188, 1652, 68, -120, -116, 316, 160, -140, 352, 808, -416, 592, 316, -480, 56, 528, -204, -568, 372, -232, 752, -344, 744, -4, 324, -416, -600, 768, 268, -248, -88, -132, -420, -432, 80, -288, 404, -316, -1216, -588, 520, -108, 92, -320, 368, -480, -216, -92, 1688, -300, 180, 1020, -176, 820, -68, -228, -260, 436, -904, 20, 40, -508, 440, -736, 312, 332, 204, 760, -372, 728, 96, -20, -632, -520, -560, 336, 1076, -64, -532, 776, 584, 192, 396, -728, -520, 276, -188, 80, -52, -612, -252, -48, 648, 212, -688, 228, -52, -260, 428, -412, -272, -404, 180, 816, -796, 48, 152, 484, -88, -216, 988, 696, 188, -528, 648, -116, -180, 316, 476, 12, -564, 96, 476, -252, -364, -376, -392, 556, -256, -576, 260, -352, 120, -16, -136, -260, -492, 72, 556, 660, 580, 616, 772, 436, 424, -32, -324, -1268, 416, -324, -80, 920, 160, 228, 724, 32, -516, 64, 384, 68, -128, 136, 240, 248, -204, -68, 252, -932, -120, -480, -628, -84, 192, 852, -404, -288, -132, 204, 100, 168, -68, -196, -868, 460, 1080, 380, -80, 244, 0, 484, -888, 64, 184, 352, 600, 460, 164, 604, -196, 320, -64, 588, -184, 228, 12, 372, 48, -848, -344, 224, 208, -200, 484, 128, -20, 272, -468, -840, 384, 256, -720, -520, -464, -580, 112, -120, 644, -356, -208, -608, -528, 704, 560, -424, 392, 828, 40, 84, 200, -152, 0, -144, 584, 280, -120, 80, -556, -972, -196, -472, 724, 80, 168, -32, 88, 160, -688, 0, 160, 356, 372, -776, 740, -128, 676, -248, -480, 4, -364, 96, 544, 232, -1032, 956, 236, 356, 20, -40, 300, 24, -676, -596, 132, 1120, -104, 532, -1096, 568, 648, 444, 508, 380, 188, -376, -604, 1488, 424, 24, 756, -220, -192, 716, 120, 920, 688, 168, 44, -460, 568, 284, 1144, 1160, 600, 424, 888, 656, -356, -320, 220, 316, -176, -724, -188, -816, -628, -348, -228, -380, 1012, -452, -660, 736, 928, 404, -696, -72, -268, -892, 128, 184, -344, -780, 360, 336, 400, 344, 428, 548, -112, 136, -228, -216, -820, -516, 340, 92, -136, 116, -300, 376, -244, 100, -316, -520, -284, -12, 824, 164, -548, -180, -128, 116, -924, -828, 268, -368, -580, 620, 192, 160, 0, -1676, 1068, 424, -56, -360, 468, -156, 720, 288, -528, 556, -364, 548, -148, 504, 316, 152, -648, -620, -684, -24, -376, -384, -108, -920, -1032, 768, 180, -264, -508, -1268, -260, -60, 300, -240, 988, 724, -376, -576, -212, -736, 556, 192, 1092, -620, -880, 376, -56, -4, -216, -32, 836, 268, 396, 1332, 864, -600, 100, 56, -412, -92, 356, 180, 884, -468, -436, 292, -388, -804, -704, -840, 368, -348, 140, -724, 1536, 940, 372, 112, -372, 436, -480, 1136, 296, -32, -228, 132, -48, -220, 868, -1016, -60, -1044, -464, 328, 916, 244, 12, -736, -296, 360, 468, -376, -108, -92, 788, 368, -56, 544, 400, -672, -420, 728, 16, 320, 44, -284, -380, -796, 488, 132, 204, -596, -372, 88, -152, -908, -636, -572, -624, -116, -692, -200, -56, 276, -88, 484, -324, 948, 864, 1000, -456, -184, -276, 292, -296, 156, 676, 320, 160, 908, -84, -1236, -288, -116, 260, -372, -644, 732, -756, -96, 84, 344, -520, 348, -688, 240, -84, 216, -1044, -136, -676, -396, -1500, 960, -40, 176, 168, 1516, 420, -504, -344, -364, -360, 1216, -940, -380, -212, 252, -660, -708, 484, -444, -152, 928, -120, 1112, 476, -260, 560, -148, -344, 108, -196, 228, -288, 504, 560, -328, -88, 288, -1008, 460, -228, 468, -836, -196, 76, 388, 232, 412, -1168, -716, -644, 756, -172, -356, -504, 116, 432, 528, 48, 476, -168, -608, 448, 160, -532, -272, 28, -676, -12, 828, 980, 456, 520, 104, -104, 256, -344, -4, -28, -368, -52, -524, -572, -556, -200, 768, 1124, -208, -512, 176, 232, 248, -148, -888, 604, -600, -304, 804, -156, -212, 488, -192, -804, -256, 368, -360, -916, -328, 228, -240, -448, -472, 856, -556, -364, 572, -12, -156, -368, -340, 432, 252, -752, -152, 288, 268, -580, -848, -592, 108, -76, 244, 312, -716, 592, -80, 436, 360, 4, -248, 160, 516, 584, 732, 44, -468, -280, -292, -156, -588, 28, 308, 912, 24, 124, 156, 180, -252, 944, -924, -772, -520, -428, -624, 300, -212, -1144, 32, -724, 800, -1128, -212, -1288, -848, 180, -416, 440, 192, -576, -792, -76, -1080, 80, -532, -352, -132, 380, -820, 148, 1112, 128, 164, 456, 700, -924, 144, -668, -384, 648, -832, 508, 552, -52, -100, -656, 208, -568, 748, -88, 680, 232, 300, 192, -408, -1012, -152, -252, -268, 272, -876, -664, -648, -332, -136, 16, 12, 1152, -28, 332, -536, 320, -672, -460, -316, 532, -260, 228, -40, 1052, -816, 180, 88, -496, -556, -672, -368, 428, 92, 356, 404, -408, 252, 196, -176, -556, 792, 268, 32, 372, 40, 96, -332, 328, 120, 372, -900, -40, 472, -264, -592, 952, 128, 656, 112, 664, -232, 420, 4, -344, -464, 556, 244, -416, -32, 252, 0, -412, 188, -696, 508, -476, 324, -1096, 656, -312, 560, 264, -136, 304, 160, -64, -580, 248, 336, -720, 560, -348, -288, -276, -196, -500, 852, -544, -236, -1128, -992, -776, 116, 56, 52, 860, 884, 212, -12, 168, 1020, 512, -552, 924, -148, 716, 188, 164, -340, -520, -184, 880, -152, -680, -208, -1156, -300, -528, -472, 364, 100, -744, -1056, -32, 540, 280, 144, -676, -32, -232, -280, -224, 96, 568, -76, 172, 148, 148, 104, 32, -296, -32, 788, -80, 32, -16, 280, 288, 944, 428, -484 }; static inline int get_random_number(int bits, uint16_t *state) { int r = *state; uint16_t bit = ((r >> 0) ^ (r >> 1) ^ (r >> 3) ^ (r >> 12)) & 1; *state = (r >> 1) | (bit << 15); return (*state >> (16 - bits)) & ((1 << bits) - 1); } static inline int round2(int x, int shift) { if (!shift) return x; return (x + (1 << (shift - 1))) >> shift; } enum { BLOCK_SIZE = 32, SCALING_LUT_SIZE = 256, GRAIN_WIDTH = 82, GRAIN_HEIGHT = 73, // On the GPU we only need a subsection of this GRAIN_WIDTH_LUT = 64, GRAIN_HEIGHT_LUT = 64, GRAIN_PAD_LUT = 9, // For subsampled grain textures SUB_GRAIN_WIDTH = 44, SUB_GRAIN_HEIGHT = 38, SUB_GRAIN_WIDTH_LUT = GRAIN_WIDTH_LUT >> 1, SUB_GRAIN_HEIGHT_LUT = GRAIN_HEIGHT_LUT >> 1, SUB_GRAIN_PAD_LUT = 6, }; // Contains the shift by which the offsets are indexed enum offset { OFFSET_TL = 24, OFFSET_T = 16, OFFSET_L = 8, OFFSET_N = 0, }; // Helper function to compute some common constants struct grain_scale { int grain_center; int grain_min; int grain_max; float texture_scale; float grain_scale; }; static inline int bit_depth(const struct pl_color_repr *repr) { int depth = PL_DEF(repr->bits.color_depth, PL_DEF(repr->bits.sample_depth, 8)); pl_assert(depth >= 8); return PL_MIN(depth, 12); } static struct grain_scale get_grain_scale(const struct pl_film_grain_params *params) { int bits = bit_depth(params->repr); struct grain_scale ret = { .grain_center = 128 << (bits - 8), }; ret.grain_min = -ret.grain_center; ret.grain_max = (256 << (bits - 8)) - 1 - ret.grain_center; struct pl_color_repr repr = *params->repr; ret.texture_scale = pl_color_repr_normalize(&repr); // Since our color samples are normalized to the range [0, 1], we need to // scale down grain values from the scale [0, 2^b - 1] to this range. ret.grain_scale = 1.0 / ((1 << bits) - 1); return ret; } // Generates the basic grain table (LumaGrain in the spec). static void generate_grain_y(float out[GRAIN_HEIGHT_LUT][GRAIN_WIDTH_LUT], int16_t buf[GRAIN_HEIGHT][GRAIN_WIDTH], const struct pl_film_grain_params *params) { const struct pl_av1_grain_data *data = ¶ms->data.params.av1; struct grain_scale scale = get_grain_scale(params); uint16_t seed = (uint16_t) params->data.seed; int bits = bit_depth(params->repr); int shift = 12 - bits + data->grain_scale_shift; pl_assert(shift >= 0); for (int y = 0; y < GRAIN_HEIGHT; y++) { for (int x = 0; x < GRAIN_WIDTH; x++) { int16_t value = gaussian_sequence[ get_random_number(11, &seed) ]; buf[y][x] = round2(value, shift); } } const int ar_pad = 3; int ar_lag = data->ar_coeff_lag; for (int y = ar_pad; y < GRAIN_HEIGHT; y++) { for (int x = ar_pad; x < GRAIN_WIDTH - ar_pad; x++) { const int8_t *coeff = data->ar_coeffs_y; int sum = 0; for (int dy = -ar_lag; dy <= 0; dy++) { for (int dx = -ar_lag; dx <= ar_lag; dx++) { if (!dx && !dy) break; sum += *(coeff++) * buf[y + dy][x + dx]; } } int16_t grain = buf[y][x] + round2(sum, data->ar_coeff_shift); grain = PL_CLAMP(grain, scale.grain_min, scale.grain_max); buf[y][x] = grain; } } for (int y = 0; y < GRAIN_HEIGHT_LUT; y++) { for (int x = 0; x < GRAIN_WIDTH_LUT; x++) { int16_t grain = buf[y + GRAIN_PAD_LUT][x + GRAIN_PAD_LUT]; out[y][x] = grain * scale.grain_scale; } } } static void generate_grain_uv(float *out, int16_t buf[GRAIN_HEIGHT][GRAIN_WIDTH], const int16_t buf_y[GRAIN_HEIGHT][GRAIN_WIDTH], enum pl_channel channel, int sub_x, int sub_y, const struct pl_film_grain_params *params) { const struct pl_av1_grain_data *data = ¶ms->data.params.av1; struct grain_scale scale = get_grain_scale(params); int bits = bit_depth(params->repr); int shift = 12 - bits + data->grain_scale_shift; pl_assert(shift >= 0); uint16_t seed = params->data.seed; if (channel == PL_CHANNEL_CB) { seed ^= 0xb524; } else if (channel == PL_CHANNEL_CR) { seed ^= 0x49d8; } int chromaW = sub_x ? SUB_GRAIN_WIDTH : GRAIN_WIDTH; int chromaH = sub_y ? SUB_GRAIN_HEIGHT : GRAIN_HEIGHT; const int8_t *coeffs[] = { [PL_CHANNEL_CB] = data->ar_coeffs_uv[0], [PL_CHANNEL_CR] = data->ar_coeffs_uv[1], }; for (int y = 0; y < chromaH; y++) { for (int x = 0; x < chromaW; x++) { int16_t value = gaussian_sequence[ get_random_number(11, &seed) ]; buf[y][x] = round2(value, shift); } } const int ar_pad = 3; int ar_lag = data->ar_coeff_lag; for (int y = ar_pad; y < chromaH; y++) { for (int x = ar_pad; x < chromaW - ar_pad; x++) { const int8_t *coeff = coeffs[channel]; pl_assert(coeff); int sum = 0; for (int dy = -ar_lag; dy <= 0; dy++) { for (int dx = -ar_lag; dx <= ar_lag; dx++) { // For the final (current) pixel, we need to add in the // contribution from the luma grain texture if (!dx && !dy) { if (!data->num_points_y) break; int luma = 0; int lumaX = ((x - ar_pad) << sub_x) + ar_pad; int lumaY = ((y - ar_pad) << sub_y) + ar_pad; for (int i = 0; i <= sub_y; i++) { for (int j = 0; j <= sub_x; j++) { luma += buf_y[lumaY + i][lumaX + j]; } } luma = round2(luma, sub_x + sub_y); sum += luma * (*coeff); break; } sum += *(coeff++) * buf[y + dy][x + dx]; } } int16_t grain = buf[y][x] + round2(sum, data->ar_coeff_shift); grain = PL_CLAMP(grain, scale.grain_min, scale.grain_max); buf[y][x] = grain; } } int lutW = GRAIN_WIDTH_LUT >> sub_x; int lutH = GRAIN_HEIGHT_LUT >> sub_y; int padX = sub_x ? SUB_GRAIN_PAD_LUT : GRAIN_PAD_LUT; int padY = sub_y ? SUB_GRAIN_PAD_LUT : GRAIN_PAD_LUT; for (int y = 0; y < lutH; y++) { for (int x = 0; x < lutW; x++) { int16_t grain = buf[y + padY][x + padX]; out[y * lutW + x] = grain * scale.grain_scale; } } } static void generate_offsets(void *pbuf, const struct sh_lut_params *params) { const struct pl_film_grain_data *data = params->priv; unsigned int *buf = pbuf; pl_static_assert(sizeof(unsigned int) >= sizeof(uint32_t)); for (int y = 0; y < params->height; y++) { uint16_t state = data->seed; state ^= ((y * 37 + 178) & 0xFF) << 8; state ^= ((y * 173 + 105) & 0xFF); for (int x = 0; x < params->width; x++) { unsigned int *offsets = &buf[y * params->width + x]; uint8_t val = get_random_number(8, &state); uint8_t val_l = x ? (offsets - 1)[0] : 0; uint8_t val_t = y ? (offsets - params->width)[0] : 0; uint8_t val_tl = x && y ? (offsets - params->width - 1)[0] : 0; // Encode four offsets into a single 32-bit integer for the // convenience of the GPU. That way only one LUT fetch is // required for the entire block. *offsets = ((uint32_t) val_tl << OFFSET_TL) | ((uint32_t) val_t << OFFSET_T) | ((uint32_t) val_l << OFFSET_L) | ((uint32_t) val << OFFSET_N); } } } static void generate_scaling(void *pdata, const struct sh_lut_params *params) { assert(params->width == SCALING_LUT_SIZE && params->comps == 1); float *data = pdata; struct { int num; uint8_t (*points)[2]; const struct pl_av1_grain_data *data; } *ctx = params->priv; float range = 1 << ctx->data->scaling_shift; // Fill up the preceding entries with the initial value for (int i = 0; i < ctx->points[0][0]; i++) data[i] = ctx->points[0][1] / range; // Linearly interpolate the values in the middle for (int i = 0; i < ctx->num - 1; i++) { int bx = ctx->points[i][0]; int by = ctx->points[i][1]; int dx = ctx->points[i + 1][0] - bx; int dy = ctx->points[i + 1][1] - by; int delta = dy * ((0x10000 + (dx >> 1)) / dx); for (int x = 0; x < dx; x++) { int v = by + ((x * delta + 0x8000) >> 16); data[bx + x] = v / range; } } // Fill up the remaining entries with the final value for (int i = ctx->points[ctx->num - 1][0]; i < SCALING_LUT_SIZE; i++) data[i] = ctx->points[ctx->num - 1][1] / range; } static void sample(pl_shader sh, enum offset off, ident_t lut, int idx, int sub_x, int sub_y) { int dx = (off & OFFSET_L) ? 1 : 0, dy = (off & OFFSET_T) ? 1 : 0; static const char *index_strs[] = { [0] = ".x", [1] = ".y", }; GLSL("offset = uvec2(%du, %du) * uvec2((data >> %d) & 0xFu, \n" " (data >> %d) & 0xFu);\n" "pos = offset + local_id.xy + uvec2(%d, %d); \n" "val = "$"(pos)%s; \n", sub_x ? 1 : 2, sub_y ? 1 : 2, off + 4, off, (BLOCK_SIZE >> sub_x) * dx, (BLOCK_SIZE >> sub_y) * dy, lut, idx >= 0 ? index_strs[idx] : ""); } struct grain_obj_av1 { // LUT objects for the offsets, grain and scaling luts pl_shader_obj lut_offsets; pl_shader_obj lut_grain[2]; pl_shader_obj lut_scaling[3]; // Previous parameters used to check reusability struct pl_film_grain_data data; struct pl_color_repr repr; bool fg_has_y; bool fg_has_u; bool fg_has_v; // Space to store the temporary arrays, reused uint32_t *offsets; float grain[2][GRAIN_HEIGHT_LUT][GRAIN_WIDTH_LUT]; int16_t grain_tmp_y[GRAIN_HEIGHT][GRAIN_WIDTH]; int16_t grain_tmp_uv[GRAIN_HEIGHT][GRAIN_WIDTH]; }; static void av1_grain_uninit(pl_gpu gpu, void *ptr) { struct grain_obj_av1 *obj = ptr; pl_shader_obj_destroy(&obj->lut_offsets); for (int i = 0; i < PL_ARRAY_SIZE(obj->lut_grain); i++) pl_shader_obj_destroy(&obj->lut_grain[i]); for (int i = 0; i < PL_ARRAY_SIZE(obj->lut_scaling); i++) pl_shader_obj_destroy(&obj->lut_scaling[i]); *obj = (struct grain_obj_av1) {0}; } bool pl_needs_fg_av1(const struct pl_film_grain_params *params) { const struct pl_av1_grain_data *data = ¶ms->data.params.av1; bool has_y = data->num_points_y > 0; bool has_u = data->num_points_uv[0] > 0 || data->chroma_scaling_from_luma; bool has_v = data->num_points_uv[1] > 0 || data->chroma_scaling_from_luma; for (int i = 0; i < 3; i++) { enum pl_channel channel = channel_map(i, params); if (channel == PL_CHANNEL_Y && has_y) return true; if (channel == PL_CHANNEL_CB && has_u) return true; if (channel == PL_CHANNEL_CR && has_v) return true; } return false; } static inline bool av1_grain_data_eq(const struct pl_film_grain_data *da, const struct pl_film_grain_data *db) { const struct pl_av1_grain_data *a = &da->params.av1, *b = &db->params.av1; // Only check the fields that are relevant for grain LUT generation return da->seed == db->seed && a->chroma_scaling_from_luma == b->chroma_scaling_from_luma && a->scaling_shift == b->scaling_shift && a->ar_coeff_lag == b->ar_coeff_lag && a->ar_coeff_shift == b->ar_coeff_shift && a->grain_scale_shift == b->grain_scale_shift && !memcmp(a->ar_coeffs_y, b->ar_coeffs_y, sizeof(a->ar_coeffs_y)) && !memcmp(a->ar_coeffs_uv, b->ar_coeffs_uv, sizeof(a->ar_coeffs_uv)); } static void fill_grain_lut(void *data, const struct sh_lut_params *params) { struct grain_obj_av1 *obj = params->priv; size_t entries = params->width * params->height * params->comps; memcpy(data, obj->grain, entries * sizeof(float)); } bool pl_shader_fg_av1(pl_shader sh, pl_shader_obj *grain_state, const struct pl_film_grain_params *params) { int sub_x = 0, sub_y = 0; int tex_w = params->tex->params.w, tex_h = params->tex->params.h; if (params->luma_tex) { sub_x = params->luma_tex->params.w > tex_w; sub_y = params->luma_tex->params.h > tex_h; } const struct pl_av1_grain_data *data = ¶ms->data.params.av1; bool fg_has_y = data->num_points_y > 0; bool fg_has_u = data->num_points_uv[0] > 0 || data->chroma_scaling_from_luma; bool fg_has_v = data->num_points_uv[1] > 0 || data->chroma_scaling_from_luma; bool tex_is_y = false, tex_is_cb = false, tex_is_cr = false; for (int i = 0; i < 3; i++) { switch (channel_map(i, params)) { case PL_CHANNEL_Y: tex_is_y = true; break; case PL_CHANNEL_CB: tex_is_cb = true; break; case PL_CHANNEL_CR: tex_is_cr = true; break; default: break; }; } if (tex_is_y && (sub_x || sub_y)) { PL_WARN(sh, "pl_film_grain_params.channels includes PL_CHANNEL_Y but " "plane is subsampled, this makes no sense. Continuing anyway " "but output is likely incorrect."); } if (!sh_require(sh, PL_SHADER_SIG_NONE, tex_w, tex_h)) return false; pl_gpu gpu = SH_GPU(sh); if (!gpu) { PL_ERR(sh, "AV1 film grain synthesis requires a non-NULL pl_gpu!"); return false; } // Disable generation for unneeded component types fg_has_y &= tex_is_y; fg_has_u &= tex_is_cb; fg_has_v &= tex_is_cr; int bw = BLOCK_SIZE >> sub_x; int bh = BLOCK_SIZE >> sub_y; bool is_compute = sh_try_compute(sh, bw, bh, false, sizeof(uint32_t)); struct grain_obj_av1 *obj; obj = SH_OBJ(sh, grain_state, PL_SHADER_OBJ_AV1_GRAIN, struct grain_obj_av1, av1_grain_uninit); if (!obj) return false; // Note: In theory we could check only the parameters related to luma or // only related to chroma and skip updating for changes to irrelevant // parts, but this is probably not worth it since the seed is expected to // change per frame anyway. bool needs_update = !av1_grain_data_eq(¶ms->data, &obj->data) || !pl_color_repr_equal(params->repr, &obj->repr) || fg_has_y != obj->fg_has_y || fg_has_u != obj->fg_has_u || fg_has_v != obj->fg_has_v; if (needs_update) { // This is needed even for chroma, so statically generate it generate_grain_y(obj->grain[0], obj->grain_tmp_y, params); } ident_t lut[3]; int idx[3] = {-1}; if (fg_has_y) { lut[0] = sh_lut(sh, sh_lut_params( .object = &obj->lut_grain[0], .var_type = PL_VAR_FLOAT, .lut_type = SH_LUT_TEXTURE, .width = GRAIN_WIDTH_LUT, .height = GRAIN_HEIGHT_LUT, .comps = 1, .update = needs_update, .dynamic = true, .fill = fill_grain_lut, .priv = obj, )); if (!lut[0]) { SH_FAIL(sh, "Failed generating/uploading luma grain LUT!"); return false; } } // Try merging the chroma LUTs into a single texture int chroma_comps = 0; if (fg_has_u) { generate_grain_uv(&obj->grain[chroma_comps][0][0], obj->grain_tmp_uv, obj->grain_tmp_y, PL_CHANNEL_CB, sub_x, sub_y, params); idx[1] = chroma_comps++; } if (fg_has_v) { generate_grain_uv(&obj->grain[chroma_comps][0][0], obj->grain_tmp_uv, obj->grain_tmp_y, PL_CHANNEL_CR, sub_x, sub_y, params); idx[2] = chroma_comps++; } if (chroma_comps > 0) { lut[1] = lut[2] = sh_lut(sh, sh_lut_params( .object = &obj->lut_grain[1], .var_type = PL_VAR_FLOAT, .lut_type = SH_LUT_TEXTURE, .width = GRAIN_WIDTH_LUT >> sub_x, .height = GRAIN_HEIGHT_LUT >> sub_y, .comps = chroma_comps, .update = needs_update, .dynamic = true, .fill = fill_grain_lut, .priv = obj, )); if (!lut[1]) { SH_FAIL(sh, "Failed generating/uploading chroma grain LUT!"); return false; } if (chroma_comps == 1) idx[1] = idx[2] = -1; } ident_t offsets = sh_lut(sh, sh_lut_params( .object = &obj->lut_offsets, .var_type = PL_VAR_UINT, .lut_type = SH_LUT_AUTO, .width = PL_ALIGN2(tex_w << sub_x, 128) / 32, .height = PL_ALIGN2(tex_h << sub_y, 128) / 32, .comps = 1, .update = needs_update, .dynamic = true, .fill = generate_offsets, .priv = (void *) ¶ms->data, )); if (!offsets) { SH_FAIL(sh, "Failed generating/uploading block offsets LUT!"); return false; } // For the scaling LUTs, we assume they'll be relatively constant // throughout the video so doing some extra work to avoid reinitializing // them constantly is probably worth it. Probably. const struct pl_av1_grain_data *obj_data = &obj->data.params.av1; bool scaling_changed = false; if (fg_has_y || data->chroma_scaling_from_luma) { scaling_changed |= data->num_points_y != obj_data->num_points_y; scaling_changed |= memcmp(data->points_y, obj_data->points_y, sizeof(data->points_y)); } if (fg_has_u && !data->chroma_scaling_from_luma) { scaling_changed |= data->num_points_uv[0] != obj_data->num_points_uv[0]; scaling_changed |= memcmp(data->points_uv[0], obj_data->points_uv[0], sizeof(data->points_uv[0])); } if (fg_has_v && !data->chroma_scaling_from_luma) { scaling_changed |= data->num_points_uv[1] != obj_data->num_points_uv[1]; scaling_changed |= memcmp(data->points_uv[1], obj_data->points_uv[1], sizeof(data->points_uv[1])); } ident_t scaling[3] = {0}; for (int i = 0; i < 3; i++) { struct { int num; const uint8_t (*points)[2]; const struct pl_av1_grain_data *data; } priv; priv.data = data; if (i == 0 || data->chroma_scaling_from_luma) { priv.num = data->num_points_y; priv.points = &data->points_y[0]; } else { priv.num = data->num_points_uv[i - 1]; priv.points = &data->points_uv[i - 1][0]; } // Skip scaling for unneeded channels bool has_c[3] = { fg_has_y, fg_has_u, fg_has_v }; if (has_c[i] && priv.num > 0) { scaling[i] = sh_lut(sh, sh_lut_params( .object = &obj->lut_scaling[i], .var_type = PL_VAR_FLOAT, .method = SH_LUT_LINEAR, .width = SCALING_LUT_SIZE, .comps = 1, .update = scaling_changed, .dynamic = true, .fill = generate_scaling, .priv = &priv, )); if (!scaling[i]) { SH_FAIL(sh, "Failed generating/uploading scaling LUTs!"); return false; } } } // Done updating LUTs obj->data = params->data; obj->repr = *params->repr; obj->fg_has_y = fg_has_y; obj->fg_has_u = fg_has_u; obj->fg_has_v = fg_has_v; sh_describe(sh, "AV1 film grain"); GLSL("vec4 color; \n" "// pl_shader_film_grain (AV1) \n" "{ \n" "uvec2 offset; \n" "uvec2 pos; \n" "float val; \n" "float grain; \n"); if (is_compute) { GLSL("uvec2 block_id = gl_WorkGroupID.xy; \n" "uvec2 local_id = gl_LocalInvocationID.xy; \n" "uvec2 global_id = gl_GlobalInvocationID.xy; \n"); } else { GLSL("uvec2 global_id = uvec2(gl_FragCoord); \n" "uvec2 block_id = global_id / uvec2(%d, %d); \n" "uvec2 local_id = global_id - uvec2(%d, %d) * block_id; \n", bw, bh, bw, bh); } // Load the data vector which holds the offsets if (is_compute) { ident_t id = sh_fresh(sh, "data"); GLSLH("shared uint "$"; \n", id); GLSL("if (gl_LocalInvocationIndex == 0u) \n" " "$" = uint("$"(block_id)); \n" "barrier(); \n" "uint data = "$"; \n", id, offsets, id); } else { GLSL("uint data = uint("$"(block_id)); \n", offsets); } struct grain_scale scale = get_grain_scale(params); pl_color_repr_normalize(params->repr); int bits = PL_DEF(params->repr->bits.color_depth, 8); pl_assert(bits >= 8); ident_t minValue, maxLuma, maxChroma; if (pl_color_levels_guess(params->repr) == PL_COLOR_LEVELS_LIMITED) { float out_scale = (1 << bits) / ((1 << bits) - 1.0); minValue = SH_FLOAT(16 / 256.0 * out_scale); maxLuma = SH_FLOAT(235 / 256.0 * out_scale); maxChroma = SH_FLOAT(240 / 256.0 * out_scale); if (!pl_color_system_is_ycbcr_like(params->repr->sys)) maxChroma = maxLuma; } else { minValue = SH_FLOAT(0.0); maxLuma = SH_FLOAT(1.0); maxChroma = SH_FLOAT(1.0); } // Load the color value of the tex itself ident_t tex = sh_desc(sh, (struct pl_shader_desc) { .binding.object = params->tex, .desc = (struct pl_desc) { .name = "tex", .type = PL_DESC_SAMPLED_TEX, }, }); ident_t tex_scale = SH_FLOAT(scale.texture_scale); GLSL("color = vec4("$") * texelFetch("$", ivec2(global_id), 0); \n", tex_scale, tex); // If we need access to the external luma plane, load it now if (tex_is_cb || tex_is_cr) { GLSL("float averageLuma; \n"); if (tex_is_y) { // We already have the luma channel as part of the pre-sampled color for (int i = 0; i < 3; i++) { if (channel_map(i, params) == PL_CHANNEL_Y) { GLSL("averageLuma = color["$"]; \n", SH_INT(i)); break; } } } else { // Luma channel not present in image, attach it separately pl_assert(params->luma_tex); ident_t luma = sh_desc(sh, (struct pl_shader_desc) { .binding.object = params->luma_tex, .desc = (struct pl_desc) { .name = "luma", .type = PL_DESC_SAMPLED_TEX, }, }); GLSL("pos = global_id * uvec2(%du, %du); \n" "averageLuma = texelFetch("$", ivec2(pos), 0)["$"]; \n" "averageLuma *= "$"; \n", 1 << sub_x, 1 << sub_y, luma, SH_INT(params->luma_comp), tex_scale); } } ident_t grain_min = SH_FLOAT(scale.grain_min * scale.grain_scale); ident_t grain_max = SH_FLOAT(scale.grain_max * scale.grain_scale); for (int i = 0; i < params->components; i++) { enum pl_channel c = channel_map(i, params); if (c == PL_CHANNEL_NONE) continue; if (!scaling[c]) continue; sample(sh, OFFSET_N, lut[c], idx[c], sub_x, sub_y); GLSL("grain = val; \n"); if (data->overlap) { const char *weights[] = { "vec2(27.0, 17.0)", "vec2(23.0, 22.0)" }; // X-direction overlapping GLSL("if (block_id.x > 0u && local_id.x < %du) { \n" "vec2 w = %s / 32.0; \n" "if (local_id.x == 1u) w.xy = w.yx; \n", 2 >> sub_x, weights[sub_x]); sample(sh, OFFSET_L, lut[c], idx[c], sub_x, sub_y); GLSL("grain = dot(vec2(val, grain), w); \n" "} \n"); // Y-direction overlapping GLSL("if (block_id.y > 0u && local_id.y < %du) { \n" "vec2 w = %s / 32.0; \n" "if (local_id.y == 1u) w.xy = w.yx; \n", 2 >> sub_y, weights[sub_y]); // We need to special-case the top left pixels since these need to // pre-blend the top-left offset block before blending vertically GLSL(" if (block_id.x > 0u && local_id.x < %du) {\n" " vec2 w2 = %s / 32.0; \n" " if (local_id.x == 1u) w2.xy = w2.yx; \n", 2 >> sub_x, weights[sub_x]); sample(sh, OFFSET_TL, lut[c], idx[c], sub_x, sub_y); GLSL(" float tmp = val; \n"); sample(sh, OFFSET_T, lut[c], idx[c], sub_x, sub_y); GLSL(" val = dot(vec2(tmp, val), w2); \n" " } else { \n"); sample(sh, OFFSET_T, lut[c], idx[c], sub_x, sub_y); GLSL(" } \n" "grain = dot(vec2(val, grain), w); \n" "} \n"); // Correctly clip the interpolated grain GLSL("grain = clamp(grain, "$", "$"); \n", grain_min, grain_max); } if (c == PL_CHANNEL_Y) { GLSL("color[%d] += "$"(color[%d]) * grain; \n" "color[%d] = clamp(color[%d], "$", "$"); \n", i, scaling[c], i, i, i, minValue, maxLuma); } else { GLSL("val = averageLuma; \n"); if (!data->chroma_scaling_from_luma) { // We need to load some extra variables for the mixing. Do this // using sh_var instead of hard-coding them to avoid shader // recompilation when these values change. ident_t mult = sh_var(sh, (struct pl_shader_var) { .var = pl_var_vec2("mult"), .data = &(float[2]){ data->uv_mult_luma[c - 1] / 64.0, data->uv_mult[c - 1] / 64.0, }, }); int c_offset = (unsigned) data->uv_offset[c - 1] << (bits - 8); ident_t offset = sh_var(sh, (struct pl_shader_var) { .var = pl_var_float("offset"), .data = &(float) { c_offset * scale.grain_scale }, }); GLSL("val = dot(vec2(val, color[%d]), "$"); \n" "val += "$"; \n", i, mult, offset); } GLSL("color[%d] += "$"(val) * grain; \n" "color[%d] = clamp(color[%d], "$", "$"); \n", i, scaling[c], i, i, minValue, maxChroma); } } GLSL("} \n"); return true; } libplacebo-v7.349.0/src/shaders/film_grain_h274.c000066400000000000000000001347431463457750100214470ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "shaders.h" #include "shaders/film_grain.h" static const int8_t Gaussian_LUT[2048+4]; static const uint32_t Seed_LUT[256]; static const int8_t R64T[64][64]; static void prng_shift(uint32_t *state) { // Primitive polynomial x^31 + x^3 + 1 (modulo 2) uint32_t x = *state; uint8_t feedback = 1u ^ (x >> 2) ^ (x >> 30); *state = (x << 1) | (feedback & 1u); } static void generate_slice(float *out, size_t out_width, uint8_t h, uint8_t v, int8_t grain[64][64], int16_t tmp[64][64]) { const uint8_t freq_h = ((h + 3) << 2) - 1; const uint8_t freq_v = ((v + 3) << 2) - 1; uint32_t seed = Seed_LUT[h + v * 13]; // Initialize with random gaussian values, using the output array as a // temporary buffer for these intermediate values. // // Note: To make the subsequent matrix multiplication cache friendlier, we // store each *column* of the starting image in a *row* of `grain` for (int y = 0; y <= freq_v; y++) { for (int x = 0; x <= freq_h; x += 4) { uint16_t offset = seed % 2048; grain[x + 0][y] = Gaussian_LUT[offset + 0]; grain[x + 1][y] = Gaussian_LUT[offset + 1]; grain[x + 2][y] = Gaussian_LUT[offset + 2]; grain[x + 3][y] = Gaussian_LUT[offset + 3]; prng_shift(&seed); } } grain[0][0] = 0; // 64x64 inverse integer transform for (int y = 0; y < 64; y++) { for (int x = 0; x <= freq_h; x++) { int32_t sum = 0; for (int p = 0; p <= freq_v; p++) sum += R64T[y][p] * grain[x][p]; tmp[y][x] = (sum + 128) >> 8; } } for (int y = 0; y < 64; y++) { for (int x = 0; x < 64; x++) { int32_t sum = 0; for (int p = 0; p <= freq_h; p++) sum += tmp[y][p] * R64T[x][p]; // R64T^T = R64 sum = (sum + 128) >> 8; grain[y][x] = PL_CLAMP(sum, -127, 127); } } static const uint8_t deblock_factors[13] = { 64, 71, 77, 84, 90, 96, 103, 109, 116, 122, 128, 128, 128 }; // Deblock horizontal edges by simple attentuation of values const uint8_t deblock_coeff = deblock_factors[v]; for (int y = 0; y < 64; y++) { switch (y % 8) { case 0: case 7: // Deblock for (int x = 0; x < 64; x++) out[x] = ((grain[y][x] * deblock_coeff) >> 7) / 255.0; break; case 1: case 2: case 3: case 4: case 5: case 6: // No deblock for (int x = 0; x < 64; x++) out[x] = grain[y][x] / 255.0; break; default: pl_unreachable(); } out += out_width; } } static void fill_grain_lut(void *data, const struct sh_lut_params *params) { struct { int8_t grain[64][64]; int16_t tmp[64][64]; } *tmp = pl_alloc_ptr(NULL, tmp); float *out = data; assert(params->var_type == PL_VAR_FLOAT); for (int h = 0; h < 13; h++) { for (int v = 0; v < 13; v++) { float *slice = out + (h * 64) * params->width + (v * 64); generate_slice(slice, params->width, h, v, tmp->grain, tmp->tmp); } } pl_free(tmp); } bool pl_needs_fg_h274(const struct pl_film_grain_params *params) { const struct pl_h274_grain_data *data = ¶ms->data.params.h274; if (data->model_id != 0) return false; for (int i = 0; i < 3; i++) { enum pl_channel channel = channel_map(i, params); if (channel < 0 || channel >= 3) continue; if (data->component_model_present[channel]) return true; } return false; } bool pl_shader_fg_h274(pl_shader sh, pl_shader_obj *grain_state, const struct pl_film_grain_params *params) { if (!sh_require(sh, PL_SHADER_SIG_NONE, params->tex->params.w, params->tex->params.h)) return false; size_t shmem_req = 0; ident_t group_sum = NULL_IDENT; const struct pl_glsl_version glsl = sh_glsl(sh); if (glsl.subgroup_size < 8*8) { group_sum = sh_fresh(sh, "group_sum"); shmem_req += sizeof(int); GLSLH("shared int "$"; \n", group_sum); GLSL($" = 0; barrier(); \n", group_sum); } if (!sh_try_compute(sh, 8, 8, false, shmem_req)) { SH_FAIL(sh, "H.274 film grain synthesis requires compute shaders!"); return false; } ident_t db = sh_lut(sh, sh_lut_params( .object = grain_state, .var_type = PL_VAR_FLOAT, .lut_type = SH_LUT_TEXTURE, .width = 13 * 64, .height = 13 * 64, .comps = 1, .fill = fill_grain_lut, .signature = CACHE_KEY_H274, // doesn't depend on anything .cache = SH_CACHE(sh), )); sh_describe(sh, "H.274 film grain"); GLSL("vec4 color; \n" "// pl_shader_film_grain (H.274) \n" "{ \n"); // Load the color value of the tex itself ident_t tex = sh_desc(sh, (struct pl_shader_desc) { .binding.object = params->tex, .desc = (struct pl_desc) { .name = "tex", .type = PL_DESC_SAMPLED_TEX, }, }); GLSL("ivec2 pos = ivec2(gl_GlobalInvocationID); \n" "color = vec4("$") * texelFetch("$", pos, 0); \n", SH_FLOAT(pl_color_repr_normalize(params->repr)), tex); const struct pl_h274_grain_data *data = ¶ms->data.params.h274; ident_t scale_factor = sh_var(sh, (struct pl_shader_var) { .var = pl_var_float("scale_factor"), .data = &(float){ 1.0 / (1 << (data->log2_scale_factor + 6)) }, }); // pcg3d (http://www.jcgt.org/published/0009/03/02/) GLSL("uvec3 pcg = uvec3("$", gl_WorkGroupID.xy / 2u); \n" "pcg = pcg * 1664525u + 1013904223u; \n" "pcg.x += pcg.y * pcg.z; \n" "pcg.y += pcg.z * pcg.x; \n" "pcg.z += pcg.x * pcg.y; \n" "pcg ^= pcg >> 16u; \n" "pcg.x += pcg.y * pcg.z; \n" "pcg.y += pcg.z * pcg.x; \n" "pcg.z += pcg.x * pcg.y; \n", sh_var(sh, (struct pl_shader_var) { .var = pl_var_uint("seed"), .data = &(unsigned int){ params->data.seed }, })); for (int idx = 0; idx < params->components; idx++) { enum pl_channel c = channel_map(idx, params); if (c == PL_CHANNEL_NONE) continue; if (!data->component_model_present[c]) continue; GLSL("// component %d\n{\n", c); // Compute the local 8x8 average GLSL("float avg = color[%d] / 64.0; \n", c); const int precision = 10000000; if (glsl.subgroup_size) { GLSL("avg = subgroupAdd(avg); \n"); if (glsl.subgroup_size < 8*8) { GLSL("if (subgroupElect()) \n" " atomicAdd("$", int(avg * %d.0)); \n" "barrier(); \n" "avg = float("$") / %d.0; \n", group_sum, precision, group_sum, precision); } } else { GLSL("atomicAdd("$", int(avg * %d.0)); \n" "barrier(); \n" "avg = float("$") / %d.0; \n", group_sum, precision, group_sum, precision); } // Hard-coded unrolled loop, to avoid having to load a dynamically // sized array into the shader - and to optimize for the very common // case of there only being a single intensity interval GLSL("uint val; \n"); for (int i = 0; i < data->num_intensity_intervals[c]; i++) { ident_t bounds = sh_var(sh, (struct pl_shader_var) { .var = pl_var_vec2("bounds"), .data = &(float[2]) { data->intensity_interval_lower_bound[c][i] / 255.0, data->intensity_interval_upper_bound[c][i] / 255.0, }, }); const uint8_t num_values = data->num_model_values[c]; uint8_t h = num_values > 1 ? data->comp_model_value[c][i][1] : 8; uint8_t v = num_values > 2 ? data->comp_model_value[c][i][2] : h; h = PL_CLAMP(h, 2, 14) - 2; v = PL_CLAMP(v, 2, 14) - 2; // FIXME: double h/v for subsampled planes! // Reduce scale for chroma planes int16_t scale = data->comp_model_value[c][i][0]; if (c > 0 && pl_color_system_is_ycbcr_like(params->repr->sys)) scale >>= 1; pl_static_assert(sizeof(unsigned int) >= sizeof(uint32_t)); ident_t values = sh_var(sh, (struct pl_shader_var) { .var = pl_var_uint("comp_model_value"), .data = &(unsigned int) { (uint16_t) scale << 16 | h << 8 | v, }, }); GLSL("if (avg >= "$".x && avg <= "$".y) \n" " val = "$"; else \n", bounds, bounds, values); } GLSL(" val = 0u; \n"); // Extract the grain parameters from comp_model_value GLSL("uvec2 offset = uvec2((val & 0xFF00u) >> 2, \n" " (val & 0xFFu) << 6); \n" "float scale = "$" * float(int(val >> 16)); \n" // Add randomness "uint rand = pcg[%d]; \n" "offset.x += (rand >> 16u) %% 52u; \n" "offset.y += (rand & 0xFFFFu) %% 56u; \n" "offset.x &= 0xFFFCu; \n" "offset.y &= 0xFFF8u; \n" "if ((rand & 1u) == 1u) scale = -scale; \n" // Add local offset and compute grain "offset += 8u * (gl_WorkGroupID.xy %% 2u); \n" "offset += gl_LocalInvocationID.xy; \n" "float grain = "$"(offset); \n" "color[%d] += scale * grain; \n", scale_factor, c, db, c); // TODO: Deblocking? GLSL("}\n"); } GLSL("} \n"); return true; } // These tables are all taken from the SMPTE RDD 5-2006 specification static const int8_t Gaussian_LUT[2048+4] = { -11, 12, 103, -11, 42, -35, 12, 59, 77, 98, -87, 3, 65, -78, 45, 56, -51, 21, 13, -11, -20, -19, 33, -127, 17, -6, -105, 18, 19, 71, 48, -10, -38, 42, -2, 75, -67, 52, -90, 33, -47, 21, -3, -56, 49, 1, -57, -42, -1, 120, -127, -108, -49, 9, 14, 127, 122, 109, 52, 127, 2, 7, 114, 19, 30, 12, 77, 112, 82, -61, -127, 111, -52, -29, 2, -49, -24, 58, -29, -73, 12, 112, 67, 79, -3, -114, -87, -6, -5, 40, 58, -81, 49, -27, -31, -34, -105, 50, 16, -24, -35, -14, -15, -127, -55, -22, -55, -127, -112, 5, -26, -72, 127, 127, -2, 41, 87, -65, -16, 55, 19, 91, -81, -65, -64, 35, -7, -54, 99, -7, 88, 125, -26, 91, 0, 63, 60, -14, -23, 113, -33, 116, 14, 26, 51, -16, 107, -8, 53, 38, -34, 17, -7, 4, -91, 6, 63, 63, -15, 39, -36, 19, 55, 17, -51, 40, 33, -37, 126, -39, -118, 17, -30, 0, 19, 98, 60, 101, -12, -73, -17, -52, 98, 3, 3, 60, 33, -3, -2, 10, -42, -106, -38, 14, 127, 16, -127, -31, -86, -39, -56, 46, -41, 75, 23, -19, -22, -70, 74, -54, -2, 32, -45, 17, -92, 59, -64, -67, 56, -102, -29, -87, -34, -92, 68, 5, -74, -61, 93, -43, 14, -26, -38, -126, -17, 16, -127, 64, 34, 31, 93, 17, -51, -59, 71, 77, 81, 127, 127, 61, 33, -106, -93, 0, 0, 75, -69, 71, 127, -19, -111, 30, 23, 15, 2, 39, 92, 5, 42, 2, -6, 38, 15, 114, -30, -37, 50, 44, 106, 27, 119, 7, -80, 25, -68, -21, 92, -11, -1, 18, 41, -50, 79, -127, -43, 127, 18, 11, -21, 32, -52, 27, -88, -90, -39, -19, -10, 24, -118, 72, -24, -44, 2, 12, 86, -107, 39, -33, -127, 47, 51, -24, -22, 46, 0, 15, -35, -69, -2, -74, 24, -6, 0, 29, -3, 45, 32, -32, 117, -45, 79, -24, -17, -109, -10, -70, 88, -48, 24, -91, 120, -37, 50, -127, 58, 32, -82, -10, -17, -7, 46, -127, -15, 89, 127, 17, 98, -39, -33, 37, 42, -40, -32, -21, 105, -19, 19, 19, -59, -9, 30, 0, -127, 34, 127, -84, 75, 24, -40, -49, -127, -107, -14, 45, -75, 1, 30, -20, 41, -68, -40, 12, 127, -3, 5, 20, -73, -59, -127, -3, -3, -53, -6, -119, 93, 120, -80, -50, 0, 20, -46, 67, 78, -12, -22, -127, 36, -41, 56, 119, -5, -116, -22, 68, -14, -90, 24, -82, -44, -127, 107, -25, -37, 40, -7, -7, -82, 5, -87, 44, -34, 9, -127, 39, 70, 49, -63, 74, -49, 109, -27, -89, -47, -39, 44, 49, -4, 60, -42, 80, 9, -127, -9, -56, -49, 125, -66, 47, 36, 117, 15, -11, -96, 109, 94, -17, -56, 70, 8, -14, -5, 50, 37, -45, 120, -30, -76, 40, -46, 6, 3, 69, 17, -78, 1, -79, 6, 127, 43, 26, 127, -127, 28, -55, -26, 55, 112, 48, 107, -1, -77, -1, 53, -9, -22, -43, 123, 108, 127, 102, 68, 46, 5, 1, 123, -13, -55, -34, -49, 89, 65, -105, -5, 94, -53, 62, 45, 30, 46, 18, -35, 15, 41, 47, -98, -24, 94, -75, 127, -114, 127, -68, 1, -17, 51, -95, 47, 12, 34, -45, -75, 89, -107, -9, -58, -29, -109, -24, 127, -61, -13, 77, -45, 17, 19, 83, -24, 9, 127, -66, 54, 4, 26, 13, 111, 43, -113, -22, 10, -24, 83, 67, -14, 75, -123, 59, 127, -12, 99, -19, 64, -38, 54, 9, 7, 61, -56, 3, -57, 113, -104, -59, 3, -9, -47, 74, 85, -55, -34, 12, 118, 28, 93, -72, 13, -99, -72, -20, 30, 72, -94, 19, -54, 64, -12, -63, -25, 65, 72, -10, 127, 0, -127, 103, -20, -73, -112, -103, -6, 28, -42, -21, -59, -29, -26, 19, -4, -51, 94, -58, -95, -37, 35, 20, -69, 127, -19, -127, -22, -120, -53, 37, 74, -127, -1, -12, -119, -53, -28, 38, 69, 17, 16, -114, 89, 62, 24, 37, -23, 49, -101, -32, -9, -95, -53, 5, 93, -23, -49, -8, 51, 3, -75, -90, -10, -39, 127, -86, -22, 20, 20, 113, 75, 52, -31, 92, -63, 7, -12, 46, 36, 101, -43, -17, -53, -7, -38, -76, -31, -21, 62, 31, 62, 20, -127, 31, 64, 36, 102, -85, -10, 77, 80, 58, -79, -8, 35, 8, 80, -24, -9, 3, -17, 72, 127, 83, -87, 55, 18, -119, -123, 36, 10, 127, 56, -55, 113, 13, 26, 32, -13, -48, 22, -13, 5, 58, 27, 24, 26, -11, -36, 37, -92, 78, 81, 9, 51, 14, 67, -13, 0, 32, 45, -76, 32, -39, -22, -49, -127, -27, 31, -9, 36, 14, 71, 13, 57, 12, -53, -86, 53, -44, -35, 2, 127, 12, -66, -44, 46, -115, 3, 10, 56, -35, 119, -19, -61, 52, -59, -127, -49, -23, 4, -5, 17, -82, -6, 127, 25, 79, 67, 64, -25, 14, -64, -37, -127, -28, 21, -63, 66, -53, -41, 109, -62, 15, -22, 13, 29, -63, 20, 27, 95, -44, -59, -116, -10, 79, -49, 22, -43, -16, 46, -47, -120, -36, -29, -52, -44, 29, 127, -13, 49, -9, -127, 75, -28, -23, 88, 59, 11, -95, 81, -59, 58, 60, -26, 40, -92, -3, -22, -58, -45, -59, -22, -53, 71, -29, 66, -32, -23, 14, -17, -66, -24, -28, -62, 47, 38, 17, 16, -37, -24, -11, 8, -27, -19, 59, 45, -49, -47, -4, -22, -81, 30, -67, -127, 74, 102, 5, -18, 98, 34, -66, 42, -52, 7, -59, 24, -58, -19, -24, -118, -73, 91, 15, -16, 79, -32, -79, -127, -36, 41, 77, -83, 2, 56, 22, -75, 127, -16, -21, 12, 31, 56, -113, -127, 90, 55, 61, 12, 55, -14, -113, -14, 32, 49, -67, -17, 91, -10, 1, 21, 69, -70, 99, -19, -112, 66, -90, -10, -9, -71, 127, 50, -81, -49, 24, 61, -61, -111, 7, -41, 127, 88, -66, 108, -127, -6, 36, -14, 41, -50, 14, 14, 73, -101, -28, 77, 127, -8, -100, 88, 38, 121, 88, -125, -60, 13, -94, -115, 20, -67, -87, -94, -119, 44, -28, -30, 18, 5, -53, -61, 20, -43, 11, -77, -60, 13, 29, 3, 6, -72, 38, -60, -11, 108, -53, 41, 66, -12, -127, -127, -49, 24, 29, 46, 36, 91, 34, -33, 116, -51, -34, -52, 91, 7, -83, 73, -26, -103, 24, -10, 76, 84, 5, 68, -80, -13, -17, -32, -48, 20, 50, 26, 10, 63, -104, -14, 37, 127, 114, 97, 35, 1, -33, -55, 127, -124, -33, 61, -7, 119, -32, -127, -53, -42, 63, 3, -5, -26, 70, -58, -33, -44, -43, 34, -56, -127, 127, 25, -35, -11, 16, -81, 29, -58, 40, -127, -127, 20, -47, -11, -36, -63, -52, -32, -82, 78, -76, -73, 8, 27, -72, -9, -74, -85, -86, -57, 25, 78, -10, -97, 35, -65, 8, -59, 14, 1, -42, 32, -88, -44, 17, -3, -9, 59, 40, 12, -108, -40, 24, 34, 18, -28, 2, 51, -110, -4, 100, 1, 65, 22, 0, 127, 61, 45, 25, -31, 6, 9, -7, -48, 99, 16, 44, -2, -40, 32, -39, -52, 10, -110, -19, 56, -127, 69, 26, 51, 92, 40, 61, -52, 45, -38, 13, 85, 122, 27, 66, 45, -111, -83, -3, 31, 37, 19, -36, 58, 71, 39, -78, -47, 58, -78, 8, -62, -36, -14, 61, 42, -127, 71, -4, 24, -54, 52, -127, 67, -4, -42, 30, -63, 59, -3, -1, -18, -46, -92, -81, -96, -14, -53, -10, -11, -77, 13, 1, 8, -67, -127, 127, -28, 26, -14, 18, -13, -26, 2, 10, -46, -32, -15, 27, -31, -59, 59, 77, -121, 28, 40, -54, -62, -31, -21, -37, -32, -6, -127, -25, -60, 70, -127, 112, -127, 127, 88, -7, 116, 110, 53, 87, -127, 3, 16, 23, 74, -106, -51, 3, 74, -82, -112, -74, 65, 81, 25, 53, 127, -45, -50, -103, -41, -65, -29, 79, -67, 64, -33, -30, -8, 127, 0, -13, -51, 67, -14, 5, -92, 29, -35, -8, -90, -57, -3, 36, 43, 44, -31, -69, -7, 36, 39, -51, 43, -81, 58, 6, 127, 12, 57, 66, 46, 59, -43, -42, 41, -15, -120, 24, 3, -11, 19, -13, 51, 28, 3, 55, -48, -12, -1, 2, 97, -19, 29, 42, 13, 43, 78, -44, 56, -108, -43, -19, 127, 15, -11, -18, -81, 83, -37, 77, -109, 15, 65, -50, 43, 12, 13, 27, 28, 61, 57, 30, 26, 106, -18, 56, 13, 97, 4, -8, -62, -103, 94, 108, -44, 52, 27, -47, -9, 105, -53, 46, 89, 103, -33, 38, -34, 55, 51, 70, -94, -35, -87, -107, -19, -31, 9, -19, 79, -14, 77, 5, -19, -107, 85, 21, -45, -39, -42, 9, -29, 74, 47, -75, 60, -127, 120, -112, -57, -32, 41, 7, 79, 76, 66, 57, 41, -25, 31, 37, -47, -36, 43, -73, -37, 63, 127, -69, -52, 90, -33, -61, 60, -55, 44, 15, 4, -67, 13, -92, 64, 29, -39, -3, 83, -2, -38, -85, -86, 58, 35, -69, -61, 29, -37, -95, -78, 4, 30, -4, -32, -80, -22, -9, -77, 46, 7, -93, -71, 65, 9, -50, 127, -70, 26, -12, -39, -114, 63, -127, -100, 4, -32, 111, 22, -60, 65, -101, 26, -42, 21, -59, -27, -74, 2, -94, 6, 126, 5, 76, -88, -9, -43, -101, 127, 1, 125, 92, -63, 52, 56, 4, 81, -127, 127, 80, 127, -29, 30, 116, -74, -17, -57, 105, 48, 45, 25, -72, 48, -38, -108, 31, -34, 4, -11, 41, -127, 52, -104, -43, -37, 52, 2, 47, 87, -9, 77, 27, -41, -25, 90, 86, -56, 75, 10, 33, 78, 58, 127, 127, -7, -73, 49, -33, -106, -35, 38, 57, 53, -17, -4, 83, 52, -108, 54, -125, 28, 23, 56, -43, -88, -17, -6, 47, 23, -9, 0, -13, 111, 75, 27, -52, -38, -34, 39, 30, 66, 39, 38, -64, 38, 3, 21, -32, -51, -28, 54, -38, -87, 20, 52, 115, 18, -81, -70, 0, -14, -46, -46, -3, 125, 16, -14, 23, -82, -84, -69, -20, -65, -127, 9, 81, -49, 61, 7, -36, -45, -42, 57, -26, 47, 20, -85, 46, -13, 41, -37, -75, -60, 86, -78, -127, 12, 50, 2, -3, 13, 47, 5, 19, -78, -55, -27, 65, -71, 12, -108, 20, -16, 11, -31, 63, -55, 37, 75, -17, 127, -73, -33, -28, -120, 105, 68, 106, -103, -106, 71, 61, 2, 23, -3, 33, -5, -15, -67, -15, -23, -54, 15, -63, 76, 58, -110, 1, 83, -27, 22, 75, -39, -17, -11, 64, -17, -127, -54, -66, 31, 96, 116, 3, -114, -7, -108, -63, 97, 9, 50, 8, 75, -28, 72, 112, -36, -112, 95, -50, 23, -13, -19, 55, 21, 23, 92, 91, 22, -49, 16, -75, 23, 9, -49, -97, -37, 49, -36, 36, -127, -86, 43, 127, -24, -24, 84, 83, -35, -34, -12, 109, 102, -38, 51, -68, 34, 19, -22, 49, -32, 127, 40, 24, -93, -4, -3, 105, 3, -58, -18, 8, 127, -18, 125, 68, 69, -62, 30, -36, 54, -57, -24, 17, 43, -36, -27, -57, -67, -21, -10, -49, 68, 12, 65, 4, 48, 55, 127, -75, 44, 89, -66, -13, -78, -82, -91, 22, 30, 33, -40, -87, -34, 96, -91, 39, 10, -64, -3, -12, 127, -50, -37, -56, 23, -35, -36, -54, 90, -91, 2, 50, 77, -6, -127, 16, 46, -5, -73, 0, -56, -18, -72, 28, 93, 60, 49, 20, 18, 111, -111, 32, -83, 47, 47, -10, 35, -88, 43, 57, -98, 127, -17, 0, 1, -39, -127, -2, 0, 63, 93, 0, 36, -66, -61, -19, 39, -127, 58, 50, -17, 127, 88, -43, -108, -51, -16, 7, -36, 68, 46, -14, 107, 40, 57, 7, 19, 8, 3, 88, -90, -92, -18, -21, -24, 13, 7, -4, -78, -91, -4, 8, -35, -5, 19, 2, -111, 4, -66, -81, 122, -20, -34, -37, -84, 127, 68, 46, 17, 47, // Repeat the beginning of the array to allow wrapping reads -11, 12, 103, -11, }; static const uint32_t Seed_LUT[256] = { 747538460, 1088979410, 1744950180, 1767011913, 1403382928, 521866116, 1060417601, 2110622736, 1557184770, 105289385, 585624216, 1827676546, 1191843873, 1018104344, 1123590530, 663361569, 2023850500, 76561770, 1226763489, 80325252, 1992581442, 502705249, 740409860, 516219202, 557974537, 1883843076, 720112066, 1640137737, 1820967556, 40667586, 155354121, 1820967557, 1115949072, 1631803309, 98284748, 287433856, 2119719977, 988742797, 1827432592, 579378475, 1017745956, 1309377032, 1316535465, 2074315269, 1923385360, 209722667, 1546228260, 168102420, 135274561, 355958469, 248291472, 2127839491, 146920100, 585982612, 1611702337, 696506029, 1386498192, 1258072451, 1212240548, 1043171860, 1217404993, 1090770605, 1386498193, 169093201, 541098240, 1468005469, 456510673, 1578687785, 1838217424, 2010752065, 2089828354, 1362717428, 970073673, 854129835, 714793201, 1266069081, 1047060864, 1991471829, 1098097741, 913883585, 1669598224, 1337918685, 1219264706, 1799741108, 1834116681, 683417731, 1120274457, 1073098457, 1648396544, 176642749, 31171789, 718317889, 1266977808, 1400892508, 549749008, 1808010512, 67112961, 1005669825, 903663673, 1771104465, 1277749632, 1229754427, 950632997, 1979371465, 2074373264, 305357524, 1049387408, 1171033360, 1686114305, 2147468765, 1941195985, 117709841, 809550080, 991480851, 1816248997, 1561503561, 329575568, 780651196, 1659144592, 1910793616, 604016641, 1665084765, 1530186961, 1870928913, 809550081, 2079346113, 71307521, 876663040, 1073807360, 832356664, 1573927377, 204073344, 2026918147, 1702476788, 2043881033, 57949587, 2001393952, 1197426649, 1186508931, 332056865, 950043140, 890043474, 349099312, 148914948, 236204097, 2022643605, 1441981517, 498130129, 1443421481, 924216797, 1817491777, 1913146664, 1411989632, 929068432, 495735097, 1684636033, 1284520017, 432816184, 1344884865, 210843729, 676364544, 234449232, 12112337, 1350619139, 1753272996, 2037118872, 1408560528, 533334916, 1043640385, 357326099, 201376421, 110375493, 541106497, 416159637, 242512193, 777294080, 1614872576, 1535546636, 870600145, 910810409, 1821440209, 1605432464, 1145147393, 951695441, 1758494976, 1506656568, 1557150160, 608221521, 1073840384, 217672017, 684818688, 1750138880, 16777217, 677990609, 953274371, 1770050213, 1359128393, 1797602707, 1984616737, 1865815816, 2120835200, 2051677060, 1772234061, 1579794881, 1652821009, 1742099468, 1887260865, 46468113, 1011925248, 1134107920, 881643832, 1354774993, 472508800, 1892499769, 1752793472, 1962502272, 687898625, 883538000, 1354355153, 1761673473, 944820481, 2020102353, 22020353, 961597696, 1342242816, 964808962, 1355809701, 17016649, 1386540177, 647682692, 1849012289, 751668241, 1557184768, 127374604, 1927564752, 1045744913, 1614921984, 43588881, 1016185088, 1544617984, 1090519041, 136122424, 215038417, 1563027841, 2026918145, 1688778833, 701530369, 1372639488, 1342242817, 2036945104, 953274369, 1750192384, 16842753, 964808960, 1359020032, 1358954497 }; // Note: This is pre-transposed, i.e. stored column-major order static const int8_t R64T[64][64] = { { 32, 45, 45, 45, 45, 45, 45, 45, 44, 44, 44, 44, 43, 43, 43, 42, 42, 41, 41, 40, 40, 39, 39, 38, 38, 37, 36, 36, 35, 34, 34, 33, 32, 31, 30, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 4, 3, 2, 1, }, { 32, 45, 45, 44, 43, 42, 41, 39, 38, 36, 34, 31, 29, 26, 23, 20, 17, 14, 11, 8, 4, 1, -2, -6, -9, -12, -15, -18, -21, -24, -27, -30, -32, -34, -36, -38, -40, -41, -43, -44, -44, -45, -45, -45, -45, -45, -44, -43, -42, -40, -39, -37, -35, -33, -30, -28, -25, -22, -19, -16, -13, -10, -7, -3, }, { 32, 45, 44, 42, 40, 37, 34, 30, 25, 20, 15, 10, 4, -1, -7, -12, -17, -22, -27, -31, -35, -38, -41, -43, -44, -45, -45, -45, -43, -41, -39, -36, -32, -28, -23, -18, -13, -8, -2, 3, 9, 14, 19, 24, 29, 33, 36, 39, 42, 44, 45, 45, 45, 44, 43, 40, 38, 34, 30, 26, 21, 16, 11, 6, }, { 32, 45, 43, 39, 35, 30, 23, 16, 9, 1, -7, -14, -21, -28, -34, -38, -42, -44, -45, -45, -43, -40, -36, -31, -25, -18, -11, -3, 4, 12, 19, 26, 32, 37, 41, 44, 45, 45, 44, 41, 38, 33, 27, 20, 13, 6, -2, -10, -17, -24, -30, -36, -40, -43, -45, -45, -44, -42, -39, -34, -29, -22, -15, -8, }, { 32, 44, 41, 36, 29, 20, 11, 1, -9, -18, -27, -34, -40, -44, -45, -45, -42, -37, -30, -22, -13, -3, 7, 16, 25, 33, 39, 43, 45, 45, 43, 38, 32, 24, 15, 6, -4, -14, -23, -31, -38, -42, -45, -45, -43, -39, -34, -26, -17, -8, 2, 12, 21, 30, 36, 41, 44, 45, 44, 40, 35, 28, 19, 10, }, { 32, 44, 39, 31, 21, 10, -2, -14, -25, -34, -41, -45, -45, -42, -36, -28, -17, -6, 7, 18, 29, 37, 43, 45, 44, 40, 34, 24, 13, 1, -11, -22, -32, -39, -44, -45, -43, -38, -30, -20, -9, 3, 15, 26, 35, 41, 45, 45, 42, 36, 27, 16, 4, -8, -19, -30, -38, -43, -45, -44, -40, -33, -23, -12, }, { 32, 43, 36, 26, 13, -1, -15, -28, -38, -44, -45, -42, -35, -24, -11, 3, 17, 30, 39, 44, 45, 41, 34, 22, 9, -6, -19, -31, -40, -45, -45, -40, -32, -20, -7, 8, 21, 33, 41, 45, 44, 39, 30, 18, 4, -10, -23, -34, -42, -45, -44, -38, -29, -16, -2, 12, 25, 36, 43, 45, 43, 37, 27, 14, }, { 32, 42, 34, 20, 4, -12, -27, -38, -44, -45, -39, -28, -13, 3, 19, 33, 42, 45, 43, 34, 21, 6, -11, -26, -38, -44, -45, -39, -29, -14, 2, 18, 32, 41, 45, 43, 35, 22, 7, -10, -25, -37, -44, -45, -40, -30, -15, 1, 17, 31, 41, 45, 43, 36, 23, 8, -9, -24, -36, -44, -45, -40, -30, -16, }, { 32, 41, 30, 14, -4, -22, -36, -44, -44, -37, -23, -6, 13, 30, 41, 45, 42, 31, 15, -3, -21, -36, -44, -45, -38, -24, -7, 12, 29, 40, 45, 42, 32, 16, -2, -20, -35, -44, -45, -38, -25, -8, 11, 28, 40, 45, 43, 33, 17, -1, -19, -34, -43, -45, -39, -26, -9, 10, 27, 39, 45, 43, 34, 18, }, { 32, 40, 27, 8, -13, -31, -43, -45, -38, -22, -2, 18, 35, 44, 44, 34, 17, -3, -23, -38, -45, -42, -30, -12, 9, 28, 41, 45, 40, 26, 7, -14, -32, -43, -45, -37, -21, -1, 19, 36, 44, 44, 34, 16, -4, -24, -39, -45, -42, -30, -11, 10, 29, 41, 45, 39, 25, 6, -15, -33, -43, -45, -36, -20, }, { 32, 39, 23, 1, -21, -38, -45, -40, -25, -3, 19, 37, 45, 41, 27, 6, -17, -36, -45, -42, -29, -8, 15, 34, 44, 43, 30, 10, -13, -33, -44, -44, -32, -12, 11, 31, 43, 44, 34, 14, -9, -30, -43, -45, -35, -16, 7, 28, 42, 45, 36, 18, -4, -26, -41, -45, -38, -20, 2, 24, 40, 45, 39, 22, }, { 32, 38, 19, -6, -29, -43, -44, -31, -9, 16, 36, 45, 40, 22, -2, -26, -42, -45, -34, -12, 13, 34, 45, 41, 25, 1, -23, -40, -45, -36, -15, 10, 32, 44, 43, 28, 4, -20, -39, -45, -38, -18, 7, 30, 43, 44, 30, 8, -17, -37, -45, -39, -21, 3, 27, 42, 44, 33, 11, -14, -35, -45, -41, -24, }, { 32, 37, 15, -12, -35, -45, -39, -18, 9, 33, 45, 40, 21, -6, -30, -44, -42, -24, 2, 28, 43, 43, 27, 1, -25, -42, -44, -30, -4, 22, 41, 45, 32, 8, -19, -39, -45, -34, -11, 16, 38, 45, 36, 14, -13, -36, -45, -38, -17, 10, 34, 45, 40, 20, -7, -31, -44, -41, -23, 3, 29, 44, 43, 26, }, { 32, 36, 11, -18, -40, -45, -30, -3, 25, 43, 43, 24, -4, -31, -45, -39, -17, 12, 36, 45, 35, 10, -19, -40, -44, -30, -2, 26, 43, 42, 23, -6, -32, -45, -39, -16, 13, 37, 45, 34, 9, -20, -41, -44, -29, -1, 27, 44, 42, 22, -7, -33, -45, -38, -15, 14, 38, 45, 34, 8, -21, -41, -44, -28, }, { 32, 34, 7, -24, -43, -41, -19, 12, 38, 45, 30, 1, -29, -45, -39, -14, 17, 40, 44, 26, -4, -33, -45, -36, -9, 22, 43, 42, 21, -10, -36, -45, -32, -3, 27, 44, 40, 16, -15, -39, -44, -28, 2, 31, 45, 37, 11, -20, -42, -43, -23, 8, 35, 45, 34, 6, -25, -44, -41, -18, 13, 38, 45, 30, }, { 32, 33, 2, -30, -45, -36, -7, 26, 44, 38, 11, -22, -43, -40, -15, 18, 42, 42, 19, -14, -40, -44, -23, 10, 38, 45, 27, -6, -35, -45, -30, 1, 32, 45, 34, 3, -29, -45, -36, -8, 25, 44, 39, 12, -21, -43, -41, -16, 17, 41, 43, 20, -13, -39, -44, -24, 9, 37, 45, 28, -4, -34, -45, -31, }, { 32, 31, -2, -34, -45, -28, 7, 37, 44, 24, -11, -39, -43, -20, 15, 41, 42, 16, -19, -43, -40, -12, 23, 44, 38, 8, -27, -45, -35, -3, 30, 45, 32, -1, -34, -45, -29, 6, 36, 45, 25, -10, -39, -44, -21, 14, 41, 42, 17, -18, -43, -40, -13, 22, 44, 38, 9, -26, -45, -36, -4, 30, 45, 33, }, { 32, 30, -7, -38, -43, -18, 19, 44, 38, 6, -30, -45, -29, 8, 39, 43, 17, -20, -44, -37, -4, 31, 45, 28, -9, -39, -43, -16, 21, 44, 36, 3, -32, -45, -27, 10, 40, 42, 15, -22, -44, -36, -2, 33, 45, 26, -11, -40, -42, -14, 23, 45, 35, 1, -34, -45, -25, 12, 41, 41, 13, -24, -45, -34, }, { 32, 28, -11, -41, -40, -8, 30, 45, 25, -14, -43, -38, -4, 33, 45, 22, -17, -44, -36, -1, 35, 44, 19, -20, -44, -34, 2, 37, 43, 16, -23, -45, -32, 6, 39, 42, 13, -26, -45, -30, 9, 40, 41, 10, -29, -45, -27, 12, 42, 39, 7, -31, -45, -24, 15, 43, 38, 3, -34, -45, -21, 18, 44, 36, }, { 32, 26, -15, -44, -35, 3, 39, 41, 9, -31, -45, -20, 21, 45, 30, -10, -42, -38, -2, 36, 43, 14, -27, -45, -25, 16, 44, 34, -4, -39, -41, -8, 32, 45, 19, -22, -45, -30, 11, 42, 38, 1, -36, -43, -13, 28, 45, 24, -17, -44, -34, 6, 40, 40, 7, -33, -44, -18, 23, 45, 29, -12, -43, -37, }, { 32, 24, -19, -45, -29, 14, 44, 33, -9, -42, -36, 3, 40, 39, 2, -37, -42, -8, 34, 44, 13, -30, -45, -18, 25, 45, 23, -20, -45, -28, 15, 44, 32, -10, -43, -36, 4, 40, 39, 1, -38, -41, -7, 34, 43, 12, -30, -45, -17, 26, 45, 22, -21, -45, -27, 16, 44, 31, -11, -43, -35, 6, 41, 38, }, { 32, 22, -23, -45, -21, 24, 45, 20, -25, -45, -19, 26, 45, 18, -27, -45, -17, 28, 45, 16, -29, -45, -15, 30, 44, 14, -30, -44, -13, 31, 44, 12, -32, -44, -11, 33, 43, 10, -34, -43, -9, 34, 43, 8, -35, -42, -7, 36, 42, 6, -36, -41, -4, 37, 41, 3, -38, -40, -2, 38, 40, 1, -39, -39, }, { 32, 20, -27, -45, -13, 33, 43, 6, -38, -39, 2, 41, 35, -10, -44, -30, 17, 45, 23, -24, -45, -16, 30, 44, 9, -36, -41, -1, 40, 37, -7, -43, -32, 14, 45, 26, -21, -45, -19, 28, 44, 12, -34, -42, -4, 38, 39, -3, -42, -34, 11, 44, 29, -18, -45, -22, 25, 45, 15, -31, -43, -8, 36, 40, }, { 32, 18, -30, -43, -4, 39, 36, -10, -44, -26, 23, 45, 13, -34, -41, 1, 42, 33, -15, -45, -21, 28, 44, 8, -38, -38, 7, 44, 29, -20, -45, -16, 32, 42, 2, -40, -35, 12, 45, 24, -25, -45, -11, 36, 40, -3, -43, -31, 17, 45, 19, -30, -43, -6, 39, 37, -9, -44, -27, 22, 45, 14, -34, -41, }, { 32, 16, -34, -40, 4, 44, 27, -24, -44, -8, 39, 36, -13, -45, -19, 31, 42, -1, -43, -30, 21, 45, 11, -37, -38, 10, 45, 22, -29, -43, -2, 41, 32, -18, -45, -14, 35, 39, -7, -44, -25, 26, 44, 6, -40, -34, 15, 45, 17, -33, -41, 3, 43, 28, -23, -45, -9, 38, 36, -12, -45, -20, 30, 42, }, { 32, 14, -36, -37, 13, 45, 15, -36, -38, 12, 45, 16, -35, -38, 11, 45, 17, -34, -39, 10, 45, 18, -34, -39, 9, 45, 19, -33, -40, 8, 45, 20, -32, -40, 7, 45, 21, -31, -41, 6, 44, 22, -30, -41, 4, 44, 23, -30, -42, 3, 44, 24, -29, -42, 2, 44, 25, -28, -43, 1, 43, 26, -27, -43, }, { 32, 12, -39, -33, 21, 44, 2, -43, -25, 30, 41, -8, -45, -16, 36, 36, -17, -45, -7, 41, 29, -26, -43, 3, 44, 20, -34, -38, 13, 45, 11, -39, -32, 22, 44, 1, -43, -24, 30, 40, -9, -45, -15, 37, 35, -18, -45, -6, 42, 28, -27, -42, 4, 45, 19, -34, -38, 14, 45, 10, -40, -31, 23, 44, }, { 32, 10, -41, -28, 29, 40, -11, -45, -9, 41, 27, -30, -40, 12, 45, 8, -42, -26, 30, 39, -13, -45, -7, 42, 25, -31, -39, 14, 45, 6, -43, -24, 32, 38, -15, -45, -4, 43, 23, -33, -38, 16, 45, 3, -43, -22, 34, 37, -17, -45, -2, 44, 21, -34, -36, 18, 44, 1, -44, -20, 35, 36, -19, -44, }, { 32, 8, -43, -22, 35, 34, -23, -42, 9, 45, 7, -43, -21, 36, 34, -24, -42, 10, 45, 6, -43, -20, 36, 33, -25, -41, 11, 45, 4, -44, -19, 37, 32, -26, -41, 12, 45, 3, -44, -18, 38, 31, -27, -40, 13, 45, 2, -44, -17, 38, 30, -28, -40, 14, 45, 1, -44, -16, 39, 30, -29, -39, 15, 45, }, { 32, 6, -44, -16, 40, 26, -34, -34, 25, 40, -15, -44, 4, 45, 7, -44, -17, 39, 27, -33, -35, 24, 41, -14, -44, 3, 45, 8, -43, -18, 39, 28, -32, -36, 23, 41, -13, -45, 2, 45, 9, -43, -19, 38, 29, -31, -36, 22, 42, -12, -45, 1, 45, 10, -43, -20, 38, 30, -30, -37, 21, 42, -11, -45, }, { 32, 3, -45, -10, 43, 16, -41, -22, 38, 28, -34, -33, 29, 37, -23, -40, 17, 43, -11, -45, 4, 45, 2, -45, -9, 44, 15, -41, -21, 38, 27, -34, -32, 30, 36, -24, -40, 18, 43, -12, -44, 6, 45, 1, -45, -8, 44, 14, -42, -20, 39, 26, -35, -31, 30, 36, -25, -39, 19, 42, -13, -44, 7, 45, }, { 32, 1, -45, -3, 45, 6, -45, -8, 44, 10, -44, -12, 43, 14, -43, -16, 42, 18, -41, -20, 40, 22, -39, -24, 38, 26, -36, -28, 35, 30, -34, -31, 32, 33, -30, -34, 29, 36, -27, -37, 25, 38, -23, -39, 21, 40, -19, -41, 17, 42, -15, -43, 13, 44, -11, -44, 9, 45, -7, -45, 4, 45, -2, -45, }, { 32, -1, -45, 3, 45, -6, -45, 8, 44, -10, -44, 12, 43, -14, -43, 16, 42, -18, -41, 20, 40, -22, -39, 24, 38, -26, -36, 28, 35, -30, -34, 31, 32, -33, -30, 34, 29, -36, -27, 37, 25, -38, -23, 39, 21, -40, -19, 41, 17, -42, -15, 43, 13, -44, -11, 44, 9, -45, -7, 45, 4, -45, -2, 45, }, { 32, -3, -45, 10, 43, -16, -41, 22, 38, -28, -34, 33, 29, -37, -23, 40, 17, -43, -11, 45, 4, -45, 2, 45, -9, -44, 15, 41, -21, -38, 27, 34, -32, -30, 36, 24, -40, -18, 43, 12, -44, -6, 45, -1, -45, 8, 44, -14, -42, 20, 39, -26, -35, 31, 30, -36, -25, 39, 19, -42, -13, 44, 7, -45, }, { 32, -6, -44, 16, 40, -26, -34, 34, 25, -40, -15, 44, 4, -45, 7, 44, -17, -39, 27, 33, -35, -24, 41, 14, -44, -3, 45, -8, -43, 18, 39, -28, -32, 36, 23, -41, -13, 45, 2, -45, 9, 43, -19, -38, 29, 31, -36, -22, 42, 12, -45, -1, 45, -10, -43, 20, 38, -30, -30, 37, 21, -42, -11, 45, }, { 32, -8, -43, 22, 35, -34, -23, 42, 9, -45, 7, 43, -21, -36, 34, 24, -42, -10, 45, -6, -43, 20, 36, -33, -25, 41, 11, -45, 4, 44, -19, -37, 32, 26, -41, -12, 45, -3, -44, 18, 38, -31, -27, 40, 13, -45, 2, 44, -17, -38, 30, 28, -40, -14, 45, -1, -44, 16, 39, -30, -29, 39, 15, -45, }, { 32, -10, -41, 28, 29, -40, -11, 45, -9, -41, 27, 30, -40, -12, 45, -8, -42, 26, 30, -39, -13, 45, -7, -42, 25, 31, -39, -14, 45, -6, -43, 24, 32, -38, -15, 45, -4, -43, 23, 33, -38, -16, 45, -3, -43, 22, 34, -37, -17, 45, -2, -44, 21, 34, -36, -18, 44, -1, -44, 20, 35, -36, -19, 44, }, { 32, -12, -39, 33, 21, -44, 2, 43, -25, -30, 41, 8, -45, 16, 36, -36, -17, 45, -7, -41, 29, 26, -43, -3, 44, -20, -34, 38, 13, -45, 11, 39, -32, -22, 44, -1, -43, 24, 30, -40, -9, 45, -15, -37, 35, 18, -45, 6, 42, -28, -27, 42, 4, -45, 19, 34, -38, -14, 45, -10, -40, 31, 23, -44, }, { 32, -14, -36, 37, 13, -45, 15, 36, -38, -12, 45, -16, -35, 38, 11, -45, 17, 34, -39, -10, 45, -18, -34, 39, 9, -45, 19, 33, -40, -8, 45, -20, -32, 40, 7, -45, 21, 31, -41, -6, 44, -22, -30, 41, 4, -44, 23, 30, -42, -3, 44, -24, -29, 42, 2, -44, 25, 28, -43, -1, 43, -26, -27, 43, }, { 32, -16, -34, 40, 4, -44, 27, 24, -44, 8, 39, -36, -13, 45, -19, -31, 42, 1, -43, 30, 21, -45, 11, 37, -38, -10, 45, -22, -29, 43, -2, -41, 32, 18, -45, 14, 35, -39, -7, 44, -25, -26, 44, -6, -40, 34, 15, -45, 17, 33, -41, -3, 43, -28, -23, 45, -9, -38, 36, 12, -45, 20, 30, -42, }, { 32, -18, -30, 43, -4, -39, 36, 10, -44, 26, 23, -45, 13, 34, -41, -1, 42, -33, -15, 45, -21, -28, 44, -8, -38, 38, 7, -44, 29, 20, -45, 16, 32, -42, 2, 40, -35, -12, 45, -24, -25, 45, -11, -36, 40, 3, -43, 31, 17, -45, 19, 30, -43, 6, 39, -37, -9, 44, -27, -22, 45, -14, -34, 41, }, { 32, -20, -27, 45, -13, -33, 43, -6, -38, 39, 2, -41, 35, 10, -44, 30, 17, -45, 23, 24, -45, 16, 30, -44, 9, 36, -41, 1, 40, -37, -7, 43, -32, -14, 45, -26, -21, 45, -19, -28, 44, -12, -34, 42, -4, -38, 39, 3, -42, 34, 11, -44, 29, 18, -45, 22, 25, -45, 15, 31, -43, 8, 36, -40, }, { 32, -22, -23, 45, -21, -24, 45, -20, -25, 45, -19, -26, 45, -18, -27, 45, -17, -28, 45, -16, -29, 45, -15, -30, 44, -14, -30, 44, -13, -31, 44, -12, -32, 44, -11, -33, 43, -10, -34, 43, -9, -34, 43, -8, -35, 42, -7, -36, 42, -6, -36, 41, -4, -37, 41, -3, -38, 40, -2, -38, 40, -1, -39, 39, }, { 32, -24, -19, 45, -29, -14, 44, -33, -9, 42, -36, -3, 40, -39, 2, 37, -42, 8, 34, -44, 13, 30, -45, 18, 25, -45, 23, 20, -45, 28, 15, -44, 32, 10, -43, 36, 4, -40, 39, -1, -38, 41, -7, -34, 43, -12, -30, 45, -17, -26, 45, -22, -21, 45, -27, -16, 44, -31, -11, 43, -35, -6, 41, -38, }, { 32, -26, -15, 44, -35, -3, 39, -41, 9, 31, -45, 20, 21, -45, 30, 10, -42, 38, -2, -36, 43, -14, -27, 45, -25, -16, 44, -34, -4, 39, -41, 8, 32, -45, 19, 22, -45, 30, 11, -42, 38, -1, -36, 43, -13, -28, 45, -24, -17, 44, -34, -6, 40, -40, 7, 33, -44, 18, 23, -45, 29, 12, -43, 37, }, { 32, -28, -11, 41, -40, 8, 30, -45, 25, 14, -43, 38, -4, -33, 45, -22, -17, 44, -36, 1, 35, -44, 19, 20, -44, 34, 2, -37, 43, -16, -23, 45, -32, -6, 39, -42, 13, 26, -45, 30, 9, -40, 41, -10, -29, 45, -27, -12, 42, -39, 7, 31, -45, 24, 15, -43, 38, -3, -34, 45, -21, -18, 44, -36, }, { 32, -30, -7, 38, -43, 18, 19, -44, 38, -6, -30, 45, -29, -8, 39, -43, 17, 20, -44, 37, -4, -31, 45, -28, -9, 39, -43, 16, 21, -44, 36, -3, -32, 45, -27, -10, 40, -42, 15, 22, -44, 36, -2, -33, 45, -26, -11, 40, -42, 14, 23, -45, 35, -1, -34, 45, -25, -12, 41, -41, 13, 24, -45, 34, }, { 32, -31, -2, 34, -45, 28, 7, -37, 44, -24, -11, 39, -43, 20, 15, -41, 42, -16, -19, 43, -40, 12, 23, -44, 38, -8, -27, 45, -35, 3, 30, -45, 32, 1, -34, 45, -29, -6, 36, -45, 25, 10, -39, 44, -21, -14, 41, -42, 17, 18, -43, 40, -13, -22, 44, -38, 9, 26, -45, 36, -4, -30, 45, -33, }, { 32, -33, 2, 30, -45, 36, -7, -26, 44, -38, 11, 22, -43, 40, -15, -18, 42, -42, 19, 14, -40, 44, -23, -10, 38, -45, 27, 6, -35, 45, -30, -1, 32, -45, 34, -3, -29, 45, -36, 8, 25, -44, 39, -12, -21, 43, -41, 16, 17, -41, 43, -20, -13, 39, -44, 24, 9, -37, 45, -28, -4, 34, -45, 31, }, { 32, -34, 7, 24, -43, 41, -19, -12, 38, -45, 30, -1, -29, 45, -39, 14, 17, -40, 44, -26, -4, 33, -45, 36, -9, -22, 43, -42, 21, 10, -36, 45, -32, 3, 27, -44, 40, -16, -15, 39, -44, 28, 2, -31, 45, -37, 11, 20, -42, 43, -23, -8, 35, -45, 34, -6, -25, 44, -41, 18, 13, -38, 45, -30, }, { 32, -36, 11, 18, -40, 45, -30, 3, 25, -43, 43, -24, -4, 31, -45, 39, -17, -12, 36, -45, 35, -10, -19, 40, -44, 30, -2, -26, 43, -42, 23, 6, -32, 45, -39, 16, 13, -37, 45, -34, 9, 20, -41, 44, -29, 1, 27, -44, 42, -22, -7, 33, -45, 38, -15, -14, 38, -45, 34, -8, -21, 41, -44, 28, }, { 32, -37, 15, 12, -35, 45, -39, 18, 9, -33, 45, -40, 21, 6, -30, 44, -42, 24, 2, -28, 43, -43, 27, -1, -25, 42, -44, 30, -4, -22, 41, -45, 32, -8, -19, 39, -45, 34, -11, -16, 38, -45, 36, -14, -13, 36, -45, 38, -17, -10, 34, -45, 40, -20, -7, 31, -44, 41, -23, -3, 29, -44, 43, -26, }, { 32, -38, 19, 6, -29, 43, -44, 31, -9, -16, 36, -45, 40, -22, -2, 26, -42, 45, -34, 12, 13, -34, 45, -41, 25, -1, -23, 40, -45, 36, -15, -10, 32, -44, 43, -28, 4, 20, -39, 45, -38, 18, 7, -30, 43, -44, 30, -8, -17, 37, -45, 39, -21, -3, 27, -42, 44, -33, 11, 14, -35, 45, -41, 24, }, { 32, -39, 23, -1, -21, 38, -45, 40, -25, 3, 19, -37, 45, -41, 27, -6, -17, 36, -45, 42, -29, 8, 15, -34, 44, -43, 30, -10, -13, 33, -44, 44, -32, 12, 11, -31, 43, -44, 34, -14, -9, 30, -43, 45, -35, 16, 7, -28, 42, -45, 36, -18, -4, 26, -41, 45, -38, 20, 2, -24, 40, -45, 39, -22, }, { 32, -40, 27, -8, -13, 31, -43, 45, -38, 22, -2, -18, 35, -44, 44, -34, 17, 3, -23, 38, -45, 42, -30, 12, 9, -28, 41, -45, 40, -26, 7, 14, -32, 43, -45, 37, -21, 1, 19, -36, 44, -44, 34, -16, -4, 24, -39, 45, -42, 30, -11, -10, 29, -41, 45, -39, 25, -6, -15, 33, -43, 45, -36, 20, }, { 32, -41, 30, -14, -4, 22, -36, 44, -44, 37, -23, 6, 13, -30, 41, -45, 42, -31, 15, 3, -21, 36, -44, 45, -38, 24, -7, -12, 29, -40, 45, -42, 32, -16, -2, 20, -35, 44, -45, 38, -25, 8, 11, -28, 40, -45, 43, -33, 17, 1, -19, 34, -43, 45, -39, 26, -9, -10, 27, -39, 45, -43, 34, -18, }, { 32, -42, 34, -20, 4, 12, -27, 38, -44, 45, -39, 28, -13, -3, 19, -33, 42, -45, 43, -34, 21, -6, -11, 26, -38, 44, -45, 39, -29, 14, 2, -18, 32, -41, 45, -43, 35, -22, 7, 10, -25, 37, -44, 45, -40, 30, -15, -1, 17, -31, 41, -45, 43, -36, 23, -8, -9, 24, -36, 44, -45, 40, -30, 16, }, { 32, -43, 36, -26, 13, 1, -15, 28, -38, 44, -45, 42, -35, 24, -11, -3, 17, -30, 39, -44, 45, -41, 34, -22, 9, 6, -19, 31, -40, 45, -45, 40, -32, 20, -7, -8, 21, -33, 41, -45, 44, -39, 30, -18, 4, 10, -23, 34, -42, 45, -44, 38, -29, 16, -2, -12, 25, -36, 43, -45, 43, -37, 27, -14, }, { 32, -44, 39, -31, 21, -10, -2, 14, -25, 34, -41, 45, -45, 42, -36, 28, -17, 6, 7, -18, 29, -37, 43, -45, 44, -40, 34, -24, 13, -1, -11, 22, -32, 39, -44, 45, -43, 38, -30, 20, -9, -3, 15, -26, 35, -41, 45, -45, 42, -36, 27, -16, 4, 8, -19, 30, -38, 43, -45, 44, -40, 33, -23, 12, }, { 32, -44, 41, -36, 29, -20, 11, -1, -9, 18, -27, 34, -40, 44, -45, 45, -42, 37, -30, 22, -13, 3, 7, -16, 25, -33, 39, -43, 45, -45, 43, -38, 32, -24, 15, -6, -4, 14, -23, 31, -38, 42, -45, 45, -43, 39, -34, 26, -17, 8, 2, -12, 21, -30, 36, -41, 44, -45, 44, -40, 35, -28, 19, -10, }, { 32, -45, 43, -39, 35, -30, 23, -16, 9, -1, -7, 14, -21, 28, -34, 38, -42, 44, -45, 45, -43, 40, -36, 31, -25, 18, -11, 3, 4, -12, 19, -26, 32, -37, 41, -44, 45, -45, 44, -41, 38, -33, 27, -20, 13, -6, -2, 10, -17, 24, -30, 36, -40, 43, -45, 45, -44, 42, -39, 34, -29, 22, -15, 8, }, { 32, -45, 44, -42, 40, -37, 34, -30, 25, -20, 15, -10, 4, 1, -7, 12, -17, 22, -27, 31, -35, 38, -41, 43, -44, 45, -45, 45, -43, 41, -39, 36, -32, 28, -23, 18, -13, 8, -2, -3, 9, -14, 19, -24, 29, -33, 36, -39, 42, -44, 45, -45, 45, -44, 43, -40, 38, -34, 30, -26, 21, -16, 11, -6, }, { 32, -45, 45, -44, 43, -42, 41, -39, 38, -36, 34, -31, 29, -26, 23, -20, 17, -14, 11, -8, 4, -1, -2, 6, -9, 12, -15, 18, -21, 24, -27, 30, -32, 34, -36, 38, -40, 41, -43, 44, -44, 45, -45, 45, -45, 45, -44, 43, -42, 40, -39, 37, -35, 33, -30, 28, -25, 22, -19, 16, -13, 10, -7, 3, }, { 32, -45, 45, -45, 45, -45, 45, -45, 44, -44, 44, -44, 43, -43, 43, -42, 42, -41, 41, -40, 40, -39, 39, -38, 38, -37, 36, -36, 35, -34, 34, -33, 32, -31, 30, -30, 29, -28, 27, -26, 25, -24, 23, -22, 21, -20, 19, -18, 17, -16, 15, -14, 13, -12, 11, -10, 9, -8, 7, -6, 4, -3, 2, -1, } }; libplacebo-v7.349.0/src/shaders/icc.c000066400000000000000000000727161463457750100173330ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include "shaders.h" #include #include const struct pl_icc_params pl_icc_default_params = { PL_ICC_DEFAULTS }; #ifdef PL_HAVE_LCMS #include #include struct icc_priv { pl_log log; pl_cache cache; // for backwards compatibility cmsContext cms; cmsHPROFILE profile; cmsHPROFILE approx; // approximation profile float a, b, scale; // approxmation tone curve parameters and scaling cmsCIEXYZ *white, black; float gamma_stddev; uint64_t lut_sig; }; static void error_callback(cmsContext cms, cmsUInt32Number code, const char *msg) { pl_log log = cmsGetContextUserData(cms); pl_err(log, "lcms2: [%d] %s", (int) code, msg); } static void set_callback(void *priv, pl_cache_obj obj) { pl_icc_object icc = priv; icc->params.cache_save(icc->params.cache_priv, obj.key, obj.data, obj.size); } static pl_cache_obj get_callback(void *priv, uint64_t key) { pl_icc_object icc = priv; int s_r = icc->params.size_r, s_g = icc->params.size_g, s_b = icc->params.size_b; size_t data_size = s_r * s_g * s_b * sizeof(uint16_t[4]); void *data = pl_alloc(NULL, data_size); bool ok = icc->params.cache_load(icc->params.cache_priv, key, data, data_size); if (!ok) { pl_free(data); return (pl_cache_obj) {0}; } return (pl_cache_obj) { .key = key, .data = data, .size = data_size, .free = pl_free, }; } void pl_icc_close(pl_icc_object *picc) { pl_icc_object icc = *picc; if (!icc) return; struct icc_priv *p = PL_PRIV(icc); cmsCloseProfile(p->approx); cmsCloseProfile(p->profile); cmsDeleteContext(p->cms); pl_cache_destroy(&p->cache); pl_free_ptr((void **) picc); } static bool detect_csp(struct pl_icc_object_t *icc) { struct icc_priv *p = PL_PRIV(icc); cmsHTRANSFORM tf; cmsHPROFILE xyz = cmsCreateXYZProfileTHR(p->cms); if (!xyz) return false; // We need to use an unadapted observer to get the raw values cmsFloat64Number prev_adapt = cmsSetAdaptationStateTHR(p->cms, 0.0); tf = cmsCreateTransformTHR(p->cms, p->profile, TYPE_RGB_8, xyz, TYPE_XYZ_DBL, INTENT_ABSOLUTE_COLORIMETRIC, /* Note: These flags mostly don't do anything * anyway, but specify them regardless */ cmsFLAGS_NOCACHE | cmsFLAGS_NOOPTIMIZE); cmsSetAdaptationStateTHR(p->cms, prev_adapt); cmsCloseProfile(xyz); if (!tf) return false; enum { RED, GREEN, BLUE, WHITE, BLACK, GRAY, RAMP, }; static const uint8_t test[][3] = { [RED] = { 0xFF, 0, 0 }, [GREEN] = { 0, 0xFF, 0 }, [BLUE] = { 0, 0, 0xFF }, [WHITE] = { 0xFF, 0xFF, 0xFF }, [BLACK] = { 0x00, 0x00, 0x00 }, [GRAY] = { 0x80, 0x80, 0x80 }, // Grayscale ramp (excluding endpoints) #define V(d) { d, d, d } V(0x01), V(0x02), V(0x03), V(0x04), V(0x05), V(0x06), V(0x07), V(0x08), V(0x09), V(0x0A), V(0x0B), V(0x0C), V(0x0D), V(0x0E), V(0x0F), V(0x10), V(0x11), V(0x12), V(0x13), V(0x14), V(0x15), V(0x16), V(0x17), V(0x18), V(0x19), V(0x1A), V(0x1B), V(0x1C), V(0x1D), V(0x1E), V(0x1F), V(0x20), V(0x21), V(0x22), V(0x23), V(0x24), V(0x25), V(0x26), V(0x27), V(0x28), V(0x29), V(0x2A), V(0x2B), V(0x2C), V(0x2D), V(0x2E), V(0x2F), V(0x30), V(0x31), V(0x32), V(0x33), V(0x34), V(0x35), V(0x36), V(0x37), V(0x38), V(0x39), V(0x3A), V(0x3B), V(0x3C), V(0x3D), V(0x3E), V(0x3F), V(0x40), V(0x41), V(0x42), V(0x43), V(0x44), V(0x45), V(0x46), V(0x47), V(0x48), V(0x49), V(0x4A), V(0x4B), V(0x4C), V(0x4D), V(0x4E), V(0x4F), V(0x50), V(0x51), V(0x52), V(0x53), V(0x54), V(0x55), V(0x56), V(0x57), V(0x58), V(0x59), V(0x5A), V(0x5B), V(0x5C), V(0x5D), V(0x5E), V(0x5F), V(0x60), V(0x61), V(0x62), V(0x63), V(0x64), V(0x65), V(0x66), V(0x67), V(0x68), V(0x69), V(0x6A), V(0x6B), V(0x6C), V(0x6D), V(0x6E), V(0x6F), V(0x70), V(0x71), V(0x72), V(0x73), V(0x74), V(0x75), V(0x76), V(0x77), V(0x78), V(0x79), V(0x7A), V(0x7B), V(0x7C), V(0x7D), V(0x7E), V(0x7F), V(0x80), V(0x81), V(0x82), V(0x83), V(0x84), V(0x85), V(0x86), V(0x87), V(0x88), V(0x89), V(0x8A), V(0x8B), V(0x8C), V(0x8D), V(0x8E), V(0x8F), V(0x90), V(0x91), V(0x92), V(0x93), V(0x94), V(0x95), V(0x96), V(0x97), V(0x98), V(0x99), V(0x9A), V(0x9B), V(0x9C), V(0x9D), V(0x9E), V(0x9F), V(0xA0), V(0xA1), V(0xA2), V(0xA3), V(0xA4), V(0xA5), V(0xA6), V(0xA7), V(0xA8), V(0xA9), V(0xAA), V(0xAB), V(0xAC), V(0xAD), V(0xAE), V(0xAF), V(0xB0), V(0xB1), V(0xB2), V(0xB3), V(0xB4), V(0xB5), V(0xB6), V(0xB7), V(0xB8), V(0xB9), V(0xBA), V(0xBB), V(0xBC), V(0xBD), V(0xBE), V(0xBF), V(0xC0), V(0xC1), V(0xC2), V(0xC3), V(0xC4), V(0xC5), V(0xC6), V(0xC7), V(0xC8), V(0xC9), V(0xCA), V(0xCB), V(0xCC), V(0xCD), V(0xCE), V(0xCF), V(0xD0), V(0xD1), V(0xD2), V(0xD3), V(0xD4), V(0xD5), V(0xD6), V(0xD7), V(0xD8), V(0xD9), V(0xDA), V(0xDB), V(0xDC), V(0xDD), V(0xDE), V(0xDF), V(0xE0), V(0xE1), V(0xE2), V(0xE3), V(0xE4), V(0xE5), V(0xE6), V(0xE7), V(0xE8), V(0xE9), V(0xEA), V(0xEB), V(0xEC), V(0xED), V(0xEE), V(0xEF), V(0xF0), V(0xF1), V(0xF2), V(0xF3), V(0xF4), V(0xF5), V(0xF6), V(0xF7), V(0xF8), V(0xF9), V(0xFA), V(0xFB), V(0xFC), V(0xFD), V(0xFE), #undef V }; cmsCIEXYZ dst[PL_ARRAY_SIZE(test)] = {0}; cmsDoTransform(tf, test, dst, PL_ARRAY_SIZE(dst)); cmsDeleteTransform(tf); // Read primaries from transformed RGBW values struct pl_raw_primaries *measured = &icc->csp.hdr.prim; measured->red = pl_cie_from_XYZ(dst[RED].X, dst[RED].Y, dst[RED].Z); measured->green = pl_cie_from_XYZ(dst[GREEN].X, dst[GREEN].Y, dst[GREEN].Z); measured->blue = pl_cie_from_XYZ(dst[BLUE].X, dst[BLUE].Y, dst[BLUE].Z); measured->white = pl_cie_from_XYZ(dst[WHITE].X, dst[WHITE].Y, dst[WHITE].Z); // Detect best containing gamut const struct pl_raw_primaries *best = NULL; for (enum pl_color_primaries prim = 1; prim < PL_COLOR_PRIM_COUNT; prim++) { const struct pl_raw_primaries *raw = pl_raw_primaries_get(prim); if (!icc->csp.primaries && pl_raw_primaries_similar(raw, measured)) { icc->containing_primaries = icc->csp.primaries = prim; best = raw; break; } if (pl_primaries_superset(raw, measured) && (!best || pl_primaries_superset(best, raw))) { icc->containing_primaries = prim; best = raw; } } if (!best) { PL_WARN(p, "ICC profile too wide to handle, colors may be clipped!"); icc->containing_primaries = PL_COLOR_PRIM_ACES_AP0; } // Detect match for known transfer functions const float contrast = icc->csp.hdr.max_luma / icc->csp.hdr.min_luma; float best_errsum = 0.0f; for (enum pl_color_transfer trc = 1; trc < PL_COLOR_TRC_COUNT; trc++) { struct pl_color_space ref = { .primaries = icc->csp.primaries, .transfer = trc, .hdr.max_luma = PL_COLOR_SDR_WHITE, .hdr.min_luma = PL_COLOR_SDR_WHITE * contrast, }; float errsum = 0.0f; for (int i = RAMP; i < PL_ARRAY_SIZE(dst); i++) { const float x = test[i][0] / 255.0; float color[3] = { x, x, x }; pl_color_linearize(&ref, color); const float delta = dst[i].Y - color[0]; errsum += delta * delta; } const int N = PL_ARRAY_SIZE(dst) - RAMP; const float tolerance = 5e-3f; // 0.5% stddev(error), around JND if (errsum > N * PL_SQUARE(tolerance)) continue; if (!icc->csp.transfer || errsum < best_errsum) { icc->csp.transfer = trc; best_errsum = errsum; } } // TODO: re-use pl_shader_linearize() and a built-in parametric // profile, instead of a pure power gamma approximation? // Rough estimate of overall gamma and starting point for curve black point const float y_approx = dst[GRAY].Y ? log(dst[GRAY].Y) / log(0.5) : 1.0f; const float kb = fmaxf(dst[BLACK].Y, 0.0f); float b = powf(kb, 1 / y_approx); // Estimate mean and stddev of gamma (Welford's method) float M = 0.0, S = 0.0; int k = 1; for (int i = RAMP; i < PL_ARRAY_SIZE(dst); i++) { // exclude primaries if (dst[i].Y <= 0 || dst[i].Y >= 1) continue; float src = (1 - b) * (test[i][0] / 255.0) + b; float y = log(dst[i].Y) / log(src); float tmpM = M; M += (y - tmpM) / k; S += (y - tmpM) * (y - M); k++; // Update estimate of black point according to current gamma estimate b = powf(kb, 1 / M); } S = sqrt(S / (k - 1)); if (M <= 0) { PL_ERR(p, "Arithmetic error in ICC profile gamma estimation? " "Please open an issue"); return false; } icc->gamma = M; p->gamma_stddev = S; return true; } static bool detect_contrast(struct pl_icc_object_t *icc, struct pl_icc_params *params) { struct icc_priv *p = PL_PRIV(icc); enum pl_rendering_intent intent = params->intent; struct pl_hdr_metadata *hdr = &icc->csp.hdr; /* LittleCMS refuses to detect an intent in absolute colorimetric intent, * so fall back to relative colorimetric since we only care about the * brightness value here */ if (intent == PL_INTENT_ABSOLUTE_COLORIMETRIC) intent = PL_INTENT_RELATIVE_COLORIMETRIC; if (!cmsDetectDestinationBlackPoint(&p->black, p->profile, intent, 0)) { /* * v4 ICC profiles have a black point tag but only for * perceptual/saturation intents. So we change the rendering intent * to perceptual if we are provided a v4 ICC profile. */ if (cmsGetEncodedICCversion(p->profile) >= 0x4000000 && intent != PL_INTENT_PERCEPTUAL) { params->intent = PL_INTENT_PERCEPTUAL; return detect_contrast(icc, params); } PL_ERR(p, "Failed detecting ICC profile black point!"); return false; } float max_luma = params->max_luma; p->white = cmsReadTag(p->profile, cmsSigLuminanceTag); if (max_luma <= 0) max_luma = p->white ? p->white->Y : PL_COLOR_SDR_WHITE; hdr->max_luma = max_luma; hdr->min_luma = p->black.Y * max_luma; hdr->min_luma = PL_MAX(hdr->min_luma, 1e-6); // prevent true 0 return true; } static void infer_clut_size(struct pl_icc_object_t *icc) { struct icc_priv *p = PL_PRIV(icc); struct pl_icc_params *params = &icc->params; if (params->size_r && params->size_g && params->size_b) { PL_DEBUG(p, "Using fixed 3DLUT size: %dx%dx%d", (int) params->size_r, (int) params->size_g, (int) params->size_b); return; } #define REQUIRE_SIZE(N) \ params->size_r = PL_MAX(params->size_r, N); \ params->size_g = PL_MAX(params->size_g, N); \ params->size_b = PL_MAX(params->size_b, N) // Default size for sanity REQUIRE_SIZE(9); // Ensure enough precision to track the (absolute) black point if (p->black.Y > 1e-4) { float black_rel = powf(p->black.Y, 1.0f / icc->gamma); int min_size = 2 * (int) ceilf(1.0f / black_rel); REQUIRE_SIZE(min_size); } // Ensure enough precision to track the gamma curve if (p->gamma_stddev > 1e-2) { REQUIRE_SIZE(65); } else if (p->gamma_stddev > 1e-3) { REQUIRE_SIZE(33); } else if (p->gamma_stddev > 1e-4) { REQUIRE_SIZE(17); } // Ensure enough precision to track any internal CLUTs cmsPipeline *pipe = NULL; switch (icc->params.intent) { case PL_INTENT_SATURATION: pipe = cmsReadTag(p->profile, cmsSigBToA2Tag); if (pipe) break; // fall through case PL_INTENT_RELATIVE_COLORIMETRIC: case PL_INTENT_ABSOLUTE_COLORIMETRIC: default: pipe = cmsReadTag(p->profile, cmsSigBToA1Tag); if (pipe) break; // fall through case PL_INTENT_PERCEPTUAL: pipe = cmsReadTag(p->profile, cmsSigBToA0Tag); break; } if (!pipe) { switch (icc->params.intent) { case PL_INTENT_SATURATION: pipe = cmsReadTag(p->profile, cmsSigAToB2Tag); if (pipe) break; // fall through case PL_INTENT_RELATIVE_COLORIMETRIC: case PL_INTENT_ABSOLUTE_COLORIMETRIC: default: pipe = cmsReadTag(p->profile, cmsSigAToB1Tag); if (pipe) break; // fall through case PL_INTENT_PERCEPTUAL: pipe = cmsReadTag(p->profile, cmsSigAToB0Tag); break; } } if (pipe) { for (cmsStage *stage = cmsPipelineGetPtrToFirstStage(pipe); stage; stage = cmsStageNext(stage)) { switch (cmsStageType(stage)) { case cmsSigCLutElemType: ; _cmsStageCLutData *data = cmsStageData(stage); if (data->Params->nInputs != 3) continue; params->size_r = PL_MAX(params->size_r, data->Params->nSamples[0]); params->size_g = PL_MAX(params->size_g, data->Params->nSamples[1]); params->size_b = PL_MAX(params->size_b, data->Params->nSamples[2]); break; default: continue; } } } // Clamp the output size to make sure profiles are not too large params->size_r = PL_MIN(params->size_r, 129); params->size_g = PL_MIN(params->size_g, 129); params->size_b = PL_MIN(params->size_b, 129); // Constrain the total LUT size to roughly 1M entries const size_t max_size = 1000000; size_t total_size = params->size_r * params->size_g * params->size_b; if (total_size > max_size) { float factor = powf((float) max_size / total_size, 1/3.0f); params->size_r = ceilf(factor * params->size_r); params->size_g = ceilf(factor * params->size_g); params->size_b = ceilf(factor * params->size_b); } } static bool icc_init(struct pl_icc_object_t *icc) { struct icc_priv *p = PL_PRIV(icc); struct pl_icc_params *params = &icc->params; if (params->intent < 0 || params->intent > PL_INTENT_ABSOLUTE_COLORIMETRIC) params->intent = cmsGetHeaderRenderingIntent(p->profile); if (!detect_contrast(icc, params)) return false; if (!detect_csp(icc)) return false; infer_clut_size(icc); // Create approximation profile. Use a tone-curve based on a BT.1886-style // pure power curve, with an approximation gamma matched to the ICC // profile. We stretch the luminance range *before* the input to the gamma // function, to avoid numerical issues near the black point. (This removes // the need for a separate linear section) // // Y = scale * (aX + b)^y, where Y = PCS luma and X = encoded value ([0-1]) p->scale = pl_hdr_rescale(PL_HDR_NITS, PL_HDR_NORM, icc->csp.hdr.max_luma); p->b = powf(icc->csp.hdr.min_luma / icc->csp.hdr.max_luma, 1.0f / icc->gamma); p->a = (1 - p->b); cmsToneCurve *curve = cmsBuildParametricToneCurve(p->cms, 2, (double[3]) { icc->gamma, p->a, p->b }); if (!curve) return false; const struct pl_raw_primaries *prim = pl_raw_primaries_get(icc->containing_primaries); cmsCIExyY wp_xyY = { prim->white.x, prim->white.y, 1.0 }; cmsCIExyYTRIPLE prim_xyY = { .Red = { prim->red.x, prim->red.y, 1.0 }, .Green = { prim->green.x, prim->green.y, 1.0 }, .Blue = { prim->blue.x, prim->blue.y, 1.0 }, }; p->approx = cmsCreateRGBProfileTHR(p->cms, &wp_xyY, &prim_xyY, (cmsToneCurve *[3]){ curve, curve, curve }); cmsFreeToneCurve(curve); if (!p->approx) return false; // We need to create an ICC V2 profile because ICC V4 perceptual profiles // have normalized semantics, but we want colorimetric mapping with BPC cmsSetHeaderRenderingIntent(p->approx, icc->params.intent); cmsSetProfileVersion(p->approx, 2.2); // Hash all parameters affecting the generated 3DLUT p->lut_sig = CACHE_KEY_ICC_3DLUT; pl_hash_merge(&p->lut_sig, icc->signature); pl_hash_merge(&p->lut_sig, params->intent); pl_hash_merge(&p->lut_sig, params->size_r); pl_hash_merge(&p->lut_sig, params->size_g); pl_hash_merge(&p->lut_sig, params->size_b); pl_hash_merge(&p->lut_sig, params->force_bpc); union { double d; uint64_t u; } v = { .d = icc->csp.hdr.max_luma }; pl_hash_merge(&p->lut_sig, v.u); // min luma depends only on the max luma and profile // Backwards compatibility with old caching API if ((params->cache_save || params->cache_load) && !params->cache) { p->cache = pl_cache_create(pl_cache_params( .log = p->log, .set = params->cache_save ? set_callback : NULL, .get = params->cache_load ? get_callback : NULL, .priv = icc, )); } // Dump profile information PL_INFO(p, "Opened ICC profile:"); if (p->white) { PL_DEBUG(p, " Raw white point: X=%.2f Y=%.2f Z=%.2f cd/m^2", p->white->X, p->white->Y, p->white->Z); } PL_DEBUG(p, " Raw black point: X=%.6f%% Y=%.6f%% Z=%.6f%%", p->black.X * 100, p->black.Y * 100, p->black.Z * 100); PL_INFO(p, " Contrast = %.0f cd/m^2 : %.3f mcd/m^2 ≈ %.0f : 1", icc->csp.hdr.max_luma, icc->csp.hdr.min_luma * 1000, icc->csp.hdr.max_luma / icc->csp.hdr.min_luma); if (icc->csp.primaries) { PL_INFO(p, " Detected primaries: %s", pl_color_primaries_name(icc->csp.primaries)); } else { const struct pl_raw_primaries *raw = &icc->csp.hdr.prim; PL_DEBUG(p, " Measured primaries:"); PL_DEBUG(p, " White: x=%.6f, y=%.6f", raw->white.x, raw->white.y); PL_DEBUG(p, " Red: x=%.3f, y=%.3f", raw->red.x, raw->red.y); PL_DEBUG(p, " Green: x=%.3f, y=%.3f", raw->green.x, raw->green.y); PL_DEBUG(p, " Blue: x=%.3f, y=%.3f", raw->blue.x, raw->blue.y); PL_INFO(p, " Containing primaries: %s", pl_color_primaries_name(icc->containing_primaries)); } if (icc->csp.transfer) { PL_INFO(p, " Transfer function: %s", pl_color_transfer_name(icc->csp.transfer)); } else { PL_INFO(p, " Approximation gamma: %.3f (stddev %.1f%s)", icc->gamma, p->gamma_stddev, p->gamma_stddev > 0.5 ? ", inaccurate!" : ""); } return true; } pl_icc_object pl_icc_open(pl_log log, const struct pl_icc_profile *profile, const struct pl_icc_params *params) { if (!profile->len) return NULL; struct pl_icc_object_t *icc = pl_zalloc_obj(NULL, icc, struct icc_priv); struct icc_priv *p = PL_PRIV(icc); icc->params = params ? *params : pl_icc_default_params; icc->signature = profile->signature; p->log = log; p->cms = cmsCreateContext(NULL, (void *) log); if (!p->cms) { PL_ERR(p, "Failed creating LittleCMS context!"); goto error; } cmsSetLogErrorHandlerTHR(p->cms, error_callback); PL_DEBUG(p, "Opening new ICC profile"); p->profile = cmsOpenProfileFromMemTHR(p->cms, profile->data, profile->len); if (!p->profile) { PL_ERR(p, "Failed opening ICC profile"); goto error; } if (cmsGetColorSpace(p->profile) != cmsSigRgbData) { PL_ERR(p, "Invalid ICC profile: not RGB"); goto error; } if (!icc_init(icc)) goto error; return icc; error: pl_icc_close((pl_icc_object *) &icc); return NULL; } static bool icc_reopen(pl_icc_object kicc, const struct pl_icc_params *params) { struct pl_icc_object_t *icc = (struct pl_icc_object_t *) kicc; struct icc_priv *p = PL_PRIV(icc); cmsCloseProfile(p->approx); pl_cache_destroy(&p->cache); *icc = (struct pl_icc_object_t) { .params = *params, .signature = icc->signature, }; *p = (struct icc_priv) { .log = p->log, .cms = p->cms, .profile = p->profile, }; PL_DEBUG(p, "Reinitializing ICC profile in-place"); return icc_init(icc); } bool pl_icc_update(pl_log log, pl_icc_object *out_icc, const struct pl_icc_profile *profile, const struct pl_icc_params *params) { params = PL_DEF(params, &pl_icc_default_params); pl_icc_object icc = *out_icc; if (!icc && !profile) return false; // nothing to update uint64_t sig = profile ? profile->signature : icc->signature; if (!icc || icc->signature != sig) { pl_assert(profile); pl_icc_close(&icc); *out_icc = icc = pl_icc_open(log, profile, params); return icc != NULL; } int size_r = PL_DEF(params->size_r, icc->params.size_r); int size_g = PL_DEF(params->size_g, icc->params.size_g); int size_b = PL_DEF(params->size_b, icc->params.size_b); bool compat = params->intent == icc->params.intent && params->max_luma == icc->params.max_luma && params->force_bpc == icc->params.force_bpc && size_r == icc->params.size_r && size_g == icc->params.size_g && size_b == icc->params.size_b; if (compat) return true; // ICC signature is the same but parameters are different, re-open in-place if (!icc_reopen(icc, params)) { pl_icc_close(&icc); *out_icc = NULL; return false; } return true; } static void fill_lut(void *datap, const struct sh_lut_params *params, bool decode) { pl_icc_object icc = params->priv; struct icc_priv *p = PL_PRIV(icc); cmsHPROFILE srcp = decode ? p->profile : p->approx; cmsHPROFILE dstp = decode ? p->approx : p->profile; int s_r = params->width, s_g = params->height, s_b = params->depth; pl_clock_t start = pl_clock_now(); cmsHTRANSFORM tf = cmsCreateTransformTHR(p->cms, srcp, TYPE_RGB_16, dstp, TYPE_RGBA_16, icc->params.intent, cmsFLAGS_BLACKPOINTCOMPENSATION | cmsFLAGS_NOCACHE | cmsFLAGS_NOOPTIMIZE); if (!tf) return; pl_clock_t after_transform = pl_clock_now(); pl_log_cpu_time(p->log, start, after_transform, "creating ICC transform"); uint16_t *tmp = pl_alloc(NULL, s_r * 3 * sizeof(tmp[0])); for (int b = 0; b < s_b; b++) { for (int g = 0; g < s_g; g++) { // Transform a single line of the output buffer for (int r = 0; r < s_r; r++) { tmp[r * 3 + 0] = r * 65535 / (s_r - 1); tmp[r * 3 + 1] = g * 65535 / (s_g - 1); tmp[r * 3 + 2] = b * 65535 / (s_b - 1); } size_t offset = (b * s_g + g) * s_r * 4; uint16_t *data = ((uint16_t *) datap) + offset; cmsDoTransform(tf, tmp, data, s_r); if (!icc->params.force_bpc) continue; // Fix the black point manually. Work-around for "improper" // profiles, as black point compensation should already have // taken care of this normally. const uint16_t knee = 16u << 8; if (tmp[0] >= knee || tmp[1] >= knee) continue; for (int r = 0; r < s_r; r++) { uint16_t s = (2 * tmp[1] + tmp[2] + tmp[r * 3]) >> 2; if (s >= knee) break; for (int c = 0; c < 3; c++) data[r * 3 + c] = (s * data[r * 3 + c] + (knee - s) * s) >> 12; } } } pl_log_cpu_time(p->log, after_transform, pl_clock_now(), "generating ICC 3DLUT"); cmsDeleteTransform(tf); pl_free(tmp); } static void fill_decode(void *datap, const struct sh_lut_params *params) { fill_lut(datap, params, true); } static void fill_encode(void *datap, const struct sh_lut_params *params) { fill_lut(datap, params, false); } static pl_cache get_cache(pl_icc_object icc, pl_shader sh) { struct icc_priv *p = PL_PRIV(icc); return PL_DEF(icc->params.cache, PL_DEF(p->cache, SH_CACHE(sh))); } void pl_icc_decode(pl_shader sh, pl_icc_object icc, pl_shader_obj *lut_obj, struct pl_color_space *out_csp) { struct icc_priv *p = PL_PRIV(icc); if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0)) return; pl_fmt fmt = pl_find_fmt(SH_GPU(sh), PL_FMT_UNORM, 4, 16, 16, PL_FMT_CAP_LINEAR); if (!fmt) { SH_FAIL(sh, "Failed finding ICC 3DLUT texture format!"); return; } ident_t lut = sh_lut(sh, sh_lut_params( .object = lut_obj, .var_type = PL_VAR_FLOAT, .method = SH_LUT_TETRAHEDRAL, .fmt = fmt, .width = icc->params.size_r, .height = icc->params.size_g, .depth = icc->params.size_b, .comps = 4, .signature = p->lut_sig, .fill = fill_decode, .cache = get_cache(icc, sh), .priv = (void *) icc, )); if (!lut) { SH_FAIL(sh, "pl_icc_decode: failed generating LUT object"); return; } // Y = scale * (aX + b)^y sh_describe(sh, "ICC 3DLUT"); GLSL("// pl_icc_decode \n" "{ \n" "color.rgb = "$"(color.rgb).rgb; \n" "color.rgb = "$" * color.rgb + vec3("$"); \n" "color.rgb = pow(color.rgb, vec3("$")); \n" "color.rgb = "$" * color.rgb; \n" "} \n", lut, SH_FLOAT(p->a), SH_FLOAT(p->b), SH_FLOAT(icc->gamma), SH_FLOAT(p->scale)); if (out_csp) { *out_csp = (struct pl_color_space) { .primaries = icc->containing_primaries, .transfer = PL_COLOR_TRC_LINEAR, .hdr = icc->csp.hdr, }; } } void pl_icc_encode(pl_shader sh, pl_icc_object icc, pl_shader_obj *lut_obj) { struct icc_priv *p = PL_PRIV(icc); if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0)) return; pl_fmt fmt = pl_find_fmt(SH_GPU(sh), PL_FMT_UNORM, 4, 16, 16, PL_FMT_CAP_LINEAR); if (!fmt) { SH_FAIL(sh, "Failed finding ICC 3DLUT texture format!"); return; } ident_t lut = sh_lut(sh, sh_lut_params( .object = lut_obj, .var_type = PL_VAR_FLOAT, .method = SH_LUT_TETRAHEDRAL, .fmt = fmt, .width = icc->params.size_r, .height = icc->params.size_g, .depth = icc->params.size_b, .comps = 4, .signature = ~p->lut_sig, // avoid confusion with decoding LUTs .fill = fill_encode, .cache = get_cache(icc, sh), .priv = (void *) icc, )); if (!lut) { SH_FAIL(sh, "pl_icc_encode: failed generating LUT object"); return; } // X = 1/a * (Y/scale)^(1/y) - b/a sh_describe(sh, "ICC 3DLUT"); GLSL("// pl_icc_encode \n" "{ \n" "color.rgb = max(color.rgb, 0.0); \n" "color.rgb = 1.0/"$" * color.rgb; \n" "color.rgb = pow(color.rgb, vec3("$")); \n" "color.rgb = 1.0/"$" * color.rgb - "$"; \n" "color.rgb = "$"(color.rgb).rgb; \n" "} \n", SH_FLOAT(p->scale), SH_FLOAT(1.0f / icc->gamma), SH_FLOAT(p->a), SH_FLOAT(p->b / p->a), lut); } #else // !PL_HAVE_LCMS void pl_icc_close(pl_icc_object *picc) {}; pl_icc_object pl_icc_open(pl_log log, const struct pl_icc_profile *profile, const struct pl_icc_params *pparams) { pl_err(log, "libplacebo compiled without LittleCMS 2 support!"); return NULL; } bool pl_icc_update(pl_log log, pl_icc_object *obj, const struct pl_icc_profile *profile, const struct pl_icc_params *params) { static bool warned; if (!warned) { pl_err(log, "libplacebo compiled without LittleCMS 2 support!"); warned = true; } *obj = NULL; return false; } void pl_icc_decode(pl_shader sh, pl_icc_object icc, pl_shader_obj *lut_obj, struct pl_color_space *out_csp) { pl_unreachable(); // can't get a pl_icc_object } void pl_icc_encode(pl_shader sh, pl_icc_object icc, pl_shader_obj *lut_obj) { pl_unreachable(); } #endif libplacebo-v7.349.0/src/shaders/lut.c000066400000000000000000000732771463457750100174040ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include #include "shaders.h" #include static inline bool isnumeric(char c) { return (c >= '0' && c <= '9') || c == '-'; } void pl_lut_free(struct pl_custom_lut **lut) { pl_free_ptr(lut); } struct pl_custom_lut *pl_lut_parse_cube(pl_log log, const char *cstr, size_t cstr_len) { struct pl_custom_lut *lut = pl_zalloc_ptr(NULL, lut); pl_str str = (pl_str) { (uint8_t *) cstr, cstr_len }; lut->signature = pl_str_hash(str); int entries = 0; float min[3] = { 0.0, 0.0, 0.0 }; float max[3] = { 1.0, 1.0, 1.0 }; // Parse header while (str.len && !isnumeric(str.buf[0])) { pl_str line = pl_str_strip(pl_str_getline(str, &str)); if (!line.len) continue; // skip empty line if (pl_str_eatstart0(&line, "TITLE")) { pl_info(log, "Loading LUT: %.*s", PL_STR_FMT(pl_str_strip(line))); continue; } if (pl_str_eatstart0(&line, "LUT_3D_SIZE")) { line = pl_str_strip(line); int size; if (!pl_str_parse_int(line, &size)) { pl_err(log, "Failed parsing dimension '%.*s'", PL_STR_FMT(line)); goto error; } if (size <= 0 || size > 1024) { pl_err(log, "Invalid 3DLUT size: %dx%d%x", size, size, size); goto error; } lut->size[0] = lut->size[1] = lut->size[2] = size; entries = size * size * size; continue; } if (pl_str_eatstart0(&line, "LUT_1D_SIZE")) { line = pl_str_strip(line); int size; if (!pl_str_parse_int(line, &size)) { pl_err(log, "Failed parsing dimension '%.*s'", PL_STR_FMT(line)); goto error; } if (size <= 0 || size > 65536) { pl_err(log, "Invalid 1DLUT size: %d", size); goto error; } lut->size[0] = size; lut->size[1] = lut->size[2] = 0; entries = size; continue; } if (pl_str_eatstart0(&line, "DOMAIN_MIN")) { line = pl_str_strip(line); if (!pl_str_parse_float(pl_str_split_char(line, ' ', &line), &min[0]) || !pl_str_parse_float(pl_str_split_char(line, ' ', &line), &min[1]) || !pl_str_parse_float(line, &min[2])) { pl_err(log, "Failed parsing domain: '%.*s'", PL_STR_FMT(line)); goto error; } continue; } if (pl_str_eatstart0(&line, "DOMAIN_MAX")) { line = pl_str_strip(line); if (!pl_str_parse_float(pl_str_split_char(line, ' ', &line), &max[0]) || !pl_str_parse_float(pl_str_split_char(line, ' ', &line), &max[1]) || !pl_str_parse_float(line, &max[2])) { pl_err(log, "Failed parsing domain: '%.*s'", PL_STR_FMT(line)); goto error; } continue; } if (pl_str_eatstart0(&line, "#")) { pl_debug(log, "Unhandled .cube comment: %.*s", PL_STR_FMT(pl_str_strip(line))); continue; } pl_warn(log, "Unhandled .cube line: %.*s", PL_STR_FMT(pl_str_strip(line))); } if (!entries) { pl_err(log, "Missing LUT size specification?"); goto error; } for (int i = 0; i < 3; i++) { if (max[i] - min[i] < 1e-6) { pl_err(log, "Invalid domain range: [%f, %f]", min[i], max[i]); goto error; } } float *data = pl_alloc(lut, sizeof(float[3]) * entries); lut->data = data; // Parse LUT body pl_clock_t start = pl_clock_now(); for (int n = 0; n < entries; n++) { for (int c = 0; c < 3; c++) { static const char * const digits = "0123456789.-+e"; // Extract valid digit sequence size_t len = pl_strspn(str, digits); pl_str entry = (pl_str) { str.buf, len }; str.buf += len; str.len -= len; if (!entry.len) { if (!str.len) { pl_err(log, "Failed parsing LUT: Unexpected EOF, expected " "%d entries, got %d", entries * 3, n * 3 + c + 1); } else { pl_err(log, "Failed parsing LUT: Unexpected '%c', expected " "digit", str.buf[0]); } goto error; } float num; if (!pl_str_parse_float(entry, &num)) { pl_err(log, "Failed parsing float value '%.*s'", PL_STR_FMT(entry)); goto error; } // Rescale to range 0.0 - 1.0 *data++ = (num - min[c]) / (max[c] - min[c]); // Skip whitespace between digits str = pl_str_strip(str); } } str = pl_str_strip(str); if (str.len) pl_warn(log, "Extra data after LUT?... ignoring '%c'", str.buf[0]); pl_log_cpu_time(log, start, pl_clock_now(), "parsing .cube LUT"); return lut; error: pl_free(lut); return NULL; } static void fill_lut(void *datap, const struct sh_lut_params *params) { const struct pl_custom_lut *lut = params->priv; int dim_r = params->width; int dim_g = PL_DEF(params->height, 1); int dim_b = PL_DEF(params->depth, 1); float *data = datap; for (int b = 0; b < dim_b; b++) { for (int g = 0; g < dim_g; g++) { for (int r = 0; r < dim_r; r++) { size_t offset = (b * dim_g + g) * dim_r + r; const float *src = &lut->data[offset * 3]; float *dst = &data[offset * 4]; dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = 0.0f; } } } } void pl_shader_custom_lut(pl_shader sh, const struct pl_custom_lut *lut, pl_shader_obj *lut_state) { if (!lut) return; int dims; if (lut->size[0] > 0 && lut->size[1] > 0 && lut->size[2] > 0) { dims = 3; } else if (lut->size[0] > 0 && !lut->size[1] && !lut->size[2]) { dims = 1; } else { SH_FAIL(sh, "Invalid dimensions %dx%dx%d for pl_custom_lut, must be 1D " "or 3D!", lut->size[0], lut->size[1], lut->size[2]); return; } if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0)) return; ident_t fun = sh_lut(sh, sh_lut_params( .object = lut_state, .var_type = PL_VAR_FLOAT, .method = SH_LUT_TETRAHEDRAL, .width = lut->size[0], .height = lut->size[1], .depth = lut->size[2], .comps = 4, // for better texel alignment .signature = lut->signature, .fill = fill_lut, .priv = (void *) lut, )); if (!fun) { SH_FAIL(sh, "pl_shader_custom_lut: failed generating LUT object"); return; } GLSL("// pl_shader_custom_lut \n"); static const pl_matrix3x3 zero = {0}; if (memcmp(&lut->shaper_in, &zero, sizeof(zero)) != 0) { GLSL("color.rgb = "$" * color.rgb; \n", sh_var(sh, (struct pl_shader_var) { .var = pl_var_mat3("shaper_in"), .data = PL_TRANSPOSE_3X3(lut->shaper_in.m), })); } switch (dims) { case 1: sh_describe(sh, "custom 1DLUT"); GLSL("color.rgb = vec3("$"(color.r).r, \n" " "$"(color.g).g, \n" " "$"(color.b).b); \n", fun, fun, fun); break; case 3: sh_describe(sh, "custom 3DLUT"); GLSL("color.rgb = "$"(color.rgb).rgb; \n", fun); break; } if (memcmp(&lut->shaper_out, &zero, sizeof(zero)) != 0) { GLSL("color.rgb = "$" * color.rgb; \n", sh_var(sh, (struct pl_shader_var) { .var = pl_var_mat3("shaper_out"), .data = PL_TRANSPOSE_3X3(lut->shaper_out.m), })); } } // Defines a LUT position helper macro. This translates from an absolute texel // scale (either in texels, or normalized to [0,1]) to the texture coordinate // scale for the corresponding sample in a texture of dimension `lut_size`. static ident_t texel_scale(pl_shader sh, int lut_size, bool normalized) { const float base = 0.5f / lut_size; const float end = 1.0f - 0.5f / lut_size; const float scale = (end - base) / (normalized ? 1.0f : (lut_size - 1)); ident_t name = sh_fresh(sh, "LUT_SCALE"); GLSLH("#define "$"(x) ("$" * (x) + "$") \n", name, SH_FLOAT(scale), SH_FLOAT(base)); return name; } struct sh_lut_obj { enum sh_lut_type type; enum sh_lut_method method; enum pl_var_type vartype; pl_fmt fmt; int width, height, depth, comps; uint64_t signature; bool error; // reset if params change // weights, depending on the lut type pl_tex tex; pl_str str; void *data; }; static void sh_lut_uninit(pl_gpu gpu, void *ptr) { struct sh_lut_obj *lut = ptr; pl_tex_destroy(gpu, &lut->tex); pl_free(lut->str.buf); pl_free(lut->data); *lut = (struct sh_lut_obj) {0}; } // Maximum number of floats to embed as a literal array (when using SH_LUT_AUTO) #define SH_LUT_MAX_LITERAL_SOFT 64 #define SH_LUT_MAX_LITERAL_HARD 256 ident_t sh_lut(pl_shader sh, const struct sh_lut_params *params) { pl_gpu gpu = SH_GPU(sh); pl_cache_obj obj = { .key = CACHE_KEY_SH_LUT ^ params->signature }; const enum pl_var_type vartype = params->var_type; pl_assert(vartype != PL_VAR_INVALID); pl_assert(params->method == SH_LUT_NONE || vartype == PL_VAR_FLOAT); pl_assert(params->width > 0 && params->height >= 0 && params->depth >= 0); pl_assert(params->comps > 0); pl_assert(!params->cache || params->signature); int sizes[] = { params->width, params->height, params->depth }; int size = params->width * PL_DEF(params->height, 1) * PL_DEF(params->depth, 1); int dims = params->depth ? 3 : params->height ? 2 : 1; enum sh_lut_method method = params->method; if (method == SH_LUT_TETRAHEDRAL && dims != 3) method = SH_LUT_LINEAR; if (method == SH_LUT_CUBIC && dims != 3) method = SH_LUT_LINEAR; int texdim = 0; uint32_t max_tex_dim[] = { gpu ? gpu->limits.max_tex_1d_dim : 0, gpu ? gpu->limits.max_tex_2d_dim : 0, (gpu && gpu->glsl.version > 100) ? gpu->limits.max_tex_3d_dim : 0, }; struct sh_lut_obj *lut = SH_OBJ(sh, params->object, PL_SHADER_OBJ_LUT, struct sh_lut_obj, sh_lut_uninit); if (!lut) return NULL_IDENT; bool update = params->update || lut->signature != params->signature || vartype != lut->vartype || params->fmt != lut->fmt || params->width != lut->width || params->height != lut->height || params->depth != lut->depth || params->comps != lut->comps; if (lut->error && !update) return NULL_IDENT; // suppress error spam until something changes // Try picking the right number of dimensions for the texture LUT. This // allows e.g. falling back to 2D textures if 1D textures are unsupported. for (int d = dims; d <= PL_ARRAY_SIZE(max_tex_dim); d++) { // For a given dimension to be compatible, all coordinates need to be // within the maximum texture size for that dimension for (int i = 0; i < d; i++) { if (sizes[i] > max_tex_dim[d - 1]) goto next_dim; } // All dimensions are compatible, so pick this texture dimension texdim = d; break; next_dim: ; // `continue` out of the inner loop } static const enum pl_fmt_type fmt_type[PL_VAR_TYPE_COUNT] = { [PL_VAR_SINT] = PL_FMT_SINT, [PL_VAR_UINT] = PL_FMT_UINT, [PL_VAR_FLOAT] = PL_FMT_FLOAT, }; enum pl_fmt_caps texcaps = PL_FMT_CAP_SAMPLEABLE; bool is_linear = method == SH_LUT_LINEAR || method == SH_LUT_CUBIC; if (is_linear) texcaps |= PL_FMT_CAP_LINEAR; pl_fmt texfmt = params->fmt; if (texfmt) { bool ok; switch (texfmt->type) { case PL_FMT_SINT: ok = vartype == PL_VAR_SINT; break; case PL_FMT_UINT: ok = vartype == PL_VAR_UINT; break; default: ok = vartype == PL_VAR_FLOAT; break; } if (!ok) { PL_ERR(sh, "Specified texture format '%s' does not match LUT " "data type!", texfmt->name); goto error; } if (~texfmt->caps & texcaps) { PL_ERR(sh, "Specified texture format '%s' does not match " "required capabilities 0x%x!\n", texfmt->name, texcaps); goto error; } } if (texdim && !texfmt) { texfmt = pl_find_fmt(gpu, fmt_type[vartype], params->comps, vartype == PL_VAR_FLOAT ? 16 : 32, pl_var_type_size(vartype) * 8, texcaps); } enum sh_lut_type type = params->lut_type; // The linear sampling code currently only supports 1D linear interpolation if (is_linear && dims > 1) { if (texfmt) { type = SH_LUT_TEXTURE; } else { PL_ERR(sh, "Can't emulate linear LUTs for 2D/3D LUTs and no " "texture support available!"); goto error; } } bool can_uniform = gpu && gpu->limits.max_variable_comps >= size * params->comps; bool can_literal = sh_glsl(sh).version > 110; // needed for literal arrays can_literal &= size <= SH_LUT_MAX_LITERAL_HARD && !params->dynamic; // Deselect unsupported methods if (type == SH_LUT_UNIFORM && !can_uniform) type = SH_LUT_AUTO; if (type == SH_LUT_LITERAL && !can_literal) type = SH_LUT_AUTO; if (type == SH_LUT_TEXTURE && !texfmt) type = SH_LUT_AUTO; // Sorted by priority if (!type && can_literal && !method && size <= SH_LUT_MAX_LITERAL_SOFT) type = SH_LUT_LITERAL; if (!type && texfmt) type = SH_LUT_TEXTURE; if (!type && can_uniform) type = SH_LUT_UNIFORM; if (!type && can_literal) type = SH_LUT_LITERAL; if (!type) { PL_ERR(sh, "Can't generate LUT: no compatible methods!"); goto error; } // Reinitialize the existing LUT if needed update |= type != lut->type; update |= method != lut->method; if (update) { if (params->dynamic) pl_log_level_cap(sh->log, PL_LOG_TRACE); size_t el_size = params->comps * pl_var_type_size(vartype); if (type == SH_LUT_TEXTURE) el_size = texfmt->texel_size; size_t buf_size = size * el_size; if (pl_cache_get(params->cache, &obj) && obj.size == buf_size) { PL_DEBUG(sh, "Re-using cached LUT (0x%"PRIx64") with size %zu", obj.key, obj.size); } else { PL_DEBUG(sh, "LUT invalidated, regenerating.."); pl_cache_obj_resize(NULL, &obj, buf_size); pl_clock_t start = pl_clock_now(); params->fill(obj.data, params); pl_log_cpu_time(sh->log, start, pl_clock_now(), "generating shader LUT"); } pl_assert(obj.data && obj.size); if (params->dynamic) pl_log_level_cap(sh->log, PL_LOG_NONE); switch (type) { case SH_LUT_TEXTURE: { if (!texdim) { PL_ERR(sh, "Texture LUT exceeds texture dimensions!"); goto error; } if (!texfmt) { PL_ERR(sh, "Found no compatible texture format for LUT!"); goto error; } struct pl_tex_params tex_params = { .w = params->width, .h = PL_DEF(params->height, texdim >= 2 ? 1 : 0), .d = PL_DEF(params->depth, texdim >= 3 ? 1 : 0), .format = texfmt, .sampleable = true, .host_writable = params->dynamic, .initial_data = params->dynamic ? NULL : obj.data, .debug_tag = params->debug_tag, }; bool ok; if (params->dynamic) { ok = pl_tex_recreate(gpu, &lut->tex, &tex_params); if (ok) { ok = pl_tex_upload(gpu, pl_tex_transfer_params( .tex = lut->tex, .ptr = obj.data, )); } } else { // Can't use pl_tex_recreate because of `initial_data` pl_tex_destroy(gpu, &lut->tex); lut->tex = pl_tex_create(gpu, &tex_params); ok = lut->tex; } if (!ok) { PL_ERR(sh, "Failed creating LUT texture!"); goto error; } break; } case SH_LUT_UNIFORM: pl_free(lut->data); lut->data = pl_memdup(NULL, obj.data, obj.size); break; case SH_LUT_LITERAL: { lut->str.len = 0; static const char prefix[PL_VAR_TYPE_COUNT] = { [PL_VAR_SINT] = 'i', [PL_VAR_UINT] = 'u', [PL_VAR_FLOAT] = ' ', }; for (int i = 0; i < size * params->comps; i += params->comps) { if (i > 0) pl_str_append_asprintf_c(lut, &lut->str, ","); if (params->comps > 1) { pl_str_append_asprintf_c(lut, &lut->str, "%cvec%d(", prefix[vartype], params->comps); } for (int c = 0; c < params->comps; c++) { switch (vartype) { case PL_VAR_FLOAT: pl_str_append_asprintf_c(lut, &lut->str, "%s%f", c > 0 ? "," : "", ((float *) obj.data)[i+c]); break; case PL_VAR_UINT: pl_str_append_asprintf_c(lut, &lut->str, "%s%u", c > 0 ? "," : "", ((unsigned int *) obj.data)[i+c]); break; case PL_VAR_SINT: pl_str_append_asprintf_c(lut, &lut->str, "%s%d", c > 0 ? "," : "", ((int *) obj.data)[i+c]); break; case PL_VAR_INVALID: case PL_VAR_TYPE_COUNT: pl_unreachable(); } } if (params->comps > 1) pl_str_append_asprintf_c(lut, &lut->str, ")"); } break; } case SH_LUT_AUTO: pl_unreachable(); } lut->type = type; lut->method = method; lut->vartype = vartype; lut->fmt = params->fmt; lut->width = params->width; lut->height = params->height; lut->depth = params->depth; lut->comps = params->comps; lut->signature = params->signature; pl_cache_set(params->cache, &obj); } // Done updating, generate the GLSL ident_t name = sh_fresh(sh, "lut"); ident_t arr_name = NULL_IDENT; static const char * const swizzles[] = {"x", "xy", "xyz", "xyzw"}; static const char * const vartypes[PL_VAR_TYPE_COUNT][4] = { [PL_VAR_SINT] = { "int", "ivec2", "ivec3", "ivec4" }, [PL_VAR_UINT] = { "uint", "uvec2", "uvec3", "uvec4" }, [PL_VAR_FLOAT] = { "float", "vec2", "vec3", "vec4" }, }; switch (type) { case SH_LUT_TEXTURE: { assert(texdim); ident_t tex = sh_desc(sh, (struct pl_shader_desc) { .desc = { .name = "weights", .type = PL_DESC_SAMPLED_TEX, }, .binding = { .object = lut->tex, .sample_mode = is_linear ? PL_TEX_SAMPLE_LINEAR : PL_TEX_SAMPLE_NEAREST, } }); if (is_linear) { ident_t pos_macros[PL_ARRAY_SIZE(sizes)] = {0}; for (int i = 0; i < dims; i++) pos_macros[i] = texel_scale(sh, sizes[i], true); GLSLH("#define "$"(pos) (textureLod("$", %s(\\\n", name, tex, vartypes[PL_VAR_FLOAT][texdim - 1]); for (int i = 0; i < texdim; i++) { char sep = i == 0 ? ' ' : ','; if (pos_macros[i]) { if (dims > 1) { GLSLH(" %c"$"(%s(pos).%c)\\\n", sep, pos_macros[i], vartypes[PL_VAR_FLOAT][dims - 1], "xyzw"[i]); } else { GLSLH(" %c"$"(float(pos))\\\n", sep, pos_macros[i]); } } else { GLSLH(" %c%f\\\n", sep, 0.5); } } GLSLH(" ), 0.0).%s)\n", swizzles[params->comps - 1]); } else { GLSLH("#define "$"(pos) (texelFetch("$", %s(pos", name, tex, vartypes[PL_VAR_SINT][texdim - 1]); // Fill up extra components of the index for (int i = dims; i < texdim; i++) GLSLH(", 0"); GLSLH("), 0).%s)\n", swizzles[params->comps - 1]); } break; } case SH_LUT_UNIFORM: arr_name = sh_var(sh, (struct pl_shader_var) { .var = { .name = "weights", .type = vartype, .dim_v = params->comps, .dim_m = 1, .dim_a = size, }, .data = lut->data, }); break; case SH_LUT_LITERAL: arr_name = sh_fresh(sh, "weights"); GLSLH("const %s "$"[%d] = %s[](\n ", vartypes[vartype][params->comps - 1], arr_name, size, vartypes[vartype][params->comps - 1]); sh_append_str(sh, SH_BUF_HEADER, lut->str); GLSLH(");\n"); break; case SH_LUT_AUTO: pl_unreachable(); } if (arr_name) { GLSLH("#define "$"(pos) ("$"[int((pos)%s)\\\n", name, arr_name, dims > 1 ? "[0]" : ""); int shift = params->width; for (int i = 1; i < dims; i++) { GLSLH(" + %d * int((pos)[%d])\\\n", shift, i); shift *= sizes[i]; } GLSLH(" ])\n"); if (is_linear) { pl_assert(dims == 1); pl_assert(vartype == PL_VAR_FLOAT); ident_t arr_lut = name; name = sh_fresh(sh, "lut_lin"); GLSLH("%s "$"(float fpos) { \n" " fpos = clamp(fpos, 0.0, 1.0) * %d.0; \n" " float fbase = floor(fpos); \n" " float fceil = ceil(fpos); \n" " float fcoord = fpos - fbase; \n" " return mix("$"(fbase), "$"(fceil), fcoord); \n" "} \n", vartypes[PL_VAR_FLOAT][params->comps - 1], name, size - 1, arr_lut, arr_lut); } } if (method == SH_LUT_CUBIC && dims == 3) { ident_t lin_lut = name; name = sh_fresh(sh, "lut_tricubic"); GLSLH("%s "$"(vec3 pos) { \n" " vec3 scale = vec3(%d.0, %d.0, %d.0); \n" " vec3 scale_inv = 1.0 / scale; \n" " pos *= scale; \n" " vec3 fpos = fract(pos); \n" " vec3 base = pos - fpos; \n" " vec3 fpos2 = fpos * fpos; \n" " vec3 inv = 1.0 - fpos; \n" " vec3 inv2 = inv * inv; \n" " vec3 w0 = 1.0/6.0 * inv2 * inv; \n" " vec3 w1 = 2.0/3.0 - 0.5 * fpos2 * (2.0 - fpos); \n" " vec3 w2 = 2.0/3.0 - 0.5 * inv2 * (2.0 - inv); \n" " vec3 w3 = 1.0/6.0 * fpos2 * fpos; \n" " vec3 g0 = w0 + w1; \n" " vec3 g1 = w2 + w3; \n" " vec3 h0 = scale_inv * ((w1 / g0) - 1.0 + base); \n" " vec3 h1 = scale_inv * ((w3 / g1) + 1.0 + base); \n" " %s c000, c001, c010, c011, c100, c101, c110, c111; \n" " c000 = "$"(h0); \n" " c100 = "$"(vec3(h1.x, h0.y, h0.z)); \n" " c000 = mix(c100, c000, g0.x); \n" " c010 = "$"(vec3(h0.x, h1.y, h0.z)); \n" " c110 = "$"(vec3(h1.x, h1.y, h0.z)); \n" " c010 = mix(c110, c010, g0.x); \n" " c000 = mix(c010, c000, g0.y); \n" " c001 = "$"(vec3(h0.x, h0.y, h1.z)); \n" " c101 = "$"(vec3(h1.x, h0.y, h1.z)); \n" " c001 = mix(c101, c001, g0.x); \n" " c011 = "$"(vec3(h0.x, h1.y, h1.z)); \n" " c111 = "$"(h1); \n" " c011 = mix(c111, c011, g0.x); \n" " c001 = mix(c011, c001, g0.y); \n" " return mix(c001, c000, g0.z); \n" "} \n", vartypes[PL_VAR_FLOAT][params->comps - 1], name, sizes[0] - 1, sizes[1] - 1, sizes[2] - 1, vartypes[PL_VAR_FLOAT][params->comps - 1], lin_lut, lin_lut, lin_lut, lin_lut, lin_lut, lin_lut, lin_lut, lin_lut); } if (method == SH_LUT_TETRAHEDRAL) { ident_t int_lut = name; name = sh_fresh(sh, "lut_barycentric"); GLSLH("%s "$"(vec3 pos) { \n" // Compute bounding vertices and fractional part " pos = clamp(pos, 0.0, 1.0) * vec3(%d.0, %d.0, %d.0); \n" " vec3 base = floor(pos); \n" " vec3 fpart = pos - base; \n" // v0 and v3 are always 'black' and 'white', respectively // v1 and v2 are the closest RGB and CMY vertices, respectively " ivec3 v0 = ivec3(base), v3 = ivec3(ceil(pos)); \n" " ivec3 v1 = v0, v2 = v3; \n" // Table of boolean checks to simplify following math " bvec3 c = greaterThanEqual(fpart.xyz, fpart.yzx); \n" " bool c_xy = c.x, c_yx = !c.x, \n" " c_yz = c.y, c_zy = !c.y, \n" " c_zx = c.z, c_xz = !c.z; \n" " vec3 s = fpart.xyz; \n" " bool cond; \n", vartypes[PL_VAR_FLOAT][params->comps - 1], name, sizes[0] - 1, sizes[1] - 1, sizes[2] - 1); // Subdivision of the cube into six congruent tetrahedras // // For each tetrahedron, test if the point is inside, and if so, update // the edge vertices. We test all six, even though only one case will // ever be true, because this avoids branches. static const char *indices[] = { "xyz", "xzy", "zxy", "zyx", "yzx", "yxz"}; for (int i = 0; i < PL_ARRAY_SIZE(indices); i++) { const char x = indices[i][0], y = indices[i][1], z = indices[i][2]; GLSLH("cond = c_%c%c && c_%c%c; \n" "s = cond ? fpart.%c%c%c : s; \n" "v1.%c = cond ? v3.%c : v1.%c; \n" "v2.%c = cond ? v0.%c : v2.%c; \n", x, y, y, z, x, y, z, x, x, x, z, z, z); } // Interpolate in barycentric coordinates, with four texel fetches GLSLH(" return (1.0 - s.x) * "$"(v0) + \n" " (s.x - s.y) * "$"(v1) + \n" " (s.y - s.z) * "$"(v2) + \n" " (s.z) * "$"(v3); \n" "} \n", int_lut, int_lut, int_lut, int_lut); } lut->error = false; pl_cache_obj_free(&obj); pl_assert(name); return name; error: lut->error = true; pl_cache_obj_free(&obj); return NULL_IDENT; } libplacebo-v7.349.0/src/shaders/meson.build000066400000000000000000000006051463457750100205570ustar00rootroot00000000000000shader_sources = [ 'colorspace.c', 'custom.c', 'custom_mpv.c', 'deinterlacing.c', 'dithering.c', 'film_grain.c', 'film_grain_av1.c', 'film_grain_h274.c', 'icc.c', 'lut.c', 'sampling.c', ] foreach s : shader_sources sources += custom_target(s, command: glsl_preproc, depend_files: glsl_deps, env: python_env, input: s, output: s, ) endforeach libplacebo-v7.349.0/src/shaders/sampling.c000066400000000000000000001374751463457750100204130ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include "shaders.h" #include #include const struct pl_deband_params pl_deband_default_params = { PL_DEBAND_DEFAULTS }; static inline struct pl_tex_params src_params(const struct pl_sample_src *src) { if (src->tex) return src->tex->params; return (struct pl_tex_params) { .w = src->tex_w, .h = src->tex_h, }; } enum filter { NEAREST = PL_TEX_SAMPLE_NEAREST, LINEAR = PL_TEX_SAMPLE_LINEAR, BEST, FASTEST, }; // Helper function to compute the src/dst sizes and upscaling ratios static bool setup_src(pl_shader sh, const struct pl_sample_src *src, ident_t *src_tex, ident_t *pos, ident_t *pt, float *ratio_x, float *ratio_y, uint8_t *comp_mask, float *scale, bool resizeable, enum filter filter) { enum pl_shader_sig sig; float src_w, src_h; enum pl_tex_sample_mode sample_mode; if (src->tex) { pl_fmt fmt = src->tex->params.format; bool can_linear = fmt->caps & PL_FMT_CAP_LINEAR; pl_assert(pl_tex_params_dimension(src->tex->params) == 2); sig = PL_SHADER_SIG_NONE; src_w = pl_rect_w(src->rect); src_h = pl_rect_h(src->rect); switch (filter) { case FASTEST: case NEAREST: sample_mode = PL_TEX_SAMPLE_NEAREST; break; case LINEAR: if (!can_linear) { SH_FAIL(sh, "Trying to use a shader that requires linear " "sampling with a texture whose format (%s) does not " "support PL_FMT_CAP_LINEAR", fmt->name); return false; } sample_mode = PL_TEX_SAMPLE_LINEAR; break; case BEST: sample_mode = can_linear ? PL_TEX_SAMPLE_LINEAR : PL_TEX_SAMPLE_NEAREST; break; } } else { pl_assert(src->tex_w && src->tex_h); sig = PL_SHADER_SIG_SAMPLER; src_w = src->sampled_w; src_h = src->sampled_h; if (filter == BEST || filter == FASTEST) { sample_mode = src->mode; } else { sample_mode = (enum pl_tex_sample_mode) filter; if (sample_mode != src->mode) { SH_FAIL(sh, "Trying to use a shader that requires a different " "filter mode than the external sampler."); return false; } } } src_w = PL_DEF(src_w, src_params(src).w); src_h = PL_DEF(src_h, src_params(src).h); pl_assert(src_w && src_h); int out_w = PL_DEF(src->new_w, roundf(fabs(src_w))); int out_h = PL_DEF(src->new_h, roundf(fabs(src_h))); pl_assert(out_w && out_h); if (ratio_x) *ratio_x = out_w / fabs(src_w); if (ratio_y) *ratio_y = out_h / fabs(src_h); if (scale) *scale = PL_DEF(src->scale, 1.0); if (comp_mask) { uint8_t tex_mask = 0x0Fu; if (src->tex) { // Mask containing only the number of components in the texture tex_mask = (1 << src->tex->params.format->num_components) - 1; } uint8_t src_mask = src->component_mask; if (!src_mask) src_mask = (1 << PL_DEF(src->components, 4)) - 1; // Only actually sample components that are both requested and // available in the texture being sampled *comp_mask = tex_mask & src_mask; } if (resizeable) out_w = out_h = 0; if (!sh_require(sh, sig, out_w, out_h)) return false; if (src->tex) { pl_rect2df rect = { .x0 = src->rect.x0, .y0 = src->rect.y0, .x1 = src->rect.x0 + src_w, .y1 = src->rect.y0 + src_h, }; *src_tex = sh_bind(sh, src->tex, src->address_mode, sample_mode, "src_tex", &rect, pos, pt); } else { if (pt) { float sx = 1.0 / src->tex_w, sy = 1.0 / src->tex_h; if (src->sampler == PL_SAMPLER_RECT) sx = sy = 1.0; *pt = sh_var(sh, (struct pl_shader_var) { .var = pl_var_vec2("tex_pt"), .data = &(float[2]) { sx, sy }, }); } sh->sampler_type = src->sampler; pl_assert(src->format); switch (src->format) { case PL_FMT_UNKNOWN: case PL_FMT_FLOAT: case PL_FMT_UNORM: case PL_FMT_SNORM: sh->sampler_prefix = ' '; break; case PL_FMT_UINT: sh->sampler_prefix = 'u'; break; case PL_FMT_SINT: sh->sampler_prefix = 's'; break; case PL_FMT_TYPE_COUNT: pl_unreachable(); } *src_tex = sh_fresh(sh, "src_tex"); *pos = sh_fresh(sh, "pos"); GLSLH("#define "$" src_tex \n" "#define "$" pos \n", *src_tex, *pos); } return true; } void pl_shader_deband(pl_shader sh, const struct pl_sample_src *src, const struct pl_deband_params *params) { float scale; ident_t tex, pos, pt; uint8_t mask; if (!setup_src(sh, src, &tex, &pos, &pt, NULL, NULL, &mask, &scale, false, NEAREST)) return; params = PL_DEF(params, &pl_deband_default_params); sh_describe(sh, "debanding"); GLSL("vec4 color; \n" "// pl_shader_deband \n" "{ \n" "vec2 pos = "$", pt = "$"; \n" "color = textureLod("$", pos, 0.0);\n", pos, pt, tex); mask &= ~0x8u; // ignore alpha channel uint8_t num_comps = sh_num_comps(mask); const char *swiz = sh_swizzle(mask); pl_assert(num_comps <= 3); if (!num_comps) { GLSL("color *= "$"; \n" "} \n", SH_FLOAT(scale)); return; } GLSL("#define GET(X, Y) \\\n" " (textureLod("$", pos + pt * vec2(X, Y), 0.0).%s) \n" "#define T %s \n", tex, swiz, sh_float_type(mask)); ident_t prng = sh_prng(sh, true, NULL); GLSL("T avg, diff, bound; \n" "T res = color.%s; \n" "vec2 d; \n", swiz); if (params->iterations > 0) { ident_t radius = sh_const_float(sh, "radius", params->radius); ident_t threshold = sh_const_float(sh, "threshold", params->threshold / (1000 * scale)); // For each iteration, compute the average at a given distance and // pick it instead of the color if the difference is below the threshold. for (int i = 1; i <= params->iterations; i++) { GLSL(// Compute a random angle and distance "d = "$".xy * vec2(%d.0 * "$", %f); \n" "d = d.x * vec2(cos(d.y), sin(d.y)); \n" // Sample at quarter-turn intervals around the source pixel "avg = T(0.0); \n" "avg += GET(+d.x, +d.y); \n" "avg += GET(-d.x, +d.y); \n" "avg += GET(-d.x, -d.y); \n" "avg += GET(+d.x, -d.y); \n" "avg *= 0.25; \n" // Compare the (normalized) average against the pixel "diff = abs(res - avg); \n" "bound = T("$" / %d.0); \n", prng, i, radius, M_PI * 2, threshold, i); if (num_comps > 1) { GLSL("res = mix(avg, res, greaterThan(diff, bound)); \n"); } else { GLSL("res = mix(avg, res, diff > bound); \n"); } } } // Add some random noise to smooth out residual differences if (params->grain > 0) { // Avoid adding grain near true black GLSL("bound = T(\n"); for (int c = 0; c < num_comps; c++) { GLSL("%c"$, c > 0 ? ',' : ' ', SH_FLOAT(params->grain_neutral[c] / scale)); } GLSL("); \n" "T strength = min(abs(res - bound), "$"); \n" "res += strength * (T("$") - T(0.5)); \n", SH_FLOAT(params->grain / (1000.0 * scale)), prng); } GLSL("color.%s = res; \n" "color *= "$"; \n" "#undef T \n" "#undef GET \n" "} \n", swiz, SH_FLOAT(scale)); } bool pl_shader_sample_direct(pl_shader sh, const struct pl_sample_src *src) { float scale; ident_t tex, pos; if (!setup_src(sh, src, &tex, &pos, NULL, NULL, NULL, NULL, &scale, true, BEST)) return false; GLSL("// pl_shader_sample_direct \n" "vec4 color = vec4("$") * textureLod("$", "$", 0.0); \n", SH_FLOAT(scale), tex, pos); return true; } bool pl_shader_sample_nearest(pl_shader sh, const struct pl_sample_src *src) { float scale; ident_t tex, pos; if (!setup_src(sh, src, &tex, &pos, NULL, NULL, NULL, NULL, &scale, true, NEAREST)) return false; sh_describe(sh, "nearest"); GLSL("// pl_shader_sample_nearest \n" "vec4 color = vec4("$") * textureLod("$", "$", 0.0); \n", SH_FLOAT(scale), tex, pos); return true; } bool pl_shader_sample_bilinear(pl_shader sh, const struct pl_sample_src *src) { float scale; ident_t tex, pos; if (!setup_src(sh, src, &tex, &pos, NULL, NULL, NULL, NULL, &scale, true, LINEAR)) return false; sh_describe(sh, "bilinear"); GLSL("// pl_shader_sample_bilinear \n" "vec4 color = vec4("$") * textureLod("$", "$", 0.0); \n", SH_FLOAT(scale), tex, pos); return true; } bool pl_shader_sample_bicubic(pl_shader sh, const struct pl_sample_src *src) { ident_t tex, pos, pt; float rx, ry, scale; if (!setup_src(sh, src, &tex, &pos, &pt, &rx, &ry, NULL, &scale, true, LINEAR)) return false; if (rx < 1 || ry < 1) { PL_TRACE(sh, "Using fast bicubic sampling when downscaling. This " "will most likely result in nasty aliasing!"); } // Explanation of how bicubic scaling with only 4 texel fetches is done: // http://www.mate.tue.nl/mate/pdfs/10318.pdf // 'Efficient GPU-Based Texture Interpolation using Uniform B-Splines' sh_describe(sh, "bicubic"); #pragma GLSL /* pl_shader_sample_bicubic */ \ vec4 color; \ { \ vec2 pos = $pos; \ vec2 size = vec2(textureSize($tex, 0)); \ vec2 frac = fract(pos * size + vec2(0.5)); \ vec2 frac2 = frac * frac; \ vec2 inv = vec2(1.0) - frac; \ vec2 inv2 = inv * inv; \ /* compute filter weights directly */ \ vec2 w0 = 1.0/6.0 * inv2 * inv; \ vec2 w1 = 2.0/3.0 - 0.5 * frac2 * (2.0 - frac); \ vec2 w2 = 2.0/3.0 - 0.5 * inv2 * (2.0 - inv); \ vec2 w3 = 1.0/6.0 * frac2 * frac; \ vec4 g = vec4(w0 + w1, w2 + w3); \ vec4 h = vec4(w1, w3) / g + inv.xyxy; \ h.xy -= vec2(2.0); \ /* sample four corners, then interpolate */ \ vec4 p = pos.xyxy + $pt.xyxy * h; \ vec4 c00 = textureLod($tex, p.xy, 0.0); \ vec4 c01 = textureLod($tex, p.xw, 0.0); \ vec4 c0 = mix(c01, c00, g.y); \ vec4 c10 = textureLod($tex, p.zy, 0.0); \ vec4 c11 = textureLod($tex, p.zw, 0.0); \ vec4 c1 = mix(c11, c10, g.y); \ color = ${float:scale} * mix(c1, c0, g.x); \ } return true; } bool pl_shader_sample_hermite(pl_shader sh, const struct pl_sample_src *src) { ident_t tex, pos, pt; float rx, ry, scale; if (!setup_src(sh, src, &tex, &pos, &pt, &rx, &ry, NULL, &scale, true, LINEAR)) return false; if (rx < 1 || ry < 1) { PL_TRACE(sh, "Using fast hermite sampling when downscaling. This " "will most likely result in nasty aliasing!"); } sh_describe(sh, "hermite"); #pragma GLSL /* pl_shader_sample_hermite */ \ vec4 color; \ { \ vec2 pos = $pos; \ vec2 size = vec2(textureSize($tex, 0)); \ vec2 frac = fract(pos * size + vec2(0.5)); \ pos += $pt * (smoothstep(0.0, 1.0, frac) - frac); \ color = ${float:scale} * textureLod($tex, pos, 0.0); \ } return true; } bool pl_shader_sample_gaussian(pl_shader sh, const struct pl_sample_src *src) { ident_t tex, pos, pt; float rx, ry, scale; if (!setup_src(sh, src, &tex, &pos, &pt, &rx, &ry, NULL, &scale, true, LINEAR)) return false; if (rx < 1 || ry < 1) { PL_TRACE(sh, "Using fast gaussian sampling when downscaling. This " "will most likely result in nasty aliasing!"); } sh_describe(sh, "gaussian"); #pragma GLSL /* pl_shader_sample_gaussian */ \ vec4 color; \ { \ vec2 pos = $pos; \ vec2 size = vec2(textureSize($tex, 0)); \ vec2 off = -fract(pos * size + vec2(0.5)); \ vec2 off2 = -2.0 * off * off; \ /* compute gaussian weights */ \ vec2 w0 = exp(off2 + 4.0 * off - vec2(2.0)); \ vec2 w1 = exp(off2); \ vec2 w2 = exp(off2 - 4.0 * off - vec2(2.0)); \ vec2 w3 = exp(off2 - 8.0 * off - vec2(8.0)); \ vec4 g = vec4(w0 + w1, w2 + w3); \ vec4 h = vec4(w1, w3) / g; \ h.xy -= vec2(1.0); \ h.zw += vec2(1.0); \ g.xy /= g.xy + g.zw; /* explicitly normalize */ \ /* sample four corners, then interpolate */ \ vec4 p = pos.xyxy + $pt.xyxy * (h + off.xyxy); \ vec4 c00 = textureLod($tex, p.xy, 0.0); \ vec4 c01 = textureLod($tex, p.xw, 0.0); \ vec4 c0 = mix(c01, c00, g.y); \ vec4 c10 = textureLod($tex, p.zy, 0.0); \ vec4 c11 = textureLod($tex, p.zw, 0.0); \ vec4 c1 = mix(c11, c10, g.y); \ color = ${float:scale} * mix(c1, c0, g.x); \ } return true; } bool pl_shader_sample_oversample(pl_shader sh, const struct pl_sample_src *src, float threshold) { ident_t tex, pos, pt; float rx, ry, scale; if (!setup_src(sh, src, &tex, &pos, &pt, &rx, &ry, NULL, &scale, true, LINEAR)) return false; threshold = PL_CLAMP(threshold, 0.0f, 0.5f); sh_describe(sh, "oversample"); #pragma GLSL /* pl_shader_sample_oversample */ \ vec4 color; \ { \ vec2 pos = $pos; \ vec2 size = vec2(textureSize($tex, 0)); \ /* Round the position to the nearest pixel */ \ vec2 fcoord = fract(pos * size - vec2(0.5)); \ float rx = ${dynamic float:rx}; \ float ry = ${dynamic float:ry}; \ vec2 coeff = (fcoord - vec2(0.5)) * vec2(rx, ry); \ coeff = clamp(coeff + vec2(0.5), 0.0, 1.0); \ @if (threshold > 0) { \ float thresh = ${float:threshold}; \ coeff = mix(coeff, vec2(0.0), \ lessThan(coeff, vec2(thresh))); \ coeff = mix(coeff, vec2(1.0), \ greaterThan(coeff, vec2(1.0 - thresh))); \ @} \ \ /* Compute the right output blend of colors */ \ pos += (coeff - fcoord) * $pt; \ color = ${float:scale} * textureLod($tex, pos, 0.0); \ } return true; } static void describe_filter(pl_shader sh, const struct pl_filter_config *cfg, const char *stage, float rx, float ry) { const char *dir; if (rx > 1 && ry > 1) { dir = "up"; } else if (rx < 1 && ry < 1) { dir = "down"; } else if (rx == 1 && ry == 1) { dir = "noop"; } else { dir = "ana"; } if (cfg->name) { sh_describef(sh, "%s %sscaling (%s)", stage, dir, cfg->name); } else if (cfg->window) { sh_describef(sh, "%s %sscaling (%s+%s)", stage, dir, PL_DEF(cfg->kernel->name, "unknown"), PL_DEF(cfg->window->name, "unknown")); } else { sh_describef(sh, "%s %sscaling (%s)", stage, dir, PL_DEF(cfg->kernel->name, "unknown")); } } // Subroutine for computing and adding an individual texel contribution // If `in` is NULL, samples directly // If `in` is set, takes the pixel from inX[idx] where X is the component, // `in` is the given identifier, and `idx` must be defined by the caller static void polar_sample(pl_shader sh, pl_filter filter, ident_t tex, ident_t lut, ident_t radius, int x, int y, uint8_t comp_mask, ident_t in, bool use_ar, ident_t scale) { // Since we can't know the subpixel position in advance, assume a // worst case scenario int yy = y > 0 ? y-1 : y; int xx = x > 0 ? x-1 : x; float dmin = sqrt(xx*xx + yy*yy); // Skip samples definitely outside the radius if (dmin >= filter->radius) return; // Check for samples that might be skippable bool maybe_skippable = dmin >= filter->radius - M_SQRT2; // Check for samples that definitely won't contribute to anti-ringing const float ar_radius = filter->radius_zero; use_ar &= dmin < ar_radius; #pragma GLSL \ offset = ivec2(${const int: x}, ${const int: y}); \ d = length(vec2(offset) - fcoord); \ @if (maybe_skippable) \ if (d < $radius) { \ w = $lut(d * 1.0 / $radius); \ wsum += w; \ @if (in != NULL_IDENT) { \ @for (c : comp_mask) \ c[@c] = ${in}_@c[idx]; \ @} else { \ c = textureLod($tex, base + pt * vec2(offset), 0.0); \ @} \ @for (c : comp_mask) \ color[@c] += w * c[@c]; \ @if (use_ar) { \ if (d <= ${const float: ar_radius}) { \ @for (c : comp_mask) { \ cc = vec2($scale * c[@c]); \ cc.x = 1.0 - cc.x; \ ww = cc + vec2(0.10); \ ww = ww * ww; \ ww = ww * ww; \ ww = ww * ww; \ ww = ww * ww; \ ww = ww * ww; \ ww = w * ww; \ ar@c += ww * cc; \ wwsum@c += ww; \ @} \ } \ @} \ @if (maybe_skippable) \ } } struct sh_sampler_obj { pl_filter filter; pl_shader_obj lut; pl_shader_obj pass2; // for pl_shader_sample_ortho }; #define SCALER_LUT_SIZE 256 #define SCALER_LUT_CUTOFF 1e-3f static void sh_sampler_uninit(pl_gpu gpu, void *ptr) { struct sh_sampler_obj *obj = ptr; pl_shader_obj_destroy(&obj->lut); pl_shader_obj_destroy(&obj->pass2); pl_filter_free(&obj->filter); *obj = (struct sh_sampler_obj) {0}; } static void fill_polar_lut(void *data, const struct sh_lut_params *params) { const struct sh_sampler_obj *obj = params->priv; pl_filter filt = obj->filter; pl_assert(params->width == filt->params.lut_entries && params->comps == 1); memcpy(data, filt->weights, params->width * sizeof(float)); } bool pl_shader_sample_polar(pl_shader sh, const struct pl_sample_src *src, const struct pl_sample_filter_params *params) { pl_assert(params); if (!params->filter.polar) { SH_FAIL(sh, "Trying to use polar sampling with a non-polar filter?"); return false; } uint8_t cmask; float rx, ry, scalef; ident_t src_tex, pos, pt, scale; if (!setup_src(sh, src, &src_tex, &pos, &pt, &rx, &ry, &cmask, &scalef, false, FASTEST)) return false; struct sh_sampler_obj *obj; obj = SH_OBJ(sh, params->lut, PL_SHADER_OBJ_SAMPLER, struct sh_sampler_obj, sh_sampler_uninit); if (!obj) return false; float inv_scale = 1.0 / PL_MIN(rx, ry); inv_scale = PL_MAX(inv_scale, 1.0); if (params->no_widening) inv_scale = 1.0; scale = sh_const_float(sh, "scale", scalef); struct pl_filter_config cfg = params->filter; cfg.antiring = PL_DEF(cfg.antiring, params->antiring); cfg.blur = PL_DEF(cfg.blur, 1.0f) * inv_scale; bool update = !obj->filter || !pl_filter_config_eq(&obj->filter->params.config, &cfg); if (update) { pl_filter_free(&obj->filter); obj->filter = pl_filter_generate(sh->log, pl_filter_params( .config = cfg, .lut_entries = SCALER_LUT_SIZE, .cutoff = SCALER_LUT_CUTOFF, )); if (!obj->filter) { // This should never happen, but just in case .. SH_FAIL(sh, "Failed initializing polar filter!"); return false; } } describe_filter(sh, &cfg, "polar", rx, ry); GLSL("// pl_shader_sample_polar \n" "vec4 color = vec4(0.0); \n" "{ \n" "vec2 pos = "$", pt = "$"; \n" "vec2 size = vec2(textureSize("$", 0)); \n" "vec2 fcoord = fract(pos * size - vec2(0.5)); \n" "vec2 base = pos - pt * fcoord; \n" "vec2 center = base + pt * vec2(0.5); \n" "ivec2 offset; \n" "float w, d, wsum = 0.0; \n" "int idx; \n" "vec4 c; \n", pos, pt, src_tex); bool use_ar = cfg.antiring > 0; if (use_ar) { #pragma GLSL \ vec2 ww, cc; \ @for (c : cmask) \ vec2 ar@c = vec2(0.0), wwsum@c = vec2(0.0); } pl_gpu gpu = SH_GPU(sh); const int num_comps = __builtin_popcount(cmask); const bool dynamic_size = SH_PARAMS(sh).dynamic_constants || !gpu || !gpu->limits.array_size_constants; int bound = ceil(obj->filter->radius); int offset = bound - 1; // padding top/left int padding = offset + bound; // total padding // Determined experimentally on modern AMD and Nvidia hardware. 32 is a // good tradeoff for the horizontal work group size. Apart from that, // just use as many threads as possible. int bw = 32, bh = sh_glsl(sh).max_group_threads / bw; int sizew, sizeh, iw, ih; // Disable compute shaders after a (hard-coded) radius of 6, since the // gather kernel generally pulls ahead here. bool is_compute = !params->no_compute && sh_glsl(sh).compute; is_compute &= obj->filter->radius < 6.0; while (is_compute) { // We need to sample everything from base_min to base_max, so make sure // we have enough room in shmem. The extra margin on the ceilf guards // against floating point inaccuracy on near-integer scaling ratios. const float margin = 1e-5; sizew = iw = (int) ceilf(bw / rx - margin) + padding + 1; sizeh = ih = (int) ceilf(bh / ry - margin) + padding + 1; if (dynamic_size) { // Overallocate slightly to reduce recompilation overhead sizew = PL_ALIGN2(sizew, 8); sizeh = PL_ALIGN2(sizeh, 8); } const int shmem_req = (sizew * sizeh * num_comps + 2) * sizeof(float); if (shmem_req > sh_glsl(sh).max_shmem_size && bh > 1) { // Try again with smaller work group size bh >>= 1; continue; } is_compute = sh_try_compute(sh, bw, bh, false, shmem_req); break; } // Note: SH_LUT_LITERAL might be faster in some specific cases, but not by // much, and it's catastrophically slow on other platforms. ident_t lut = sh_lut(sh, sh_lut_params( .object = &obj->lut, .lut_type = SH_LUT_TEXTURE, .var_type = PL_VAR_FLOAT, .method = SH_LUT_LINEAR, .width = SCALER_LUT_SIZE, .comps = 1, .update = update, .fill = fill_polar_lut, .priv = obj, )); if (!lut) { SH_FAIL(sh, "Failed initializing polar LUT!"); return false; } ident_t radius_c = sh_const_float(sh, "radius", obj->filter->radius); ident_t in = sh_fresh(sh, "in"); if (is_compute) { // Compute shader kernel GLSL("uvec2 base_id = uvec2(0u); \n"); if (src->rect.x0 > src->rect.x1) GLSL("base_id.x = gl_WorkGroupSize.x - 1u; \n"); if (src->rect.y0 > src->rect.y1) GLSL("base_id.y = gl_WorkGroupSize.y - 1u; \n"); GLSLH("shared vec2 "$"_base; \n", in); GLSL("if (gl_LocalInvocationID.xy == base_id) \n" " "$"_base = base; \n" "barrier(); \n" "ivec2 rel = ivec2(round((base - "$"_base) * size)); \n", in, in); ident_t sizew_c = sh_const(sh, (struct pl_shader_const) { .type = PL_VAR_SINT, .compile_time = true, .name = "sizew", .data = &sizew, }); ident_t sizeh_c = sh_const(sh, (struct pl_shader_const) { .type = PL_VAR_SINT, .compile_time = true, .name = "sizeh", .data = &sizeh, }); ident_t iw_c = sizew_c, ih_c = sizeh_c; if (dynamic_size) { iw_c = sh_const_int(sh, "iw", iw); ih_c = sh_const_int(sh, "ih", ih); } // Load all relevant texels into shmem GLSL("for (int y = int(gl_LocalInvocationID.y); y < "$"; y += %d) { \n" "for (int x = int(gl_LocalInvocationID.x); x < "$"; x += %d) { \n" "c = textureLod("$", "$"_base + pt * vec2(x - %d, y - %d), 0.0); \n", ih_c, bh, iw_c, bw, src_tex, in, offset, offset); for (uint8_t comps = cmask; comps;) { uint8_t c = __builtin_ctz(comps); GLSLH("shared float "$"_%d["$" * "$"]; \n", in, c, sizeh_c, sizew_c); GLSL(""$"_%d["$" * y + x] = c[%d]; \n", in, c, sizew_c, c); comps &= ~(1 << c); } GLSL("}} \n" "barrier(); \n"); // Dispatch the actual samples for (int y = 1 - bound; y <= bound; y++) { for (int x = 1 - bound; x <= bound; x++) { GLSL("idx = "$" * rel.y + rel.x + "$" * %d + %d; \n", sizew_c, sizew_c, y + offset, x + offset); polar_sample(sh, obj->filter, src_tex, lut, radius_c, x, y, cmask, in, use_ar, scale); } } } else { // Fragment shader sampling for (uint8_t comps = cmask; comps;) { uint8_t c = __builtin_ctz(comps); GLSL("vec4 "$"_%d; \n", in, c); comps &= ~(1 << c); } // For maximum efficiency, we want to use textureGather() if // possible, rather than direct sampling. Since this is not // always possible/sensible, we need to possibly intermix gathering // with regular sampling. This requires keeping track of which // pixels in the next row were already gathered by the previous // row. uint64_t gathered_cur = 0x0, gathered_next = 0x0; const float radius2 = PL_SQUARE(obj->filter->radius); const int base = bound - 1; if (base + bound >= 8 * sizeof(gathered_cur)) { SH_FAIL(sh, "Polar radius %f exceeds implementation capacity!", obj->filter->radius); return false; } for (int y = 1 - bound; y <= bound; y++) { for (int x = 1 - bound; x <= bound; x++) { // Skip already gathered texels uint64_t bit = 1llu << (base + x); if (gathered_cur & bit) continue; // Using texture gathering is only more efficient than direct // sampling in the case where we expect to be able to use all // four gathered texels, without having to discard any. So // only do it if we suspect it will be a win rather than a // loss. int xx = x*x, xx1 = (x+1)*(x+1); int yy = y*y, yy1 = (y+1)*(y+1); bool use_gather = PL_MAX(xx, xx1) + PL_MAX(yy, yy1) < radius2; use_gather &= PL_MAX(x, y) <= sh_glsl(sh).max_gather_offset; use_gather &= PL_MIN(x, y) >= sh_glsl(sh).min_gather_offset; use_gather &= !src->tex || src->tex->params.format->gatherable; // Gathering from components other than the R channel requires // support for GLSL 400, which introduces the overload of // textureGather* that allows specifying the component. // // This is also the minimum requirement if we don't know the // texture format capabilities, for the sampler2D interface if (cmask != 0x1 || !src->tex) use_gather &= sh_glsl(sh).version >= 400; if (!use_gather) { // Switch to direct sampling instead polar_sample(sh, obj->filter, src_tex, lut, radius_c, x, y, cmask, NULL_IDENT, use_ar, scale); continue; } // Gather the four surrounding texels simultaneously for (uint8_t comps = cmask; comps;) { uint8_t c = __builtin_ctz(comps); if (x || y) { if (c) { GLSL($"_%d = textureGatherOffset("$", " "center, ivec2(%d, %d), %d); \n", in, c, src_tex, x, y, c); } else { GLSL($"_0 = textureGatherOffset("$", " "center, ivec2(%d, %d)); \n", in, src_tex, x, y); } } else { if (c) { GLSL($"_%d = textureGather("$", center, %d); \n", in, c, src_tex, c); } else { GLSL($"_0 = textureGather("$", center); \n", in, src_tex); } } comps &= ~(1 << c); } // Mix in all of the points with their weights for (int p = 0; p < 4; p++) { // The four texels are gathered counterclockwise starting // from the bottom left static const int xo[4] = {0, 1, 1, 0}; static const int yo[4] = {1, 1, 0, 0}; if (x+xo[p] > bound || y+yo[p] > bound) continue; // next subpixel if (!yo[p] && (gathered_cur & (bit << xo[p]))) continue; // already sampled GLSL("idx = %d;\n", p); polar_sample(sh, obj->filter, src_tex, lut, radius_c, x+xo[p], y+yo[p], cmask, in, use_ar, scale); } // Mark the other next row's pixels as already gathered gathered_next |= bit | (bit << 1); x++; // skip adjacent pixel } // Prepare for new row gathered_cur = gathered_next; gathered_next = 0; } } #pragma GLSL \ color = $scale / wsum * color; \ @if (use_ar) { \ @for (c : cmask) { \ ww = ar@c / wwsum@c; \ ww.x = 1.0 - ww.x; \ w = clamp(color[@c], ww.x, ww.y); \ w = mix(w, dot(ww, vec2(0.5)), ww.x > ww.y); \ color[@c] = mix(color[@c], w, ${float:cfg.antiring}); \ @} \ @} \ @if (!(cmask & (1 << PL_CHANNEL_A))) \ color.a = 1.0; \ } return true; } static void fill_ortho_lut(void *data, const struct sh_lut_params *params) { const struct sh_sampler_obj *obj = params->priv; pl_filter filt = obj->filter; if (filt->radius == filt->radius_zero) { // Main lobe covers entire radius, so all weights are positive, meaning // we can use the linear resampling trick for (int n = 0; n < SCALER_LUT_SIZE; n++) { const float *weights = filt->weights + n * filt->row_stride; float *row = (float *) data + n * filt->row_stride; pl_assert(filt->row_size % 2 == 0); for (int i = 0; i < filt->row_size; i += 2) { const float w0 = weights[i], w1 = weights[i+1]; assert(w0 + w1 >= 0.0f); row[i] = w0 + w1; row[i+1] = w1 / (w0 + w1); } } } else { size_t entries = SCALER_LUT_SIZE * filt->row_stride; pl_assert(params->width * params->height * params->comps == entries); memcpy(data, filt->weights, entries * sizeof(float)); } } enum { SEP_VERT = 0, SEP_HORIZ, SEP_PASSES }; bool pl_shader_sample_ortho2(pl_shader sh, const struct pl_sample_src *src, const struct pl_sample_filter_params *params) { pl_assert(params); if (params->filter.polar) { SH_FAIL(sh, "Trying to use separated sampling with a polar filter?"); return false; } pl_gpu gpu = SH_GPU(sh); pl_assert(gpu); uint8_t comps; float ratio[SEP_PASSES], scale; ident_t src_tex, pos, pt; if (!setup_src(sh, src, &src_tex, &pos, &pt, &ratio[SEP_HORIZ], &ratio[SEP_VERT], &comps, &scale, false, LINEAR)) return false; int pass; if (fabs(ratio[SEP_HORIZ] - 1.0f) < 1e-6f) { pass = SEP_VERT; } else if (fabs(ratio[SEP_VERT] - 1.0f) < 1e-6f) { pass = SEP_HORIZ; } else { SH_FAIL(sh, "Trying to use pl_shader_sample_ortho with a " "pl_sample_src that requires scaling in multiple directions " "(rx=%f, ry=%f), this is not possible!", ratio[SEP_HORIZ], ratio[SEP_VERT]); return false; } // We can store a separate sampler object per dimension, so dispatch the // right one. This is needed for two reasons: // 1. Anamorphic content can have a different scaling ratio for each // dimension. In particular, you could be upscaling in one and // downscaling in the other. // 2. After fixing the source for `setup_src`, we lose information about // the scaling ratio of the other component. (Although this is only a // minor reason and could easily be changed with some boilerplate) struct sh_sampler_obj *obj; obj = SH_OBJ(sh, params->lut, PL_SHADER_OBJ_SAMPLER, struct sh_sampler_obj, sh_sampler_uninit); if (!obj) return false; if (pass != 0) { obj = SH_OBJ(sh, &obj->pass2, PL_SHADER_OBJ_SAMPLER, struct sh_sampler_obj, sh_sampler_uninit); assert(obj); } float inv_scale = 1.0 / ratio[pass]; inv_scale = PL_MAX(inv_scale, 1.0); if (params->no_widening) inv_scale = 1.0; struct pl_filter_config cfg = params->filter; cfg.antiring = PL_DEF(cfg.antiring, params->antiring); cfg.blur = PL_DEF(cfg.blur, 1.0f) * inv_scale; bool update = !obj->filter || !pl_filter_config_eq(&obj->filter->params.config, &cfg); if (update) { pl_filter_free(&obj->filter); obj->filter = pl_filter_generate(sh->log, pl_filter_params( .config = cfg, .lut_entries = SCALER_LUT_SIZE, .max_row_size = gpu->limits.max_tex_2d_dim / 4, .row_stride_align = 4, )); if (!obj->filter) { // This should never happen, but just in case .. SH_FAIL(sh, "Failed initializing separated filter!"); return false; } } int N = obj->filter->row_size; // number of samples to convolve int width = obj->filter->row_stride / 4; // width of the LUT texture ident_t lut = sh_lut(sh, sh_lut_params( .object = &obj->lut, .var_type = PL_VAR_FLOAT, .method = SH_LUT_LINEAR, .width = width, .height = SCALER_LUT_SIZE, .comps = 4, .update = update, .fill = fill_ortho_lut, .priv = obj, )); if (!lut) { SH_FAIL(sh, "Failed initializing separated LUT!"); return false; } const int dir[SEP_PASSES][2] = { [SEP_HORIZ] = {1, 0}, [SEP_VERT] = {0, 1}, }; static const char *names[SEP_PASSES] = { [SEP_HORIZ] = "ortho (horiz)", [SEP_VERT] = "ortho (vert)", }; describe_filter(sh, &cfg, names[pass], ratio[pass], ratio[pass]); float denom = PL_MAX(1, width - 1); // avoid division by zero bool use_ar = cfg.antiring > 0 && ratio[pass] > 1.0; bool use_linear = obj->filter->radius == obj->filter->radius_zero; use_ar &= !use_linear; // filter has no negative weights #pragma GLSL /* pl_shader_sample_ortho */ \ vec4 color = vec4(0.0, 0.0, 0.0, 1.0); \ { \ vec2 pos = $pos, pt = $pt; \ vec2 size = vec2(textureSize($src_tex, 0)); \ vec2 dir = vec2(${const float:dir[pass][0]}, ${const float: dir[pass][1]}); \ pt *= dir; \ vec2 fcoord2 = fract(pos * size - vec2(0.5)); \ float fcoord = dot(fcoord2, dir); \ vec2 base = pos - fcoord * pt - pt * vec2(${const float: N / 2 - 1}); \ vec4 ws; \ float off; \ ${vecType: comps} c, ca = ${vecType: comps}(0.0); \ @if (use_ar) { \ ${vecType: comps} hi = ${vecType: comps}(0.0); \ ${vecType: comps} lo = ${vecType: comps}(1e9); \ @} \ #pragma unroll 4 \ for (uint n = 0u; n < ${uint: N}; n += ${const uint: use_linear ? 2u : 1u}) { \ if (n % 4u == 0u) \ ws = $lut(vec2(float(n / 4u) / ${const float: denom}, fcoord)); \ off = float(n); \ @if (use_linear) \ off += ws[n % 4u + 1u]; \ c = textureLod($src_tex, base + pt * off, 0.0).${swizzle: comps}; \ @if (use_ar) { \ if (n == ${uint: N} / 2u - 1u || n == ${uint: N} / 2u) { \ lo = min(lo, c); \ hi = max(hi, c); \ } \ @} \ ca += ws[n % 4u] * c; \ } \ @if (use_ar) \ ca = mix(ca, clamp(ca, lo, hi), ${float: cfg.antiring}); \ color.${swizzle: comps} = ${float: scale} * ca; \ } return true; } const struct pl_distort_params pl_distort_default_params = { PL_DISTORT_DEFAULTS }; void pl_shader_distort(pl_shader sh, pl_tex src_tex, int out_w, int out_h, const struct pl_distort_params *params) { pl_assert(params); if (!sh_require(sh, PL_SHADER_SIG_NONE, out_w, out_h)) return; const int src_w = src_tex->params.w, src_h = src_tex->params.h; float rx = 1.0f, ry = 1.0f; if (src_w > src_h) { ry = (float) src_h / src_w; } else { rx = (float) src_w / src_h; } // Map from texel coordinates [0,1]² to aspect-normalized representation const pl_transform2x2 tex2norm = { .mat.m = { { 2 * rx, 0 }, { 0, -2 * ry }, }, .c = { -rx, ry }, }; // Map from aspect-normalized representation to canvas coords [-1,1]² const float sx = params->unscaled ? (float) src_w / out_w : 1.0f; const float sy = params->unscaled ? (float) src_h / out_h : 1.0f; const pl_transform2x2 norm2canvas = { .mat.m = { { sx / rx, 0 }, { 0, sy / ry }, }, }; struct pl_transform2x2 transform = params->transform; pl_transform2x2_mul(&transform, &tex2norm); pl_transform2x2_rmul(&norm2canvas, &transform); if (params->constrain) { pl_rect2df bb = pl_transform2x2_bounds(&transform, &(pl_rect2df) { .x1 = 1, .y1 = 1, }); const float k = fmaxf(fmaxf(pl_rect_w(bb), pl_rect_h(bb)), 2.0f); pl_transform2x2_scale(&transform, 2.0f / k); }; // Bind the canvas coordinates as [-1,1]², flipped vertically to correspond // to normal mathematical axis conventions static const pl_rect2df canvas = { .x0 = -1.0f, .x1 = 1.0f, .y0 = 1.0f, .y1 = -1.0f, }; ident_t pos = sh_attr_vec2(sh, "pos", &canvas); ident_t pt, tex = sh_bind(sh, src_tex, params->address_mode, PL_TEX_SAMPLE_LINEAR, "tex", NULL, NULL, &pt); // Bind the inverse of the tex2canvas transform (i.e. canvas2tex) pl_transform2x2_invert(&transform); ident_t tf = sh_var(sh, (struct pl_shader_var) { .var = pl_var_mat2("tf"), .data = PL_TRANSPOSE_2X2(transform.mat.m), }); ident_t tf_c = sh_var(sh, (struct pl_shader_var) { .var = pl_var_vec2("tf_c"), .data = transform.c, }); // See pl_shader_sample_bicubic sh_describe(sh, "distortion"); #pragma GLSL /* pl_shader_sample_distort */ \ vec4 color; \ { \ vec2 pos = $tf * $pos + $tf_c; \ vec2 pt = $pt; \ @if (params->bicubic) { \ vec2 size = vec2(textureSize($tex, 0)); \ vec2 frac = fract(pos * size + vec2(0.5)); \ vec2 frac2 = frac * frac; \ vec2 inv = vec2(1.0) - frac; \ vec2 inv2 = inv * inv; \ vec2 w0 = 1.0/6.0 * inv2 * inv; \ vec2 w1 = 2.0/3.0 - 0.5 * frac2 * (2.0 - frac); \ vec2 w2 = 2.0/3.0 - 0.5 * inv2 * (2.0 - inv); \ vec2 w3 = 1.0/6.0 * frac2 * frac; \ vec4 g = vec4(w0 + w1, w2 + w3); \ vec4 h = vec4(w1, w3) / g + inv.xyxy; \ h.xy -= vec2(2.0); \ vec4 p = pos.xyxy + pt.xyxy * h; \ vec4 c00 = textureLod($tex, p.xy, 0.0); \ vec4 c01 = textureLod($tex, p.xw, 0.0); \ vec4 c0 = mix(c01, c00, g.y); \ vec4 c10 = textureLod($tex, p.zy, 0.0); \ vec4 c11 = textureLod($tex, p.zw, 0.0); \ vec4 c1 = mix(c11, c10, g.y); \ color = mix(c1, c0, g.x); \ @} else { \ color = texture($tex, pos); \ @} \ @if (params->alpha_mode) { \ vec2 border = min(pos, vec2(1.0) - pos); \ border = smoothstep(vec2(0.0), pt, border); \ @if (params->alpha_mode == PL_ALPHA_PREMULTIPLIED) \ color.rgba *= border.x * border.y; \ @else \ color.a *= border.x * border.y; \ @} \ } } libplacebo-v7.349.0/src/swapchain.c000066400000000000000000000045741463457750100171160ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "common.h" #include "log.h" #include "swapchain.h" void pl_swapchain_destroy(pl_swapchain *ptr) { pl_swapchain sw = *ptr; if (!sw) return; const struct pl_sw_fns *impl = PL_PRIV(sw); impl->destroy(sw); *ptr = NULL; } int pl_swapchain_latency(pl_swapchain sw) { const struct pl_sw_fns *impl = PL_PRIV(sw); if (!impl->latency) return 0; return impl->latency(sw); } bool pl_swapchain_resize(pl_swapchain sw, int *width, int *height) { int dummy[2] = {0}; width = PL_DEF(width, &dummy[0]); height = PL_DEF(height, &dummy[1]); const struct pl_sw_fns *impl = PL_PRIV(sw); if (!impl->resize) { *width = *height = 0; return true; } return impl->resize(sw, width, height); } void pl_swapchain_colorspace_hint(pl_swapchain sw, const struct pl_color_space *csp) { const struct pl_sw_fns *impl = PL_PRIV(sw); if (!impl->colorspace_hint) return; struct pl_swapchain_colors fix = {0}; if (csp) { fix = *csp; // Ensure we have valid values set for all the fields pl_color_space_infer(&fix); } impl->colorspace_hint(sw, &fix); } bool pl_swapchain_start_frame(pl_swapchain sw, struct pl_swapchain_frame *out_frame) { *out_frame = (struct pl_swapchain_frame) {0}; // sanity const struct pl_sw_fns *impl = PL_PRIV(sw); return impl->start_frame(sw, out_frame); } bool pl_swapchain_submit_frame(pl_swapchain sw) { const struct pl_sw_fns *impl = PL_PRIV(sw); return impl->submit_frame(sw); } void pl_swapchain_swap_buffers(pl_swapchain sw) { const struct pl_sw_fns *impl = PL_PRIV(sw); impl->swap_buffers(sw); } libplacebo-v7.349.0/src/swapchain.h000066400000000000000000000024311463457750100171110ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "common.h" #include // This struct must be the first member of the swapchains's priv struct. The // `pl_swapchain` helpers will cast the priv struct to this struct! #define SW_PFN(name) __typeof__(pl_swapchain_##name) *name struct pl_sw_fns { // This destructor follows the same rules as `pl_gpu_fns` void (*destroy)(pl_swapchain sw); SW_PFN(latency); // optional SW_PFN(resize); // optional SW_PFN(colorspace_hint); // optional SW_PFN(start_frame); SW_PFN(submit_frame); SW_PFN(swap_buffers); }; #undef SW_PFN libplacebo-v7.349.0/src/tests/000077500000000000000000000000001463457750100161255ustar00rootroot00000000000000libplacebo-v7.349.0/src/tests/bench.c000066400000000000000000000412631463457750100173560ustar00rootroot00000000000000#include "utils.h" #include #include #include #include #include enum { // Image configuration NUM_TEX = 16, WIDTH = 1920, HEIGHT = 1080, DEPTH = 16, COMPS = 4, // Queue configuration NUM_QUEUES = NUM_TEX, ASYNC_TX = 1, ASYNC_COMP = 1, // Test configuration TEST_MS = 1000, WARMUP_MS = 500, }; static pl_tex create_test_img(pl_gpu gpu) { pl_fmt fmt = pl_find_fmt(gpu, PL_FMT_FLOAT, COMPS, DEPTH, 32, PL_FMT_CAP_LINEAR); REQUIRE(fmt); const float xc = (WIDTH - 1) / 2.0f; const float yc = (HEIGHT - 1) / 2.0f; const float kf = 0.5f / sqrtf(xc * xc + yc * yc); const float invphi = 0.61803398874989; const float freqR = kf * M_PI * 0.2f; const float freqG = freqR * invphi; const float freqB = freqG * invphi; float *data = malloc(WIDTH * HEIGHT * COMPS * sizeof(float)); for (int y = 0; y < HEIGHT; y++) { for (int x = 0; x < WIDTH; x++) { float *color = &data[(y * WIDTH + x) * COMPS]; float xx = x - xc, yy = y - yc; float r2 = xx * xx + yy * yy; switch (COMPS) { case 4: color[3] = 1.0; case 3: color[2] = 0.5f * sinf(freqB * r2) + 0.5f;; case 2: color[1] = 0.5f * sinf(freqG * r2) + 0.5f;; case 1: color[0] = 0.5f * sinf(freqR * r2) + 0.5f;; } } } pl_tex tex = pl_tex_create(gpu, pl_tex_params( .format = fmt, .w = WIDTH, .h = HEIGHT, .sampleable = true, .initial_data = data, )); free(data); REQUIRE(tex); return tex; } struct bench { void (*run_sh)(pl_shader sh, pl_shader_obj *state, pl_tex src); void (*run_tex)(pl_gpu gpu, pl_tex tex); }; static void run_bench(pl_gpu gpu, pl_dispatch dp, pl_shader_obj *state, pl_tex src, pl_tex fbo, pl_timer timer, const struct bench *bench) { REQUIRE(bench); REQUIRE(bench->run_sh || bench->run_tex); if (bench->run_sh) { pl_shader sh = pl_dispatch_begin(dp); bench->run_sh(sh, state, src); pl_dispatch_finish(dp, pl_dispatch_params( .shader = &sh, .target = fbo, .timer = timer, )); } else { bench->run_tex(gpu, fbo); } } static void benchmark(pl_gpu gpu, const char *name, const struct bench *bench) { pl_dispatch dp = pl_dispatch_create(gpu->log, gpu); REQUIRE(dp); pl_shader_obj state = NULL; pl_tex src = create_test_img(gpu); // Create the FBOs pl_fmt fmt = pl_find_fmt(gpu, PL_FMT_FLOAT, COMPS, DEPTH, 32, PL_FMT_CAP_RENDERABLE | PL_FMT_CAP_BLITTABLE); REQUIRE(fmt); pl_tex fbos[NUM_TEX] = {0}; for (int i = 0; i < NUM_TEX; i++) { fbos[i] = pl_tex_create(gpu, pl_tex_params( .format = fmt, .w = WIDTH, .h = HEIGHT, .renderable = true, .blit_dst = true, .host_writable = true, .host_readable = true, .storable = !!(fmt->caps & PL_FMT_CAP_STORABLE), )); REQUIRE(fbos[i]); pl_tex_clear(gpu, fbos[i], (float[4]){ 0.0 }); } // Run the benchmark and flush+block once to force shader compilation etc. run_bench(gpu, dp, &state, src, fbos[0], NULL, bench); pl_gpu_finish(gpu); // Perform the actual benchmark pl_clock_t start_warmup = 0, start_test = 0; unsigned long frames = 0, frames_warmup = 0; pl_timer timer = pl_timer_create(gpu); uint64_t gputime_total = 0; unsigned long gputime_count = 0; uint64_t gputime; start_warmup = pl_clock_now(); do { const int idx = frames % NUM_TEX; while (pl_tex_poll(gpu, fbos[idx], UINT64_MAX)) ; // do nothing run_bench(gpu, dp, &state, src, fbos[idx], start_test ? timer : NULL, bench); pl_gpu_flush(gpu); frames++; if (start_test) { while ((gputime = pl_timer_query(gpu, timer))) { gputime_total += gputime; gputime_count++; } } pl_clock_t now = pl_clock_now(); if (start_test) { if (pl_clock_diff(now, start_test) > TEST_MS * 1e-3) break; } else if (pl_clock_diff(now, start_warmup) > WARMUP_MS * 1e-3) { start_test = now; frames_warmup = frames; } } while (true); // Force the GPU to finish execution and re-measure the final stop time pl_gpu_finish(gpu); pl_clock_t stop = pl_clock_now(); while ((gputime = pl_timer_query(gpu, timer))) { gputime_total += gputime; gputime_count++; } frames -= frames_warmup; double secs = pl_clock_diff(stop, start_test); printf("'%s':\t%4lu frames in %1.6f seconds => %2.6f ms/frame (%5.2f FPS)", name, frames, secs, 1000 * secs / frames, frames / secs); if (gputime_count) printf(", gpu time: %2.6f ms", 1e-6 * gputime_total / gputime_count); printf("\n"); pl_timer_destroy(gpu, &timer); pl_shader_obj_destroy(&state); pl_dispatch_destroy(&dp); pl_tex_destroy(gpu, &src); for (int i = 0; i < NUM_TEX; i++) pl_tex_destroy(gpu, &fbos[i]); } // List of benchmarks static void bench_deband(pl_shader sh, pl_shader_obj *state, pl_tex src) { pl_shader_deband(sh, pl_sample_src( .tex = src ), NULL); } static void bench_deband_heavy(pl_shader sh, pl_shader_obj *state, pl_tex src) { pl_shader_deband(sh, pl_sample_src( .tex = src ), pl_deband_params( .iterations = 4, .threshold = 4.0, .radius = 4.0, .grain = 16.0, )); } static void bench_bilinear(pl_shader sh, pl_shader_obj *state, pl_tex src) { REQUIRE(pl_shader_sample_bilinear(sh, pl_sample_src( .tex = src ))); } static void bench_bicubic(pl_shader sh, pl_shader_obj *state, pl_tex src) { REQUIRE(pl_shader_sample_bicubic(sh, pl_sample_src( .tex = src ))); } static void bench_hermite(pl_shader sh, pl_shader_obj *state, pl_tex src) { REQUIRE(pl_shader_sample_hermite(sh, pl_sample_src( .tex = src ))); } static void bench_gaussian(pl_shader sh, pl_shader_obj *state, pl_tex src) { REQUIRE(pl_shader_sample_gaussian(sh, pl_sample_src( .tex = src ))); } static void bench_dither_blue(pl_shader sh, pl_shader_obj *state, pl_tex src) { REQUIRE(pl_shader_sample_direct(sh, pl_sample_src( .tex = src ))); pl_shader_dither(sh, 8, state, pl_dither_params( .method = PL_DITHER_BLUE_NOISE, )); } static void bench_dither_white(pl_shader sh, pl_shader_obj *state, pl_tex src) { REQUIRE(pl_shader_sample_direct(sh, pl_sample_src( .tex = src ))); pl_shader_dither(sh, 8, state, pl_dither_params( .method = PL_DITHER_WHITE_NOISE, )); } static void bench_dither_ordered_fix(pl_shader sh, pl_shader_obj *state, pl_tex src) { REQUIRE(pl_shader_sample_direct(sh, pl_sample_src( .tex = src ))); pl_shader_dither(sh, 8, state, pl_dither_params( .method = PL_DITHER_ORDERED_FIXED, )); } static void bench_polar(pl_shader sh, pl_shader_obj *state, pl_tex src) { struct pl_sample_filter_params params = { .filter = pl_filter_ewa_lanczos, .lut = state, }; REQUIRE(pl_shader_sample_polar(sh, pl_sample_src( .tex = src ), ¶ms)); } static void bench_polar_nocompute(pl_shader sh, pl_shader_obj *state, pl_tex src) { struct pl_sample_filter_params params = { .filter = pl_filter_ewa_lanczos, .no_compute = true, .lut = state, }; REQUIRE(pl_shader_sample_polar(sh, pl_sample_src( .tex = src ), ¶ms)); } static void bench_hdr_peak(pl_shader sh, pl_shader_obj *state, pl_tex src) { REQUIRE(pl_shader_sample_direct(sh, pl_sample_src( .tex = src ))); REQUIRE(pl_shader_detect_peak(sh, pl_color_space_hdr10, state, &pl_peak_detect_default_params)); } static void bench_hdr_peak_hq(pl_shader sh, pl_shader_obj *state, pl_tex src) { REQUIRE(pl_shader_sample_direct(sh, pl_sample_src( .tex = src ))); REQUIRE(pl_shader_detect_peak(sh, pl_color_space_hdr10, state, &pl_peak_detect_high_quality_params)); } static void bench_hdr_lut(pl_shader sh, pl_shader_obj *state, pl_tex src) { struct pl_color_map_params params = { PL_COLOR_MAP_DEFAULTS .tone_mapping_function = &pl_tone_map_bt2390, .tone_mapping_mode = PL_TONE_MAP_RGB, }; REQUIRE(pl_shader_sample_direct(sh, pl_sample_src( .tex = src ))); pl_shader_color_map_ex(sh, ¶ms, pl_color_map_args( .src = pl_color_space_hdr10, .dst = pl_color_space_monitor, .state = state, )); } static void bench_hdr_clip(pl_shader sh, pl_shader_obj *state, pl_tex src) { struct pl_color_map_params params = { PL_COLOR_MAP_DEFAULTS .tone_mapping_function = &pl_tone_map_clip, .tone_mapping_mode = PL_TONE_MAP_RGB, }; REQUIRE(pl_shader_sample_direct(sh, pl_sample_src( .tex = src ))); pl_shader_color_map_ex(sh, ¶ms, pl_color_map_args( .src = pl_color_space_hdr10, .dst = pl_color_space_monitor, .state = state, )); } static void bench_weave(pl_shader sh, pl_shader_obj *state, pl_tex src) { struct pl_deinterlace_source dsrc = { .cur = pl_field_pair(src), .field = PL_FIELD_TOP, }; pl_shader_deinterlace(sh, &dsrc, pl_deinterlace_params( .algo = PL_DEINTERLACE_WEAVE, )); } static void bench_bob(pl_shader sh, pl_shader_obj *state, pl_tex src) { struct pl_deinterlace_source dsrc = { .cur = pl_field_pair(src), .field = PL_FIELD_TOP, }; pl_shader_deinterlace(sh, &dsrc, pl_deinterlace_params( .algo = PL_DEINTERLACE_BOB, )); } static void bench_yadif(pl_shader sh, pl_shader_obj *state, pl_tex src) { struct pl_deinterlace_source dsrc = { .prev = pl_field_pair(src), .cur = pl_field_pair(src), .next = pl_field_pair(src), .field = PL_FIELD_TOP, }; pl_shader_deinterlace(sh, &dsrc, pl_deinterlace_params( .algo = PL_DEINTERLACE_YADIF, )); } static void bench_av1_grain(pl_shader sh, pl_shader_obj *state, pl_tex src) { struct pl_film_grain_params params = { .data = { .type = PL_FILM_GRAIN_AV1, .params.av1 = av1_grain_data, .seed = rand(), }, .tex = src, .components = 3, .component_mapping = {0, 1, 2}, .repr = &(struct pl_color_repr) {0}, }; REQUIRE(pl_shader_film_grain(sh, state, ¶ms)); } static void bench_av1_grain_lap(pl_shader sh, pl_shader_obj *state, pl_tex src) { struct pl_film_grain_params params = { .data = { .type = PL_FILM_GRAIN_AV1, .params.av1 = av1_grain_data, .seed = rand(), }, .tex = src, .components = 3, .component_mapping = {0, 1, 2}, .repr = &(struct pl_color_repr) {0}, }; params.data.params.av1.overlap = true; REQUIRE(pl_shader_film_grain(sh, state, ¶ms)); } static void bench_h274_grain(pl_shader sh, pl_shader_obj *state, pl_tex src) { struct pl_film_grain_params params = { .data = { .type = PL_FILM_GRAIN_H274, .params.h274 = h274_grain_data, .seed = rand(), }, .tex = src, .components = 3, .component_mapping = {0, 1, 2}, .repr = &(struct pl_color_repr) {0}, }; REQUIRE(pl_shader_film_grain(sh, state, ¶ms)); } static void bench_reshape_poly(pl_shader sh, pl_shader_obj *state, pl_tex src) { REQUIRE(pl_shader_sample_direct(sh, pl_sample_src( .tex = src ))); pl_shader_dovi_reshape(sh, &(struct pl_dovi_metadata) { .comp = { { .num_pivots = 8, .pivots = {0.0, 0.00488758553, 0.0420332365, 0.177908108, 0.428152502, 0.678396881, 0.92864126, 1.0}, .method = {0, 0, 0, 0, 0, 0, 0}, .poly_coeffs = { {0.00290930271, 2.30019712, 50.1446037}, {0.00725257397, 1.88119054, -4.49443769}, {0.0150123835, 1.61106598, -1.64833081}, {0.0498571396, 1.2059114, -0.430627108}, {0.0878019333, 1.01845241, -0.19669354}, {0.120447636, 0.920134187, -0.122338772}, {2.12430835, -3.30913281, 2.10893941}, }, }, { .num_pivots = 2, .pivots = {0.0, 1.0}, .method = {0}, .poly_coeffs = {{-0.397901177, 1.85908031, 0}}, }, { .num_pivots = 2, .pivots = {0.0, 1.0}, .method = {0}, .poly_coeffs = {{-0.399355531, 1.85591626, 0}}, }, }}); } static void bench_reshape_mmr(pl_shader sh, pl_shader_obj *state, pl_tex src) { REQUIRE(pl_shader_sample_direct(sh, pl_sample_src( .tex = src ))); pl_shader_dovi_reshape(sh, &dovi_meta); // this includes MMR } static float data[WIDTH * HEIGHT * COMPS + 8192]; static void bench_download(pl_gpu gpu, pl_tex tex) { REQUIRE(pl_tex_download(gpu, pl_tex_transfer_params( .tex = tex, .ptr = (uint8_t *) PL_ALIGN((uintptr_t) data, 4096), ))); } static void bench_upload(pl_gpu gpu, pl_tex tex) { REQUIRE(pl_tex_upload(gpu, pl_tex_transfer_params( .tex = tex, .ptr = (uint8_t *) PL_ALIGN((uintptr_t) data, 4096), ))); } static void dummy_cb(void *arg) {} static void bench_download_async(pl_gpu gpu, pl_tex tex) { REQUIRE(pl_tex_download(gpu, pl_tex_transfer_params( .tex = tex, .ptr = (uint8_t *) PL_ALIGN((uintptr_t) data, 4096), .callback = dummy_cb, ))); } static void bench_upload_async(pl_gpu gpu, pl_tex tex) { REQUIRE(pl_tex_upload(gpu, pl_tex_transfer_params( .tex = tex, .ptr = (uint8_t *) PL_ALIGN((uintptr_t) data, 4096), .callback = dummy_cb, ))); } int main() { setbuf(stdout, NULL); setbuf(stderr, NULL); pl_log log = pl_log_create(PL_API_VER, pl_log_params( .log_cb = isatty(fileno(stdout)) ? pl_log_color : pl_log_simple, .log_level = PL_LOG_WARN, )); pl_vulkan vk = pl_vulkan_create(log, pl_vulkan_params( .allow_software = true, .async_transfer = ASYNC_TX, .async_compute = ASYNC_COMP, .queue_count = NUM_QUEUES, )); if (!vk) return SKIP; #define BENCH_SH(fn) &(struct bench) { .run_sh = fn } #define BENCH_TEX(fn) &(struct bench) { .run_tex = fn } printf("= Running benchmarks =\n"); benchmark(vk->gpu, "tex_download ptr", BENCH_TEX(bench_download)); benchmark(vk->gpu, "tex_download ptr async", BENCH_TEX(bench_download_async)); benchmark(vk->gpu, "tex_upload ptr", BENCH_TEX(bench_upload)); benchmark(vk->gpu, "tex_upload ptr async", BENCH_TEX(bench_upload_async)); benchmark(vk->gpu, "bilinear", BENCH_SH(bench_bilinear)); benchmark(vk->gpu, "bicubic", BENCH_SH(bench_bicubic)); benchmark(vk->gpu, "hermite", BENCH_SH(bench_hermite)); benchmark(vk->gpu, "gaussian", BENCH_SH(bench_gaussian)); benchmark(vk->gpu, "deband", BENCH_SH(bench_deband)); benchmark(vk->gpu, "deband_heavy", BENCH_SH(bench_deband_heavy)); // Deinterlacing benchmark(vk->gpu, "weave", BENCH_SH(bench_weave)); benchmark(vk->gpu, "bob", BENCH_SH(bench_bob)); benchmark(vk->gpu, "yadif", BENCH_SH(bench_yadif)); // Polar sampling benchmark(vk->gpu, "polar", BENCH_SH(bench_polar)); if (vk->gpu->glsl.compute) benchmark(vk->gpu, "polar_nocompute", BENCH_SH(bench_polar_nocompute)); // Dithering algorithms benchmark(vk->gpu, "dither_blue", BENCH_SH(bench_dither_blue)); benchmark(vk->gpu, "dither_white", BENCH_SH(bench_dither_white)); benchmark(vk->gpu, "dither_ordered_fixed", BENCH_SH(bench_dither_ordered_fix)); // HDR peak detection if (vk->gpu->glsl.compute) { benchmark(vk->gpu, "hdr_peakdetect", BENCH_SH(bench_hdr_peak)); benchmark(vk->gpu, "hdr_peakdetect_hq", BENCH_SH(bench_hdr_peak_hq)); } // Tone mapping benchmark(vk->gpu, "hdr_lut", BENCH_SH(bench_hdr_lut)); benchmark(vk->gpu, "hdr_clip", BENCH_SH(bench_hdr_clip)); // Misc stuff benchmark(vk->gpu, "av1_grain", BENCH_SH(bench_av1_grain)); benchmark(vk->gpu, "av1_grain_lap", BENCH_SH(bench_av1_grain_lap)); benchmark(vk->gpu, "h274_grain", BENCH_SH(bench_h274_grain)); benchmark(vk->gpu, "reshape_poly", BENCH_SH(bench_reshape_poly)); benchmark(vk->gpu, "reshape_mmr", BENCH_SH(bench_reshape_mmr)); pl_vulkan_destroy(&vk); pl_log_destroy(&log); return 0; } libplacebo-v7.349.0/src/tests/cache.c000066400000000000000000000203261463457750100173370ustar00rootroot00000000000000#include "utils.h" #include // Returns "foo" for even keys, "bar" for odd static pl_cache_obj lookup_foobar(void *priv, uint64_t key) { return (pl_cache_obj) { .key = 0xFFFF, // test key sanity .data = (key & 1) ? "bar" : "foo", .size = 3, }; } static void update_count(void *priv, pl_cache_obj obj) { int *count = priv; *count += obj.size ? 1 : -1; } enum { KEY1 = 0x9c65575f419288f5, KEY2 = 0x92da969be9b88086, KEY3 = 0x7fcb62540b00bc8b, KEY4 = 0x46c60ec11af9dde3, KEY5 = 0xcb6760b98ece2477, KEY6 = 0xf37dc72b7f9e5c88, KEY7 = 0x30c18c962d82e5f5, }; int main() { pl_log log = pl_test_logger(); pl_cache test = pl_cache_create(pl_cache_params( .log = log, .max_object_size = 16, .max_total_size = 32, )); pl_cache_obj obj1 = { .key = KEY1, .data = "abc", .size = 3 }; pl_cache_obj obj2 = { .key = KEY2, .data = "de", .size = 2 }; pl_cache_obj obj3 = { .key = KEY3, .data = "xyzw", .size = 4 }; REQUIRE_CMP(pl_cache_signature(test), ==, 0x0, PRIu64); REQUIRE(pl_cache_try_set(test, &obj1)); REQUIRE_CMP(pl_cache_signature(test), ==, KEY1, PRIu64); REQUIRE(pl_cache_try_set(test, &obj2)); REQUIRE_CMP(pl_cache_signature(test), ==, KEY1 ^ KEY2, PRIu64); REQUIRE(pl_cache_try_set(test, &obj3)); REQUIRE_CMP(pl_cache_signature(test), ==, KEY1 ^ KEY2 ^ KEY3, PRIu64); REQUIRE_CMP(pl_cache_size(test), ==, 9, "zu"); REQUIRE_CMP(pl_cache_objects(test), ==, 3, "d"); REQUIRE(pl_cache_try_set(test, &obj2)); // delete KEY2 REQUIRE_CMP(pl_cache_size(test), ==, 7, "zu"); REQUIRE_CMP(pl_cache_objects(test), ==, 2, "d"); REQUIRE_CMP(pl_cache_signature(test), ==, KEY1 ^ KEY3, PRIu64); REQUIRE(pl_cache_get(test, &obj1)); REQUIRE(!pl_cache_get(test, &obj2)); REQUIRE(pl_cache_get(test, &obj3)); REQUIRE_CMP(pl_cache_size(test), ==, 0, "zu"); REQUIRE_CMP(pl_cache_objects(test), ==, 0, "d"); REQUIRE_MEMEQ(obj1.data, "abc", 3); REQUIRE_MEMEQ(obj3.data, "xyzw", 4); // Re-insert removed objects (in reversed order) REQUIRE(pl_cache_try_set(test, &obj3)); REQUIRE(pl_cache_try_set(test, &obj1)); REQUIRE_CMP(pl_cache_size(test), ==, 7, "zu"); REQUIRE_CMP(pl_cache_objects(test), ==, 2, "d"); uint8_t ref[72]; memset(ref, 0xbe, sizeof(ref)); uint8_t *refp = ref; #define PAD_ALIGN(x) PL_ALIGN2(x, sizeof(uint32_t)) #define W(type, ...) \ do { \ size_t sz = sizeof((type){__VA_ARGS__}); \ pl_assert(ref + sizeof(ref) - refp >= sz); \ memcpy(refp, &(type){__VA_ARGS__}, sz); \ refp += sz; \ size_t pad_sz = PAD_ALIGN(sz) - sz; \ pl_assert(ref + sizeof(ref) - refp >= pad_sz); \ memcpy(refp, &(char[PAD_ALIGN(1)]){0}, pad_sz); \ refp += pad_sz; \ } while (0) W(char[], 'p', 'l', '_', 'c', 'a', 'c', 'h', 'e'); // cache magic W(uint32_t, 1); // cache version W(uint32_t, 2); // number of objects // object 3 W(uint64_t, KEY3); // key W(uint64_t, 4); // size #ifdef PL_HAVE_XXHASH W(uint64_t, 0xd43612ef3fbee8be); // hash #else W(uint64_t, 0xec18884e5e471117); // hash #endif W(char[], 'x', 'y', 'z', 'w'); // data // object 1 W(uint64_t, KEY1); // key W(uint64_t, 3); // size #ifdef PL_HAVE_XXHASH W(uint64_t, 0x78af5f94892f3950); // hash #else W(uint64_t, 0x3a204d408a2e2d77); // hash #endif W(char[], 'a', 'b', 'c'); // data #undef W #undef PAD_ALIGN uint8_t data[100]; pl_static_assert(sizeof(data) >= sizeof(ref)); REQUIRE_CMP(pl_cache_save(test, data, sizeof(data)), ==, sizeof(ref), "zu"); REQUIRE_MEMEQ(data, ref, sizeof(ref)); pl_cache test2 = pl_cache_create(pl_cache_params( .log = log )); REQUIRE_CMP(pl_cache_load(test2, data, sizeof(data)), ==, 2, "d"); REQUIRE_CMP(pl_cache_signature(test), ==, pl_cache_signature(test2), PRIu64); REQUIRE_CMP(pl_cache_size(test2), ==, 7, "zu"); REQUIRE_CMP(pl_cache_save(test2, NULL, 0), ==, sizeof(ref), "zu"); REQUIRE_CMP(pl_cache_save(test2, data, sizeof(data)), ==, sizeof(ref), "zu"); REQUIRE_MEMEQ(data, ref, sizeof(ref)); // Test loading invalid data REQUIRE_CMP(pl_cache_load(test2, ref, 0), <, 0, "d"); // empty file REQUIRE_CMP(pl_cache_load(test2, ref, 5), <, 0, "d"); // truncated header REQUIRE_CMP(pl_cache_load(test2, ref, 64), ==, 1, "d"); // truncated object data data[sizeof(ref) - 2] = 'X'; // corrupt data REQUIRE_CMP(pl_cache_load(test2, data, sizeof(ref)), ==, 1, "d"); // bad checksum pl_cache_destroy(&test2); // Inserting too large object should fail uint8_t zero[32] = {0}; pl_cache_obj obj4 = { .key = KEY4, .data = zero, .size = 32 }; REQUIRE(!pl_cache_try_set(test, &obj4)); REQUIRE(!pl_cache_get(test, &obj4)); REQUIRE_CMP(pl_cache_size(test), ==, 7, "zu"); REQUIRE_CMP(pl_cache_objects(test), ==, 2, "d"); // Inserting 16-byte object should succeed, and not purge old entries obj4 = (pl_cache_obj) { .key = KEY4, .data = zero, .size = 16 }; REQUIRE(pl_cache_try_set(test, &obj4)); REQUIRE_CMP(pl_cache_size(test), ==, 23, "zu"); REQUIRE_CMP(pl_cache_objects(test), ==, 3, "d"); REQUIRE(pl_cache_get(test, &obj1)); REQUIRE(pl_cache_get(test, &obj3)); REQUIRE(pl_cache_get(test, &obj4)); pl_cache_set(test, &obj1); pl_cache_set(test, &obj3); pl_cache_set(test, &obj4); REQUIRE_CMP(pl_cache_size(test), ==, 23, "zu"); REQUIRE_CMP(pl_cache_objects(test), ==, 3, "d"); // Inserting another 10-byte object should purge entry KEY1 pl_cache_obj obj5 = { .key = KEY5, .data = zero, .size = 10 }; REQUIRE(pl_cache_try_set(test, &obj5)); REQUIRE_CMP(pl_cache_size(test), ==, 30, "zu"); REQUIRE_CMP(pl_cache_objects(test), ==, 3, "d"); REQUIRE(!pl_cache_get(test, &obj1)); REQUIRE(pl_cache_get(test, &obj3)); REQUIRE(pl_cache_get(test, &obj4)); REQUIRE(pl_cache_get(test, &obj5)); pl_cache_set(test, &obj3); pl_cache_set(test, &obj4); pl_cache_set(test, &obj5); REQUIRE_CMP(pl_cache_size(test), ==, 30, "zu"); REQUIRE_CMP(pl_cache_objects(test), ==, 3, "d"); // Inserting final 6-byte object should purge entry KEY3 pl_cache_obj obj6 = { .key = KEY6, .data = zero, .size = 6 }; REQUIRE(pl_cache_try_set(test, &obj6)); REQUIRE_CMP(pl_cache_size(test), ==, 32, "zu"); REQUIRE_CMP(pl_cache_objects(test), ==, 3, "d"); REQUIRE(!pl_cache_get(test, &obj3)); REQUIRE(pl_cache_get(test, &obj4)); REQUIRE(pl_cache_get(test, &obj5)); REQUIRE(pl_cache_get(test, &obj6)); REQUIRE_CMP(pl_cache_size(test), ==, 0, "zu"); REQUIRE_CMP(pl_cache_objects(test), ==, 0, "d"); pl_cache_obj_free(&obj4); pl_cache_obj_free(&obj5); pl_cache_obj_free(&obj6); // Test callback API int num_objects = 0; test2 = pl_cache_create(pl_cache_params( .get = lookup_foobar, .set = update_count, .priv = &num_objects, )); REQUIRE(pl_cache_get(test2, &obj1)); REQUIRE_CMP(obj1.key, ==, KEY1, PRIu64); REQUIRE_CMP(obj1.size, ==, 3, "zu"); REQUIRE_MEMEQ(obj1.data, "bar", 3); REQUIRE(pl_cache_get(test2, &obj2)); REQUIRE_CMP(obj2.key, ==, KEY2, PRIu64); REQUIRE_CMP(obj2.size, ==, 3, "zu"); REQUIRE_MEMEQ(obj2.data, "foo", 3); REQUIRE_CMP(pl_cache_objects(test2), ==, 0, "d"); REQUIRE_CMP(num_objects, ==, 0, "d"); REQUIRE(pl_cache_try_set(test2, &obj1)); REQUIRE(pl_cache_try_set(test2, &obj2)); REQUIRE(pl_cache_try_set(test2, &(pl_cache_obj) { .key = KEY7, .data = "abcde", .size = 5 })); REQUIRE_CMP(pl_cache_objects(test2), ==, 3, "d"); REQUIRE_CMP(num_objects, ==, 3, "d"); REQUIRE(pl_cache_try_set(test2, &obj1)); REQUIRE(pl_cache_try_set(test2, &obj2)); REQUIRE_CMP(pl_cache_objects(test2), ==, 1, "d"); REQUIRE_CMP(num_objects, ==, 1, "d"); pl_cache_destroy(&test2); pl_cache_destroy(&test); pl_log_destroy(&log); return 0; } libplacebo-v7.349.0/src/tests/colorspace.c000066400000000000000000000554331463457750100204350ustar00rootroot00000000000000#include "utils.h" int main() { for (enum pl_color_system sys = 0; sys < PL_COLOR_SYSTEM_COUNT; sys++) { bool ycbcr = sys >= PL_COLOR_SYSTEM_BT_601 && sys <= PL_COLOR_SYSTEM_YCGCO; REQUIRE_CMP(ycbcr, ==, pl_color_system_is_ycbcr_like(sys), "d"); } for (enum pl_color_transfer trc = 0; trc < PL_COLOR_TRC_COUNT; trc++) { printf("Testing color transfer: %s\n", pl_color_transfer_name(trc)); bool hdr = trc >= PL_COLOR_TRC_PQ && trc <= PL_COLOR_TRC_S_LOG2; REQUIRE_CMP(hdr, ==, pl_color_transfer_is_hdr(trc), "d"); REQUIRE_CMP(pl_color_transfer_nominal_peak(trc), >=, 1.0, "f"); if (trc == PL_COLOR_TRC_LINEAR) continue; // Test round trip const float peak = 1.0f, contrast = 1000; const struct pl_color_space csp = { .transfer = trc, .hdr.max_luma = PL_COLOR_SDR_WHITE * peak, .hdr.min_luma = PL_COLOR_SDR_WHITE * peak / contrast, }; for (float x = 0.0f; x <= 1.0f; x += 0.01f) { float color[3] = { x, x, x }; pl_color_linearize(&csp, color); if (trc == PL_COLOR_TRC_PQ) REQUIRE_FEQ(color[0], pl_hdr_rescale(PL_HDR_PQ, PL_HDR_NORM, x), 1e-5f); if (pl_color_space_is_black_scaled(&csp) || trc == PL_COLOR_TRC_BT_1886) REQUIRE_CMP(color[0] + 1e-6f, >=, peak / contrast, "f"); if (!pl_color_space_is_hdr(&csp) && trc != PL_COLOR_TRC_ST428) REQUIRE_CMP(color[0] - 1e-6f, <=, peak, "f"); switch (trc) { case PL_COLOR_TRC_V_LOG: case PL_COLOR_TRC_S_LOG1: case PL_COLOR_TRC_S_LOG2: // FIXME: these don't currently round-trip on subzero values break; default: pl_color_delinearize(&csp, color); REQUIRE_FEQ(color[0], x, 1e-5f); break; } } } float pq_peak = pl_color_transfer_nominal_peak(PL_COLOR_TRC_PQ); REQUIRE_FEQ(PL_COLOR_SDR_WHITE * pq_peak, 10000, 1e-7); struct pl_color_repr tv_repr = { .sys = PL_COLOR_SYSTEM_BT_709, .levels = PL_COLOR_LEVELS_LIMITED, }; struct pl_color_repr pc_repr = { .sys = PL_COLOR_SYSTEM_RGB, .levels = PL_COLOR_LEVELS_FULL, }; // Ensure this is a no-op for bits == bits for (int bits = 1; bits <= 16; bits++) { tv_repr.bits.color_depth = tv_repr.bits.sample_depth = bits; pc_repr.bits.color_depth = pc_repr.bits.sample_depth = bits; REQUIRE_FEQ(pl_color_repr_normalize(&tv_repr), 1.0, 1e-7); REQUIRE_FEQ(pl_color_repr_normalize(&pc_repr), 1.0, 1e-7); } tv_repr.bits.color_depth = 8; tv_repr.bits.sample_depth = 10; float tv8to10 = pl_color_repr_normalize(&tv_repr); tv_repr.bits.color_depth = 8; tv_repr.bits.sample_depth = 12; float tv8to12 = pl_color_repr_normalize(&tv_repr); // Simulate the effect of GPU texture sampling on UNORM texture REQUIRE_FEQ(tv8to10 * 16 /1023., 64/1023., 1e-7); // black REQUIRE_FEQ(tv8to10 * 235/1023., 940/1023., 1e-7); // nominal white REQUIRE_FEQ(tv8to10 * 128/1023., 512/1023., 1e-7); // achromatic REQUIRE_FEQ(tv8to10 * 240/1023., 960/1023., 1e-7); // nominal chroma peak REQUIRE_FEQ(tv8to12 * 16 /4095., 256 /4095., 1e-7); // black REQUIRE_FEQ(tv8to12 * 235/4095., 3760/4095., 1e-7); // nominal white REQUIRE_FEQ(tv8to12 * 128/4095., 2048/4095., 1e-7); // achromatic REQUIRE_FEQ(tv8to12 * 240/4095., 3840/4095., 1e-7); // nominal chroma peak // Ensure lavc's xyz12 is handled correctly struct pl_color_repr xyz12 = { .sys = PL_COLOR_SYSTEM_XYZ, .levels = PL_COLOR_LEVELS_UNKNOWN, .bits = { .sample_depth = 16, .color_depth = 12, .bit_shift = 4, }, }; float xyz = pl_color_repr_normalize(&xyz12); REQUIRE_FEQ(xyz * (4095 << 4), 65535, 1e-7); // Assume we uploaded a 10-bit source directly (unshifted) as a 16-bit // texture. This texture multiplication factor should make it behave as if // it was uploaded as a 10-bit texture instead. pc_repr.bits.color_depth = 10; pc_repr.bits.sample_depth = 16; float pc10to16 = pl_color_repr_normalize(&pc_repr); REQUIRE_FEQ(pc10to16 * 1000/65535., 1000/1023., 1e-7); const struct pl_raw_primaries *bt709, *bt2020, *dcip3; bt709 = pl_raw_primaries_get(PL_COLOR_PRIM_BT_709); bt2020 = pl_raw_primaries_get(PL_COLOR_PRIM_BT_2020); dcip3 = pl_raw_primaries_get(PL_COLOR_PRIM_DCI_P3); REQUIRE(pl_primaries_superset(bt2020, bt709)); REQUIRE(!pl_primaries_superset(bt2020, dcip3)); // small region doesn't overlap REQUIRE(pl_primaries_superset(dcip3, bt709)); REQUIRE(!pl_primaries_superset(bt709, bt2020)); REQUIRE(pl_primaries_compatible(bt2020, bt2020)); REQUIRE(pl_primaries_compatible(bt2020, bt709)); REQUIRE(pl_primaries_compatible(bt709, bt2020)); REQUIRE(pl_primaries_compatible(bt2020, dcip3)); REQUIRE(pl_primaries_compatible(bt709, dcip3)); struct pl_raw_primaries bt709_2020 = pl_primaries_clip(bt709, bt2020); struct pl_raw_primaries bt2020_709 = pl_primaries_clip(bt2020, bt709); REQUIRE(pl_raw_primaries_similar(&bt709_2020, bt709)); REQUIRE(pl_raw_primaries_similar(&bt2020_709, bt709)); struct pl_raw_primaries dcip3_bt2020 = pl_primaries_clip(dcip3, bt2020); struct pl_raw_primaries dcip3_bt709 = pl_primaries_clip(dcip3, bt709); REQUIRE(pl_primaries_superset(dcip3, &dcip3_bt2020)); REQUIRE(pl_primaries_superset(dcip3, &dcip3_bt709)); REQUIRE(pl_primaries_superset(bt2020, &dcip3_bt2020)); REQUIRE(pl_primaries_superset(bt709, &dcip3_bt709)); pl_matrix3x3 rgb2xyz, rgb2xyz_; rgb2xyz = rgb2xyz_ = pl_get_rgb2xyz_matrix(bt709); pl_matrix3x3_invert(&rgb2xyz_); pl_matrix3x3_invert(&rgb2xyz_); // Make sure the double-inversion round trips for (int y = 0; y < 3; y++) { for (int x = 0; x < 3; x++) REQUIRE_FEQ(rgb2xyz.m[y][x], rgb2xyz_.m[y][x], 1e-6); } // Make sure mapping the spectral RGB colors (i.e. the matrix rows) matches // our original primaries float Y = rgb2xyz.m[1][0]; REQUIRE_FEQ(rgb2xyz.m[0][0], pl_cie_X(bt709->red) * Y, 1e-7); REQUIRE_FEQ(rgb2xyz.m[2][0], pl_cie_Z(bt709->red) * Y, 1e-7); Y = rgb2xyz.m[1][1]; REQUIRE_FEQ(rgb2xyz.m[0][1], pl_cie_X(bt709->green) * Y, 1e-7); REQUIRE_FEQ(rgb2xyz.m[2][1], pl_cie_Z(bt709->green) * Y, 1e-7); Y = rgb2xyz.m[1][2]; REQUIRE_FEQ(rgb2xyz.m[0][2], pl_cie_X(bt709->blue) * Y, 1e-7); REQUIRE_FEQ(rgb2xyz.m[2][2], pl_cie_Z(bt709->blue) * Y, 1e-7); // Make sure the gamut mapping round-trips pl_matrix3x3 bt709_bt2020, bt2020_bt709; bt709_bt2020 = pl_get_color_mapping_matrix(bt709, bt2020, PL_INTENT_RELATIVE_COLORIMETRIC); bt2020_bt709 = pl_get_color_mapping_matrix(bt2020, bt709, PL_INTENT_RELATIVE_COLORIMETRIC); for (int n = 0; n < 10; n++) { float vec[3] = { RANDOM, RANDOM, RANDOM }; float dst[3] = { vec[0], vec[1], vec[2] }; pl_matrix3x3_apply(&bt709_bt2020, dst); pl_matrix3x3_apply(&bt2020_bt709, dst); for (int i = 0; i < 3; i++) REQUIRE_FEQ(dst[i], vec[i], 1e-6); } // Ensure the decoding matrix round-trips to white/black for (enum pl_color_system sys = 0; sys < PL_COLOR_SYSTEM_COUNT; sys++) { if (!pl_color_system_is_linear(sys)) continue; printf("Testing color system: %s\n", pl_color_system_name(sys)); struct pl_color_repr repr = { .levels = PL_COLOR_LEVELS_LIMITED, .sys = sys, .bits = { // synthetic test .color_depth = 8, .sample_depth = 10, }, }; float scale = pl_color_repr_normalize(&repr); pl_transform3x3 yuv2rgb = pl_color_repr_decode(&repr, NULL); pl_matrix3x3_scale(&yuv2rgb.mat, scale); static const float white_ycbcr[3] = { 235/1023., 128/1023., 128/1023. }; static const float black_ycbcr[3] = { 16/1023., 128/1023., 128/1023. }; static const float white_other[3] = { 235/1023., 235/1023., 235/1023. }; static const float black_other[3] = { 16/1023., 16/1023., 16/1023. }; float white[3], black[3]; for (int i = 0; i < 3; i++) { if (pl_color_system_is_ycbcr_like(sys)) { white[i] = white_ycbcr[i]; black[i] = black_ycbcr[i]; } else { white[i] = white_other[i]; black[i] = black_other[i]; } } pl_transform3x3_apply(&yuv2rgb, white); REQUIRE_FEQ(white[0], 1.0, 1e-6); REQUIRE_FEQ(white[1], 1.0, 1e-6); REQUIRE_FEQ(white[2], 1.0, 1e-6); pl_transform3x3_apply(&yuv2rgb, black); REQUIRE_FEQ(black[0], 0.0, 1e-6); REQUIRE_FEQ(black[1], 0.0, 1e-6); REQUIRE_FEQ(black[2], 0.0, 1e-6); } // Make sure chromatic adaptation works struct pl_raw_primaries bt709_d50; bt709_d50 = *pl_raw_primaries_get(PL_COLOR_PRIM_BT_709); bt709_d50.white = (struct pl_cie_xy) { 0.34567, 0.35850 }; pl_matrix3x3 d50_d65; d50_d65 = pl_get_color_mapping_matrix(&bt709_d50, bt709, PL_INTENT_RELATIVE_COLORIMETRIC); float white[3] = { 1.0, 1.0, 1.0 }; pl_matrix3x3_apply(&d50_d65, white); REQUIRE_FEQ(white[0], 1.0, 1e-6); REQUIRE_FEQ(white[1], 1.0, 1e-6); REQUIRE_FEQ(white[2], 1.0, 1e-6); // Simulate a typical 10-bit YCbCr -> 16 bit texture conversion tv_repr.bits.color_depth = 10; tv_repr.bits.sample_depth = 16; pl_transform3x3 yuv2rgb; yuv2rgb = pl_color_repr_decode(&tv_repr, NULL); float test[3] = { 575/65535., 336/65535., 640/65535. }; pl_transform3x3_apply(&yuv2rgb, test); REQUIRE_FEQ(test[0], 0.808305, 1e-6); REQUIRE_FEQ(test[1], 0.553254, 1e-6); REQUIRE_FEQ(test[2], 0.218841, 1e-6); // DVD REQUIRE_CMP(pl_color_system_guess_ycbcr(720, 480), ==, PL_COLOR_SYSTEM_BT_601, "u"); REQUIRE_CMP(pl_color_system_guess_ycbcr(720, 576), ==, PL_COLOR_SYSTEM_BT_601, "u"); REQUIRE_CMP(pl_color_primaries_guess(720, 576), ==, PL_COLOR_PRIM_BT_601_625, "u"); REQUIRE_CMP(pl_color_primaries_guess(720, 480), ==, PL_COLOR_PRIM_BT_601_525, "u"); // PAL 16:9 REQUIRE_CMP(pl_color_system_guess_ycbcr(1024, 576), ==, PL_COLOR_SYSTEM_BT_601, "u"); REQUIRE_CMP(pl_color_primaries_guess(1024, 576), ==, PL_COLOR_PRIM_BT_601_625, "u"); // HD REQUIRE_CMP(pl_color_system_guess_ycbcr(1280, 720), ==, PL_COLOR_SYSTEM_BT_709, "u"); REQUIRE_CMP(pl_color_system_guess_ycbcr(1920, 1080), ==, PL_COLOR_SYSTEM_BT_709, "u"); REQUIRE_CMP(pl_color_primaries_guess(1280, 720), ==, PL_COLOR_PRIM_BT_709, "u"); REQUIRE_CMP(pl_color_primaries_guess(1920, 1080), ==, PL_COLOR_PRIM_BT_709, "u"); // Odd/weird videos REQUIRE_CMP(pl_color_primaries_guess(2000, 576), ==, PL_COLOR_PRIM_BT_709, "u"); REQUIRE_CMP(pl_color_primaries_guess(200, 200), ==, PL_COLOR_PRIM_BT_709, "u"); REQUIRE(pl_color_repr_equal(&pl_color_repr_sdtv, &pl_color_repr_sdtv)); REQUIRE(!pl_color_repr_equal(&pl_color_repr_sdtv, &pl_color_repr_hdtv)); struct pl_color_repr repr = pl_color_repr_unknown; pl_color_repr_merge(&repr, &pl_color_repr_uhdtv); REQUIRE(pl_color_repr_equal(&repr, &pl_color_repr_uhdtv)); REQUIRE(!pl_color_primaries_is_wide_gamut(PL_COLOR_PRIM_UNKNOWN)); REQUIRE(!pl_color_primaries_is_wide_gamut(PL_COLOR_PRIM_BT_601_525)); REQUIRE(!pl_color_primaries_is_wide_gamut(PL_COLOR_PRIM_BT_601_625)); REQUIRE(!pl_color_primaries_is_wide_gamut(PL_COLOR_PRIM_BT_709)); REQUIRE(!pl_color_primaries_is_wide_gamut(PL_COLOR_PRIM_BT_470M)); REQUIRE(pl_color_primaries_is_wide_gamut(PL_COLOR_PRIM_BT_2020)); REQUIRE(pl_color_primaries_is_wide_gamut(PL_COLOR_PRIM_APPLE)); REQUIRE(pl_color_primaries_is_wide_gamut(PL_COLOR_PRIM_ADOBE)); REQUIRE(pl_color_primaries_is_wide_gamut(PL_COLOR_PRIM_PRO_PHOTO)); REQUIRE(pl_color_primaries_is_wide_gamut(PL_COLOR_PRIM_CIE_1931)); REQUIRE(pl_color_primaries_is_wide_gamut(PL_COLOR_PRIM_DCI_P3)); REQUIRE(pl_color_primaries_is_wide_gamut(PL_COLOR_PRIM_DISPLAY_P3)); REQUIRE(pl_color_primaries_is_wide_gamut(PL_COLOR_PRIM_V_GAMUT)); REQUIRE(pl_color_primaries_is_wide_gamut(PL_COLOR_PRIM_S_GAMUT)); struct pl_color_space space = pl_color_space_unknown; pl_color_space_merge(&space, &pl_color_space_bt709); REQUIRE(pl_color_space_equal(&space, &pl_color_space_bt709)); // Infer some color spaces struct pl_color_space hlg = { .primaries = PL_COLOR_PRIM_BT_2020, .transfer = PL_COLOR_TRC_HLG, }; pl_color_space_infer(&hlg); REQUIRE_CMP(hlg.hdr.max_luma, ==, PL_COLOR_HLG_PEAK, "f"); struct pl_color_space unknown = {0}; struct pl_color_space display = { .primaries = PL_COLOR_PRIM_BT_709, .transfer = PL_COLOR_TRC_BT_1886, }; pl_color_space_infer(&unknown); pl_color_space_infer(&display); REQUIRE(pl_color_space_equal(&unknown, &display)); float x, y; pl_chroma_location_offset(PL_CHROMA_LEFT, &x, &y); REQUIRE_CMP(x, ==, -0.5f, "f"); REQUIRE_CMP(y, ==, 0.0f, "f"); pl_chroma_location_offset(PL_CHROMA_TOP_LEFT, &x, &y); REQUIRE_CMP(x, ==, -0.5f, "f"); REQUIRE_CMP(y, ==, -0.5f, "f"); pl_chroma_location_offset(PL_CHROMA_CENTER, &x, &y); REQUIRE_CMP(x, ==, 0.0f, "f"); REQUIRE_CMP(y, ==, 0.0f, "f"); pl_chroma_location_offset(PL_CHROMA_BOTTOM_CENTER, &x, &y); REQUIRE_CMP(x, ==, 0.0f, "f"); REQUIRE_CMP(y, ==, 0.5f, "f"); REQUIRE_CMP(pl_raw_primaries_get(PL_COLOR_PRIM_UNKNOWN), ==, pl_raw_primaries_get(PL_COLOR_PRIM_BT_709), "p"); // Color blindness tests float red[3] = { 1.0, 0.0, 0.0 }; float green[3] = { 0.0, 1.0, 0.0 }; float blue[3] = { 0.0, 0.0, 1.0 }; #define TEST_CONE(model, color) \ do { \ float tmp[3] = { (color)[0], (color)[1], (color)[2] }; \ pl_matrix3x3 mat = pl_get_cone_matrix(&(model), bt709); \ pl_matrix3x3_apply(&mat, tmp); \ printf("%s + %s = %f %f %f\n", #model, #color, tmp[0], tmp[1], tmp[2]); \ for (int i = 0; i < 3; i++) \ REQUIRE_FEQ((color)[i], tmp[i], 1e-5f); \ } while(0) struct pl_cone_params red_only = { .cones = PL_CONE_MS }; struct pl_cone_params green_only = { .cones = PL_CONE_LS }; struct pl_cone_params blue_only = pl_vision_monochromacy; // These models should all round-trip white TEST_CONE(pl_vision_normal, white); TEST_CONE(pl_vision_protanopia, white); TEST_CONE(pl_vision_protanomaly, white); TEST_CONE(pl_vision_deuteranomaly, white); TEST_CONE(pl_vision_tritanomaly, white); TEST_CONE(pl_vision_achromatopsia, white); TEST_CONE(red_only, white); TEST_CONE(green_only, white); TEST_CONE(blue_only, white); // These models should round-trip blue TEST_CONE(pl_vision_normal, blue); TEST_CONE(pl_vision_protanomaly, blue); TEST_CONE(pl_vision_deuteranomaly, blue); // These models should round-trip red TEST_CONE(pl_vision_normal, red); TEST_CONE(pl_vision_tritanomaly, red); TEST_CONE(pl_vision_tritanopia, red); // These models should round-trip green TEST_CONE(pl_vision_normal, green); // Color adaptation tests struct pl_cie_xy d65 = pl_white_from_temp(6504); REQUIRE_FEQ(d65.x, 0.31271, 1e-3); REQUIRE_FEQ(d65.y, 0.32902, 1e-3); struct pl_cie_xy d55 = pl_white_from_temp(5503); REQUIRE_FEQ(d55.x, 0.33242, 1e-3); REQUIRE_FEQ(d55.y, 0.34743, 1e-3); // Make sure we infer the correct set of metadata parameters #define TEST_METADATA(CSP, TYPE, MIN, MAX, AVG) \ do { \ float _min, _max, _avg; \ pl_color_space_nominal_luma_ex(pl_nominal_luma_params( \ .color = &(CSP), \ .metadata = TYPE, \ .scaling = PL_HDR_PQ, \ .out_min = &_min, \ .out_max = &_max, \ .out_avg = &_avg, \ )); \ const float _min_ref = pl_hdr_rescale(PL_HDR_NITS, PL_HDR_PQ, MIN); \ const float _max_ref = pl_hdr_rescale(PL_HDR_NITS, PL_HDR_PQ, MAX); \ const float _avg_ref = pl_hdr_rescale(PL_HDR_NITS, PL_HDR_PQ, AVG); \ REQUIRE_FEQ(_min, _min_ref, 1e-5); \ REQUIRE_FEQ(_max, _max_ref, 1e-5); \ REQUIRE_FEQ(_avg, _avg_ref, 1e-5); \ } while (0) const struct pl_color_space hdr10plus = { .primaries = PL_COLOR_PRIM_BT_2020, .transfer = PL_COLOR_TRC_PQ, .hdr = { .min_luma = 0.005, .max_luma = 4000, .scene_max = {596.69, 1200, 500}, .scene_avg = 300, }, }; REQUIRE(pl_hdr_metadata_contains(&hdr10plus.hdr, PL_HDR_METADATA_ANY)); REQUIRE(pl_hdr_metadata_contains(&hdr10plus.hdr, PL_HDR_METADATA_NONE)); REQUIRE(pl_hdr_metadata_contains(&hdr10plus.hdr, PL_HDR_METADATA_HDR10)); REQUIRE(pl_hdr_metadata_contains(&hdr10plus.hdr, PL_HDR_METADATA_HDR10PLUS)); REQUIRE(!pl_hdr_metadata_contains(&hdr10plus.hdr, PL_HDR_METADATA_CIE_Y)); TEST_METADATA(hdr10plus, PL_HDR_METADATA_NONE, PL_COLOR_HDR_BLACK, 10000, 0); TEST_METADATA(hdr10plus, PL_HDR_METADATA_CIE_Y, PL_COLOR_HDR_BLACK, 4000, 0); TEST_METADATA(hdr10plus, PL_HDR_METADATA_HDR10, PL_COLOR_HDR_BLACK, 4000, 0); TEST_METADATA(hdr10plus, PL_HDR_METADATA_HDR10PLUS, PL_COLOR_HDR_BLACK, 1000, 250); TEST_METADATA(hdr10plus, PL_HDR_METADATA_ANY, PL_COLOR_HDR_BLACK, 1000, 250); const struct pl_color_space dovi = { .primaries = PL_COLOR_PRIM_BT_2020, .transfer = PL_COLOR_TRC_PQ, .hdr = { .min_luma = 0.005, .max_luma = 4000, .max_pq_y = pl_hdr_rescale(PL_HDR_NITS, PL_HDR_PQ, 1000), .avg_pq_y = pl_hdr_rescale(PL_HDR_NITS, PL_HDR_PQ, 250), }, }; REQUIRE(pl_hdr_metadata_contains(&dovi.hdr, PL_HDR_METADATA_ANY)); REQUIRE(pl_hdr_metadata_contains(&dovi.hdr, PL_HDR_METADATA_NONE)); REQUIRE(pl_hdr_metadata_contains(&dovi.hdr, PL_HDR_METADATA_HDR10)); REQUIRE(pl_hdr_metadata_contains(&dovi.hdr, PL_HDR_METADATA_CIE_Y)); REQUIRE(!pl_hdr_metadata_contains(&dovi.hdr, PL_HDR_METADATA_HDR10PLUS)); TEST_METADATA(dovi, PL_HDR_METADATA_NONE, PL_COLOR_HDR_BLACK, 10000, 0); TEST_METADATA(dovi, PL_HDR_METADATA_HDR10, PL_COLOR_HDR_BLACK, 4000, 0); TEST_METADATA(dovi, PL_HDR_METADATA_HDR10PLUS, PL_COLOR_HDR_BLACK, 4000, 0); TEST_METADATA(dovi, PL_HDR_METADATA_CIE_Y, PL_COLOR_HDR_BLACK, 1000, 250); TEST_METADATA(dovi, PL_HDR_METADATA_ANY, PL_COLOR_HDR_BLACK, 1000, 250); const struct pl_color_space hlg4000 = { .primaries = PL_COLOR_PRIM_BT_2020, .transfer = PL_COLOR_TRC_HLG, .hdr.max_luma = 4000, .hdr.min_luma = 0.005, }; TEST_METADATA(hlg4000, PL_HDR_METADATA_NONE, PL_COLOR_HDR_BLACK, PL_COLOR_HLG_PEAK, 0); TEST_METADATA(hlg4000, PL_HDR_METADATA_HDR10, 0.005, 4000, 0); TEST_METADATA(hlg4000, PL_HDR_METADATA_ANY, 0.005, 4000, 0); const struct pl_color_space untagged = { .primaries = PL_COLOR_PRIM_BT_709, .transfer = PL_COLOR_TRC_BT_1886, }; REQUIRE(pl_hdr_metadata_contains(&untagged.hdr, PL_HDR_METADATA_NONE)); REQUIRE(!pl_hdr_metadata_contains(&untagged.hdr, PL_HDR_METADATA_ANY)); REQUIRE(!pl_hdr_metadata_contains(&untagged.hdr, PL_HDR_METADATA_HDR10)); REQUIRE(!pl_hdr_metadata_contains(&untagged.hdr, PL_HDR_METADATA_CIE_Y)); REQUIRE(!pl_hdr_metadata_contains(&untagged.hdr, PL_HDR_METADATA_HDR10PLUS)); const float sdr_black = PL_COLOR_SDR_WHITE / PL_COLOR_SDR_CONTRAST; TEST_METADATA(untagged, PL_HDR_METADATA_NONE, sdr_black, PL_COLOR_SDR_WHITE, 0); TEST_METADATA(untagged, PL_HDR_METADATA_ANY, sdr_black, PL_COLOR_SDR_WHITE, 0); const struct pl_color_space sdr50 = { .primaries = PL_COLOR_PRIM_BT_709, .transfer = PL_COLOR_TRC_BT_1886, .hdr.max_luma = 50, }; REQUIRE(pl_hdr_metadata_contains(&sdr50.hdr, PL_HDR_METADATA_NONE)); REQUIRE(pl_hdr_metadata_contains(&sdr50.hdr, PL_HDR_METADATA_ANY)); REQUIRE(pl_hdr_metadata_contains(&sdr50.hdr, PL_HDR_METADATA_HDR10)); REQUIRE(!pl_hdr_metadata_contains(&sdr50.hdr, PL_HDR_METADATA_CIE_Y)); REQUIRE(!pl_hdr_metadata_contains(&sdr50.hdr, PL_HDR_METADATA_HDR10PLUS)); TEST_METADATA(sdr50, PL_HDR_METADATA_NONE, sdr_black, PL_COLOR_SDR_WHITE, 0); TEST_METADATA(sdr50, PL_HDR_METADATA_HDR10, 50 / PL_COLOR_SDR_CONTRAST, 50, 0); TEST_METADATA(sdr50, PL_HDR_METADATA_ANY, 50 / PL_COLOR_SDR_CONTRAST, 50, 0); const struct pl_color_space sdr10k = { .primaries = PL_COLOR_PRIM_BT_709, .transfer = PL_COLOR_TRC_BT_1886, .hdr.min_luma = PL_COLOR_SDR_WHITE / 10000, }; REQUIRE(pl_hdr_metadata_contains(&sdr10k.hdr, PL_HDR_METADATA_NONE)); REQUIRE(!pl_hdr_metadata_contains(&sdr10k.hdr, PL_HDR_METADATA_ANY)); REQUIRE(!pl_hdr_metadata_contains(&sdr10k.hdr, PL_HDR_METADATA_HDR10)); TEST_METADATA(sdr10k, PL_HDR_METADATA_NONE, sdr_black, PL_COLOR_SDR_WHITE, 0); TEST_METADATA(sdr10k, PL_HDR_METADATA_HDR10, PL_COLOR_SDR_WHITE / 10000, PL_COLOR_SDR_WHITE, 0); TEST_METADATA(sdr10k, PL_HDR_METADATA_ANY, PL_COLOR_SDR_WHITE / 10000, PL_COLOR_SDR_WHITE, 0); const struct pl_color_space bogus_vals = { .primaries = PL_COLOR_PRIM_BT_2020, .transfer = PL_COLOR_TRC_HLG, .hdr.min_luma = 1e-9, .hdr.max_luma = 1000000, }; const struct pl_color_space bogus_flip = { .primaries = PL_COLOR_PRIM_BT_2020, .transfer = PL_COLOR_TRC_PQ, .hdr.min_luma = 4000, .hdr.max_luma = 0.05, }; const struct pl_color_space bogus_sign = { .primaries = PL_COLOR_PRIM_BT_2020, .transfer = PL_COLOR_TRC_HLG, .hdr.min_luma = -0.5, .hdr.max_luma = -4000, }; TEST_METADATA(bogus_vals, PL_HDR_METADATA_HDR10, PL_COLOR_HDR_BLACK, 10000, 0); TEST_METADATA(bogus_flip, PL_HDR_METADATA_HDR10, PL_COLOR_HDR_BLACK, 10000, 0); TEST_METADATA(bogus_sign, PL_HDR_METADATA_HDR10, PL_COLOR_HDR_BLACK, PL_COLOR_HLG_PEAK, 0); } libplacebo-v7.349.0/src/tests/common.c000066400000000000000000000104471463457750100175670ustar00rootroot00000000000000#include "utils.h" static int irand() { return rand() - RAND_MAX / 2; } int main() { pl_log log = pl_test_logger(); pl_log_update(log, NULL); pl_log_destroy(&log); // Test some misc helper functions pl_rect2d rc2 = { irand(), irand(), irand(), irand(), }; pl_rect3d rc3 = { irand(), irand(), irand(), irand(), irand(), irand(), }; pl_rect2d_normalize(&rc2); REQUIRE_CMP(rc2.x1, >=, rc2.x0, "d"); REQUIRE_CMP(rc2.y1, >=, rc2.y0, "d"); pl_rect3d_normalize(&rc3); REQUIRE_CMP(rc3.x1, >=, rc3.x0, "d"); REQUIRE_CMP(rc3.y1, >=, rc3.y0, "d"); REQUIRE_CMP(rc3.z1, >=, rc3.z0, "d"); pl_rect2df rc2f = { RANDOM, RANDOM, RANDOM, RANDOM, }; pl_rect3df rc3f = { RANDOM, RANDOM, RANDOM, RANDOM, RANDOM, RANDOM, }; pl_rect2df_normalize(&rc2f); REQUIRE_CMP(rc2f.x1, >=, rc2f.x0, "f"); REQUIRE_CMP(rc2f.y1, >=, rc2f.y0, "f"); pl_rect3df_normalize(&rc3f); REQUIRE_CMP(rc3f.x1, >=, rc3f.x0, "f"); REQUIRE_CMP(rc3f.y1, >=, rc3f.y0, "f"); REQUIRE_CMP(rc3f.z1, >=, rc3f.z0, "f"); pl_rect2d rc2r = pl_rect2df_round(&rc2f); pl_rect3d rc3r = pl_rect3df_round(&rc3f); REQUIRE_CMP(fabs(rc2r.x0 - rc2f.x0), <=, 0.5, "f"); REQUIRE_CMP(fabs(rc2r.x1 - rc2f.x1), <=, 0.5, "f"); REQUIRE_CMP(fabs(rc2r.y0 - rc2f.y0), <=, 0.5, "f"); REQUIRE_CMP(fabs(rc2r.y1 - rc2f.y1), <=, 0.5, "f"); REQUIRE_CMP(fabs(rc3r.x0 - rc3f.x0), <=, 0.5, "f"); REQUIRE_CMP(fabs(rc3r.x1 - rc3f.x1), <=, 0.5, "f"); REQUIRE_CMP(fabs(rc3r.y0 - rc3f.y0), <=, 0.5, "f"); REQUIRE_CMP(fabs(rc3r.y1 - rc3f.y1), <=, 0.5, "f"); REQUIRE_CMP(fabs(rc3r.z0 - rc3f.z0), <=, 0.5, "f"); REQUIRE_CMP(fabs(rc3r.z1 - rc3f.z1), <=, 0.5, "f"); pl_transform3x3 tr = { .mat = {{ { RANDOM, RANDOM, RANDOM }, { RANDOM, RANDOM, RANDOM }, { RANDOM, RANDOM, RANDOM }, }}, .c = { RANDOM, RANDOM, RANDOM }, }; pl_transform3x3 tr2 = tr; float scale = 1.0 + RANDOM; pl_transform3x3_scale(&tr2, scale); pl_transform3x3_invert(&tr2); pl_transform3x3_invert(&tr2); pl_transform3x3_scale(&tr2, 1.0 / scale); for (int i = 0; i < 3; i++) { for (int j = 0; j < 3; j++) { printf("%f %f\n", tr.mat.m[i][j], tr2.mat.m[i][j]); REQUIRE_FEQ(tr.mat.m[i][j], tr2.mat.m[i][j], 1e-4); } REQUIRE_FEQ(tr.c[i], tr2.c[i], 1e-4); } // Test aspect ratio code const pl_rect2df rc1080p = {0, 0, 1920, 1080}; const pl_rect2df rc43 = {0, 0, 1024, 768}; pl_rect2df rc; REQUIRE_FEQ(pl_rect2df_aspect(&rc1080p), 16.0/9.0, 1e-8); REQUIRE_FEQ(pl_rect2df_aspect(&rc43), 4.0/3.0, 1e-8); #define pl_rect2df_midx(rc) (((rc).x0 + (rc).x1) / 2.0) #define pl_rect2df_midy(rc) (((rc).y0 + (rc).y1) / 2.0) for (float aspect = 0.2; aspect < 3.0; aspect += 0.4) { for (float scan = 0.0; scan <= 1.0; scan += 0.5) { rc = rc1080p; pl_rect2df_aspect_set(&rc, aspect, scan); printf("aspect %.2f, panscan %.1f: {%f %f} -> {%f %f}\n", aspect, scan, rc.x0, rc.y0, rc.x1, rc.y1); REQUIRE_FEQ(pl_rect2df_aspect(&rc), aspect, 1e-6); REQUIRE_FEQ(pl_rect2df_midx(rc), pl_rect2df_midx(rc1080p), 1e-6); REQUIRE_FEQ(pl_rect2df_midy(rc), pl_rect2df_midy(rc1080p), 1e-6); } } rc = rc1080p; pl_rect2df_aspect_fit(&rc, &rc43, 0.0); REQUIRE_FEQ(pl_rect2df_aspect(&rc), pl_rect2df_aspect(&rc43), 1e-6); REQUIRE_FEQ(pl_rect2df_midx(rc), pl_rect2df_midx(rc1080p), 1e-6); REQUIRE_FEQ(pl_rect2df_midy(rc), pl_rect2df_midy(rc1080p), 1e-6); REQUIRE_FEQ(pl_rect_w(rc), pl_rect_w(rc43), 1e-6); REQUIRE_FEQ(pl_rect_h(rc), pl_rect_h(rc43), 1e-6); rc = rc43; pl_rect2df_aspect_fit(&rc, &rc1080p, 0.0); REQUIRE_FEQ(pl_rect2df_aspect(&rc), pl_rect2df_aspect(&rc1080p), 1e-6); REQUIRE_FEQ(pl_rect2df_midx(rc), pl_rect2df_midx(rc43), 1e-6); REQUIRE_FEQ(pl_rect2df_midy(rc), pl_rect2df_midy(rc43), 1e-6); REQUIRE_FEQ(pl_rect_w(rc), pl_rect_w(rc43), 1e-6); rc = (pl_rect2df) { 1920, 1080, 0, 0 }; pl_rect2df_offset(&rc, 50, 100); REQUIRE_FEQ(rc.x0, 1870, 1e-6); REQUIRE_FEQ(rc.x1, -50, 1e-6); REQUIRE_FEQ(rc.y0, 980, 1e-6); REQUIRE_FEQ(rc.y1, -100, 1e-6); } libplacebo-v7.349.0/src/tests/d3d11.c000066400000000000000000000027261463457750100171140ustar00rootroot00000000000000#include "gpu_tests.h" #include "d3d11/gpu.h" #include #include int main() { pl_log log = pl_test_logger(); IDXGIFactory1 *factory = NULL; IDXGIAdapter1 *adapter1 = NULL; HRESULT hr; HMODULE dxgi = LoadLibraryW(L"dxgi.dll"); if (!dxgi) return SKIP; __typeof__(&CreateDXGIFactory1) pCreateDXGIFactory1 = (void *) GetProcAddress(dxgi, "CreateDXGIFactory1"); if (!pCreateDXGIFactory1) return SKIP; hr = pCreateDXGIFactory1(&IID_IDXGIFactory1, (void **) &factory); if (FAILED(hr)) { printf("Failed to create DXGI factory\n"); return SKIP; } // Test all attached devices for (int i = 0;; i++) { hr = IDXGIFactory1_EnumAdapters1(factory, i, &adapter1); if (hr == DXGI_ERROR_NOT_FOUND) break; if (FAILED(hr)) { printf("Failed to enumerate adapters\n"); return SKIP; } DXGI_ADAPTER_DESC1 desc; hr = IDXGIAdapter1_GetDesc1(adapter1, &desc); if (FAILED(hr)) { printf("Failed to enumerate adapters\n"); return SKIP; } SAFE_RELEASE(adapter1); const struct pl_d3d11_t *d3d11 = pl_d3d11_create(log, pl_d3d11_params( .debug = true, .adapter_luid = desc.AdapterLuid, )); REQUIRE(d3d11); gpu_shader_tests(d3d11->gpu); pl_d3d11_destroy(&d3d11); } SAFE_RELEASE(factory); } libplacebo-v7.349.0/src/tests/dav1d.c000066400000000000000000000032351463457750100172730ustar00rootroot00000000000000#include "utils.h" #include "libplacebo/utils/dav1d.h" int main() { // Test enum functions for (enum pl_color_system sys = 0; sys < PL_COLOR_SYSTEM_COUNT; sys++) { // Exceptions to the rule, due to different handling in dav1d if (sys == PL_COLOR_SYSTEM_BT_2100_HLG || sys == PL_COLOR_SYSTEM_XYZ) continue; enum Dav1dMatrixCoefficients mc = pl_system_to_dav1d(sys); enum pl_color_system sys2 = pl_system_from_dav1d(mc); if (sys2) REQUIRE_CMP(sys, ==, sys2, "u"); } for (enum pl_color_levels lev = 0; lev < PL_COLOR_LEVELS_COUNT; lev++) { int range = pl_levels_to_dav1d(lev); enum pl_color_levels lev2 = pl_levels_from_dav1d(range); if (lev != PL_COLOR_LEVELS_UNKNOWN) REQUIRE_CMP(lev, ==, lev2, "u"); } for (enum pl_color_primaries prim = 0; prim < PL_COLOR_PRIM_COUNT; prim++) { enum Dav1dColorPrimaries dpri = pl_primaries_to_dav1d(prim); enum pl_color_primaries prim2 = pl_primaries_from_dav1d(dpri); if (prim2) REQUIRE_CMP(prim, ==, prim2, "u"); } for (enum pl_color_transfer trc = 0; trc < PL_COLOR_TRC_COUNT; trc++) { enum Dav1dTransferCharacteristics dtrc = pl_transfer_to_dav1d(trc); enum pl_color_transfer trc2 = pl_transfer_from_dav1d(dtrc); if (trc2) REQUIRE_CMP(trc, ==, trc2, "u"); } for (enum pl_chroma_location loc = 0; loc < PL_CHROMA_COUNT; loc++) { enum Dav1dChromaSamplePosition dloc = pl_chroma_to_dav1d(loc); enum pl_chroma_location loc2 = pl_chroma_from_dav1d(dloc); if (loc2) REQUIRE_CMP(loc, ==, loc2, "u"); } } libplacebo-v7.349.0/src/tests/dither.c000066400000000000000000000021261463457750100175510ustar00rootroot00000000000000#include "utils.h" #include #include #define SHIFT 4 #define SIZE (1 << SHIFT) float data[SIZE][SIZE]; int main() { printf("Ordered dither matrix:\n"); pl_generate_bayer_matrix(&data[0][0], SIZE); for (int y = 0; y < SIZE; y++) { for (int x = 0; x < SIZE; x++) printf(" %3d", (int)(data[y][x] * SIZE * SIZE)); printf("\n"); } printf("Blue noise dither matrix:\n"); pl_generate_blue_noise(&data[0][0], SHIFT); for (int y = 0; y < SIZE; y++) { for (int x = 0; x < SIZE; x++) printf(" %3d", (int)(data[y][x] * SIZE * SIZE)); printf("\n"); } // Generate an example of a dither shader pl_log log = pl_test_logger(); pl_shader sh = pl_shader_alloc(log, NULL); pl_shader_obj obj = NULL; pl_shader_dither(sh, 8, &obj, NULL); const struct pl_shader_res *res = pl_shader_finalize(sh); REQUIRE(res); printf("Generated dither shader:\n%s\n", res->glsl); pl_shader_obj_destroy(&obj); pl_shader_free(&sh); pl_log_destroy(&log); } libplacebo-v7.349.0/src/tests/dummy.c000066400000000000000000000037761463457750100174410ustar00rootroot00000000000000#include "gpu_tests.h" #include #include int main() { pl_log log = pl_test_logger(); pl_gpu gpu = pl_gpu_dummy_create(log, NULL); pl_buffer_tests(gpu); pl_texture_tests(gpu); // Attempt creating a shader and accessing the resulting LUT pl_tex dummy = pl_tex_dummy_create(gpu, pl_tex_dummy_params( .w = 100, .h = 100, .format = pl_find_named_fmt(gpu, "rgba8"), )); struct pl_sample_src src = { .tex = dummy, .new_w = 1000, .new_h = 1000, }; pl_shader_obj lut = NULL; struct pl_sample_filter_params filter_params = { .filter = pl_filter_ewa_lanczos, .lut = &lut, }; pl_shader sh = pl_shader_alloc(log, pl_shader_params( .gpu = gpu )); REQUIRE(pl_shader_sample_polar(sh, &src, &filter_params)); const struct pl_shader_res *res = pl_shader_finalize(sh); REQUIRE(res); for (int n = 0; n < res->num_descriptors; n++) { const struct pl_shader_desc *sd = &res->descriptors[n]; if (sd->desc.type != PL_DESC_SAMPLED_TEX) continue; pl_tex tex = sd->binding.object; const float *data = (float *) pl_tex_dummy_data(tex); if (!data) continue; // means this was the `dummy` texture #ifdef PRINT_LUTS for (int i = 0; i < tex->params.w; i++) printf("lut[%d] = %f\n", i, data[i]); #endif } // Try out generation of the sampler2D interface src.tex = NULL; src.tex_w = 100; src.tex_h = 100; src.format = PL_FMT_UNORM; src.sampler = PL_SAMPLER_NORMAL; src.mode = PL_TEX_SAMPLE_LINEAR; pl_shader_reset(sh, pl_shader_params( .gpu = gpu )); REQUIRE(pl_shader_sample_polar(sh, &src, &filter_params)); REQUIRE((res = pl_shader_finalize(sh))); REQUIRE_CMP(res->input, ==, PL_SHADER_SIG_SAMPLER, "u"); pl_shader_free(&sh); pl_shader_obj_destroy(&lut); pl_tex_destroy(gpu, &dummy); pl_gpu_dummy_destroy(&gpu); pl_log_destroy(&log); } libplacebo-v7.349.0/src/tests/filters.c000066400000000000000000000054621463457750100177500ustar00rootroot00000000000000#include "utils.h" #include int main() { pl_log log = pl_test_logger(); for (int i = 0; i < pl_num_filter_functions; i++) { const struct pl_filter_function *fun = pl_filter_functions[i]; if (fun->opaque) continue; printf("Testing filter function '%s'\n", fun->name); struct pl_filter_ctx ctx = { .radius = fun->radius }; memcpy(ctx.params, fun->params, sizeof(ctx.params)); // Ensure the kernel is correctly scaled REQUIRE_FEQ(fun->weight(&ctx, 0.0), 1.0, 1e-7); // Only box filters are radius 1, these are unwindowed by design. // Gaussian technically never reaches 0 even at its preconfigured radius. if (fun->radius > 1.0 && fun != &pl_filter_function_gaussian) REQUIRE_FEQ(fun->weight(&ctx, fun->radius), 0.0, 1e-7); } for (int c = 0; c < pl_num_filter_configs; c++) { const struct pl_filter_config *conf = pl_filter_configs[c]; if (conf->kernel->opaque) continue; printf("Testing filter config '%s'\n", conf->name); pl_filter flt = pl_filter_generate(log, pl_filter_params( .config = *conf, .lut_entries = 256, .cutoff = 1e-3, )); REQUIRE(flt); const float radius = PL_DEF(conf->radius, conf->kernel->radius); REQUIRE_CMP(flt->radius, <=, radius, "f"); REQUIRE_CMP(flt->radius_zero, >, 0.0, "f"); REQUIRE_CMP(flt->radius_zero, <=, flt->radius, "f"); if (conf->polar) { // Test LUT accuracy const int range = flt->params.lut_entries - 1; double scale = flt->weights[0] / pl_filter_sample(conf, 0.0); double err = 0.0; for (float k = 0.0; k <= 1.0; k += 1e-3f) { double ref = scale * pl_filter_sample(conf, k * flt->radius); double idx = k * range; int base = floorf(idx); double fpart = idx - base; int next = PL_MIN(base + 1, range); double interp = PL_MIX(flt->weights[base], flt->weights[next], fpart); err = fmaxf(err, fabs(interp - ref)); } REQUIRE_CMP(err, <=, 1e-4, "g"); } else { // Ensure the weights for each row add up to unity for (int i = 0; i < flt->params.lut_entries; i++) { const float *row = flt->weights + i * flt->row_stride; float sum = 0.0; REQUIRE(flt->row_size); REQUIRE_CMP(flt->row_stride, >=, flt->row_size, "d"); for (int n = 0; n < flt->row_size; n++) sum += row[n]; REQUIRE_FEQ(sum, 1.0, 1e-6); } } pl_filter_free(&flt); } pl_log_destroy(&log); } libplacebo-v7.349.0/src/tests/fuzz/000077500000000000000000000000001463457750100171235ustar00rootroot00000000000000libplacebo-v7.349.0/src/tests/fuzz/lut.c000066400000000000000000000006631463457750100201000ustar00rootroot00000000000000#include "../tests.h" #include __AFL_FUZZ_INIT(); #pragma clang optimize off int main() { struct pl_custom_lut *lut; #ifdef __AFL_HAVE_MANUAL_CONTROL __AFL_INIT(); #endif unsigned char *buf = __AFL_FUZZ_TESTCASE_BUF; while (__AFL_LOOP(100000)) { size_t len = __AFL_FUZZ_TESTCASE_LEN; lut = pl_lut_parse_cube(NULL, (char *) buf, len); pl_lut_free(&lut); } } libplacebo-v7.349.0/src/tests/fuzz/options.c000066400000000000000000000010401463457750100207550ustar00rootroot00000000000000#include "../tests.h" #include __AFL_FUZZ_INIT(); #pragma clang optimize off int main() { pl_options opts = pl_options_alloc(NULL); #ifdef __AFL_HAVE_MANUAL_CONTROL __AFL_INIT(); #endif unsigned char *buf = __AFL_FUZZ_TESTCASE_BUF; while (__AFL_LOOP(100000)) { size_t len = __AFL_FUZZ_TESTCASE_LEN; buf[len - 1] = '\0'; // ensure proper null termination pl_options_load(opts, (const char *) buf); pl_options_save(opts); pl_options_reset(opts, NULL); } } libplacebo-v7.349.0/src/tests/fuzz/shaders.c000066400000000000000000000113571463457750100207270ustar00rootroot00000000000000#include "../tests.h" #include "shaders.h" #include #include #include #include __AFL_FUZZ_INIT(); #pragma clang optimize off int main() { pl_gpu gpu = pl_gpu_dummy_create(NULL, NULL); #define WIDTH 64 #define HEIGHT 64 #define COMPS 4 static const float empty[HEIGHT][WIDTH][COMPS] = {0}; struct pl_sample_src src = { .tex = pl_tex_create(gpu, pl_tex_params( .format = pl_find_fmt(gpu, PL_FMT_FLOAT, COMPS, 0, 32, PL_FMT_CAP_SAMPLEABLE), .initial_data = empty, .sampleable = true, .w = WIDTH, .h = HEIGHT, )), .new_w = WIDTH * 2, .new_h = HEIGHT * 2, }; if (!src.tex) return 1; #ifdef __AFL_HAVE_MANUAL_CONTROL __AFL_INIT(); #endif unsigned char *buf = __AFL_FUZZ_TESTCASE_BUF; while (__AFL_LOOP(10000)) { #define STACK_SIZE 16 pl_shader stack[STACK_SIZE] = {0}; int idx = 0; stack[0] = pl_shader_alloc(NULL, pl_shader_params( .gpu = gpu, )); pl_shader sh = stack[idx]; pl_shader_obj polar = NULL, ortho = NULL, peak = NULL, dither = NULL; size_t len = __AFL_FUZZ_TESTCASE_LEN; for (size_t pos = 0; pos < len; pos++) { switch (buf[pos]) { // Sampling steps case 'S': pl_shader_sample_direct(sh, &src); break; case 'D': pl_shader_deband(sh, &src, NULL); break; case 'P': pl_shader_sample_polar(sh, &src, pl_sample_filter_params( .filter = pl_filter_ewa_lanczos, .lut = &polar, )); break; case 'O': ; struct pl_sample_src srcfix = src; srcfix.new_w = WIDTH; pl_shader_sample_ortho2(sh, &srcfix, pl_sample_filter_params( .filter = pl_filter_spline36, .lut = &ortho, )); break; case 'X': pl_shader_custom(sh, &(struct pl_custom_shader) { .input = PL_SHADER_SIG_NONE, .output = PL_SHADER_SIG_COLOR, .body = "// merge subpasses", }); break; // Colorspace transformation steps case 'y': { struct pl_color_repr repr = pl_color_repr_jpeg; pl_shader_decode_color(sh, &repr, NULL); break; } case 'p': pl_shader_detect_peak(sh, pl_color_space_hdr10, &peak, NULL); break; case 'm': pl_shader_color_map(sh, NULL, pl_color_space_bt709, pl_color_space_monitor, NULL, false); break; case 't': pl_shader_color_map(sh, NULL, pl_color_space_hdr10, pl_color_space_monitor, &peak, false); break; case 'd': pl_shader_dither(sh, 8, &dither, pl_dither_params( // Picked to speed up calculation .method = PL_DITHER_ORDERED_LUT, .lut_size = 2, )); break; // Push and pop subshader commands case '(': if (idx+1 == STACK_SIZE) goto invalid; idx++; if (!stack[idx]) { stack[idx] = pl_shader_alloc(NULL, pl_shader_params( .gpu = gpu, .id = idx, )); } sh = stack[idx]; break; case ')': if (idx == 0) goto invalid; idx--; sh_subpass(stack[idx], stack[idx + 1]); pl_shader_reset(stack[idx + 1], pl_shader_params( .gpu = gpu, .id = idx + 1, )); sh = stack[idx]; break; default: goto invalid; } } // Merge remaining shaders while (idx > 0) { sh_subpass(stack[idx - 1], stack[idx]); idx--; } pl_shader_finalize(stack[0]); invalid: for (int i = 0; i < STACK_SIZE; i++) pl_shader_free(&stack[i]); pl_shader_obj_destroy(&polar); pl_shader_obj_destroy(&ortho); pl_shader_obj_destroy(&peak); pl_shader_obj_destroy(&dither); } pl_tex_destroy(gpu, &src.tex); pl_gpu_dummy_destroy(&gpu); } libplacebo-v7.349.0/src/tests/fuzz/user_shaders.c000066400000000000000000000010771463457750100217630ustar00rootroot00000000000000#include "../tests.h" #include #include __AFL_FUZZ_INIT(); #pragma clang optimize off int main() { pl_gpu gpu = pl_gpu_dummy_create(NULL, NULL); const struct pl_hook *hook; #ifdef __AFL_HAVE_MANUAL_CONTROL __AFL_INIT(); #endif unsigned char *buf = __AFL_FUZZ_TESTCASE_BUF; while (__AFL_LOOP(100000)) { size_t len = __AFL_FUZZ_TESTCASE_LEN; hook = pl_mpv_user_shader_parse(gpu, (char *) buf, len); pl_mpv_user_shader_destroy(&hook); } pl_gpu_dummy_destroy(&gpu); } libplacebo-v7.349.0/src/tests/gpu_tests.c000066400000000000000000001747141463457750100203240ustar00rootroot00000000000000#include "gpu_tests.h" #include "shaders.h" #include #include #include //#define PRINT_OUTPUT void pl_buffer_tests(pl_gpu gpu) { const size_t buf_size = 1024; if (buf_size > gpu->limits.max_buf_size) return; uint8_t *test_src = malloc(buf_size * 2); uint8_t *test_dst = test_src + buf_size; assert(test_src && test_dst); memset(test_dst, 0, buf_size); for (int i = 0; i < buf_size; i++) test_src[i] = RANDOM_U8; pl_buf buf = NULL, tbuf = NULL; printf("test buffer static creation and readback\n"); buf = pl_buf_create(gpu, pl_buf_params( .size = buf_size, .host_readable = true, .initial_data = test_src, )); REQUIRE(buf); REQUIRE(pl_buf_read(gpu, buf, 0, test_dst, buf_size)); REQUIRE_MEMEQ(test_src, test_dst, buf_size); pl_buf_destroy(gpu, &buf); printf("test buffer empty creation, update and readback\n"); memset(test_dst, 0, buf_size); buf = pl_buf_create(gpu, pl_buf_params( .size = buf_size, .host_writable = true, .host_readable = true, )); REQUIRE(buf); pl_buf_write(gpu, buf, 0, test_src, buf_size); REQUIRE(pl_buf_read(gpu, buf, 0, test_dst, buf_size)); REQUIRE_MEMEQ(test_src, test_dst, buf_size); pl_buf_destroy(gpu, &buf); printf("test buffer-buffer copy and readback\n"); memset(test_dst, 0, buf_size); buf = pl_buf_create(gpu, pl_buf_params( .size = buf_size, .initial_data = test_src, )); tbuf = pl_buf_create(gpu, pl_buf_params( .size = buf_size, .host_readable = true, )); REQUIRE(buf && tbuf); pl_buf_copy(gpu, tbuf, 0, buf, 0, buf_size); REQUIRE(pl_buf_read(gpu, tbuf, 0, test_dst, buf_size)); REQUIRE_MEMEQ(test_src, test_dst, buf_size); pl_buf_destroy(gpu, &buf); pl_buf_destroy(gpu, &tbuf); if (buf_size <= gpu->limits.max_mapped_size) { printf("test host mapped buffer readback\n"); buf = pl_buf_create(gpu, pl_buf_params( .size = buf_size, .host_mapped = true, .initial_data = test_src, )); REQUIRE(buf); REQUIRE(!pl_buf_poll(gpu, buf, 0)); REQUIRE_MEMEQ(test_src, buf->data, buf_size); pl_buf_destroy(gpu, &buf); } // `compute_queues` check is to exclude dummy GPUs here if (buf_size <= gpu->limits.max_ssbo_size && gpu->limits.compute_queues) { printf("test endian swapping\n"); buf = pl_buf_create(gpu, pl_buf_params( .size = buf_size, .storable = true, .initial_data = test_src, )); tbuf = pl_buf_create(gpu, pl_buf_params( .size = buf_size, .storable = true, .host_readable = true, )); REQUIRE(buf && tbuf); REQUIRE(pl_buf_copy_swap(gpu, &(struct pl_buf_copy_swap_params) { .src = buf, .dst = tbuf, .size = buf_size, .wordsize = 2, })); REQUIRE(pl_buf_read(gpu, tbuf, 0, test_dst, buf_size)); for (int i = 0; i < buf_size / 2; i++) { REQUIRE_CMP(test_src[2 * i + 0], ==, test_dst[2 * i + 1], PRIu8); REQUIRE_CMP(test_src[2 * i + 1], ==, test_dst[2 * i + 0], PRIu8); } // test endian swap in-place REQUIRE(pl_buf_copy_swap(gpu, &(struct pl_buf_copy_swap_params) { .src = tbuf, .dst = tbuf, .size = buf_size, .wordsize = 4, })); REQUIRE(pl_buf_read(gpu, tbuf, 0, test_dst, buf_size)); for (int i = 0; i < buf_size / 4; i++) { REQUIRE_CMP(test_src[4 * i + 0], ==, test_dst[4 * i + 2], PRIu8); REQUIRE_CMP(test_src[4 * i + 1], ==, test_dst[4 * i + 3], PRIu8); REQUIRE_CMP(test_src[4 * i + 2], ==, test_dst[4 * i + 0], PRIu8); REQUIRE_CMP(test_src[4 * i + 3], ==, test_dst[4 * i + 1], PRIu8); } pl_buf_destroy(gpu, &buf); pl_buf_destroy(gpu, &tbuf); } free(test_src); } static void test_cb(void *priv) { bool *flag = priv; *flag = true; } static void pl_test_roundtrip(pl_gpu gpu, pl_tex tex[2], uint8_t *src, uint8_t *dst) { if (!tex[0] || !tex[1]) { printf("failed creating test textures... skipping this test\n"); return; } int texels = tex[0]->params.w; texels *= tex[0]->params.h ? tex[0]->params.h : 1; texels *= tex[0]->params.d ? tex[0]->params.d : 1; pl_fmt fmt = tex[0]->params.format; size_t bytes = texels * fmt->texel_size; memset(src, 0, bytes); memset(dst, 0, bytes); for (int i = 0; i < texels; i++) { uint8_t *data = &src[i * fmt->texel_size]; if (fmt->type == PL_FMT_FLOAT) { for (int n = 0; n < fmt->num_components; n++) { switch (fmt->component_depth[n]) { case 16: *(uint16_t *) data = RANDOM_F16; data += 2; break; case 32: *(float *) data = RANDOM_F32; data += 4; break; case 64: *(double *) data = RANDOM_F64; data += 8; break; } } } else { for (int n = 0; n < fmt->texel_size; n++) data[n] = RANDOM_U8; } } pl_timer ul, dl; ul = pl_timer_create(gpu); dl = pl_timer_create(gpu); bool ran_ul = false, ran_dl = false; REQUIRE(pl_tex_upload(gpu, &(struct pl_tex_transfer_params){ .tex = tex[0], .ptr = src, .timer = ul, .callback = gpu->limits.callbacks ? test_cb : NULL, .priv = &ran_ul, })); // Test blitting, if possible for this format pl_tex dst_tex = tex[0]; if (tex[0]->params.blit_src && tex[1]->params.blit_dst) { pl_tex_clear_ex(gpu, tex[1], (union pl_clear_color){0}); // for testing pl_tex_blit(gpu, &(struct pl_tex_blit_params) { .src = tex[0], .dst = tex[1], }); dst_tex = tex[1]; } REQUIRE(pl_tex_download(gpu, &(struct pl_tex_transfer_params){ .tex = dst_tex, .ptr = dst, .timer = dl, .callback = gpu->limits.callbacks ? test_cb : NULL, .priv = &ran_dl, })); pl_gpu_finish(gpu); if (gpu->limits.callbacks) REQUIRE(ran_ul && ran_dl); if (fmt->emulated && fmt->type == PL_FMT_FLOAT) { // TODO: can't memcmp here because bits might be lost due to the // emulated 16/32 bit upload paths, figure out a better way to // generate data and verify the roundtrip! } else { REQUIRE_MEMEQ(src, dst, bytes); } // Report timer results printf("upload time: %"PRIu64", download time: %"PRIu64"\n", pl_timer_query(gpu, ul), pl_timer_query(gpu, dl)); pl_timer_destroy(gpu, &ul); pl_timer_destroy(gpu, &dl); } void pl_texture_tests(pl_gpu gpu) { const size_t max_size = 16*16*16 * 4 *sizeof(double); uint8_t *test_src = malloc(max_size * 2); uint8_t *test_dst = test_src + max_size; for (int f = 0; f < gpu->num_formats; f++) { pl_fmt fmt = gpu->formats[f]; if (fmt->opaque || !(fmt->caps & PL_FMT_CAP_HOST_READABLE)) continue; printf("testing texture roundtrip for format %s\n", fmt->name); assert(fmt->texel_size <= 4 * sizeof(double)); struct pl_tex_params ref_params = { .format = fmt, .blit_src = (fmt->caps & PL_FMT_CAP_BLITTABLE), .blit_dst = (fmt->caps & PL_FMT_CAP_BLITTABLE), .host_writable = true, .host_readable = true, .debug_tag = PL_DEBUG_TAG, }; pl_tex tex[2]; if (gpu->limits.max_tex_1d_dim >= 16) { printf("... 1D\n"); struct pl_tex_params params = ref_params; params.w = 16; if (!gpu->limits.blittable_1d_3d) params.blit_src = params.blit_dst = false; for (int i = 0; i < PL_ARRAY_SIZE(tex); i++) tex[i] = pl_tex_create(gpu, ¶ms); pl_test_roundtrip(gpu, tex, test_src, test_dst); for (int i = 0; i < PL_ARRAY_SIZE(tex); i++) pl_tex_destroy(gpu, &tex[i]); } if (gpu->limits.max_tex_2d_dim >= 16) { printf("... 2D\n"); struct pl_tex_params params = ref_params; params.w = params.h = 16; for (int i = 0; i < PL_ARRAY_SIZE(tex); i++) tex[i] = pl_tex_create(gpu, ¶ms); pl_test_roundtrip(gpu, tex, test_src, test_dst); for (int i = 0; i < PL_ARRAY_SIZE(tex); i++) pl_tex_destroy(gpu, &tex[i]); } if (gpu->limits.max_tex_3d_dim >= 16) { printf("... 3D\n"); struct pl_tex_params params = ref_params; params.w = params.h = params.d = 16; if (!gpu->limits.blittable_1d_3d) params.blit_src = params.blit_dst = false; for (int i = 0; i < PL_ARRAY_SIZE(tex); i++) tex[i] = pl_tex_create(gpu, ¶ms); pl_test_roundtrip(gpu, tex, test_src, test_dst); for (int i = 0; i < PL_ARRAY_SIZE(tex); i++) pl_tex_destroy(gpu, &tex[i]); } } free(test_src); } static void pl_planar_tests(pl_gpu gpu) { pl_fmt fmt = pl_find_named_fmt(gpu, "g8_b8_r8_420"); if (!fmt) return; REQUIRE_CMP(fmt->num_planes, ==, 3, "d"); const int width = 64, height = 32; pl_tex tex = pl_tex_create(gpu, pl_tex_params( .w = width, .h = height, .format = fmt, .blit_dst = true, .host_readable = true, )); if (!tex) return; for (int i = 0; i < fmt->num_planes; i++) REQUIRE(tex->planes[i]); pl_tex plane = tex->planes[1]; uint8_t data[(width * height) >> 2]; REQUIRE_CMP(plane->params.w * plane->params.h, ==, PL_ARRAY_SIZE(data), "d"); pl_tex_clear(gpu, plane, (float[]){ (float) 0x80 / 0xFF, 0.0, 0.0, 1.0 }); REQUIRE(pl_tex_download(gpu, pl_tex_transfer_params( .tex = plane, .ptr = data, ))); uint8_t ref[PL_ARRAY_SIZE(data)]; memset(ref, 0x80, sizeof(ref)); REQUIRE_MEMEQ(data, ref, PL_ARRAY_SIZE(data)); pl_tex_destroy(gpu, &tex); } static void pl_shader_tests(pl_gpu gpu) { if (gpu->glsl.version < 410) return; const char *vert_shader = "#version 410 \n" "layout(location=0) in vec2 vertex_pos; \n" "layout(location=1) in vec3 vertex_color; \n" "layout(location=0) out vec3 frag_color; \n" "void main() { \n" " gl_Position = vec4(vertex_pos, 0, 1); \n" " frag_color = vertex_color; \n" "}"; const char *frag_shader = "#version 410 \n" "layout(location=0) in vec3 frag_color; \n" "layout(location=0) out vec4 out_color; \n" "void main() { \n" " out_color = vec4(frag_color, 1.0); \n" "}"; pl_fmt fbo_fmt; enum pl_fmt_caps caps = PL_FMT_CAP_RENDERABLE | PL_FMT_CAP_BLITTABLE | PL_FMT_CAP_LINEAR; fbo_fmt = pl_find_fmt(gpu, PL_FMT_FLOAT, 4, 16, 32, caps); if (!fbo_fmt) return; #define FBO_W 16 #define FBO_H 16 pl_tex fbo; fbo = pl_tex_create(gpu, &(struct pl_tex_params) { .format = fbo_fmt, .w = FBO_W, .h = FBO_H, .renderable = true, .storable = !!(fbo_fmt->caps & PL_FMT_CAP_STORABLE), .host_readable = true, .blit_dst = true, }); REQUIRE(fbo); pl_tex_clear_ex(gpu, fbo, (union pl_clear_color){0}); pl_fmt vert_fmt; vert_fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 3); REQUIRE(vert_fmt); static const struct vertex { float pos[2]; float color[3]; } vertices[] = { {{-1.0, -1.0}, {0, 0, 0}}, {{ 1.0, -1.0}, {1, 0, 0}}, {{-1.0, 1.0}, {0, 1, 0}}, {{ 1.0, 1.0}, {1, 1, 0}}, }; pl_pass pass; pass = pl_pass_create(gpu, &(struct pl_pass_params) { .type = PL_PASS_RASTER, .target_format = fbo_fmt, .vertex_shader = vert_shader, .glsl_shader = frag_shader, .vertex_type = PL_PRIM_TRIANGLE_STRIP, .vertex_stride = sizeof(struct vertex), .num_vertex_attribs = 2, .vertex_attribs = (struct pl_vertex_attrib[]) {{ .name = "vertex_pos", .fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 2), .location = 0, .offset = offsetof(struct vertex, pos), }, { .name = "vertex_color", .fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 3), .location = 1, .offset = offsetof(struct vertex, color), }}, }); REQUIRE(pass); if (pass->params.cached_program || pass->params.cached_program_len) { // Ensure both are set if either one is set REQUIRE(pass->params.cached_program); REQUIRE(pass->params.cached_program_len); } pl_timer timer = pl_timer_create(gpu); pl_pass_run(gpu, &(struct pl_pass_run_params) { .pass = pass, .target = fbo, .vertex_count = PL_ARRAY_SIZE(vertices), .vertex_data = vertices, .timer = timer, }); // Wait until this pass is complete and report the timer result pl_gpu_finish(gpu); printf("timer query result: %"PRIu64"\n", pl_timer_query(gpu, timer)); pl_timer_destroy(gpu, &timer); static float test_data[FBO_H * FBO_W * 4] = {0}; // Test against the known pattern of `src`, only useful for roundtrip tests #define TEST_FBO_PATTERN(eps, fmt, ...) \ do { \ printf("testing pattern of " fmt "\n", __VA_ARGS__); \ REQUIRE(pl_tex_download(gpu, &(struct pl_tex_transfer_params) { \ .tex = fbo, \ .ptr = test_data, \ })); \ \ for (int y = 0; y < FBO_H; y++) { \ for (int x = 0; x < FBO_W; x++) { \ float *color = &test_data[(y * FBO_W + x) * 4]; \ REQUIRE_FEQ(color[0], (x + 0.5) / FBO_W, eps); \ REQUIRE_FEQ(color[1], (y + 0.5) / FBO_H, eps); \ REQUIRE_FEQ(color[2], 0.0, eps); \ REQUIRE_FEQ(color[3], 1.0, eps); \ } \ } \ } while (0) TEST_FBO_PATTERN(1e-6, "%s", "initial rendering"); if (sizeof(vertices) <= gpu->limits.max_vbo_size) { // Test the use of an explicit vertex buffer pl_buf vert = pl_buf_create(gpu, &(struct pl_buf_params) { .size = sizeof(vertices), .initial_data = vertices, .drawable = true, }); REQUIRE(vert); pl_pass_run(gpu, &(struct pl_pass_run_params) { .pass = pass, .target = fbo, .vertex_count = sizeof(vertices) / sizeof(struct vertex), .vertex_buf = vert, .buf_offset = 0, }); pl_buf_destroy(gpu, &vert); TEST_FBO_PATTERN(1e-6, "%s", "using vertex buffer"); } // Test the use of index buffers static const uint16_t indices[] = { 3, 2, 1, 0 }; pl_pass_run(gpu, &(struct pl_pass_run_params) { .pass = pass, .target = fbo, .vertex_count = PL_ARRAY_SIZE(indices), .vertex_data = vertices, .index_data = indices, }); pl_pass_destroy(gpu, &pass); TEST_FBO_PATTERN(1e-6, "%s", "using indexed rendering"); // Test the use of pl_dispatch pl_dispatch dp = pl_dispatch_create(gpu->log, gpu); pl_shader sh = pl_dispatch_begin(dp); REQUIRE(pl_shader_custom(sh, &(struct pl_custom_shader) { .body = "color = vec4(col, 1.0);", .input = PL_SHADER_SIG_NONE, .output = PL_SHADER_SIG_COLOR, })); REQUIRE(pl_dispatch_vertex(dp, &(struct pl_dispatch_vertex_params) { .shader = &sh, .target = fbo, .vertex_stride = sizeof(struct vertex), .vertex_position_idx = 0, .num_vertex_attribs = 2, .vertex_attribs = (struct pl_vertex_attrib[]) {{ .name = "pos", .fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 2), .offset = offsetof(struct vertex, pos), }, { .name = "col", .fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 3), .offset = offsetof(struct vertex, color), }}, .vertex_type = PL_PRIM_TRIANGLE_STRIP, .vertex_coords = PL_COORDS_NORMALIZED, .vertex_count = PL_ARRAY_SIZE(vertices), .vertex_data = vertices, })); TEST_FBO_PATTERN(1e-6, "%s", "using custom vertices"); static float src_data[FBO_H * FBO_W * 4] = {0}; memcpy(src_data, test_data, sizeof(src_data)); pl_tex src; src = pl_tex_create(gpu, &(struct pl_tex_params) { .format = fbo_fmt, .w = FBO_W, .h = FBO_H, .storable = fbo->params.storable, .sampleable = true, .initial_data = src_data, }); if (fbo->params.storable) { // Test 1x1 blit, to make sure the scaling code runs REQUIRE(pl_tex_blit_compute(gpu, &(struct pl_tex_blit_params) { .src = src, .dst = fbo, .src_rc = {0, 0, 0, 1, 1, 1}, .dst_rc = {0, 0, 0, FBO_W, FBO_H, 1}, .sample_mode = PL_TEX_SAMPLE_NEAREST, })); // Test non-resizing blit, which uses the efficient imageLoad path REQUIRE(pl_tex_blit_compute(gpu, &(struct pl_tex_blit_params) { .src = src, .dst = fbo, .src_rc = {0, 0, 0, FBO_W, FBO_H, 1}, .dst_rc = {0, 0, 0, FBO_W, FBO_H, 1}, .sample_mode = PL_TEX_SAMPLE_NEAREST, })); TEST_FBO_PATTERN(1e-6, "%s", "pl_tex_blit_compute"); } // Test encoding/decoding of all gamma functions, color spaces, etc. for (enum pl_color_transfer trc = 0; trc < PL_COLOR_TRC_COUNT; trc++) { struct pl_color_space test_csp = { .transfer = trc, .hdr.min_luma = PL_COLOR_HDR_BLACK, }; sh = pl_dispatch_begin(dp); pl_shader_sample_nearest(sh, pl_sample_src( .tex = src )); pl_shader_delinearize(sh, &test_csp); pl_shader_linearize(sh, &test_csp); REQUIRE(pl_dispatch_finish(dp, pl_dispatch_params( .shader = &sh, .target = fbo, ))); float epsilon = pl_color_transfer_is_hdr(trc) ? 1e-4 : 1e-6; TEST_FBO_PATTERN(epsilon, "transfer function: %s", pl_color_transfer_name(trc)); } for (enum pl_color_system sys = 0; sys < PL_COLOR_SYSTEM_COUNT; sys++) { if (sys == PL_COLOR_SYSTEM_DOLBYVISION) continue; // requires metadata sh = pl_dispatch_begin(dp); pl_shader_sample_nearest(sh, pl_sample_src( .tex = src )); pl_shader_encode_color(sh, &(struct pl_color_repr) { .sys = sys }); pl_shader_decode_color(sh, &(struct pl_color_repr) { .sys = sys }, NULL); REQUIRE(pl_dispatch_finish(dp, &(struct pl_dispatch_params) { .shader = &sh, .target = fbo, })); float epsilon; switch (sys) { case PL_COLOR_SYSTEM_BT_2020_C: case PL_COLOR_SYSTEM_XYZ: epsilon = 1e-5; break; case PL_COLOR_SYSTEM_BT_2100_PQ: case PL_COLOR_SYSTEM_BT_2100_HLG: // These seem to be horrifically noisy and prone to breaking on // edge cases for some reason // TODO: figure out why! continue; default: epsilon = 1e-6; break; } TEST_FBO_PATTERN(epsilon, "color system: %s", pl_color_system_name(sys)); } // Repeat this a few times to test the caching pl_cache cache = pl_cache_create(pl_cache_params( .log = gpu->log )); pl_gpu_set_cache(gpu, cache); for (int i = 0; i < 10; i++) { if (i == 5) { printf("Recreating pl_dispatch to test the caching\n"); size_t size = pl_dispatch_save(dp, NULL); REQUIRE(size); uint8_t *cache_data = malloc(size); REQUIRE(cache_data); REQUIRE_CMP(pl_dispatch_save(dp, cache_data), ==, size, "zu"); pl_dispatch_destroy(&dp); dp = pl_dispatch_create(gpu->log, gpu); pl_dispatch_load(dp, cache_data); // Test to make sure the pass regenerates the same cache uint64_t hash = pl_str_hash((pl_str) { cache_data, size }); REQUIRE_CMP(pl_dispatch_save(dp, NULL), ==, size, "zu"); REQUIRE_CMP(pl_dispatch_save(dp, cache_data), ==, size, "zu"); REQUIRE_CMP(pl_str_hash((pl_str) { cache_data, size }), ==, hash, PRIu64); free(cache_data); } sh = pl_dispatch_begin(dp); // For testing, force the use of CS if possible if (gpu->glsl.compute) { sh->type = SH_COMPUTE; sh->group_size[0] = 8; sh->group_size[1] = 8; } pl_shader_deband(sh, pl_sample_src( .tex = src ), pl_deband_params( .iterations = 0, .grain = 0.0, )); REQUIRE(pl_dispatch_finish(dp, &(struct pl_dispatch_params) { .shader = &sh, .target = fbo, })); TEST_FBO_PATTERN(1e-6, "deband iter %d", i); } pl_gpu_set_cache(gpu, NULL); pl_cache_destroy(&cache); // Test peak detection and readback if possible sh = pl_dispatch_begin(dp); pl_shader_sample_nearest(sh, pl_sample_src( .tex = src )); pl_shader_obj peak_state = NULL; struct pl_color_space csp_gamma22 = { .transfer = PL_COLOR_TRC_GAMMA22 }; struct pl_peak_detect_params peak_params = { .minimum_peak = 0.01 }; if (pl_shader_detect_peak(sh, csp_gamma22, &peak_state, &peak_params)) { REQUIRE(pl_dispatch_compute(dp, &(struct pl_dispatch_compute_params) { .shader = &sh, .width = fbo->params.w, .height = fbo->params.h, })); struct pl_hdr_metadata hdr; REQUIRE(pl_get_detected_hdr_metadata(peak_state, &hdr)); float real_peak = 0, real_avg = 0; for (int y = 0; y < FBO_H; y++) { for (int x = 0; x < FBO_W; x++) { float *color = &src_data[(y * FBO_W + x) * 4]; float luma = 0.212639f * powf(color[0], 2.2f) + 0.715169f * powf(color[1], 2.2f) + 0.072192f * powf(color[2], 2.2f); luma = pl_hdr_rescale(PL_HDR_NORM, PL_HDR_PQ, luma); real_peak = PL_MAX(real_peak, luma); real_avg += luma; } } real_avg = real_avg / (FBO_W * FBO_H); REQUIRE_FEQ(hdr.max_pq_y, real_peak, 1e-4); REQUIRE_FEQ(hdr.avg_pq_y, real_avg, 1e-3); } pl_dispatch_abort(dp, &sh); pl_shader_obj_destroy(&peak_state); // Test film grain synthesis pl_shader_obj grain = NULL; struct pl_film_grain_params grain_params = { .tex = src, .components = 3, .component_mapping = { 0, 1, 2}, .repr = &(struct pl_color_repr) { .sys = PL_COLOR_SYSTEM_BT_709, .levels = PL_COLOR_LEVELS_LIMITED, .bits = { .color_depth = 10, .sample_depth = 10 }, }, }; for (int i = 0; i < 2; i++) { grain_params.data.type = PL_FILM_GRAIN_AV1; grain_params.data.params.av1 = av1_grain_data; grain_params.data.params.av1.overlap = !!i; grain_params.data.seed = rand(); sh = pl_dispatch_begin(dp); pl_shader_film_grain(sh, &grain, &grain_params); REQUIRE(pl_dispatch_finish(dp, &(struct pl_dispatch_params) { .shader = &sh, .target = fbo, })); } if (gpu->glsl.compute) { grain_params.data.type = PL_FILM_GRAIN_H274; grain_params.data.params.h274 = h274_grain_data; grain_params.data.seed = rand(); sh = pl_dispatch_begin(dp); pl_shader_film_grain(sh, &grain, &grain_params); REQUIRE(pl_dispatch_finish(dp, &(struct pl_dispatch_params) { .shader = &sh, .target = fbo, })); } pl_shader_obj_destroy(&grain); // Test custom shaders struct pl_custom_shader custom = { .header = "vec3 invert(vec3 color) \n" "{ \n" " return vec3(1.0) - color; \n" "} \n", .body = "color = vec4(gl_FragCoord.xy, 0.0, 1.0); \n" "color.rgb = invert(color.rgb) + offset; \n", .input = PL_SHADER_SIG_NONE, .output = PL_SHADER_SIG_COLOR, .num_variables = 1, .variables = &(struct pl_shader_var) { .var = pl_var_float("offset"), .data = &(float) { 0.1 }, }, }; sh = pl_dispatch_begin(dp); REQUIRE(pl_shader_custom(sh, &custom)); REQUIRE(pl_dispatch_finish(dp, &(struct pl_dispatch_params) { .shader = &sh, .target = fbo, })); // Test dolbyvision struct pl_color_repr repr = { .sys = PL_COLOR_SYSTEM_DOLBYVISION, .dovi = &dovi_meta, }; sh = pl_dispatch_begin(dp); pl_shader_sample_direct(sh, pl_sample_src( .tex = src )); pl_shader_decode_color(sh, &repr, NULL); REQUIRE(pl_dispatch_finish(dp, &(struct pl_dispatch_params) { .shader = &sh, .target = fbo, })); // Test deinterlacing sh = pl_dispatch_begin(dp); pl_shader_deinterlace(sh, pl_deinterlace_source( .cur = pl_field_pair(src) ), NULL); REQUIRE(pl_dispatch_finish(dp, pl_dispatch_params( .shader = &sh, .target = fbo, ))); // Test error diffusion if (fbo->params.storable) { for (int i = 0; i < pl_num_error_diffusion_kernels; i++) { const struct pl_error_diffusion_kernel *k = pl_error_diffusion_kernels[i]; printf("testing error diffusion kernel '%s'\n", k->name); sh = pl_dispatch_begin(dp); bool ok = pl_shader_error_diffusion(sh, pl_error_diffusion_params( .input_tex = src, .output_tex = fbo, .new_depth = 8, .kernel = k, )); if (!ok) { fprintf(stderr, "kernel '%s' exceeds GPU limits, skipping...\n", k->name); continue; } REQUIRE(pl_dispatch_compute(dp, pl_dispatch_compute_params( .shader = &sh, .dispatch_size = {1, 1, 1}, ))); } } pl_dispatch_destroy(&dp); pl_tex_destroy(gpu, &src); pl_tex_destroy(gpu, &fbo); } static void pl_scaler_tests(pl_gpu gpu) { pl_fmt src_fmt = pl_find_fmt(gpu, PL_FMT_FLOAT, 1, 16, 32, PL_FMT_CAP_LINEAR); pl_fmt fbo_fmt = pl_find_fmt(gpu, PL_FMT_FLOAT, 1, 16, 32, PL_FMT_CAP_RENDERABLE); if (!src_fmt || !fbo_fmt) return; float *fbo_data = NULL; pl_shader_obj lut = NULL; static float data_5x5[5][5] = { { 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0 }, { 0, 0, 1, 0, 0 }, { 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0 }, }; pl_tex dot5x5 = pl_tex_create(gpu, &(struct pl_tex_params) { .w = 5, .h = 5, .format = src_fmt, .sampleable = true, .initial_data = &data_5x5[0][0], }); struct pl_tex_params fbo_params = { .w = 100, .h = 100, .format = fbo_fmt, .renderable = true, .storable = fbo_fmt->caps & PL_FMT_CAP_STORABLE, .host_readable = fbo_fmt->caps & PL_FMT_CAP_HOST_READABLE, }; pl_tex fbo = pl_tex_create(gpu, &fbo_params); pl_dispatch dp = pl_dispatch_create(gpu->log, gpu); if (!dot5x5 || !fbo || !dp) goto error; pl_shader sh = pl_dispatch_begin(dp); REQUIRE(pl_shader_sample_polar(sh, pl_sample_src( .tex = dot5x5, .new_w = fbo->params.w, .new_h = fbo->params.h, ), pl_sample_filter_params( .filter = pl_filter_ewa_lanczos, .lut = &lut, .no_compute = !fbo->params.storable, ) )); REQUIRE(pl_dispatch_finish(dp, &(struct pl_dispatch_params) { .shader = &sh, .target = fbo, })); if (fbo->params.host_readable) { fbo_data = malloc(fbo->params.w * fbo->params.h * sizeof(float)); REQUIRE(pl_tex_download(gpu, &(struct pl_tex_transfer_params) { .tex = fbo, .ptr = fbo_data, })); #ifdef PRINT_OUTPUT int max = 255; printf("P2\n%d %d\n%d\n", fbo->params.w, fbo->params.h, max); for (int y = 0; y < fbo->params.h; y++) { for (int x = 0; x < fbo->params.w; x++) { float v = fbo_data[y * fbo->params.h + x]; printf("%d ", (int) round(fmin(fmax(v, 0.0), 1.0) * max)); } printf("\n"); } #endif } error: free(fbo_data); pl_shader_obj_destroy(&lut); pl_dispatch_destroy(&dp); pl_tex_destroy(gpu, &dot5x5); pl_tex_destroy(gpu, &fbo); } static const char *user_shader_tests[] = { // Test hooking, saving and loading "// Example of a comment at the beginning \n" " \n" "//!HOOK NATIVE \n" "//!DESC upscale image \n" "//!BIND HOOKED \n" "//!WIDTH HOOKED.w 10 * \n" "//!HEIGHT HOOKED.h 10 * \n" "//!SAVE NATIVEBIG \n" "//!WHEN NATIVE.w 500 < \n" " \n" "vec4 hook() \n" "{ \n" " return HOOKED_texOff(0); \n" "} \n" " \n" "//!HOOK MAIN \n" "//!DESC downscale bigger image \n" "//!WHEN NATIVE.w 500 < \n" "//!BIND NATIVEBIG \n" " \n" "vec4 hook() \n" "{ \n" " return NATIVEBIG_texOff(0); \n" "} \n", // Test use of textures "//!HOOK MAIN \n" "//!DESC turn everything into colorful pixels \n" "//!BIND HOOKED \n" "//!BIND DISCO \n" "//!COMPONENTS 3 \n" " \n" "vec4 hook() \n" "{ \n" " return vec4(DISCO_tex(HOOKED_pos * 10.0).rgb, 1); \n" "} \n" " \n" "//!TEXTURE DISCO \n" "//!SIZE 3 3 \n" "//!FORMAT rgba8 \n" "//!FILTER NEAREST \n" "//!BORDER REPEAT \n" "ff0000ff00ff00ff0000ffff00ffffffff00ffffffff00ff4c4c4cff999999ffffffffff\n" // Test custom parameters "//!PARAM test \n" "//!DESC test parameter \n" "//!TYPE DYNAMIC float \n" "//!MINIMUM 0.0 \n" "//!MAXIMUM 100.0 \n" "1.0 \n" " \n" "//!PARAM testconst \n" "//!TYPE CONSTANT uint \n" "//!MAXIMUM 16 \n" "3 \n" " \n" "//!PARAM testdefine \n" "//!TYPE DEFINE \n" "100 \n" " \n" "//!PARAM testenum \n" "//!TYPE ENUM DEFINE \n" "FOO \n" "BAR \n" " \n" "//!HOOK MAIN \n" "//!WHEN testconst 30 > \n" "#error should not be run \n" " \n" "//!HOOK MAIN \n" "//!WHEN testenum FOO = \n" "#if testenum == BAR \n" " #error bad \n" "#endif \n" "vec4 hook() { return vec4(0.0); } \n" }; static const char *compute_shader_tests[] = { // Test use of storage/buffer resources "//!HOOK MAIN \n" "//!DESC attach some storage objects \n" "//!BIND tex_storage \n" "//!BIND buf_uniform \n" "//!BIND buf_storage \n" "//!COMPONENTS 4 \n" " \n" "vec4 hook() \n" "{ \n" " return vec4(foo, bar, bat); \n" "} \n" " \n" "//!TEXTURE tex_storage \n" "//!SIZE 100 100 \n" "//!FORMAT r32f \n" "//!STORAGE \n" " \n" "//!BUFFER buf_uniform \n" "//!VAR float foo \n" "//!VAR float bar \n" "0000000000000000 \n" " \n" "//!BUFFER buf_storage \n" "//!VAR vec2 bat \n" "//!VAR int big[32]; \n" "//!STORAGE \n", }; static const char *test_luts[] = { "TITLE \"1D identity\" \n" "LUT_1D_SIZE 2 \n" "0.0 0.0 0.0 \n" "1.0 1.0 1.0 \n", "TITLE \"3D identity\" \n" "LUT_3D_SIZE 2 \n" "0.0 0.0 0.0 \n" "1.0 0.0 0.0 \n" "0.0 1.0 0.0 \n" "1.0 1.0 0.0 \n" "0.0 0.0 1.0 \n" "1.0 0.0 1.0 \n" "0.0 1.0 1.0 \n" "1.0 1.0 1.0 \n" }; static bool frame_passthrough(pl_gpu gpu, pl_tex *tex, const struct pl_source_frame *src, struct pl_frame *out_frame) { const struct pl_frame *frame = src->frame_data; *out_frame = *frame; return true; } static enum pl_queue_status get_frame_ptr(struct pl_source_frame *out_frame, const struct pl_queue_params *qparams) { const struct pl_source_frame **pframe = qparams->priv; if (!(*pframe)->frame_data) return PL_QUEUE_EOF; *out_frame = *(*pframe)++; return PL_QUEUE_OK; } static void render_info_cb(void *priv, const struct pl_render_info *info) { printf("{%d} Executed shader: %s\n", info->index, info->pass->shader->description); } static void pl_render_tests(pl_gpu gpu) { pl_tex img_tex = NULL, fbo = NULL; pl_renderer rr = NULL; enum { width = 50, height = 50 }; static float data[width][height]; for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++) data[y][x] = RANDOM; } struct pl_plane img_plane = {0}; struct pl_plane_data plane_data = { .type = PL_FMT_FLOAT, .width = width, .height = height, .component_size = { 8 * sizeof(float) }, .component_map = { 0 }, .pixel_stride = sizeof(float), .pixels = data, }; if (!pl_recreate_plane(gpu, NULL, &fbo, &plane_data)) return; if (!pl_upload_plane(gpu, &img_plane, &img_tex, &plane_data)) goto error; rr = pl_renderer_create(gpu->log, gpu); pl_tex_clear_ex(gpu, fbo, (union pl_clear_color){0}); struct pl_frame image = { .num_planes = 1, .planes = { img_plane }, .repr = { .sys = PL_COLOR_SYSTEM_BT_709, .levels = PL_COLOR_LEVELS_FULL, }, .color = pl_color_space_srgb, }; struct pl_frame target = { .num_planes = 1, .planes = {{ .texture = fbo, .components = 3, .component_mapping = {0, 1, 2}, }}, .repr = { .sys = PL_COLOR_SYSTEM_RGB, .levels = PL_COLOR_LEVELS_FULL, .bits.color_depth = 32, }, .color = pl_color_space_srgb, }; REQUIRE(pl_render_image(rr, &image, &target, NULL)); REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE); // TODO: embed a reference texture and ensure it matches // Test a bunch of different params #define TEST(SNAME, STYPE, DEFAULT, FIELD, LIMIT) \ do { \ for (int i = 0; i <= LIMIT; i++) { \ printf("testing `" #STYPE "." #FIELD " = %d`\n", i); \ struct pl_render_params params = pl_render_default_params; \ params.force_dither = true; \ struct STYPE tmp = DEFAULT; \ tmp.FIELD = i; \ params.SNAME = &tmp; \ REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); \ pl_gpu_flush(gpu); \ REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE); \ } \ } while (0) #define TEST_PARAMS(NAME, FIELD, LIMIT) \ TEST(NAME##_params, pl_##NAME##_params, pl_##NAME##_default_params, FIELD, LIMIT) image.crop.x1 = width / 2.0; image.crop.y1 = height / 2.0; for (int i = 0; i < pl_num_scale_filters; i++) { struct pl_render_params params = pl_render_default_params; params.upscaler = pl_scale_filters[i].filter; printf("testing `params.upscaler = /* %s */`\n", pl_scale_filters[i].name); REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); pl_gpu_flush(gpu); REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE); } image.crop.x1 = image.crop.y1 = 0; target.crop.x1 = width / 2.0; target.crop.y1 = height / 2.0; for (int i = 0; i < pl_num_scale_filters; i++) { struct pl_render_params params = pl_render_default_params; params.downscaler = pl_scale_filters[i].filter; printf("testing `params.downscaler = /* %s */`\n", pl_scale_filters[i].name); REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); pl_gpu_flush(gpu); REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE); } target.crop.x1 = target.crop.y1 = 0; TEST_PARAMS(deband, iterations, 3); TEST_PARAMS(sigmoid, center, 1); TEST_PARAMS(color_map, intent, PL_INTENT_ABSOLUTE_COLORIMETRIC); TEST_PARAMS(dither, method, PL_DITHER_WHITE_NOISE); TEST_PARAMS(dither, temporal, true); TEST_PARAMS(distort, alpha_mode, PL_ALPHA_INDEPENDENT); TEST_PARAMS(distort, constrain, true); TEST_PARAMS(distort, bicubic, true); TEST(cone_params, pl_cone_params, pl_vision_deuteranomaly, strength, 0); // Test gamma-correct dithering target.repr.bits.color_depth = 2; TEST_PARAMS(dither, transfer, PL_COLOR_TRC_GAMMA22); target.repr.bits.color_depth = 32; // Test HDR tone mapping image.color = pl_color_space_hdr10; TEST_PARAMS(color_map, visualize_lut, true); if (gpu->limits.max_ssbo_size) TEST_PARAMS(peak_detect, allow_delayed, true); // Test inverse tone-mapping and pure BPC image.color.hdr.max_luma = 1000; target.color.hdr.max_luma = 4000; target.color.hdr.min_luma = 0.02; TEST_PARAMS(color_map, inverse_tone_mapping, true); image.color = pl_color_space_srgb; target.color = pl_color_space_srgb; // Test some misc stuff struct pl_render_params params = pl_render_default_params; params.color_adjustment = &(struct pl_color_adjustment) { .brightness = 0.1, .contrast = 0.9, .saturation = 1.5, .gamma = 0.8, .temperature = 0.3, }; REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE); params = pl_render_default_params; struct pl_frame inferred_image = image, inferred_target = target; pl_frames_infer(rr, &inferred_image, &inferred_target); REQUIRE(pl_render_image(rr, &inferred_image, &inferred_target, ¶ms)); REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE); // Test background blending and alpha transparency params.blend_against_tiles = true; params.corner_rounding = 0.25f; REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE); params = pl_render_default_params; // Test film grain synthesis image.film_grain.type = PL_FILM_GRAIN_AV1; image.film_grain.params.av1 = av1_grain_data; REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE); image.film_grain.type = PL_FILM_GRAIN_H274; image.film_grain.params.h274 = h274_grain_data; REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); // H.274 film grain synthesis requires compute shaders if (gpu->glsl.compute) { REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE); } else { const struct pl_render_errors rr_err = pl_renderer_get_errors(rr); REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_FILM_GRAIN); pl_renderer_reset_errors(rr, &rr_err); } image.film_grain = (struct pl_film_grain_data) {0}; // Test mpv-style custom shaders for (int i = 0; i < PL_ARRAY_SIZE(user_shader_tests); i++) { printf("testing user shader:\n\n%s\n", user_shader_tests[i]); const struct pl_hook *hook; hook = pl_mpv_user_shader_parse(gpu, user_shader_tests[i], strlen(user_shader_tests[i])); REQUIRE(hook); params.hooks = &hook; params.num_hooks = 1; REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE); pl_mpv_user_shader_destroy(&hook); } if (gpu->glsl.compute && gpu->limits.max_ssbo_size) { for (int i = 0; i < PL_ARRAY_SIZE(compute_shader_tests); i++) { printf("testing user shader:\n\n%s\n", compute_shader_tests[i]); const struct pl_hook *hook; hook = pl_mpv_user_shader_parse(gpu, compute_shader_tests[i], strlen(compute_shader_tests[i])); REQUIRE(hook); params.hooks = &hook; params.num_hooks = 1; REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE); pl_mpv_user_shader_destroy(&hook); } } params = pl_render_default_params; // Test custom LUTs for (int i = 0; i < PL_ARRAY_SIZE(test_luts); i++) { printf("testing custom lut %d\n", i); struct pl_custom_lut *lut; lut = pl_lut_parse_cube(gpu->log, test_luts[i], strlen(test_luts[i])); REQUIRE(lut); bool has_3dlut = gpu->limits.max_tex_3d_dim && gpu->glsl.version > 100; if (lut->size[2] && !has_3dlut) { pl_lut_free(&lut); continue; } // Test all three at the same time to reduce the number of tests image.lut = target.lut = params.lut = lut; for (enum pl_lut_type t = PL_LUT_UNKNOWN; t <= PL_LUT_CONVERSION; t++) { printf("testing LUT method %d\n", t); image.lut_type = target.lut_type = params.lut_type = t; REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE); } image.lut = target.lut = params.lut = NULL; pl_lut_free(&lut); } #ifdef PL_HAVE_LCMS // It doesn't fit without use of 3D textures on GLES2 if (gpu->glsl.version > 100) { // Test ICC profiles image.profile = TEST_PROFILE(sRGB_v2_nano_icc); REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE); image.profile = (struct pl_icc_profile) {0}; target.profile = TEST_PROFILE(sRGB_v2_nano_icc); REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE); target.profile = (struct pl_icc_profile) {0}; image.profile = TEST_PROFILE(sRGB_v2_nano_icc); target.profile = image.profile; REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE); image.profile = (struct pl_icc_profile) {0}; target.profile = (struct pl_icc_profile) {0}; } #endif // Test overlays image.num_overlays = 1; image.overlays = &(struct pl_overlay) { .tex = img_plane.texture, .mode = PL_OVERLAY_NORMAL, .num_parts = 2, .parts = (struct pl_overlay_part[]) {{ .src = {0, 0, 2, 2}, .dst = {30, 100, 40, 200}, }, { .src = {2, 2, 5, 5}, .dst = {1000, -1, 3, 5}, }}, }; REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE); params.disable_fbos = true; REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE); image.num_overlays = 0; params = pl_render_default_params; target.num_overlays = 1; target.overlays = &(struct pl_overlay) { .tex = img_plane.texture, .mode = PL_OVERLAY_MONOCHROME, .num_parts = 1, .parts = &(struct pl_overlay_part) { .src = {5, 5, 15, 15}, .dst = {5, 5, 15, 15}, .color = {1.0, 0.5, 0.0}, }, }; REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE); REQUIRE(pl_render_image(rr, NULL, &target, ¶ms)); REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE); target.num_overlays = 0; // Test rotation for (pl_rotation rot = 0; rot < PL_ROTATION_360; rot += PL_ROTATION_90) { image.rotation = rot; REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE); } // Attempt frame mixing, using the mixer queue helper printf("testing frame mixing \n"); struct pl_render_params mix_params = { .frame_mixer = &pl_filter_mitchell_clamp, .info_callback = render_info_cb, }; struct pl_queue_params qparams = { .radius = pl_frame_mix_radius(&mix_params), .vsync_duration = 1.0 / 60.0, }; // Test large PTS jumps in frame mix struct pl_frame_mix mix = (struct pl_frame_mix) { .num_frames = 2, .frames = (const struct pl_frame *[]) { &image, &image }, .signatures = (uint64_t[]) { 0xFFF1, 0xFFF2 }, .timestamps = (float[]) { -100, 100 }, .vsync_duration = 1.6, }; REQUIRE(pl_render_image_mix(rr, &mix, &target, &mix_params)); // Test inferring frame mix inferred_target = target; pl_frames_infer_mix(rr, &mix, &inferred_target, &inferred_image); REQUIRE(pl_render_image_mix(rr, &mix, &target, &mix_params)); // Test empty frame mix mix = (struct pl_frame_mix) {0}; REQUIRE(pl_render_image_mix(rr, &mix, &target, &mix_params)); // Test inferring empty frame mix inferred_target = target; pl_frames_infer_mix(rr, &mix, &inferred_target, &inferred_image); REQUIRE(pl_render_image_mix(rr, &mix, &target, &mix_params)); // Test mixer queue #define NUM_MIX_FRAMES 20 const float frame_duration = 1.0 / 24.0; struct pl_source_frame srcframes[NUM_MIX_FRAMES+1]; srcframes[NUM_MIX_FRAMES] = (struct pl_source_frame) {0}; for (int i = 0; i < NUM_MIX_FRAMES; i++) { srcframes[i] = (struct pl_source_frame) { .pts = i * frame_duration, .duration = frame_duration, .map = frame_passthrough, .frame_data = &image, }; } pl_queue queue = pl_queue_create(gpu); enum pl_queue_status ret; // Test pre-pushing all frames, with delayed EOF. for (int i = 0; i < NUM_MIX_FRAMES; i++) { const struct pl_source_frame *src = &srcframes[i]; if (i > 10) // test pushing in reverse order src = &srcframes[NUM_MIX_FRAMES + 10 - i]; if (!pl_queue_push_block(queue, 1, src)) // mini-sleep pl_queue_push(queue, src); // push it anyway, for testing } while ((ret = pl_queue_update(queue, &mix, &qparams)) != PL_QUEUE_EOF) { if (ret == PL_QUEUE_MORE) { REQUIRE_CMP(qparams.pts, >, 0.0f, "f"); pl_queue_push(queue, NULL); // push delayed EOF continue; } REQUIRE_CMP(ret, ==, PL_QUEUE_OK, "u"); REQUIRE(pl_render_image_mix(rr, &mix, &target, &mix_params)); // Simulate advancing vsync qparams.pts += qparams.vsync_duration; } // Test dynamically pulling all frames, with oversample mixer const struct pl_source_frame *frame_ptr = &srcframes[0]; mix_params.frame_mixer = &pl_oversample_frame_mixer; qparams = (struct pl_queue_params) { .radius = pl_frame_mix_radius(&mix_params), .vsync_duration = qparams.vsync_duration, .get_frame = get_frame_ptr, .priv = &frame_ptr, }; pl_queue_reset(queue); while ((ret = pl_queue_update(queue, &mix, &qparams)) != PL_QUEUE_EOF) { REQUIRE_CMP(ret, ==, PL_QUEUE_OK, "u"); REQUIRE_CMP(mix.num_frames, <=, 2, "d"); REQUIRE(pl_render_image_mix(rr, &mix, &target, &mix_params)); qparams.pts += qparams.vsync_duration; } // Test large PTS jump pl_queue_reset(queue); REQUIRE(pl_queue_update(queue, &mix, &qparams) == PL_QUEUE_EOF); // Test deinterlacing pl_queue_reset(queue); printf("testing deinterlacing \n"); for (int i = 0; i < NUM_MIX_FRAMES; i++) { struct pl_source_frame *src = &srcframes[i]; if (i > 10) src = &srcframes[NUM_MIX_FRAMES + 10 - i]; src->first_field = PL_FIELD_EVEN; pl_queue_push(queue, src); } pl_queue_push(queue, NULL); qparams.pts = 0; qparams.get_frame = NULL; while ((ret = pl_queue_update(queue, &mix, &qparams)) != PL_QUEUE_EOF) { REQUIRE_CMP(ret, ==, PL_QUEUE_OK, "u"); REQUIRE(pl_render_image_mix(rr, &mix, &target, &mix_params)); qparams.pts += qparams.vsync_duration; } pl_queue_destroy(&queue); error: pl_renderer_destroy(&rr); pl_tex_destroy(gpu, &img_tex); pl_tex_destroy(gpu, &fbo); } static struct pl_hook_res noop_hook(void *priv, const struct pl_hook_params *params) { return (struct pl_hook_res) {0}; } static void pl_ycbcr_tests(pl_gpu gpu) { struct pl_plane_data data[3]; for (int i = 0; i < 3; i++) { const int sub = i > 0 ? 1 : 0; const int width = (323 + sub) >> sub; const int height = (255 + sub) >> sub; data[i] = (struct pl_plane_data) { .type = PL_FMT_UNORM, .width = width, .height = height, .component_size = {16}, .component_map = {i}, .pixel_stride = sizeof(uint16_t), .row_stride = PL_ALIGN2(width * sizeof(uint16_t), gpu->limits.align_tex_xfer_pitch), }; } pl_fmt fmt = pl_plane_find_fmt(gpu, NULL, &data[0]); enum pl_fmt_caps caps = PL_FMT_CAP_RENDERABLE | PL_FMT_CAP_HOST_READABLE; if (!fmt || (fmt->caps & caps) != caps) return; pl_renderer rr = pl_renderer_create(gpu->log, gpu); if (!rr) return; pl_tex src_tex[3] = {0}; pl_tex dst_tex[3] = {0}; struct pl_frame img = { .num_planes = 3, .repr = pl_color_repr_hdtv, .color = pl_color_space_bt709, }; struct pl_frame target = { .num_planes = 3, .repr = pl_color_repr_hdtv, .color = pl_color_space_bt709, }; uint8_t *src_buffer[3] = {0}; uint8_t *dst_buffer = NULL; for (int i = 0; i < 3; i++) { // Generate some arbitrary data for the buffer src_buffer[i] = malloc(data[i].height * data[i].row_stride); if (!src_buffer[i]) goto error; data[i].pixels = src_buffer[i]; for (int y = 0; y < data[i].height; y++) { for (int x = 0; x < data[i].width; x++) { size_t off = y * data[i].row_stride + x * data[i].pixel_stride; uint16_t *pixel = (uint16_t *) &src_buffer[i][off]; int gx = 200 + 100 * i, gy = 300 + 150 * i; *pixel = (gx * x) ^ (gy * y); // whatever } } REQUIRE(pl_upload_plane(gpu, &img.planes[i], &src_tex[i], &data[i])); } // This co-sites chroma pixels with pixels in the RGB image, meaning we // get an exact round-trip when sampling both ways. This makes it useful // as a test case, even though it's not common in the real world. pl_frame_set_chroma_location(&img, PL_CHROMA_TOP_LEFT); for (int i = 0; i < 3; i++) { dst_tex[i] = pl_tex_create(gpu, &(struct pl_tex_params) { .format = fmt, .w = data[i].width, .h = data[i].height, .renderable = true, .host_readable = true, .storable = fmt->caps & PL_FMT_CAP_STORABLE, .blit_dst = fmt->caps & PL_FMT_CAP_BLITTABLE, }); if (!dst_tex[i]) goto error; target.planes[i] = img.planes[i]; target.planes[i].texture = dst_tex[i]; } REQUIRE(pl_render_image(rr, &img, &target, &(struct pl_render_params) { .num_hooks = 1, .hooks = &(const struct pl_hook *){&(struct pl_hook) { // Forces chroma merging, to test the chroma merging code .stages = PL_HOOK_CHROMA_INPUT, .hook = noop_hook, }}, })); REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE); size_t buf_size = data[0].height * data[0].row_stride; dst_buffer = malloc(buf_size); if (!dst_buffer) goto error; for (int i = 0; i < 3; i++) { memset(dst_buffer, 0xAA, buf_size); REQUIRE(pl_tex_download(gpu, &(struct pl_tex_transfer_params) { .tex = dst_tex[i], .ptr = dst_buffer, .row_pitch = data[i].row_stride, })); for (int y = 0; y < data[i].height; y++) { for (int x = 0; x < data[i].width; x++) { size_t off = y * data[i].row_stride + x * data[i].pixel_stride; uint16_t *src_pixel = (uint16_t *) &src_buffer[i][off]; uint16_t *dst_pixel = (uint16_t *) &dst_buffer[off]; int diff = abs((int) *src_pixel - (int) *dst_pixel); REQUIRE_CMP(diff, <=, 50, "d"); // a little under 0.1% } } } error: pl_renderer_destroy(&rr); free(dst_buffer); for (int i = 0; i < 3; i++) { free(src_buffer[i]); pl_tex_destroy(gpu, &src_tex[i]); pl_tex_destroy(gpu, &dst_tex[i]); } } static void pl_test_export_import(pl_gpu gpu, enum pl_handle_type handle_type) { // Test texture roundtrip if (!(gpu->export_caps.tex & handle_type) || !(gpu->import_caps.tex & handle_type)) goto skip_tex; pl_fmt fmt = pl_find_fmt(gpu, PL_FMT_UNORM, 4, 0, 0, PL_FMT_CAP_BLITTABLE); if (!fmt) goto skip_tex; printf("testing texture import/export with fmt %s\n", fmt->name); pl_tex export = pl_tex_create(gpu, &(struct pl_tex_params) { .w = 32, .h = 32, .format = fmt, .export_handle = handle_type, }); REQUIRE(export); REQUIRE_HANDLE(export->shared_mem, handle_type); pl_tex import = pl_tex_create(gpu, &(struct pl_tex_params) { .w = export->params.w, .h = export->params.h, .format = fmt, .import_handle = handle_type, .shared_mem = export->shared_mem, }); REQUIRE(import); pl_tex_destroy(gpu, &import); pl_tex_destroy(gpu, &export); skip_tex: ; // Test buffer roundtrip if (!(gpu->export_caps.buf & handle_type) || !(gpu->import_caps.buf & handle_type)) return; printf("testing buffer import/export\n"); pl_buf exp_buf = pl_buf_create(gpu, &(struct pl_buf_params) { .size = 32, .export_handle = handle_type, }); REQUIRE(exp_buf); REQUIRE_HANDLE(exp_buf->shared_mem, handle_type); pl_buf imp_buf = pl_buf_create(gpu, &(struct pl_buf_params) { .size = 32, .import_handle = handle_type, .shared_mem = exp_buf->shared_mem, }); REQUIRE(imp_buf); pl_buf_destroy(gpu, &imp_buf); pl_buf_destroy(gpu, &exp_buf); } static void pl_test_host_ptr(pl_gpu gpu) { if (!(gpu->import_caps.buf & PL_HANDLE_HOST_PTR)) return; #ifdef __unix__ printf("testing host ptr\n"); REQUIRE(gpu->limits.max_mapped_size); const size_t size = 2 << 20; const size_t offset = 2 << 10; const size_t slice = 2 << 16; uint8_t *data = aligned_alloc(0x1000, size); for (int i = 0; i < size; i++) data[i] = (uint8_t) i; pl_buf buf = pl_buf_create(gpu, &(struct pl_buf_params) { .size = slice, .import_handle = PL_HANDLE_HOST_PTR, .shared_mem = { .handle.ptr = data, .size = size, .offset = offset, }, .host_mapped = true, }); REQUIRE(buf); REQUIRE_MEMEQ(data + offset, buf->data, slice); pl_buf_destroy(gpu, &buf); free(data); #endif // unix } void gpu_shader_tests(pl_gpu gpu) { pl_buffer_tests(gpu); pl_texture_tests(gpu); pl_planar_tests(gpu); pl_shader_tests(gpu); pl_scaler_tests(gpu); pl_render_tests(gpu); pl_ycbcr_tests(gpu); REQUIRE(!pl_gpu_is_failed(gpu)); } void gpu_interop_tests(pl_gpu gpu) { pl_test_export_import(gpu, PL_HANDLE_DMA_BUF); pl_test_host_ptr(gpu); REQUIRE(!pl_gpu_is_failed(gpu)); } libplacebo-v7.349.0/src/tests/gpu_tests.h000066400000000000000000000016221463457750100203140ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "utils.h" #include void pl_buffer_tests(pl_gpu gpu); void pl_texture_tests(pl_gpu gpu); void gpu_shader_tests(pl_gpu gpu); void gpu_interop_tests(pl_gpu gpu); libplacebo-v7.349.0/src/tests/icc.c000066400000000000000000000144061463457750100170340ustar00rootroot00000000000000#include "utils.h" #include static const uint8_t DisplayP3_v2_micro_icc[] = { 0x00, 0x00, 0x01, 0xc8, 0x6c, 0x63, 0x6d, 0x73, 0x02, 0x10, 0x00, 0x00, 0x6d, 0x6e, 0x74, 0x72, 0x52, 0x47, 0x42, 0x20, 0x58, 0x59, 0x5a, 0x20, 0x07, 0xe2, 0x00, 0x03, 0x00, 0x14, 0x00, 0x09, 0x00, 0x0e, 0x00, 0x1d, 0x61, 0x63, 0x73, 0x70, 0x4d, 0x53, 0x46, 0x54, 0x00, 0x00, 0x00, 0x00, 0x73, 0x61, 0x77, 0x73, 0x63, 0x74, 0x72, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf6, 0xd6, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0xd3, 0x2d, 0x68, 0x61, 0x6e, 0x64, 0xb4, 0xaa, 0xdd, 0x1f, 0x13, 0xc8, 0x03, 0x3c, 0xf5, 0x51, 0x14, 0x45, 0x28, 0x7a, 0x98, 0xe2, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x64, 0x65, 0x73, 0x63, 0x00, 0x00, 0x00, 0xf0, 0x00, 0x00, 0x00, 0x5e, 0x63, 0x70, 0x72, 0x74, 0x00, 0x00, 0x01, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x77, 0x74, 0x70, 0x74, 0x00, 0x00, 0x01, 0x18, 0x00, 0x00, 0x00, 0x14, 0x72, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x01, 0x2c, 0x00, 0x00, 0x00, 0x14, 0x67, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x00, 0x14, 0x62, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x01, 0x54, 0x00, 0x00, 0x00, 0x14, 0x72, 0x54, 0x52, 0x43, 0x00, 0x00, 0x01, 0x68, 0x00, 0x00, 0x00, 0x60, 0x67, 0x54, 0x52, 0x43, 0x00, 0x00, 0x01, 0x68, 0x00, 0x00, 0x00, 0x60, 0x62, 0x54, 0x52, 0x43, 0x00, 0x00, 0x01, 0x68, 0x00, 0x00, 0x00, 0x60, 0x64, 0x65, 0x73, 0x63, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x75, 0x50, 0x33, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x74, 0x65, 0x78, 0x74, 0x00, 0x00, 0x00, 0x00, 0x43, 0x43, 0x30, 0x00, 0x58, 0x59, 0x5a, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf3, 0x51, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x16, 0xcc, 0x58, 0x59, 0x5a, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x83, 0xdf, 0x00, 0x00, 0x3d, 0xbf, 0xff, 0xff, 0xff, 0xbb, 0x58, 0x59, 0x5a, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4a, 0xbf, 0x00, 0x00, 0xb1, 0x37, 0x00, 0x00, 0x0a, 0xb9, 0x58, 0x59, 0x5a, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x28, 0x38, 0x00, 0x00, 0x11, 0x0a, 0x00, 0x00, 0xc8, 0xb9, 0x63, 0x75, 0x72, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x7c, 0x00, 0xf8, 0x01, 0x9c, 0x02, 0x75, 0x03, 0x83, 0x04, 0xc9, 0x06, 0x4e, 0x08, 0x12, 0x0a, 0x18, 0x0c, 0x62, 0x0e, 0xf4, 0x11, 0xcf, 0x14, 0xf6, 0x18, 0x6a, 0x1c, 0x2e, 0x20, 0x43, 0x24, 0xac, 0x29, 0x6a, 0x2e, 0x7e, 0x33, 0xeb, 0x39, 0xb3, 0x3f, 0xd6, 0x46, 0x57, 0x4d, 0x36, 0x54, 0x76, 0x5c, 0x17, 0x64, 0x1d, 0x6c, 0x86, 0x75, 0x56, 0x7e, 0x8d, 0x88, 0x2c, 0x92, 0x36, 0x9c, 0xab, 0xa7, 0x8c, 0xb2, 0xdb, 0xbe, 0x99, 0xca, 0xc7, 0xd7, 0x65, 0xe4, 0x77, 0xf1, 0xf9, 0xff, 0xff }; static const uint8_t Rec2020_v2_micro_icc[] = { 0x00, 0x00, 0x01, 0xcc, 0x6c, 0x63, 0x6d, 0x73, 0x02, 0x10, 0x00, 0x00, 0x6d, 0x6e, 0x74, 0x72, 0x52, 0x47, 0x42, 0x20, 0x58, 0x59, 0x5a, 0x20, 0x07, 0xe2, 0x00, 0x03, 0x00, 0x14, 0x00, 0x09, 0x00, 0x0e, 0x00, 0x1d, 0x61, 0x63, 0x73, 0x70, 0x4d, 0x53, 0x46, 0x54, 0x00, 0x00, 0x00, 0x00, 0x73, 0x61, 0x77, 0x73, 0x63, 0x74, 0x72, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf6, 0xd6, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0xd3, 0x2d, 0x68, 0x61, 0x6e, 0x64, 0x17, 0xcb, 0x44, 0xd1, 0x0d, 0xca, 0xe1, 0xc9, 0x03, 0x3e, 0x20, 0x85, 0x4a, 0x67, 0x4e, 0xa9, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x64, 0x65, 0x73, 0x63, 0x00, 0x00, 0x00, 0xf0, 0x00, 0x00, 0x00, 0x5f, 0x63, 0x70, 0x72, 0x74, 0x00, 0x00, 0x01, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x77, 0x74, 0x70, 0x74, 0x00, 0x00, 0x01, 0x18, 0x00, 0x00, 0x00, 0x14, 0x72, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x01, 0x2c, 0x00, 0x00, 0x00, 0x14, 0x67, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x00, 0x14, 0x62, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x01, 0x54, 0x00, 0x00, 0x00, 0x14, 0x72, 0x54, 0x52, 0x43, 0x00, 0x00, 0x01, 0x68, 0x00, 0x00, 0x00, 0x64, 0x67, 0x54, 0x52, 0x43, 0x00, 0x00, 0x01, 0x68, 0x00, 0x00, 0x00, 0x64, 0x62, 0x54, 0x52, 0x43, 0x00, 0x00, 0x01, 0x68, 0x00, 0x00, 0x00, 0x64, 0x64, 0x65, 0x73, 0x63, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x75, 0x32, 0x30, 0x32, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x74, 0x65, 0x78, 0x74, 0x00, 0x00, 0x00, 0x00, 0x43, 0x43, 0x30, 0x00, 0x58, 0x59, 0x5a, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf3, 0x51, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x16, 0xcc, 0x58, 0x59, 0x5a, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xac, 0x69, 0x00, 0x00, 0x47, 0x70, 0xff, 0xff, 0xff, 0x81, 0x58, 0x59, 0x5a, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2a, 0x6a, 0x00, 0x00, 0xac, 0xe3, 0x00, 0x00, 0x07, 0xad, 0x58, 0x59, 0x5a, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x03, 0x00, 0x00, 0x0b, 0xad, 0x00, 0x00, 0xcb, 0xff, 0x63, 0x75, 0x72, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x01, 0x53, 0x02, 0xa5, 0x03, 0xf8, 0x05, 0x4e, 0x06, 0xd6, 0x08, 0x98, 0x0a, 0x8f, 0x0c, 0xc3, 0x0f, 0x31, 0x11, 0xdc, 0x14, 0xc3, 0x17, 0xe8, 0x1b, 0x4c, 0x1e, 0xf0, 0x22, 0xd5, 0x26, 0xfa, 0x2b, 0x62, 0x30, 0x0c, 0x34, 0xfa, 0x3a, 0x2b, 0x3f, 0xa2, 0x45, 0x5d, 0x4b, 0x5f, 0x51, 0xa7, 0x58, 0x37, 0x5f, 0x0d, 0x66, 0x2c, 0x6d, 0x94, 0x75, 0x45, 0x7d, 0x3f, 0x85, 0x84, 0x8e, 0x13, 0x96, 0xee, 0xa0, 0x13, 0xa9, 0x86, 0xb3, 0x44, 0xbd, 0x4f, 0xc7, 0xa8, 0xd2, 0x4e, 0xdd, 0x42, 0xe8, 0x86, 0xf4, 0x16, 0xff, 0xff }; int main() { pl_log log = pl_test_logger(); pl_icc_object icc; icc = pl_icc_open(log, &TEST_PROFILE(sRGB_v2_nano_icc), NULL); REQUIRE_CMP(icc->csp.primaries, ==, PL_COLOR_PRIM_BT_709, "u"); pl_icc_close(&icc); icc = pl_icc_open(log, &TEST_PROFILE(DisplayP3_v2_micro_icc), NULL); REQUIRE_CMP(icc->csp.primaries, ==, PL_COLOR_PRIM_DISPLAY_P3, "u"); pl_icc_close(&icc); icc = pl_icc_open(log, &TEST_PROFILE(Rec2020_v2_micro_icc), NULL); REQUIRE_CMP(icc->csp.primaries, ==, PL_COLOR_PRIM_BT_2020, "u"); pl_icc_close(&icc); pl_log_destroy(&log); } libplacebo-v7.349.0/src/tests/include/000077500000000000000000000000001463457750100175505ustar00rootroot00000000000000libplacebo-v7.349.0/src/tests/include/include_tmpl.c000066400000000000000000000000371463457750100223730ustar00rootroot00000000000000#include libplacebo-v7.349.0/src/tests/include/include_tmpl.cpp000066400000000000000000000001431463457750100227310ustar00rootroot00000000000000#define PL_LIBAV_IMPLEMENTATION 0 #define PL_DAV1D_IMPLEMENTATION 0 #include libplacebo-v7.349.0/src/tests/include/meson.build000066400000000000000000000015701463457750100217150ustar00rootroot00000000000000include_tmpl_langs = ['c', 'cpp'] # Ensure all headers compile test_include_sources = [] foreach h : headers if (h.contains('internal') or h.contains('dav1d') and not dav1d.found() or h.contains('libav') and not libav_found or h.contains('d3d11') and not d3d11_header) continue endif foreach lang : include_tmpl_langs test_include_sources += configure_file( input: 'include_tmpl.' + lang, output: 'include_@0@.@1@'.format(h.underscorify(), lang), configuration: { 'header': h }, ) endforeach endforeach static_library('test_include', test_include_sources, dependencies: [tdep_static, lavu, lavc, lavf], include_directories: [inc, vulkan_headers_inc], implicit_include_directories: false, c_args: ['-Wall', '-Wextra', '-Wpedantic'], cpp_args: ['-Wall', '-Wextra', '-Wpedantic'], ) libplacebo-v7.349.0/src/tests/libav.c000066400000000000000000000252151463457750100173730ustar00rootroot00000000000000#include "utils.h" #include "libplacebo/utils/libav.h" int main() { struct pl_plane_data data[4] = {0}; struct pl_bit_encoding bits; // Make sure we don't crash on any av pixfmt const AVPixFmtDescriptor *desc = NULL; while ((desc = av_pix_fmt_desc_next(desc))) pl_plane_data_from_pixfmt(data, &bits, av_pix_fmt_desc_get_id(desc)); #define TEST(pixfmt, reference) \ do { \ int planes = pl_plane_data_from_pixfmt(data, &bits, pixfmt); \ REQUIRE_CMP(planes, ==, sizeof(reference) / sizeof(*reference), "d"); \ REQUIRE_MEMEQ(data, reference, sizeof(reference)); \ } while (0) // Planar and semiplanar formats static const struct pl_plane_data yuvp8[] = { { .type = PL_FMT_UNORM, .component_size = {8}, .component_map = {0}, .pixel_stride = 1, }, { .type = PL_FMT_UNORM, .component_size = {8}, .component_map = {1}, .pixel_stride = 1, }, { .type = PL_FMT_UNORM, .component_size = {8}, .component_map = {2}, .pixel_stride = 1, } }; TEST(AV_PIX_FMT_YUV420P, yuvp8); TEST(AV_PIX_FMT_YUV422P, yuvp8); TEST(AV_PIX_FMT_YUV444P, yuvp8); TEST(AV_PIX_FMT_YUV410P, yuvp8); TEST(AV_PIX_FMT_YUV411P, yuvp8); TEST(AV_PIX_FMT_YUV440P, yuvp8); static const struct pl_plane_data yuvap8[] = { { .type = PL_FMT_UNORM, .component_size = {8}, .component_map = {0}, .pixel_stride = 1, }, { .type = PL_FMT_UNORM, .component_size = {8}, .component_map = {1}, .pixel_stride = 1, }, { .type = PL_FMT_UNORM, .component_size = {8}, .component_map = {2}, .pixel_stride = 1, }, { .type = PL_FMT_UNORM, .component_size = {8}, .component_map = {3}, .pixel_stride = 1, } }; TEST(AV_PIX_FMT_YUVA420P, yuvap8); static const struct pl_plane_data yuvp16[] = { { .type = PL_FMT_UNORM, .component_size = {16}, .component_map = {0}, .pixel_stride = 2, }, { .type = PL_FMT_UNORM, .component_size = {16}, .component_map = {1}, .pixel_stride = 2, }, { .type = PL_FMT_UNORM, .component_size = {16}, .component_map = {2}, .pixel_stride = 2, } }; TEST(AV_PIX_FMT_YUV420P10LE, yuvp16); TEST(AV_PIX_FMT_YUV420P16LE, yuvp16); static const struct pl_plane_data nv12[] = { { .type = PL_FMT_UNORM, .component_size = {8}, .component_map = {0}, .pixel_stride = 1, }, { .type = PL_FMT_UNORM, .component_size = {8, 8}, .component_map = {1, 2}, .pixel_stride = 2, } }; TEST(AV_PIX_FMT_NV12, nv12); static const struct pl_plane_data nv21[] = { { .type = PL_FMT_UNORM, .component_size = {8}, .component_map = {0}, .pixel_stride = 1, }, { .type = PL_FMT_UNORM, .component_size = {8, 8}, .component_map = {2, 1}, .pixel_stride = 2, } }; TEST(AV_PIX_FMT_NV21, nv21); static const struct pl_plane_data p016[] = { { .type = PL_FMT_UNORM, .component_size = {16}, .component_map = {0}, .pixel_stride = 2, }, { .type = PL_FMT_UNORM, .component_size = {16, 16}, .component_map = {1, 2}, .pixel_stride = 4, } }; TEST(AV_PIX_FMT_P010LE, p016); TEST(AV_PIX_FMT_P016LE, p016); // Packed formats static const struct pl_plane_data r8[] = { { .type = PL_FMT_UNORM, .component_size = {8}, .component_map = {0}, .pixel_stride = 1, } }; TEST(AV_PIX_FMT_GRAY8, r8); static const struct pl_plane_data rg8[] = { { .type = PL_FMT_UNORM, .component_size = {8, 8}, .component_map = {0, 1}, .pixel_stride = 2, } }; TEST(AV_PIX_FMT_GRAY8A, rg8); static const struct pl_plane_data rgb8[] = { { .type = PL_FMT_UNORM, .component_size = {8, 8, 8}, .component_map = {0, 1, 2}, .pixel_stride = 3, } }; TEST(AV_PIX_FMT_RGB24, rgb8); static const struct pl_plane_data bgr8[] = { { .type = PL_FMT_UNORM, .component_size = {8, 8, 8}, .component_map = {2, 1, 0}, .pixel_stride = 3, } }; TEST(AV_PIX_FMT_BGR24, bgr8); static const struct pl_plane_data rgbx8[] = { { .type = PL_FMT_UNORM, .component_size = {8, 8, 8}, .component_map = {0, 1, 2}, .pixel_stride = 4, } }; TEST(AV_PIX_FMT_RGB0, rgbx8); static const struct pl_plane_data xrgb8[] = { { .type = PL_FMT_UNORM, .component_size = {8, 8, 8}, .component_map = {0, 1, 2}, .component_pad = {8, 0, 0}, .pixel_stride = 4, } }; TEST(AV_PIX_FMT_0RGB, xrgb8); static const struct pl_plane_data rgba8[] = { { .type = PL_FMT_UNORM, .component_size = {8, 8, 8, 8}, .component_map = {0, 1, 2, 3}, .pixel_stride = 4, } }; TEST(AV_PIX_FMT_RGBA, rgba8); static const struct pl_plane_data argb8[] = { { .type = PL_FMT_UNORM, .component_size = {8, 8, 8, 8}, .component_map = {3, 0, 1, 2}, .pixel_stride = 4, } }; TEST(AV_PIX_FMT_ARGB, argb8); static const struct pl_plane_data bgra8[] = { { .type = PL_FMT_UNORM, .component_size = {8, 8, 8, 8}, .component_map = {2, 1, 0, 3}, .pixel_stride = 4, } }; TEST(AV_PIX_FMT_BGRA, bgra8); static const struct pl_plane_data abgr8[] = { { .type = PL_FMT_UNORM, .component_size = {8, 8, 8, 8}, .component_map = {3, 2, 1, 0}, .pixel_stride = 4, } }; TEST(AV_PIX_FMT_ABGR, abgr8); static const struct pl_plane_data r16[] = { { .type = PL_FMT_UNORM, .component_size = {16}, .component_map = {0}, .pixel_stride = 2, } }; TEST(AV_PIX_FMT_GRAY16LE, r16); static const struct pl_plane_data rgb16[] = { { .type = PL_FMT_UNORM, .component_size = {16, 16, 16}, .component_map = {0, 1, 2}, .pixel_stride = 6, } }; TEST(AV_PIX_FMT_RGB48LE, rgb16); static const struct pl_plane_data rgb16be[] = { { .type = PL_FMT_UNORM, .component_size = {16, 16, 16}, .component_map = {0, 1, 2}, .pixel_stride = 6, .swapped = true, } }; TEST(AV_PIX_FMT_RGB48BE, rgb16be); static const struct pl_plane_data rgba16[] = { { .type = PL_FMT_UNORM, .component_size = {16, 16, 16, 16}, .component_map = {0, 1, 2, 3}, .pixel_stride = 8, } }; TEST(AV_PIX_FMT_RGBA64LE, rgba16); static const struct pl_plane_data rgba16be[] = { { .type = PL_FMT_UNORM, .component_size = {16, 16, 16, 16}, .component_map = {0, 1, 2, 3}, .pixel_stride = 8, .swapped = true, } }; TEST(AV_PIX_FMT_RGBA64BE, rgba16be); static const struct pl_plane_data rgb565[] = { { .type = PL_FMT_UNORM, .component_size = {5, 6, 5}, .component_map = {2, 1, 0}, // LSB to MSB .pixel_stride = 2, } }; TEST(AV_PIX_FMT_RGB565LE, rgb565); #if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 37, 100) static const struct pl_plane_data rgb32f[] = { { .type = PL_FMT_FLOAT, .component_size = {32, 32, 32}, .component_map = {0, 1, 2}, .pixel_stride = 12, } }; TEST(AV_PIX_FMT_RGBF32LE, rgb32f); #endif // Test pl_frame <- AVFrame bridge struct pl_frame image; AVFrame *frame = av_frame_alloc(); frame->format = AV_PIX_FMT_RGBA; pl_frame_from_avframe(&image, frame); REQUIRE_CMP(image.num_planes, ==, 1, "d"); REQUIRE_CMP(image.repr.sys, ==, PL_COLOR_SYSTEM_RGB, "u"); // Test inverse mapping struct pl_color_space csp = image.color; pl_color_space_infer(&csp); pl_avframe_set_color(frame, csp); pl_avframe_set_repr(frame, image.repr); pl_avframe_set_profile(frame, image.profile); pl_frame_from_avframe(&image, frame); pl_color_space_infer(&image.color); REQUIRE(pl_color_space_equal(&csp, &image.color)); av_frame_free(&frame); // Test enum functions for (enum pl_color_system sys = 0; sys < PL_COLOR_SYSTEM_COUNT; sys++) { enum AVColorSpace spc = pl_system_to_av(sys); enum pl_color_system sys2 = pl_system_from_av(spc); // Exception to the rule, due to different handling in libav* if (sys2 && sys != PL_COLOR_SYSTEM_BT_2100_HLG) REQUIRE_CMP(sys, ==, sys2, "u"); } for (enum pl_color_levels lev = 0; lev < PL_COLOR_LEVELS_COUNT; lev++) { enum AVColorRange range = pl_levels_to_av(lev); enum pl_color_levels lev2 = pl_levels_from_av(range); REQUIRE_CMP(lev, ==, lev2, "u"); } for (enum pl_color_primaries prim = 0; prim < PL_COLOR_PRIM_COUNT; prim++) { enum AVColorPrimaries avpri = pl_primaries_to_av(prim); enum pl_color_primaries prim2 = pl_primaries_from_av(avpri); if (prim2) REQUIRE_CMP(prim, ==, prim2, "u"); } for (enum pl_color_transfer trc = 0; trc < PL_COLOR_TRC_COUNT; trc++) { enum AVColorTransferCharacteristic avtrc = pl_transfer_to_av(trc); enum pl_color_transfer trc2 = pl_transfer_from_av(avtrc); if (trc2) REQUIRE_CMP(trc, ==, trc2, "u"); } for (enum pl_chroma_location loc = 0; loc < PL_CHROMA_COUNT; loc++) { enum AVChromaLocation avloc = pl_chroma_to_av(loc); enum pl_chroma_location loc2 = pl_chroma_from_av(avloc); REQUIRE_CMP(loc, ==, loc2, "u"); } } libplacebo-v7.349.0/src/tests/lut.c000066400000000000000000000051241463457750100170770ustar00rootroot00000000000000#include "utils.h" #include #include static const char *luts[] = { "TITLE \"1D LUT example\" \n" "LUT_1D_SIZE 11 \n" "# Random comment \n" "0.0 0.0 0.0 \n" "0.1 0.1 0.1 \n" "0.2 0.2 0.2 \n" "0.3 0.3 0.3 \n" "0.4 0.4 0.4 \n" "0.5 0.5 0.5 \n" "0.6 0.6 0.6 \n" "0.7 0.7 0.7 \n" "0.8 0.8 0.8 \n" "0.9 0.9 0.9 \n" "0.10 0.10 0.10 \n", "LUT_3D_SIZE 3 \n" "TITLE \"3D LUT example\" \n" "0.0 0.0 0.0 \n" "0.5 0.0 0.0 \n" "1.0 0.0 0.0 \n" "0.0 0.5 0.0 \n" "0.5 0.5 0.0 \n" "1.0 0.5 0.0 \n" "0.0 1.0 0.0 \n" "0.5 1.0 0.0 \n" "1.0 1.0 0.0 \n" "0.0 0.0 0.5 \n" "0.5 0.0 0.5 \n" "1.0 0.0 0.5 \n" "0.0 0.5 0.5 \n" "0.5 0.5 0.5 \n" "1.0 0.5 0.5 \n" "0.0 1.0 0.5 \n" "0.5 1.0 0.5 \n" "1.0 1.0 0.5 \n" "0.0 0.0 1.0 \n" "0.5 0.0 1.0 \n" "1.0 0.0 1.0 \n" "0.0 0.5 1.0 \n" "0.5 0.5 1.0 \n" "1.0 0.5 1.0 \n" "0.0 1.0 1.0 \n" "0.5 1.0 1.0 \n" "1.0 1.0 1.0 \n", "LUT_1D_SIZE 3 \n" "TITLE \"custom domain\" \n" "DOMAIN_MAX 255 255 255 \n" "0 0 0 \n" "128 128 128 \n" "255 255 255 \n" }; int main() { pl_log log = pl_test_logger(); pl_gpu gpu = pl_gpu_dummy_create(log, NULL); pl_shader sh = pl_shader_alloc(log, NULL); pl_shader_obj obj = NULL; for (int i = 0; i < PL_ARRAY_SIZE(luts); i++) { struct pl_custom_lut *lut; lut = pl_lut_parse_cube(log, luts[i], strlen(luts[i])); REQUIRE(lut); pl_shader_reset(sh, pl_shader_params( .gpu = gpu )); pl_shader_custom_lut(sh, lut, &obj); const struct pl_shader_res *res = pl_shader_finalize(sh); REQUIRE(res); printf("Generated LUT shader:\n%s\n", res->glsl); pl_lut_free(&lut); } pl_shader_obj_destroy(&obj); pl_shader_free(&sh); pl_gpu_dummy_destroy(&gpu); pl_log_destroy(&log); } libplacebo-v7.349.0/src/tests/meson.build000066400000000000000000000024021463457750100202650ustar00rootroot00000000000000gpu_tests = [ 'vulkan.c', 'opengl_surfaceless.c', 'd3d11.c', 'dummy.c' ] ts = [] foreach t : tests deps = [tdep_static] if t == 'opengl_surfaceless.c' deps += glad_dep endif sources = [] if gpu_tests.contains(t) sources += 'gpu_tests.c' endif # TODO: Define objects in tdep_static once Meson 1.1.0 is ok to use ts += { 'test': t, 'sources': sources, 'deps': deps, 'objects': lib.extract_all_objects(recursive: false) } endforeach dav1d = dependency('dav1d', required: false) if dav1d.found() ts += { 'test': 'dav1d.c', 'deps': [dav1d, tdep_shared] } endif lavu = dependency('libavutil', version: '>=55.74.100', required: false) lavc = dependency('libavcodec', required: false) lavf = dependency('libavformat', required: false) libav_found = lavu.found() and lavc.found() and lavf.found() if libav_found ts += { 'test': 'libav.c', 'deps': [lavu, lavc, lavf, tdep_shared] } endif foreach t : ts e = executable('test.' + t['test'], [t['test']] + t.get('sources', []), objects: t.get('objects', []), c_args: [ '-Wno-unused-function' ], dependencies: t.get('deps', []), link_args: link_args, link_depends: link_depends, ) test(t['test'], e, timeout: 120) endforeach subdir('include') libplacebo-v7.349.0/src/tests/opengl_surfaceless.c000066400000000000000000000162731463457750100221650ustar00rootroot00000000000000#include "gpu_tests.h" #include "opengl/utils.h" #include #include static void opengl_interop_tests(pl_gpu gpu) { pl_fmt fmt = pl_find_fmt(gpu, PL_FMT_UNORM, 1, 0, 0, PL_FMT_CAP_RENDERABLE | PL_FMT_CAP_LINEAR); if (!fmt) return; pl_tex export = pl_tex_create(gpu, pl_tex_params( .w = 32, .h = 32, .format = fmt, .sampleable = true, .renderable = true, .blit_dst = fmt->caps & PL_FMT_CAP_BLITTABLE, )); REQUIRE(export); struct pl_opengl_wrap_params wrap = { .width = export->params.w, .height = export->params.h, .depth = export->params.d, }; wrap.texture = pl_opengl_unwrap(gpu, export, &wrap.target, &wrap.iformat, NULL); REQUIRE(wrap.texture); pl_tex import = pl_opengl_wrap(gpu, &wrap); REQUIRE(import); REQUIRE(import->params.renderable); REQUIRE_CMP(import->params.blit_dst, ==, export->params.blit_dst, "d"); pl_tex_destroy(gpu, &import); pl_tex_destroy(gpu, &export); } #define PBUFFER_WIDTH 640 #define PBUFFER_HEIGHT 480 struct swapchain_priv { EGLDisplay display; EGLSurface surface; }; static void swap_buffers(void *priv) { struct swapchain_priv *p = priv; eglSwapBuffers(p->display, p->surface); } static void opengl_swapchain_tests(pl_opengl gl, EGLDisplay display, EGLSurface surface) { if (surface == EGL_NO_SURFACE) return; printf("testing opengl swapchain\n"); pl_gpu gpu = gl->gpu; pl_swapchain sw; sw = pl_opengl_create_swapchain(gl, pl_opengl_swapchain_params( .swap_buffers = swap_buffers, .priv = &(struct swapchain_priv) { display, surface }, )); REQUIRE(sw); int w = PBUFFER_WIDTH, h = PBUFFER_HEIGHT; REQUIRE(pl_swapchain_resize(sw, &w, &h)); for (int i = 0; i < 10; i++) { struct pl_swapchain_frame frame; REQUIRE(pl_swapchain_start_frame(sw, &frame)); if (frame.fbo->params.blit_dst) pl_tex_clear(gpu, frame.fbo, (float[4]){0}); // TODO: test this with an actual pl_renderer instance struct pl_frame target; pl_frame_from_swapchain(&target, &frame); REQUIRE(pl_swapchain_submit_frame(sw)); pl_swapchain_swap_buffers(sw); } pl_swapchain_destroy(&sw); } int main() { if (!gladLoaderLoadEGL(EGL_NO_DISPLAY)) return SKIP; const char *extstr = eglQueryString(EGL_NO_DISPLAY, EGL_EXTENSIONS); if (!extstr || !strstr(extstr, "EGL_MESA_platform_surfaceless")) return SKIP; // Create the OpenGL context EGLDisplay dpy = eglGetPlatformDisplayEXT(EGL_PLATFORM_SURFACELESS_MESA, (void *) EGL_DEFAULT_DISPLAY, NULL); if (dpy == EGL_NO_DISPLAY) return SKIP; EGLint major, minor; if (!eglInitialize(dpy, &major, &minor)) return SKIP; if (!gladLoaderLoadEGL(dpy)) return SKIP; printf("Initialized EGL v%d.%d\n", major, minor); int egl_ver = major * 10 + minor; struct { EGLenum api; EGLenum render; int major, minor; int glsl_ver; EGLenum profile; } egl_vers[] = { { EGL_OPENGL_API, EGL_OPENGL_BIT, 4, 6, 460, EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT }, { EGL_OPENGL_API, EGL_OPENGL_BIT, 3, 3, 330, EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT }, { EGL_OPENGL_API, EGL_OPENGL_BIT, 3, 0, 130, EGL_CONTEXT_OPENGL_COMPATIBILITY_PROFILE_BIT, }, { EGL_OPENGL_ES_API, EGL_OPENGL_ES3_BIT, 3, 0, 300, }, }; struct pl_glsl_version last_glsl = {0}; struct pl_gpu_limits last_limits = {0}; pl_log log = pl_test_logger(); for (int i = 0; i < PL_ARRAY_SIZE(egl_vers); i++) { const int cfg_attribs[] = { EGL_SURFACE_TYPE, EGL_PBUFFER_BIT, EGL_RENDERABLE_TYPE, egl_vers[i].render, EGL_NONE }; EGLConfig config = 0; EGLint num_configs = 0; bool ok = eglChooseConfig(dpy, cfg_attribs, &config, 1, &num_configs); if (!ok || !num_configs) goto error; if (!eglBindAPI(egl_vers[i].api)) goto error; EGLContext egl; if (egl_vers[i].api == EGL_OPENGL_ES_API) { // OpenGL ES const EGLint egl_attribs[] = { EGL_CONTEXT_CLIENT_VERSION, egl_vers[i].major, (egl_ver >= 15) ? EGL_CONTEXT_OPENGL_DEBUG : EGL_NONE, EGL_TRUE, EGL_NONE }; printf("Attempting creation of OpenGL ES v%d context\n", egl_vers[i].major); egl = eglCreateContext(dpy, config, EGL_NO_CONTEXT, egl_attribs); } else { // Desktop OpenGL const int egl_attribs[] = { EGL_CONTEXT_MAJOR_VERSION, egl_vers[i].major, EGL_CONTEXT_MINOR_VERSION, egl_vers[i].minor, EGL_CONTEXT_OPENGL_PROFILE_MASK, egl_vers[i].profile, (egl_ver >= 15) ? EGL_CONTEXT_OPENGL_DEBUG : EGL_NONE, EGL_TRUE, EGL_NONE }; printf("Attempting creation of Desktop OpenGL v%d.%d context\n", egl_vers[i].major, egl_vers[i].minor); egl = eglCreateContext(dpy, config, EGL_NO_CONTEXT, egl_attribs); } if (!egl) goto error; const EGLint pbuffer_attribs[] = { EGL_WIDTH, PBUFFER_WIDTH, EGL_HEIGHT, PBUFFER_HEIGHT, EGL_NONE }; EGLSurface surf = eglCreatePbufferSurface(dpy, config, pbuffer_attribs); if (!eglMakeCurrent(dpy, surf, surf, egl)) goto error; pl_opengl gl = pl_opengl_create(log, pl_opengl_params( .get_proc_addr = (pl_voidfunc_t (*)(const char *)) eglGetProcAddress, .max_glsl_version = egl_vers[i].glsl_ver, .debug = true, .egl_display = dpy, .egl_context = egl, #ifdef CI_ALLOW_SW .allow_software = true, #endif )); if (!gl) goto next; // Skip repeat tests pl_gpu gpu = gl->gpu; if (memcmp(&last_glsl, &gpu->glsl, sizeof(last_glsl)) == 0 && memcmp(&last_limits, &gpu->limits, sizeof(last_limits)) == 0) { printf("Skipping tests due to duplicate capabilities/version\n"); goto next; } #ifdef CI_MAXGL if (last_glsl.version && last_glsl.gles == gpu->glsl.gles) goto next; #endif last_glsl = gpu->glsl; last_limits = gpu->limits; gpu_shader_tests(gpu); gpu_interop_tests(gpu); opengl_interop_tests(gpu); opengl_swapchain_tests(gl, dpy, surf); // Reduce log spam after first successful test pl_log_level_update(log, PL_LOG_INFO); next: pl_opengl_destroy(&gl); eglDestroySurface(dpy, surf); eglDestroyContext(dpy, egl); continue; error: ; EGLint error = eglGetError(); if (error != EGL_SUCCESS) fprintf(stderr, "EGL error: %s\n", egl_err_str(error)); } eglTerminate(dpy); gladLoaderUnloadEGL(); pl_log_destroy(&log); if (!last_glsl.version) return SKIP; } libplacebo-v7.349.0/src/tests/options.c000066400000000000000000000116631463457750100177730ustar00rootroot00000000000000#include "utils.h" #include static void count_cb(void *priv, pl_opt_data data) { int *num = priv; printf("Iterating over option: %s = %s\n", data->opt->key, data->text); (*num)++; } static void set_cb(void *priv, pl_opt_data data) { pl_options dst = priv; REQUIRE(pl_options_set_str(dst, data->opt->key, data->text)); } int main() { pl_log log = pl_test_logger(); pl_options test = pl_options_alloc(log); REQUIRE_STREQ(pl_options_save(test), ""); REQUIRE(pl_options_load(test, "")); REQUIRE_STREQ(pl_options_save(test), ""); pl_options_reset(test, &pl_render_fast_params); REQUIRE_STREQ(pl_options_save(test), ""); REQUIRE(pl_options_load(test, "preset=fast")); REQUIRE_STREQ(pl_options_save(test), ""); const char *def_opts = "upscaler=lanczos,downscaler=hermite,frame_mixer=oversample,sigmoid=yes,peak_detect=yes,dither=yes"; pl_options_reset(test, &pl_render_default_params); REQUIRE_STREQ(pl_options_save(test), def_opts); struct pl_options_t def_pre = *test; pl_options_reset(test, NULL); REQUIRE_STREQ(pl_options_save(test), ""); REQUIRE(pl_options_load(test, def_opts)); REQUIRE_STREQ(pl_options_save(test), def_opts); REQUIRE_MEMEQ(test, &def_pre, sizeof(*test)); pl_options_reset(test, NULL); REQUIRE(pl_options_load(test, "preset=default")); REQUIRE_STREQ(pl_options_save(test), def_opts); REQUIRE_MEMEQ(test, &def_pre, sizeof(*test)); int num = 0; pl_options_iterate(test, count_cb, &num); REQUIRE_CMP(num, ==, 6, "d"); pl_opt_data data; REQUIRE((data = pl_options_get(test, "tile_size"))); REQUIRE_STREQ(data->opt->key, "tile_size"); REQUIRE_CMP(*(int *) data->value, =, pl_render_default_params.tile_size, "d"); REQUIRE_STREQ(data->text, "32"); const char *hq_opts = "upscaler=ewa_lanczossharp,downscaler=hermite,frame_mixer=oversample,deband=yes,sigmoid=yes,peak_detect=yes,peak_percentile=99.99500274658203,contrast_recovery=0.30000001192092896,dither=yes"; // fallback can produce different precision const char *hq_opts2 = "upscaler=ewa_lanczossharp,downscaler=hermite,frame_mixer=oversample,deband=yes,sigmoid=yes,peak_detect=yes,peak_percentile=99.99500274658203125,contrast_recovery=0.30000001192092896,dither=yes"; pl_options_reset(test, &pl_render_high_quality_params); const char *opts = pl_options_save(test); if (!strcmp(opts, hq_opts2)) hq_opts = hq_opts2; REQUIRE_STREQ(opts, hq_opts); struct pl_options_t hq_pre = *test; pl_options_reset(test, NULL); REQUIRE_STREQ(pl_options_save(test), ""); REQUIRE(pl_options_load(test, hq_opts)); REQUIRE_STREQ(pl_options_save(test), hq_opts); REQUIRE_MEMEQ(test, &hq_pre, sizeof(*test)); REQUIRE(pl_options_load(test, "preset=high_quality")); REQUIRE_STREQ(pl_options_save(test), hq_opts); REQUIRE_MEMEQ(test, &hq_pre, sizeof(*test)); pl_options test2 = pl_options_alloc(log); pl_options_iterate(test, set_cb, test2); REQUIRE_STREQ(pl_options_save(test), pl_options_save(test2)); pl_options_free(&test2); // Test custom scalers pl_options_reset(test, pl_render_params( .upscaler = &(struct pl_filter_config) { .kernel = &pl_filter_function_jinc, .window = &pl_filter_function_jinc, .radius = 4.0, .polar = true, }, )); const char *jinc4_opts = "upscaler=custom,upscaler_kernel=jinc,upscaler_window=jinc,upscaler_radius=4,upscaler_polar=yes"; REQUIRE_STREQ(pl_options_save(test), jinc4_opts); struct pl_options_t jinc4_pre = *test; pl_options_reset(test, NULL); REQUIRE(pl_options_load(test, "upscaler=custom,upscaler_preset=ewa_lanczos,upscaler_radius=4.0,upscaler_clamp=0.0")); REQUIRE_STREQ(pl_options_save(test), jinc4_opts); REQUIRE_MEMEQ(test, &jinc4_pre, sizeof(*test)); // Test params presets pl_options_reset(test, NULL); REQUIRE(pl_options_load(test, "cone=yes,cone_preset=deuteranomaly")); REQUIRE_STREQ(pl_options_save(test), "cone=yes,cones=m,cone_strength=0.5"); // Test error paths pl_options bad = pl_options_alloc(NULL); REQUIRE(!pl_options_load(bad, "scale_preset=help")); REQUIRE(!pl_options_load(bad, "dither_method=invalid")); REQUIRE(!pl_options_load(bad, "lut_entries=-1")); REQUIRE(!pl_options_load(bad, "deband_iterations=100")); REQUIRE(!pl_options_load(bad, "tone_lut_size=abc")); REQUIRE(!pl_options_load(bad, "show_clipping=hello")); REQUIRE(!pl_options_load(bad, "brightness=2.0")); REQUIRE(!pl_options_load(bad, "gamma=oops")); REQUIRE(!pl_options_load(bad, "invalid")); REQUIRE(!pl_options_load(bad, "=")); REQUIRE(!pl_options_load(bad, "preset==bar")); REQUIRE(!pl_options_load(bad, "peak_percentile=E8203125")); REQUIRE(!pl_options_get(bad, "invalid")); REQUIRE_STREQ(pl_options_save(bad), ""); pl_options_free(&bad); pl_options_free(&test); pl_log_destroy(&log); return 0; } libplacebo-v7.349.0/src/tests/string.c000066400000000000000000000151751463457750100176100ustar00rootroot00000000000000#include "utils.h" static const pl_str null = {0}; static const pl_str test = PL_STR0("test"); static const pl_str empty = PL_STR0(""); static inline bool is_null(pl_str str) { return !str.len && !str.buf; } static inline bool is_empty(pl_str str) { return !str.len; } int main() { void *tmp = pl_tmp(NULL); REQUIRE(is_null(pl_str0(NULL))); REQUIRE(is_null(pl_strdup(tmp, null))); char *empty0 = pl_strdup0(tmp, null); REQUIRE(empty0 && !empty0[0]); REQUIRE(pl_str_equals0(empty, empty0)); pl_str buf = {0}; pl_str_append(tmp, &buf, null); REQUIRE(is_empty(buf)); pl_str_append_asprintf(tmp, &buf, "%.*s", PL_STR_FMT(test)); REQUIRE(pl_str_equals(buf, test)); pl_str_append_asprintf_c(tmp, &buf, "%d %f %f %f %lld %zu %.*sx %hx %hx %hx %hx", 1, 1.0f, 4294967295.56, 83224965647295.65, 0xFFll, (size_t) 0, PL_STR_FMT(empty), (unsigned short) 0xCAFEu, (unsigned short) 0x1, (unsigned short) 0, (unsigned short) 0xFFFFu); const char *expected = "test1 1 4294967295.56 83224965647295.66 255 0 x cafe 1 0 ffff"; // fallback can produce different precision const char *expected2 = "test1 1 4294967295.55999994277954102 83224965647295.65625 255 0 x cafe 1 0 ffff"; REQUIRE(pl_str_equals0(buf, expected) || pl_str_equals0(buf, expected2)); REQUIRE_CMP(pl_strchr(null, ' '), <, 0, "d"); REQUIRE_CMP((int) pl_strspn(null, " "), ==, 0, "d"); REQUIRE_CMP((int) pl_strcspn(null, " "), ==, 0, "d"); REQUIRE(is_null(pl_str_strip(null))); REQUIRE_CMP(pl_strchr(test, 's'), ==, 2, "d"); REQUIRE_CMP((int) pl_strspn(test, "et"), ==, 2, "d"); REQUIRE_CMP((int) pl_strcspn(test, "xs"), ==, 2, "d"); REQUIRE(is_null(pl_str_take(null, 10))); REQUIRE(is_empty(pl_str_take(test, 0))); REQUIRE(is_null(pl_str_drop(null, 10))); REQUIRE(is_null(pl_str_drop(test, test.len))); REQUIRE(pl_str_equals(pl_str_drop(test, 0), test)); REQUIRE_CMP(pl_str_find(null, test), <, 0, "d"); REQUIRE_CMP(pl_str_find(null, null), ==, 0, "d"); REQUIRE_CMP(pl_str_find(test, null), ==, 0, "d"); REQUIRE_CMP(pl_str_find(test, test), ==, 0, "d"); pl_str rest; REQUIRE(is_null(pl_str_split_char(null, ' ', &rest)) && is_null(rest)); REQUIRE(is_null(pl_str_split_str(null, test, &rest)) && is_null(rest)); REQUIRE(is_empty(pl_str_split_str(test, test, &rest)) && is_empty(rest)); REQUIRE(is_null(pl_str_getline(null, &rest)) && is_null(rest)); pl_str right, left = pl_str_split_char(pl_str0("left right"), ' ', &right); REQUIRE(pl_str_equals0(left, "left")); REQUIRE(pl_str_equals0(right, "right")); left = pl_str_split_str0(pl_str0("leftTESTright"), "TEST", &right); REQUIRE(pl_str_equals0(left, "left")); REQUIRE(pl_str_equals0(right, "right")); pl_str out; REQUIRE(pl_str_decode_hex(tmp, null, &out) && is_empty(out)); REQUIRE(!pl_str_decode_hex(tmp, pl_str0("invalid"), &out)); REQUIRE(pl_str_equals(null, null)); REQUIRE(pl_str_equals(null, empty)); REQUIRE(pl_str_startswith(null, null)); REQUIRE(pl_str_startswith(test, null)); REQUIRE(pl_str_startswith(test, test)); REQUIRE(pl_str_endswith(null, null)); REQUIRE(pl_str_endswith(test, null)); REQUIRE(pl_str_endswith(test, test)); double d; float f; int i; unsigned u; int64_t i64; uint64_t u64; REQUIRE(pl_str_parse_double(pl_str0("4294967295.56"), &d)); REQUIRE_FEQ(d, 4294967295.56, 1e-20); REQUIRE(pl_str_parse_double(pl_str0("-4294967295.56"), &d)); REQUIRE_FEQ(d, -4294967295.56, 1e-20); REQUIRE(pl_str_parse_double(pl_str0("83224965647295.65"), &d)); REQUIRE_FEQ(d, 83224965647295.65, 1e-20); REQUIRE(pl_str_parse_double(pl_str0("-83224965647295.65"), &d)); REQUIRE_FEQ(d, -83224965647295.65, 1e-20); REQUIRE(pl_str_parse_float(pl_str0("4294967295.56"), &f)); REQUIRE_FEQ(f, 4294967295.56f, 1e-8); REQUIRE(pl_str_parse_float(pl_str0("-4294967295.56"), &f)); REQUIRE_FEQ(f, -4294967295.56f, 1e-8); REQUIRE(pl_str_parse_float(pl_str0("83224965647295.65"), &f)); REQUIRE_FEQ(f, 83224965647295.65f, 1e-8); REQUIRE(pl_str_parse_float(pl_str0("-83224965647295.65"), &f)); REQUIRE_FEQ(f, -83224965647295.65f, 1e-8); REQUIRE(pl_str_parse_float(pl_str0("1.3984"), &f)); REQUIRE_FEQ(f, 1.3984f, 1e-8); REQUIRE(pl_str_parse_float(pl_str0("-8.9100083"), &f)); REQUIRE_FEQ(f, -8.9100083f, 1e-8); REQUIRE(pl_str_parse_float(pl_str0("-0"), &f)); REQUIRE_FEQ(f, 0.0f, 1e-8); REQUIRE(pl_str_parse_float(pl_str0("-3.14e20"), &f)); REQUIRE_FEQ(f, -3.14e20f, 1e-8); REQUIRE(pl_str_parse_float(pl_str0("0.5e-5"), &f)); REQUIRE_FEQ(f, 0.5e-5f, 1e-8); REQUIRE(pl_str_parse_float(pl_str0("0.5e+5"), &f)); REQUIRE_FEQ(f, 0.5e+5f, 1e-8); REQUIRE(pl_str_parse_int(pl_str0("64239"), &i)); REQUIRE_CMP(i, ==, 64239, "d"); REQUIRE(pl_str_parse_int(pl_str0("-102"), &i)); REQUIRE_CMP(i, ==, -102, "d"); REQUIRE(pl_str_parse_int(pl_str0("1"), &i)); REQUIRE_CMP(i, ==, 1, "d"); REQUIRE(pl_str_parse_int(pl_str0("-0"), &i)); REQUIRE_CMP(i, ==, 0, "d"); REQUIRE(pl_str_parse_uint(pl_str0("64239"), &u)); REQUIRE_CMP(u, ==, 64239, "u"); REQUIRE(pl_str_parse_uint(pl_str0("1"), &u)); REQUIRE_CMP(u, ==, 1, "u"); REQUIRE(pl_str_parse_int64(pl_str0("9223372036854775799"), &i64)); REQUIRE_CMP(i64, ==, 9223372036854775799LL, PRIi64); REQUIRE(pl_str_parse_int64(pl_str0("-9223372036854775799"), &i64)); REQUIRE_CMP(i64, ==, -9223372036854775799LL, PRIi64); REQUIRE(pl_str_parse_uint64(pl_str0("18446744073709551609"), &u64)); REQUIRE_CMP(u64, ==, 18446744073709551609LLU, PRIu64); REQUIRE(!pl_str_parse_float(null, &f)); REQUIRE(!pl_str_parse_float(test, &f)); REQUIRE(!pl_str_parse_float(empty, &f)); REQUIRE(!pl_str_parse_int(null, &i)); REQUIRE(!pl_str_parse_int(test, &i)); REQUIRE(!pl_str_parse_int(empty, &i)); REQUIRE(!pl_str_parse_uint(null, &u)); REQUIRE(!pl_str_parse_uint(test, &u)); REQUIRE(!pl_str_parse_uint(empty, &u)); pl_str_builder builder = pl_str_builder_alloc(tmp); pl_str_builder_const_str(builder, "hello"); pl_str_builder_str(builder, pl_str0("world")); pl_str res = pl_str_builder_exec(builder); REQUIRE(pl_str_equals0(res, "helloworld")); pl_str_builder_reset(builder); pl_str_builder_printf_c(builder, "foo %d bar %u bat %s baz %lld", 123, 56u, "quack", 0xDEADBEEFll); pl_str_builder_printf_c(builder, " %.*s", PL_STR_FMT(pl_str0("test123"))); res = pl_str_builder_exec(builder); REQUIRE(pl_str_equals0(res, "foo 123 bar 56 bat quack baz 3735928559 test123")); pl_free(tmp); return 0; } libplacebo-v7.349.0/src/tests/tone_mapping.c000066400000000000000000000170711463457750100207570ustar00rootroot00000000000000#include "utils.h" #include "log.h" #include #include //#define PRINT_LUTS int main() { pl_log log = pl_test_logger(); // PQ unit tests REQUIRE_FEQ(pl_hdr_rescale(PL_HDR_PQ, PL_HDR_NITS, 0.0), 0.0, 1e-2); REQUIRE_FEQ(pl_hdr_rescale(PL_HDR_PQ, PL_HDR_NITS, 1.0), 10000.0, 1e-2); REQUIRE_FEQ(pl_hdr_rescale(PL_HDR_PQ, PL_HDR_NITS, 0.58), 203.0, 1e-2); // Test round-trip for (float x = 0.0f; x < 1.0f; x += 0.01f) { REQUIRE_FEQ(x, pl_hdr_rescale(PL_HDR_NORM, PL_HDR_PQ, pl_hdr_rescale(PL_HDR_PQ, PL_HDR_NORM, x)), 1e-5); } static float lut[128]; struct pl_tone_map_params params = { .constants = { PL_TONE_MAP_CONSTANTS }, .input_scaling = PL_HDR_PQ, .output_scaling = PL_HDR_PQ, .lut_size = PL_ARRAY_SIZE(lut), }; // Test regular tone-mapping params.input_min = pl_hdr_rescale(PL_HDR_NITS, params.input_scaling, 0.005); params.input_max = pl_hdr_rescale(PL_HDR_NITS, params.input_scaling, 1000.0); params.output_min = pl_hdr_rescale(PL_HDR_NORM, params.output_scaling, 0.001); params.output_max = pl_hdr_rescale(PL_HDR_NORM, params.output_scaling, 1.0); struct pl_tone_map_params params_inv = params; PL_SWAP(params_inv.input_min, params_inv.output_min); PL_SWAP(params_inv.input_max, params_inv.output_max); int tested_pure_bpc = 0; // Generate example tone mapping curves, forward and inverse for (int i = 0; i < pl_num_tone_map_functions; i++) { const struct pl_tone_map_function *fun = pl_tone_map_functions[i]; printf("Testing tone-mapping function %s\n", fun->name); params.function = params_inv.function = fun; pl_clock_t start = pl_clock_now(); pl_tone_map_generate(lut, ¶ms); pl_log_cpu_time(log, start, pl_clock_now(), "generating LUT"); for (int j = 0; j < PL_ARRAY_SIZE(lut); j++) { REQUIRE(isfinite(lut[j]) && !isnan(lut[j])); if (j > 0) REQUIRE_CMP(lut[j], >=, lut[j - 1], "f"); #ifdef PRINT_LUTS printf("%f, %f\n", j / (PL_ARRAY_SIZE(lut) - 1.0f), lut[j]); #endif } if (fun->map_inverse || !tested_pure_bpc++) { start = pl_clock_now(); pl_tone_map_generate(lut, ¶ms_inv); pl_log_cpu_time(log, start, pl_clock_now(), "generating inverse LUT"); for (int j = 0; j < PL_ARRAY_SIZE(lut); j++) { REQUIRE(isfinite(lut[j]) && !isnan(lut[j])); if (j > 0) REQUIRE_CMP(lut[j], >=, lut[j - 1], "f"); #ifdef PRINT_LUTS printf("%f, %f\n", j / (PL_ARRAY_SIZE(lut) - 1.0f), lut[j]); #endif } } } // Test that `spline` is a no-op for 1:1 tone mapping params.output_min = params.input_min; params.output_max = params.input_max; params.function = &pl_tone_map_spline; pl_tone_map_generate(lut, ¶ms); for (int j = 0; j < PL_ARRAY_SIZE(lut); j++) { float x = j / (PL_ARRAY_SIZE(lut) - 1.0f); x = PL_MIX(params.input_min, params.input_max, x); REQUIRE_FEQ(x, lut[j], 1e-5); } // Test some gamut mapping methods for (int i = 0; i < pl_num_gamut_map_functions; i++) { static const float min_rgb = 0.1f, max_rgb = PL_COLOR_SDR_WHITE; struct pl_gamut_map_params gamut = { .function = pl_gamut_map_functions[i], .input_gamut = *pl_raw_primaries_get(PL_COLOR_PRIM_BT_2020), .output_gamut = *pl_raw_primaries_get(PL_COLOR_PRIM_BT_709), .min_luma = pl_hdr_rescale(PL_HDR_NITS, PL_HDR_PQ, min_rgb), .max_luma = pl_hdr_rescale(PL_HDR_NITS, PL_HDR_PQ, max_rgb), }; printf("Testing gamut-mapping function %s\n", gamut.function->name); // Require that black maps to black and white maps to white float black[3] = { gamut.min_luma, 0.0f, 0.0f }; float white[3] = { gamut.max_luma, 0.0f, 0.0f }; pl_gamut_map_sample(black, &gamut); pl_gamut_map_sample(white, &gamut); REQUIRE_FEQ(black[0], gamut.min_luma, 1e-4); REQUIRE_FEQ(black[1], 0.0f, 1e-4); REQUIRE_FEQ(black[2], 0.0f, 1e-4); if (gamut.function != &pl_gamut_map_darken) REQUIRE_FEQ(white[0], gamut.max_luma, 1e-4); REQUIRE_FEQ(white[1], 0.0f, 1e-4); REQUIRE_FEQ(white[2], 0.0f, 1e-4); } enum { LUT3D_SIZE = 65 }; // for benchmarking struct pl_gamut_map_params perceptual = { .function = &pl_gamut_map_perceptual, .input_gamut = *pl_raw_primaries_get(PL_COLOR_PRIM_BT_2020), .output_gamut = *pl_raw_primaries_get(PL_COLOR_PRIM_BT_709), .max_luma = pl_hdr_rescale(PL_HDR_NORM, PL_HDR_PQ, 1.0f), .lut_size_I = LUT3D_SIZE, .lut_size_C = LUT3D_SIZE, .lut_size_h = LUT3D_SIZE, .lut_stride = 3, // Set strength to maximum, because otherwise the saturation mapping // code will not fully apply, invalidating the following test .constants.perceptual_strength = 1.0f, }; // Test that primaries round-trip for perceptual gamut mapping const pl_matrix3x3 rgb2lms_src = pl_ipt_rgb2lms(&perceptual.input_gamut); const pl_matrix3x3 rgb2lms_dst = pl_ipt_rgb2lms(&perceptual.output_gamut); const pl_matrix3x3 lms2rgb_dst = pl_ipt_lms2rgb(&perceptual.output_gamut); static const float refpoints[][3] = { {1, 0, 0}, {0, 1, 0}, {0, 0, 1}, {0, 1, 1}, {1, 0, 1}, {1, 1, 0}, }; for (int i = 0; i < PL_ARRAY_SIZE(refpoints); i++) { float c[3] = { refpoints[i][0], refpoints[i][1], refpoints[i][2] }; float ref[3] = { refpoints[i][0], refpoints[i][1], refpoints[i][2] }; printf("Testing primary: RGB {%.0f %.0f %.0f}\n", c[0], c[1], c[2]); pl_matrix3x3_apply(&rgb2lms_src, c); c[0] = pl_hdr_rescale(PL_HDR_NORM, PL_HDR_PQ, c[0]); c[1] = pl_hdr_rescale(PL_HDR_NORM, PL_HDR_PQ, c[1]); c[2] = pl_hdr_rescale(PL_HDR_NORM, PL_HDR_PQ, c[2]); pl_matrix3x3_apply(&pl_ipt_lms2ipt, c); printf("Before: ICh {%f %f %f}\n", c[0], sqrtf(c[1]*c[1] + c[2]*c[2]), atan2f(c[2], c[1])); pl_gamut_map_sample(c, &perceptual); float rgb[3] = { c[0], c[1], c[2] }; pl_matrix3x3_apply(&pl_ipt_ipt2lms, rgb); rgb[0] = pl_hdr_rescale(PL_HDR_PQ, PL_HDR_NORM, rgb[0]); rgb[1] = pl_hdr_rescale(PL_HDR_PQ, PL_HDR_NORM, rgb[1]); rgb[2] = pl_hdr_rescale(PL_HDR_PQ, PL_HDR_NORM, rgb[2]); pl_matrix3x3_apply(&lms2rgb_dst, rgb); const float hue = atan2f(c[2], c[1]); printf("After: ICh {%f %f %f} = RGB {%f %f %f}\n", c[0], sqrtf(c[1]*c[1] + c[2]*c[2]), hue, rgb[0], rgb[1], rgb[2]); pl_matrix3x3_apply(&rgb2lms_dst, ref); ref[0] = pl_hdr_rescale(PL_HDR_NORM, PL_HDR_PQ, ref[0]); ref[1] = pl_hdr_rescale(PL_HDR_NORM, PL_HDR_PQ, ref[1]); ref[2] = pl_hdr_rescale(PL_HDR_NORM, PL_HDR_PQ, ref[2]); pl_matrix3x3_apply(&pl_ipt_lms2ipt, ref); const float hue_ref = atan2f(ref[2], ref[1]); printf("Should be: ICh {%f %f %f}\n", ref[0], sqrtf(ref[1]*ref[1] + ref[2]*ref[2]), hue_ref); REQUIRE_FEQ(hue, hue_ref, 3.0e-3); } float *tmp = malloc(sizeof(float[LUT3D_SIZE][LUT3D_SIZE][LUT3D_SIZE][3])); if (tmp) { pl_clock_t start = pl_clock_now(); pl_gamut_map_generate(tmp, &perceptual); pl_log_cpu_time(log, start, pl_clock_now(), "generating 3DLUT"); free(tmp); } pl_log_destroy(&log); } libplacebo-v7.349.0/src/tests/utils.c000066400000000000000000000132401463457750100174310ustar00rootroot00000000000000#include "utils.h" #include "gpu.h" #include int main() { struct pl_bit_encoding bits = {0}; struct pl_plane_data data = {0}; static const struct pl_bit_encoding bits0 = {0}; static const struct pl_bit_encoding bits8 = { .sample_depth = 8, .color_depth = 8, }; static const struct pl_bit_encoding bits16 = { .sample_depth = 16, .color_depth = 16, }; static const struct pl_bit_encoding bits10_16 = { .sample_depth = 16, .color_depth = 10, }; static const struct pl_bit_encoding bits10_16_6 = { .sample_depth = 16, .color_depth = 10, .bit_shift = 6, }; #define TEST_ALIGN(ref, ref_align, ref_bits, ...) \ do { \ pl_plane_data_from_mask(&data, (uint64_t[4]){ __VA_ARGS__ }); \ REQUIRE_MEMEQ(&data, &ref, sizeof(ref)); \ pl_plane_data_align(&data, &bits); \ REQUIRE_MEMEQ(&data, &ref_align, sizeof(ref_align)); \ REQUIRE_MEMEQ(&bits, &ref_bits, sizeof(bits)); \ } while (0) #define TEST(ref, bits, ...) TEST_ALIGN(ref, ref, bits, __VA_ARGS__) static const struct pl_plane_data rgb8 = { .component_size = {8, 8, 8}, .component_map = {0, 1, 2}, }; TEST(rgb8, bits8, 0xFF, 0xFF00, 0xFF0000); static const struct pl_plane_data bgra8 = { .component_size = {8, 8, 8, 8}, .component_map = {2, 1, 0, 3}, }; TEST(bgra8, bits8, 0xFF0000, 0xFF00, 0xFF, 0xFF000000); static const struct pl_plane_data gr16 = { .component_size = {16, 16}, .component_map = {1, 0}, }; TEST(gr16, bits16, 0xFFFF0000, 0xFFFF); static const struct pl_plane_data r10x6g10 = { .component_size = {10, 10}, .component_map = {1, 0}, // LSB -> MSB ordering .component_pad = {0, 6}, }; TEST_ALIGN(r10x6g10, gr16, bits10_16, 0x03FF0000, 0x03FF); static const struct pl_plane_data rgb565 = { .component_size = {5, 6, 5}, .component_map = {2, 1, 0}, // LSB -> MSB ordering }; TEST(rgb565, bits0, 0xF800, 0x07E0, 0x001F); static const struct pl_plane_data rgba16 = { .component_size = {16, 16, 16, 16}, .component_map = {0, 1, 2, 3}, }; TEST(rgba16, bits16, 0xFFFFllu, 0xFFFF0000llu, 0xFFFF00000000llu, 0xFFFF000000000000llu); static const struct pl_plane_data p010 = { .component_size = {10, 10, 10}, .component_map = {0, 1, 2}, .component_pad = {6, 6, 6}, }; static const struct pl_plane_data rgb16 = { .component_size = {16, 16, 16}, .component_map = {0, 1, 2}, }; TEST_ALIGN(p010, rgb16, bits10_16_6, 0xFFC0llu, 0xFFC00000llu, 0xFFC000000000llu); // Test GLSL structure packing struct pl_var vec1 = pl_var_float(""), vec2 = pl_var_vec2(""), vec3 = pl_var_vec3(""), mat2 = pl_var_mat2(""), mat3 = pl_var_mat3(""); struct pl_var_layout layout; layout = pl_std140_layout(0, &vec2); REQUIRE_CMP(layout.offset, ==, 0 * sizeof(float), "zu"); REQUIRE_CMP(layout.stride, ==, 2 * sizeof(float), "zu"); REQUIRE_CMP(layout.size, ==, 2 * sizeof(float), "zu"); layout = pl_std140_layout(3 * sizeof(float), &vec3); REQUIRE_CMP(layout.offset, ==, 4 * sizeof(float), "zu"); REQUIRE_CMP(layout.stride, ==, 3 * sizeof(float), "zu"); REQUIRE_CMP(layout.size, ==, 3 * sizeof(float), "zu"); layout = pl_std140_layout(2 * sizeof(float), &mat3); REQUIRE_CMP(layout.offset, ==, 4 * sizeof(float), "zu"); REQUIRE_CMP(layout.stride, ==, 4 * sizeof(float), "zu"); REQUIRE_CMP(layout.size, ==, 3 * 4 * sizeof(float), "zu"); layout = pl_std430_layout(2 * sizeof(float), &mat3); REQUIRE_CMP(layout.offset, ==, 4 * sizeof(float), "zu"); REQUIRE_CMP(layout.stride, ==, 4 * sizeof(float), "zu"); REQUIRE_CMP(layout.size, ==, 4 * 3 * sizeof(float), "zu"); layout = pl_std140_layout(3 * sizeof(float), &vec1); REQUIRE_CMP(layout.offset, ==, 3 * sizeof(float), "zu"); REQUIRE_CMP(layout.stride, ==, sizeof(float), "zu"); REQUIRE_CMP(layout.size, ==, sizeof(float), "zu"); struct pl_var vec2a = vec2; vec2a.dim_a = 50; layout = pl_std140_layout(sizeof(float), &vec2a); REQUIRE_CMP(layout.offset, ==, 4 * sizeof(float), "zu"); REQUIRE_CMP(layout.stride, ==, 4 * sizeof(float), "zu"); REQUIRE_CMP(layout.size, ==, 50 * 4 * sizeof(float), "zu"); layout = pl_std430_layout(sizeof(float), &vec2a); REQUIRE_CMP(layout.offset, ==, 2 * sizeof(float), "zu"); REQUIRE_CMP(layout.stride, ==, 2 * sizeof(float), "zu"); REQUIRE_CMP(layout.size, ==, 50 * 2 * sizeof(float), "zu"); struct pl_var mat2a = mat2; mat2a.dim_a = 20; layout = pl_std140_layout(5 * sizeof(float), &mat2a); REQUIRE_CMP(layout.offset, ==, 8 * sizeof(float), "zu"); REQUIRE_CMP(layout.stride, ==, 4 * sizeof(float), "zu"); REQUIRE_CMP(layout.size, ==, 20 * 2 * 4 * sizeof(float), "zu"); layout = pl_std430_layout(5 * sizeof(float), &mat2a); REQUIRE_CMP(layout.offset, ==, 6 * sizeof(float), "zu"); REQUIRE_CMP(layout.stride, ==, 2 * sizeof(float), "zu"); REQUIRE_CMP(layout.size, ==, 20 * 2 * 2 * sizeof(float), "zu"); for (const struct pl_named_var *nvar = pl_var_glsl_types; nvar->glsl_name; nvar++) { struct pl_var var = nvar->var; REQUIRE_CMP(nvar->glsl_name, ==, pl_var_glsl_type_name(var), "s"); var.dim_a = 100; REQUIRE_CMP(nvar->glsl_name, ==, pl_var_glsl_type_name(var), "s"); } } libplacebo-v7.349.0/src/tests/utils.h000066400000000000000000000376461463457750100174560ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "common.h" #include #include #include #include #include #include #include #ifdef PL_HAVE_WIN32 #include #define isatty _isatty #define fileno _fileno #else #include #endif static void pl_log_timestamp(void *stream, enum pl_log_level level, const char *msg) { static char letter[] = { [PL_LOG_FATAL] = 'f', [PL_LOG_ERR] = 'e', [PL_LOG_WARN] = 'w', [PL_LOG_INFO] = 'i', [PL_LOG_DEBUG] = 'd', [PL_LOG_TRACE] = 't', }; // Log time relative to the first message static pl_clock_t base = 0; if (!base) base = pl_clock_now(); double secs = pl_clock_diff(pl_clock_now(), base); printf("[%2.3f][%c] %s\n", secs, letter[level], msg); if (level <= PL_LOG_WARN) { // duplicate warnings/errors to stderr fprintf(stderr, "[%2.3f][%c] %s\n", secs, letter[level], msg); fflush(stderr); } } static inline pl_log pl_test_logger(void) { setbuf(stdout, NULL); setbuf(stderr, NULL); return pl_log_create(PL_API_VER, pl_log_params( .log_cb = isatty(fileno(stdout)) ? pl_log_color : pl_log_timestamp, .log_level = PL_LOG_DEBUG, )); } #define RANDOM (rand() / (float) RAND_MAX) #define RANDOM_U8 ((uint8_t) (256.0 * rand() / (RAND_MAX + 1.0))) #define SKIP 77 static inline uint16_t random_f16(void) { union { uint16_t u; uint8_t b[2]; } x; do { for (int i = 0; i < PL_ARRAY_SIZE(x.b); i++) x.b[i] = RANDOM_U8; } while ((x.u & 0x7C00) == 0x7C00); /* infinity or nan */ return x.u; } static inline float random_f32(void) { union { float f; uint8_t b[4]; } x; do { for (int i = 0; i < PL_ARRAY_SIZE(x.b); i++) x.b[i] = RANDOM_U8; } while (isnan(x.f) || isinf(x.f)); return x.f; } static inline double random_f64(void) { union { double f; uint8_t b[8]; } x; do { for (int i = 0; i < PL_ARRAY_SIZE(x.b); i++) x.b[i] = RANDOM_U8; } while (isnan(x.f) || isinf(x.f)); return x.f; } #define RANDOM_F16 random_f16() #define RANDOM_F32 random_f32() #define RANDOM_F64 random_f64() // Helpers for performing various checks #define REQUIRE(cond) do \ { \ if (!(cond)) { \ fprintf(stderr, "=== FAILED: '"#cond"' at "__FILE__":%d\n\n", __LINE__);\ exit(1); \ } \ } while (0) #define REQUIRE_CMP(a, op, b, fmt) do \ { \ __typeof__(a) _va = (a), _vb = (b); \ \ if (!(_va op _vb)) { \ fprintf(stderr, "=== FAILED: '"#a" "#op" "#b"' at "__FILE__":%d\n" \ " %-31s = %"fmt"\n" \ " %-31s = %"fmt"\n\n", \ __LINE__, #a, _va, #b, _vb); \ exit(1); \ } \ } while (0) #define REQUIRE_FEQ(a, b, epsilon) do \ { \ float _va = (a); \ float _vb = (b); \ float _delta = (epsilon) * fmax(1.0, fabs(_va)); \ \ if (fabs(_va - _vb) > _delta) { \ fprintf(stderr, "=== FAILED: '"#a" ≈ "#b"' at "__FILE__":%d\n" \ " %-31s = %f\n" \ " %-31s = %f\n" \ " %-31s = %f\n\n", \ __LINE__, #a, _va, #b, _vb, \ "epsilon "#epsilon" -> max delta", _delta); \ exit(1); \ } \ } while (0) #define REQUIRE_STREQ(a, b) do \ { \ const char *_a = (a); \ const char *_b = (b); \ if (strcmp(_a, _b) != 0) { \ fprintf(stderr, "=== FAILED: !strcmp("#a", "#b") at "__FILE__":%d\n" \ " %-31s = %s\n" \ " %-31s = %s\n\n", \ __LINE__, #a, _a, #b, _b); \ exit(1); \ } \ } while (0) static inline void log_array(const uint8_t *a, const uint8_t *ref, size_t off, size_t size) { const int width = 16; unsigned errors = 0; for (size_t n = 0; n < size; n++) { const char *prefix = "", *suffix = ""; bool newline = false; int idx = n % width; if (a[n + off] != ref[n + off]) { prefix = "\033[31;1m"; suffix = "\033[0m"; errors |= 1 << idx; } if (n + 1 == size || idx == width - 1) newline = true; fprintf(stderr, "%s%02"PRIx8"%s%c", prefix, a[n + off], suffix, newline ? '\n' : ' '); if (newline && errors) { for (int i = 0; i <= idx; i++) { const char mark = errors & (1 << i) ? '^' : ' '; fprintf(stderr, "%c%c%c", mark, mark, i == idx ? '\n' : ' '); } errors = 0; } } } static inline void require_memeq(const void *aptr, const void *bptr, size_t size, const char *astr, const char *bstr, const char *sizestr, const char *file, int line) { const uint8_t *a = aptr, *b = bptr; for (size_t i = 0; i < size; i++) { if (a[i] == b[i]) continue; fprintf(stderr, "=== FAILED: memcmp(%s, %s, %s) == 0 at %s:%d\n" "at position %zu: 0x%02"PRIx8" != 0x%02"PRIx8"\n\n", astr, bstr, sizestr, file, line, i, a[i], b[i]); size_t start = i >= 256 ? i - 256 : 0; size_t end = PL_MIN(size, i + 256); fprintf(stderr, "%zu bytes of '%s' at offset %zu:\n", end - start, astr, start); log_array(a, b, start, end - start); fprintf(stderr, "\n%zu bytes of '%s' at offset %zu:\n", end - start, bstr, start); log_array(b, a, start, end - start); exit(1); } } #define REQUIRE_MEMEQ(a, b, size) require_memeq(a, b, size, #a, #b, #size, __FILE__, __LINE__) #define REQUIRE_HANDLE(shmem, type) \ switch (type) { \ case PL_HANDLE_FD: \ case PL_HANDLE_DMA_BUF: \ REQUIRE(shmem.handle.fd > -1); \ break; \ case PL_HANDLE_WIN32: \ case PL_HANDLE_WIN32_KMT: \ /* INVALID_HANDLE_VALUE = (-1) */ \ REQUIRE(shmem.handle.handle != (void *)(intptr_t) (-1)); \ /* fallthrough */ \ case PL_HANDLE_MTL_TEX: \ case PL_HANDLE_IOSURFACE: \ REQUIRE(shmem.handle.handle); \ break; \ case PL_HANDLE_HOST_PTR: \ REQUIRE(shmem.handle.ptr); \ break; \ } static const struct pl_av1_grain_data av1_grain_data = { .num_points_y = 6, .points_y = {{0, 4}, {27, 33}, {54, 55}, {67, 61}, {108, 71}, {255, 72}}, .chroma_scaling_from_luma = false, .num_points_uv = {2, 2}, .points_uv = {{{0, 64}, {255, 64}}, {{0, 64}, {255, 64}}}, .scaling_shift = 11, .ar_coeff_lag = 3, .ar_coeffs_y = {4, 1, 3, 0, 1, -3, 8, -3, 7, -23, 1, -25, 0, -10, 6, -17, -4, 53, 36, 5, -5, -17, 8, 66}, .ar_coeffs_uv = { {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127}, }, .ar_coeff_shift = 7, .grain_scale_shift = 0, .uv_mult = {0, 0}, .uv_mult_luma = {64, 64}, .uv_offset = {0, 0}, }; static const uint8_t h274_lower_bound = 10; static const uint8_t h274_upper_bound = 250; static const int16_t h274_values[6] = {16, 12, 14}; static const struct pl_h274_grain_data h274_grain_data = { .model_id = 0, .blending_mode_id = 0, .log2_scale_factor = 2, .component_model_present = {true}, .num_intensity_intervals = {1}, .num_model_values = {3}, .intensity_interval_lower_bound = {&h274_lower_bound}, .intensity_interval_upper_bound = {&h274_upper_bound}, .comp_model_value = {&h274_values}, }; static const struct pl_dovi_metadata dovi_meta = { .nonlinear = {{{1, 0, 0}, {0, 1, 0}, {0, 0, 1}}}, .linear = {{{1, 0, 0}, {0, 1, 0}, {0, 0, 1}}}, .comp = { { .num_pivots = 9, .pivots = {0.0615835786, 0.129032254, 0.353861183, 0.604105592, 0.854349971, 0.890518069, 0.906158328, 0.913978517, 0.92082113}, .method = {0, 0, 0, 0, 0, 0, 0, 0}, .poly_coeffs = { {-0.0488376617, 1.99335372, -2.41716385}, {-0.0141925812, 1.61829138, -1.53397191}, { 0.157061458, 0.63640213, -0.11302495}, {0.25272119, 0.246226311, 0.27281332}, {0.951621532, -1.35507894, 1.18898678}, {6.41251612, -13.6188488, 8.07336903}, {13.467535, -29.1869125, 16.6612244}, {28.2321472, -61.8516273, 34.7264938} }, }, { .num_pivots = 2, .pivots = {0.0, 1.0}, .method = {1}, .mmr_order = {3}, .mmr_constant = {-0.500733018}, .mmr_coeffs = {{ {1.08411026, 3.80807829, 0.0881733894, -3.23097038, -0.409078479, -1.31310081, 2.71297002}, {-0.241833091, -3.57880807, -0.108109117, 3.13198471, 0.869203091, 1.96561158, -9.30871677}, {-0.177356839, 1.48970401, 0.0908923149, -0.510447979, -0.687603354, -0.934977889, 12.3544884}, }}, }, { .num_pivots = 2, .pivots = {0.0, 1.0}, .method = {1}, .mmr_order = {3}, .mmr_constant = {-1.23833287}, .mmr_coeffs = {{ {3.52909589, 0.383154511, 5.50820637, -1.02094889, -6.36386824, 0.194121242, 0.64683497}, {-2.57899785, -0.626081586, -6.05729723, 2.29143763, 9.14653015, -0.0507702827, -4.17724133}, {0.705404401, 0.341412306, 2.98387456, -1.71712542, -4.91501331, 0.1465137, 6.38665438}, }}, }, }, }; static const uint8_t sRGB_v2_nano_icc[] = { 0x00, 0x00, 0x01, 0x9a, 0x6c, 0x63, 0x6d, 0x73, 0x02, 0x10, 0x00, 0x00, 0x6d, 0x6e, 0x74, 0x72, 0x52, 0x47, 0x42, 0x20, 0x58, 0x59, 0x5a, 0x20, 0x07, 0xe2, 0x00, 0x03, 0x00, 0x14, 0x00, 0x09, 0x00, 0x0e, 0x00, 0x1d, 0x61, 0x63, 0x73, 0x70, 0x4d, 0x53, 0x46, 0x54, 0x00, 0x00, 0x00, 0x00, 0x73, 0x61, 0x77, 0x73, 0x63, 0x74, 0x72, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf6, 0xd6, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0xd3, 0x2d, 0x68, 0x61, 0x6e, 0x64, 0xeb, 0x77, 0x1f, 0x3c, 0xaa, 0x53, 0x51, 0x02, 0xe9, 0x3e, 0x28, 0x6c, 0x91, 0x46, 0xae, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x64, 0x65, 0x73, 0x63, 0x00, 0x00, 0x00, 0xf0, 0x00, 0x00, 0x00, 0x5f, 0x77, 0x74, 0x70, 0x74, 0x00, 0x00, 0x01, 0x0c, 0x00, 0x00, 0x00, 0x14, 0x72, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x01, 0x20, 0x00, 0x00, 0x00, 0x14, 0x67, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x01, 0x34, 0x00, 0x00, 0x00, 0x14, 0x62, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x01, 0x48, 0x00, 0x00, 0x00, 0x14, 0x72, 0x54, 0x52, 0x43, 0x00, 0x00, 0x01, 0x5c, 0x00, 0x00, 0x00, 0x34, 0x67, 0x54, 0x52, 0x43, 0x00, 0x00, 0x01, 0x5c, 0x00, 0x00, 0x00, 0x34, 0x62, 0x54, 0x52, 0x43, 0x00, 0x00, 0x01, 0x5c, 0x00, 0x00, 0x00, 0x34, 0x63, 0x70, 0x72, 0x74, 0x00, 0x00, 0x01, 0x90, 0x00, 0x00, 0x00, 0x0a, 0x64, 0x65, 0x73, 0x63, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x6e, 0x52, 0x47, 0x42, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x58, 0x59, 0x5a, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf3, 0x54, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x16, 0xc9, 0x58, 0x59, 0x5a, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6f, 0xa0, 0x00, 0x00, 0x38, 0xf2, 0x00, 0x00, 0x03, 0x8f, 0x58, 0x59, 0x5a, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x62, 0x96, 0x00, 0x00, 0xb7, 0x89, 0x00, 0x00, 0x18, 0xda, 0x58, 0x59, 0x5a, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0xa0, 0x00, 0x00, 0x0f, 0x85, 0x00, 0x00, 0xb6, 0xc4, 0x63, 0x75, 0x72, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x01, 0x07, 0x02, 0xb5, 0x05, 0x6b, 0x09, 0x36, 0x0e, 0x50, 0x14, 0xb1, 0x1c, 0x80, 0x25, 0xc8, 0x30, 0xa1, 0x3d, 0x19, 0x4b, 0x40, 0x5b, 0x27, 0x6c, 0xdb, 0x80, 0x6b, 0x95, 0xe3, 0xad, 0x50, 0xc6, 0xc2, 0xe2, 0x31, 0xff, 0xff, 0x74, 0x65, 0x78, 0x74, 0x00, 0x00, 0x00, 0x00, 0x30, 0x00 }; #define TEST_PROFILE(arr) ((struct pl_icc_profile) { \ .data = (arr), \ .len = PL_ARRAY_SIZE(arr), \ .signature = (uintptr_t) (arr), \ }) libplacebo-v7.349.0/src/tests/vulkan.c000066400000000000000000000177061463457750100176040ustar00rootroot00000000000000#include #include "gpu_tests.h" #include "vulkan/command.h" #include "vulkan/gpu.h" #include #include static void vulkan_interop_tests(pl_vulkan pl_vk, enum pl_handle_type handle_type) { pl_gpu gpu = pl_vk->gpu; printf("testing vulkan interop for handle type 0x%x\n", handle_type); if (gpu->export_caps.buf & handle_type) { pl_buf buf = pl_buf_create(gpu, pl_buf_params( .size = 1024, .export_handle = handle_type, )); REQUIRE(buf); REQUIRE_HANDLE(buf->shared_mem, handle_type); REQUIRE_CMP(buf->shared_mem.size, >=, buf->params.size, "zu"); REQUIRE(pl_buf_export(gpu, buf)); pl_buf_destroy(gpu, &buf); } pl_fmt fmt = pl_find_fmt(gpu, PL_FMT_UNORM, 1, 0, 0, PL_FMT_CAP_BLITTABLE); if (!fmt) return; // Test interop API if (gpu->export_caps.tex & handle_type) { VkSemaphore sem = pl_vulkan_sem_create(gpu, pl_vulkan_sem_params( .type = VK_SEMAPHORE_TYPE_TIMELINE, .initial_value = 0, )); pl_tex tex = pl_tex_create(gpu, pl_tex_params( .w = 32, .h = 32, .format = fmt, .blit_dst = true, .export_handle = handle_type, )); REQUIRE(sem); REQUIRE(tex); REQUIRE(pl_vulkan_hold_ex(gpu, pl_vulkan_hold_params( .tex = tex, .layout = VK_IMAGE_LAYOUT_GENERAL, .qf = VK_QUEUE_FAMILY_EXTERNAL, .semaphore = { sem, 1 }, ))); pl_vulkan_release_ex(gpu, pl_vulkan_release_params( .tex = tex, .layout = VK_IMAGE_LAYOUT_GENERAL, .qf = VK_QUEUE_FAMILY_EXTERNAL, .semaphore = { sem, 1 }, )); pl_tex_clear(gpu, tex, (float[4]){0}); pl_gpu_finish(gpu); REQUIRE(!pl_tex_poll(gpu, tex, 0)); pl_vulkan_sem_destroy(gpu, &sem); pl_tex_destroy(gpu, &tex); } } static void vulkan_swapchain_tests(pl_vulkan vk, VkSurfaceKHR surf) { if (!surf) return; printf("testing vulkan swapchain\n"); pl_gpu gpu = vk->gpu; pl_swapchain sw; sw = pl_vulkan_create_swapchain(vk, pl_vulkan_swapchain_params( .surface = surf, )); REQUIRE(sw); // Attempt actually initializing the swapchain int w = 640, h = 480; REQUIRE(pl_swapchain_resize(sw, &w, &h)); for (int i = 0; i < 10; i++) { struct pl_swapchain_frame frame; REQUIRE(pl_swapchain_start_frame(sw, &frame)); if (frame.fbo->params.blit_dst) pl_tex_clear(gpu, frame.fbo, (float[4]){0}); // TODO: test this with an actual pl_renderer instance struct pl_frame target; pl_frame_from_swapchain(&target, &frame); REQUIRE(pl_swapchain_submit_frame(sw)); pl_swapchain_swap_buffers(sw); // Try resizing the swapchain in the middle of rendering if (i == 5) { w = 320; h = 240; REQUIRE(pl_swapchain_resize(sw, &w, &h)); } } pl_swapchain_destroy(&sw); } int main() { pl_log log = pl_test_logger(); pl_vk_inst inst = pl_vk_inst_create(log, pl_vk_inst_params( .debug = true, .debug_extra = true, .get_proc_addr = vkGetInstanceProcAddr, .opt_extensions = (const char *[]){ VK_KHR_SURFACE_EXTENSION_NAME, VK_EXT_HEADLESS_SURFACE_EXTENSION_NAME, }, .num_opt_extensions = 2, )); if (!inst) return SKIP; PL_VK_LOAD_FUN(inst->instance, EnumeratePhysicalDevices, inst->get_proc_addr); PL_VK_LOAD_FUN(inst->instance, GetPhysicalDeviceProperties, inst->get_proc_addr); uint32_t num = 0; EnumeratePhysicalDevices(inst->instance, &num, NULL); if (!num) return SKIP; VkPhysicalDevice *devices = calloc(num, sizeof(*devices)); if (!devices) return 1; EnumeratePhysicalDevices(inst->instance, &num, devices); VkSurfaceKHR surf = VK_NULL_HANDLE; PL_VK_LOAD_FUN(inst->instance, CreateHeadlessSurfaceEXT, inst->get_proc_addr); if (CreateHeadlessSurfaceEXT) { VkHeadlessSurfaceCreateInfoEXT info = { .sType = VK_STRUCTURE_TYPE_HEADLESS_SURFACE_CREATE_INFO_EXT, }; VkResult res = CreateHeadlessSurfaceEXT(inst->instance, &info, NULL, &surf); REQUIRE_CMP(res, ==, VK_SUCCESS, "u"); } // Make sure choosing any device works VkPhysicalDevice dev; dev = pl_vulkan_choose_device(log, pl_vulkan_device_params( .instance = inst->instance, .get_proc_addr = inst->get_proc_addr, .allow_software = true, .surface = surf, )); if (!dev) return SKIP; // Test all attached devices for (int i = 0; i < num; i++) { VkPhysicalDeviceProperties props = {0}; GetPhysicalDeviceProperties(devices[i], &props); #ifndef CI_ALLOW_SW if (props.deviceType == VK_PHYSICAL_DEVICE_TYPE_CPU) { printf("Skipping device %d: %s\n", i, props.deviceName); continue; } #endif printf("Testing device %d: %s\n", i, props.deviceName); // Make sure we can choose this device by name dev = pl_vulkan_choose_device(log, pl_vulkan_device_params( .instance = inst->instance, .get_proc_addr = inst->get_proc_addr, .device_name = props.deviceName, )); REQUIRE_CMP(dev, ==, devices[i], "p"); struct pl_vulkan_params params = *pl_vulkan_params( .instance = inst->instance, .get_proc_addr = inst->get_proc_addr, .device = devices[i], .queue_count = 8, // test inter-queue stuff .surface = surf, ); pl_vulkan vk = pl_vulkan_create(log, ¶ms); if (!vk) continue; gpu_shader_tests(vk->gpu); vulkan_swapchain_tests(vk, surf); // Print heap statistics pl_vk_print_heap(vk->gpu, PL_LOG_DEBUG); // Test importing this context via the vulkan interop API pl_vulkan vk2 = pl_vulkan_import(log, pl_vulkan_import_params( .instance = vk->instance, .get_proc_addr = inst->get_proc_addr, .phys_device = vk->phys_device, .device = vk->device, .extensions = vk->extensions, .num_extensions = vk->num_extensions, .features = vk->features, .queue_graphics = vk->queue_graphics, .queue_compute = vk->queue_compute, .queue_transfer = vk->queue_transfer, )); REQUIRE(vk2); pl_vulkan_destroy(&vk2); // Run these tests last because they disable some validation layers #ifdef PL_HAVE_UNIX vulkan_interop_tests(vk, PL_HANDLE_FD); vulkan_interop_tests(vk, PL_HANDLE_DMA_BUF); #endif #ifdef PL_HAVE_WIN32 vulkan_interop_tests(vk, PL_HANDLE_WIN32); vulkan_interop_tests(vk, PL_HANDLE_WIN32_KMT); #endif gpu_interop_tests(vk->gpu); pl_vulkan_destroy(&vk); // Re-run the same export/import tests with async queues disabled params.async_compute = false; params.async_transfer = false; vk = pl_vulkan_create(log, ¶ms); REQUIRE(vk); // it succeeded the first time #ifdef PL_HAVE_UNIX vulkan_interop_tests(vk, PL_HANDLE_FD); vulkan_interop_tests(vk, PL_HANDLE_DMA_BUF); #endif #ifdef PL_HAVE_WIN32 vulkan_interop_tests(vk, PL_HANDLE_WIN32); vulkan_interop_tests(vk, PL_HANDLE_WIN32_KMT); #endif gpu_interop_tests(vk->gpu); pl_vulkan_destroy(&vk); // Reduce log spam after first tested device pl_log_level_update(log, PL_LOG_INFO); } if (surf) vkDestroySurfaceKHR(inst->instance, surf, NULL); pl_vk_inst_destroy(&inst); pl_log_destroy(&log); free(devices); } libplacebo-v7.349.0/src/tone_mapping.c000066400000000000000000000644471463457750100176260ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include "common.h" #include #define fclampf(x, lo, hi) fminf(fmaxf(x, lo), hi) static void fix_constants(struct pl_tone_map_constants *c) { const float eps = 1e-6f; c->knee_adaptation = fclampf(c->knee_adaptation, 0.0f, 1.0f); c->knee_minimum = fclampf(c->knee_minimum, eps, 0.5f - eps); c->knee_maximum = fclampf(c->knee_maximum, 0.5f + eps, 1.0f - eps); c->knee_default = fclampf(c->knee_default, c->knee_minimum, c->knee_maximum); c->knee_offset = fclampf(c->knee_offset, 0.5f, 2.0f); c->slope_tuning = fclampf(c->slope_tuning, 0.0f, 10.0f); c->slope_offset = fclampf(c->slope_offset, 0.0f, 1.0f); c->spline_contrast = fclampf(c->spline_contrast, 0.0f, 1.5f); c->reinhard_contrast = fclampf(c->reinhard_contrast, eps, 1.0f - eps); c->linear_knee = fclampf(c->linear_knee, eps, 1.0f - eps); c->exposure = fclampf(c->exposure, eps, 10.0f); } static inline bool constants_equal(const struct pl_tone_map_constants *a, const struct pl_tone_map_constants *b) { pl_static_assert(sizeof(*a) % sizeof(float) == 0); return !memcmp(a, b, sizeof(*a)); } bool pl_tone_map_params_equal(const struct pl_tone_map_params *a, const struct pl_tone_map_params *b) { return a->function == b->function && a->param == b->param && a->input_scaling == b->input_scaling && a->output_scaling == b->output_scaling && a->lut_size == b->lut_size && a->input_min == b->input_min && a->input_max == b->input_max && a->input_avg == b->input_avg && a->output_min == b->output_min && a->output_max == b->output_max && constants_equal(&a->constants, &b->constants) && pl_hdr_metadata_equal(&a->hdr, &b->hdr); } bool pl_tone_map_params_noop(const struct pl_tone_map_params *p) { float in_min = pl_hdr_rescale(p->input_scaling, PL_HDR_NITS, p->input_min); float in_max = pl_hdr_rescale(p->input_scaling, PL_HDR_NITS, p->input_max); float out_min = pl_hdr_rescale(p->output_scaling, PL_HDR_NITS, p->output_min); float out_max = pl_hdr_rescale(p->output_scaling, PL_HDR_NITS, p->output_max); bool can_inverse = p->function->map_inverse; return fabs(in_min - out_min) < 1e-4 && // no BPC in_max < out_max + 1e-2 && // no range reduction (out_max < in_max + 1e-2 || !can_inverse); // no inverse tone-mapping } void pl_tone_map_params_infer(struct pl_tone_map_params *par) { if (!par->function) par->function = &pl_tone_map_clip; if (par->param) { // Backwards compatibility for older API if (par->function == &pl_tone_map_st2094_40 || par->function == &pl_tone_map_st2094_10) par->constants.knee_adaptation = par->param; if (par->function == &pl_tone_map_bt2390) par->constants.knee_offset = par->param; if (par->function == &pl_tone_map_spline) par->constants.spline_contrast = par->param; if (par->function == &pl_tone_map_reinhard) par->constants.reinhard_contrast = par->param; if (par->function == &pl_tone_map_mobius || par->function == &pl_tone_map_gamma) par->constants.linear_knee = par->param; if (par->function == &pl_tone_map_linear || par->function == &pl_tone_map_linear_light) par->constants.exposure = par->param; } fix_constants(&par->constants); // Constrain the input peak to be no less than target SDR white float sdr = pl_hdr_rescale(par->output_scaling, par->input_scaling, par->output_max); sdr = fminf(sdr, pl_hdr_rescale(PL_HDR_NITS, par->input_scaling, PL_COLOR_SDR_WHITE)); par->input_max = fmaxf(par->input_max, sdr); // Constrain the output peak if function does not support inverse mapping if (!par->function->map_inverse) par->output_max = fminf(par->output_max, par->input_max); } // Infer params and rescale to function scaling static struct pl_tone_map_params fix_params(const struct pl_tone_map_params *params) { struct pl_tone_map_params fixed = *params; pl_tone_map_params_infer(&fixed); const struct pl_tone_map_function *fun = params->function; fixed.input_scaling = fun->scaling; fixed.output_scaling = fun->scaling; fixed.input_min = pl_hdr_rescale(params->input_scaling, fun->scaling, fixed.input_min); fixed.input_max = pl_hdr_rescale(params->input_scaling, fun->scaling, fixed.input_max); fixed.input_avg = pl_hdr_rescale(params->input_scaling, fun->scaling, fixed.input_avg); fixed.output_min = pl_hdr_rescale(params->output_scaling, fun->scaling, fixed.output_min); fixed.output_max = pl_hdr_rescale(params->output_scaling, fun->scaling, fixed.output_max); return fixed; } #define FOREACH_LUT(lut, V) \ for (float *_iter = lut, *_end = lut + params->lut_size, V; \ _iter < _end && ( V = *_iter, 1 ); *_iter++ = V) static void map_lut(float *lut, const struct pl_tone_map_params *params) { if (params->output_max > params->input_max + 1e-4) { // Inverse tone-mapping pl_assert(params->function->map_inverse); params->function->map_inverse(lut, params); } else { // Forward tone-mapping params->function->map(lut, params); } } void pl_tone_map_generate(float *out, const struct pl_tone_map_params *params) { struct pl_tone_map_params fixed = fix_params(params); // Generate input values evenly spaced in `params->input_scaling` for (size_t i = 0; i < params->lut_size; i++) { float x = (float) i / (params->lut_size - 1); x = PL_MIX(params->input_min, params->input_max, x); out[i] = pl_hdr_rescale(params->input_scaling, fixed.function->scaling, x); } map_lut(out, &fixed); // Sanitize outputs and adapt back to `params->scaling` for (size_t i = 0; i < params->lut_size; i++) { float x = PL_CLAMP(out[i], fixed.output_min, fixed.output_max); out[i] = pl_hdr_rescale(fixed.function->scaling, params->output_scaling, x); } } float pl_tone_map_sample(float x, const struct pl_tone_map_params *params) { struct pl_tone_map_params fixed = fix_params(params); fixed.lut_size = 1; x = PL_CLAMP(x, params->input_min, params->input_max); x = pl_hdr_rescale(params->input_scaling, fixed.function->scaling, x); map_lut(&x, &fixed); x = PL_CLAMP(x, fixed.output_min, fixed.output_max); x = pl_hdr_rescale(fixed.function->scaling, params->output_scaling, x); return x; } // Rescale from input-absolute to input-relative static inline float rescale_in(float x, const struct pl_tone_map_params *params) { return (x - params->input_min) / (params->input_max - params->input_min); } // Rescale from input-absolute to output-relative static inline float rescale(float x, const struct pl_tone_map_params *params) { return (x - params->input_min) / (params->output_max - params->output_min); } // Rescale from output-relative to output-absolute static inline float rescale_out(float x, const struct pl_tone_map_params *params) { return x * (params->output_max - params->output_min) + params->output_min; } static inline float bt1886_eotf(float x, float min, float max) { const float lb = powf(min, 1/2.4f); const float lw = powf(max, 1/2.4f); return powf((lw - lb) * x + lb, 2.4f); } static inline float bt1886_oetf(float x, float min, float max) { const float lb = powf(min, 1/2.4f); const float lw = powf(max, 1/2.4f); return (powf(x, 1/2.4f) - lb) / (lw - lb); } static void noop(float *lut, const struct pl_tone_map_params *params) { return; } const struct pl_tone_map_function pl_tone_map_clip = { .name = "clip", .description = "No tone mapping (clip)", .map = noop, .map_inverse = noop, }; // Helper function to pick a knee point (for suitable methods) based on the // HDR10+ brightness metadata and scene brightness average matching. // // Inspired by SMPTE ST2094-10, with some modifications static void st2094_pick_knee(float *out_src_knee, float *out_dst_knee, const struct pl_tone_map_params *params) { const float src_min = pl_hdr_rescale(params->input_scaling, PL_HDR_PQ, params->input_min); const float src_max = pl_hdr_rescale(params->input_scaling, PL_HDR_PQ, params->input_max); const float src_avg = pl_hdr_rescale(params->input_scaling, PL_HDR_PQ, params->input_avg); const float dst_min = pl_hdr_rescale(params->output_scaling, PL_HDR_PQ, params->output_min); const float dst_max = pl_hdr_rescale(params->output_scaling, PL_HDR_PQ, params->output_max); const float min_knee = params->constants.knee_minimum; const float max_knee = params->constants.knee_maximum; const float def_knee = params->constants.knee_default; const float src_knee_min = PL_MIX(src_min, src_max, min_knee); const float src_knee_max = PL_MIX(src_min, src_max, max_knee); const float dst_knee_min = PL_MIX(dst_min, dst_max, min_knee); const float dst_knee_max = PL_MIX(dst_min, dst_max, max_knee); // Choose source knee based on source scene brightness float src_knee = PL_DEF(src_avg, PL_MIX(src_min, src_max, def_knee)); src_knee = fclampf(src_knee, src_knee_min, src_knee_max); // Choose target adaptation point based on linearly re-scaling source knee float target = (src_knee - src_min) / (src_max - src_min); float adapted = PL_MIX(dst_min, dst_max, target); // Choose the destnation knee by picking the perceptual adaptation point // between the source knee and the desired target. This moves the knee // point, on the vertical axis, closer to the 1:1 (neutral) line. // // Adjust the adaptation strength towards 1 based on how close the knee // point is to its extreme values (min/max knee) float tuning = 1.0f - pl_smoothstep(max_knee, def_knee, target) * pl_smoothstep(min_knee, def_knee, target); float adaptation = PL_MIX(params->constants.knee_adaptation, 1.0f, tuning); float dst_knee = PL_MIX(src_knee, adapted, adaptation); dst_knee = fclampf(dst_knee, dst_knee_min, dst_knee_max); *out_src_knee = pl_hdr_rescale(PL_HDR_PQ, params->input_scaling, src_knee); *out_dst_knee = pl_hdr_rescale(PL_HDR_PQ, params->output_scaling, dst_knee); } // Pascal's triangle static const uint16_t binom[17][17] = { {1}, {1,1}, {1,2,1}, {1,3,3,1}, {1,4,6,4,1}, {1,5,10,10,5,1}, {1,6,15,20,15,6,1}, {1,7,21,35,35,21,7,1}, {1,8,28,56,70,56,28,8,1}, {1,9,36,84,126,126,84,36,9,1}, {1,10,45,120,210,252,210,120,45,10,1}, {1,11,55,165,330,462,462,330,165,55,11,1}, {1,12,66,220,495,792,924,792,495,220,66,12,1}, {1,13,78,286,715,1287,1716,1716,1287,715,286,78,13,1}, {1,14,91,364,1001,2002,3003,3432,3003,2002,1001,364,91,14,1}, {1,15,105,455,1365,3003,5005,6435,6435,5005,3003,1365,455,105,15,1}, {1,16,120,560,1820,4368,8008,11440,12870,11440,8008,4368,1820,560,120,16,1}, }; static inline float st2094_intercept(uint8_t N, float Kx, float Ky) { if (Kx <= 0 || Ky >= 1) return 1.0f / N; const float slope = Ky / Kx * (1 - Kx) / (1 - Ky); return fminf(slope / N, 1.0f); } static void st2094_40(float *lut, const struct pl_tone_map_params *params) { const float D = params->output_max; // Allocate space for the adjusted bezier control points, plus endpoints float P[17], Kx, Ky, T; uint8_t N; if (params->hdr.ootf.num_anchors) { // Use bezier curve from metadata Kx = PL_CLAMP(params->hdr.ootf.knee_x, 0, 1); Ky = PL_CLAMP(params->hdr.ootf.knee_y, 0, 1); T = PL_CLAMP(params->hdr.ootf.target_luma, params->input_min, params->input_max); N = params->hdr.ootf.num_anchors + 1; pl_assert(N < PL_ARRAY_SIZE(P)); memcpy(P + 1, params->hdr.ootf.anchors, (N - 1) * sizeof(*P)); P[0] = 0.0f; P[N] = 1.0f; } else { // Missing metadata, default to simple brightness matching float src_knee, dst_knee; st2094_pick_knee(&src_knee, &dst_knee, params); Kx = src_knee / params->input_max; Ky = dst_knee / params->output_max; // Solve spline to match slope at knee intercept const float slope = Ky / Kx * (1 - Kx) / (1 - Ky); N = PL_CLAMP((int) ceilf(slope), 2, PL_ARRAY_SIZE(P) - 1); P[0] = 0.0f; P[1] = st2094_intercept(N, Kx, Ky); for (int i = 2; i <= N; i++) P[i] = 1.0f; T = D; } if (D < T) { // Output display darker than OOTF target, make brighter const float Dmin = 0.0f, u = fmaxf(0.0f, (D - Dmin) / (T - Dmin)); // Scale down the knee point to make more room for the OOTF Kx *= u; Ky *= u; // Make the slope of the knee more closely approximate a clip(), // constrained to avoid exploding P[1] const float beta = N * Kx / (1 - Kx); const float Kxy = fminf(Kx * params->input_max / D, beta / (beta + 1)); Ky = PL_MIX(Kxy, Ky, u); for (int p = 2; p <= N; p++) P[p] = PL_MIX(1.0f, P[p], u); // Make the OOTF intercept linear as D -> Dmin P[1] = PL_MIX(st2094_intercept(N, Kx, Ky), P[1], u); } else if (D > T) { // Output display brighter than OOTF target, make more linear pl_assert(params->input_max > T); const float w = powf(1 - (D - T) / (params->input_max - T), 1.4f); // Constrain the slope of the input knee to prevent it from // exploding and making the picture way too bright Ky *= T / D; // Make the slope of the knee more linear by solving for f(Kx) = Kx float Kxy = Kx * D / params->input_max; Ky = PL_MIX(Kxy, Ky, w); for (int p = 2; p < N; p++) { float anchor_lin = (float) p / N; P[p] = PL_MIX(anchor_lin, P[p], w); } // Make the OOTF intercept linear as D -> input_max P[1] = PL_MIX(st2094_intercept(N, Kx, Ky), P[1], w); } pl_assert(Kx >= 0 && Kx <= 1); pl_assert(Ky >= 0 && Ky <= 1); FOREACH_LUT(lut, x) { x = bt1886_oetf(x, params->input_min, params->input_max); x = bt1886_eotf(x, 0.0f, 1.0f); if (x <= Kx && Kx) { // Linear section x *= Ky / Kx; } else { // Bezier section const float t = (x - Kx) / (1 - Kx); x = 0; // Bn for (uint8_t p = 0; p <= N; p++) x += binom[N][p] * powf(t, p) * powf(1 - t, N - p) * P[p]; x = Ky + (1 - Ky) * x; } x = bt1886_oetf(x, 0.0f, 1.0f); x = bt1886_eotf(x, params->output_min, params->output_max); } } const struct pl_tone_map_function pl_tone_map_st2094_40 = { .name = "st2094-40", .description = "SMPTE ST 2094-40 Annex B", .param_desc = "Knee point target", .param_min = 0.00f, .param_def = 0.70f, .param_max = 1.00f, .scaling = PL_HDR_NITS, .map = st2094_40, }; static void st2094_10(float *lut, const struct pl_tone_map_params *params) { float src_knee, dst_knee; st2094_pick_knee(&src_knee, &dst_knee, params); const float x1 = params->input_min; const float x3 = params->input_max; const float x2 = src_knee; const float y1 = params->output_min; const float y3 = params->output_max; const float y2 = dst_knee; const pl_matrix3x3 cmat = {{ { x2*x3*(y2 - y3), x1*x3*(y3 - y1), x1*x2*(y1 - y2) }, { x3*y3 - x2*y2, x1*y1 - x3*y3, x2*y2 - x1*y1 }, { x3 - x2, x1 - x3, x2 - x1 }, }}; float coeffs[3] = { y1, y2, y3 }; pl_matrix3x3_apply(&cmat, coeffs); const float k = 1.0 / (x3*y3*(x1 - x2) + x2*y2*(x3 - x1) + x1*y1*(x2 - x3)); const float c1 = k * coeffs[0]; const float c2 = k * coeffs[1]; const float c3 = k * coeffs[2]; FOREACH_LUT(lut, x) x = (c1 + c2 * x) / (1 + c3 * x); } const struct pl_tone_map_function pl_tone_map_st2094_10 = { .name = "st2094-10", .description = "SMPTE ST 2094-10 Annex B.2", .param_desc = "Knee point target", .param_min = 0.00f, .param_def = 0.70f, .param_max = 1.00f, .scaling = PL_HDR_NITS, .map = st2094_10, }; static void bt2390(float *lut, const struct pl_tone_map_params *params) { const float minLum = rescale_in(params->output_min, params); const float maxLum = rescale_in(params->output_max, params); const float offset = params->constants.knee_offset; const float ks = (1 + offset) * maxLum - offset; const float bp = minLum > 0 ? fminf(1 / minLum, 4) : 4; const float gain_inv = 1 + minLum / maxLum * powf(1 - maxLum, bp); const float gain = maxLum < 1 ? 1 / gain_inv : 1; FOREACH_LUT(lut, x) { x = rescale_in(x, params); // Piece-wise hermite spline if (ks < 1) { float tb = (x - ks) / (1 - ks); float tb2 = tb * tb; float tb3 = tb2 * tb; float pb = (2 * tb3 - 3 * tb2 + 1) * ks + (tb3 - 2 * tb2 + tb) * (1 - ks) + (-2 * tb3 + 3 * tb2) * maxLum; x = x < ks ? x : pb; } // Black point adaptation if (x < 1) { x += minLum * powf(1 - x, bp); x = gain * (x - minLum) + minLum; } x = x * (params->input_max - params->input_min) + params->input_min; } } const struct pl_tone_map_function pl_tone_map_bt2390 = { .name = "bt2390", .description = "ITU-R BT.2390 EETF", .scaling = PL_HDR_PQ, .param_desc = "Knee offset", .param_min = 0.50, .param_def = 1.00, .param_max = 2.00, .map = bt2390, }; static void bt2446a(float *lut, const struct pl_tone_map_params *params) { const float phdr = 1 + 32 * powf(params->input_max / 10000, 1/2.4f); const float psdr = 1 + 32 * powf(params->output_max / 10000, 1/2.4f); FOREACH_LUT(lut, x) { x = powf(rescale_in(x, params), 1/2.4f); x = logf(1 + (phdr - 1) * x) / logf(phdr); if (x <= 0.7399f) { x = 1.0770f * x; } else if (x < 0.9909f) { x = (-1.1510f * x + 2.7811f) * x - 0.6302f; } else { x = 0.5f * x + 0.5f; } x = (powf(psdr, x) - 1) / (psdr - 1); x = bt1886_eotf(x, params->output_min, params->output_max); } } static void bt2446a_inv(float *lut, const struct pl_tone_map_params *params) { FOREACH_LUT(lut, x) { x = bt1886_oetf(x, params->input_min, params->input_max); x *= 255.0; if (x > 70) { x = powf(x, (2.8305e-6f * x - 7.4622e-4f) * x + 1.2528f); } else { x = powf(x, (1.8712e-5f * x - 2.7334e-3f) * x + 1.3141f); } x = powf(x / 1000, 2.4f); x = rescale_out(x, params); } } const struct pl_tone_map_function pl_tone_map_bt2446a = { .name = "bt2446a", .description = "ITU-R BT.2446 Method A", .scaling = PL_HDR_NITS, .map = bt2446a, .map_inverse = bt2446a_inv, }; static void spline(float *lut, const struct pl_tone_map_params *params) { float src_pivot, dst_pivot; st2094_pick_knee(&src_pivot, &dst_pivot, params); // Solve for linear knee (Pa = 0) float slope = (dst_pivot - params->output_min) / (src_pivot - params->input_min); // Tune the slope at the knee point slightly: raise it to a user-provided // gamma exponent, multiplied by an extra tuning coefficient designed to // make the slope closer to 1.0 when the difference in peaks is low, and // closer to linear when the difference between peaks is high. float ratio = params->input_max / params->output_max - 1.0f; ratio = fclampf(params->constants.slope_tuning * ratio, params->constants.slope_offset, 1.0f + params->constants.slope_offset); slope = powf(slope, (1.0f - params->constants.spline_contrast) * ratio); // Normalize everything the pivot to make the math easier const float in_min = params->input_min - src_pivot; const float in_max = params->input_max - src_pivot; const float out_min = params->output_min - dst_pivot; const float out_max = params->output_max - dst_pivot; // Solve P of order 2 for: // P(in_min) = out_min // P'(0.0) = slope // P(0.0) = 0.0 const float Pa = (out_min - slope * in_min) / (in_min * in_min); const float Pb = slope; // Solve Q of order 3 for: // Q(in_max) = out_max // Q''(in_max) = 0.0 // Q(0.0) = 0.0 // Q'(0.0) = slope const float t = 2 * in_max * in_max; const float Qa = (slope * in_max - out_max) / (in_max * t); const float Qb = -3 * (slope * in_max - out_max) / t; const float Qc = slope; FOREACH_LUT(lut, x) { x -= src_pivot; x = x > 0 ? ((Qa * x + Qb) * x + Qc) * x : (Pa * x + Pb) * x; x += dst_pivot; } } const struct pl_tone_map_function pl_tone_map_spline = { .name = "spline", .description = "Single-pivot polynomial spline", .param_desc = "Contrast", .param_min = 0.00f, .param_def = 0.50f, .param_max = 1.50f, .scaling = PL_HDR_PQ, .map = spline, .map_inverse = spline, }; static void reinhard(float *lut, const struct pl_tone_map_params *params) { const float peak = rescale(params->input_max, params), contrast = params->constants.reinhard_contrast, offset = (1.0 - contrast) / contrast, scale = (peak + offset) / peak; FOREACH_LUT(lut, x) { x = rescale(x, params); x = x / (x + offset); x *= scale; x = rescale_out(x, params); } } const struct pl_tone_map_function pl_tone_map_reinhard = { .name = "reinhard", .description = "Reinhard", .param_desc = "Contrast", .param_min = 0.001, .param_def = 0.50, .param_max = 0.99, .map = reinhard, }; static void mobius(float *lut, const struct pl_tone_map_params *params) { const float peak = rescale(params->input_max, params), j = params->constants.linear_knee; // Solve for M(j) = j; M(peak) = 1.0; M'(j) = 1.0 // where M(x) = scale * (x+a)/(x+b) const float a = -j*j * (peak - 1.0f) / (j*j - 2.0f * j + peak); const float b = (j*j - 2.0f * j * peak + peak) / fmaxf(1e-6f, peak - 1.0f); const float scale = (b*b + 2.0f * b*j + j*j) / (b - a); FOREACH_LUT(lut, x) { x = rescale(x, params); x = x <= j ? x : scale * (x + a) / (x + b); x = rescale_out(x, params); } } const struct pl_tone_map_function pl_tone_map_mobius = { .name = "mobius", .description = "Mobius", .param_desc = "Knee point", .param_min = 0.00, .param_def = 0.30, .param_max = 0.99, .map = mobius, }; static inline float hable(float x) { const float A = 0.15, B = 0.50, C = 0.10, D = 0.20, E = 0.02, F = 0.30; return ((x * (A*x + C*B) + D*E) / (x * (A*x + B) + D*F)) - E/F; } static void hable_map(float *lut, const struct pl_tone_map_params *params) { const float peak = params->input_max / params->output_max, scale = 1.0f / hable(peak); FOREACH_LUT(lut, x) { x = bt1886_oetf(x, params->input_min, params->input_max); x = bt1886_eotf(x, 0, peak); x = scale * hable(x); x = bt1886_oetf(x, 0, 1); x = bt1886_eotf(x, params->output_min, params->output_max); } } const struct pl_tone_map_function pl_tone_map_hable = { .name = "hable", .description = "Filmic tone-mapping (Hable)", .map = hable_map, }; static void gamma_map(float *lut, const struct pl_tone_map_params *params) { const float peak = rescale(params->input_max, params), cutoff = params->constants.linear_knee, gamma = logf(cutoff) / logf(cutoff / peak); FOREACH_LUT(lut, x) { x = rescale(x, params); x = x > cutoff ? powf(x / peak, gamma) : x; x = rescale_out(x, params); } } const struct pl_tone_map_function pl_tone_map_gamma = { .name = "gamma", .description = "Gamma function with knee", .param_desc = "Knee point", .param_min = 0.001, .param_def = 0.30, .param_max = 1.00, .map = gamma_map, }; static void linear(float *lut, const struct pl_tone_map_params *params) { const float gain = params->constants.exposure; FOREACH_LUT(lut, x) { x = rescale_in(x, params); x *= gain; x = rescale_out(x, params); } } const struct pl_tone_map_function pl_tone_map_linear = { .name = "linear", .description = "Perceptually linear stretch", .param_desc = "Exposure", .param_min = 0.001, .param_def = 1.00, .param_max = 10.0, .scaling = PL_HDR_PQ, .map = linear, .map_inverse = linear, }; const struct pl_tone_map_function pl_tone_map_linear_light = { .name = "linearlight", .description = "Linear light stretch", .param_desc = "Exposure", .param_min = 0.001, .param_def = 1.00, .param_max = 10.0, .scaling = PL_HDR_NORM, .map = linear, .map_inverse = linear, }; const struct pl_tone_map_function * const pl_tone_map_functions[] = { &pl_tone_map_clip, &pl_tone_map_st2094_40, &pl_tone_map_st2094_10, &pl_tone_map_bt2390, &pl_tone_map_bt2446a, &pl_tone_map_spline, &pl_tone_map_reinhard, &pl_tone_map_mobius, &pl_tone_map_hable, &pl_tone_map_gamma, &pl_tone_map_linear, &pl_tone_map_linear_light, NULL }; const int pl_num_tone_map_functions = PL_ARRAY_SIZE(pl_tone_map_functions) - 1; const struct pl_tone_map_function *pl_find_tone_map_function(const char *name) { for (int i = 0; i < pl_num_tone_map_functions; i++) { if (strcmp(name, pl_tone_map_functions[i]->name) == 0) return pl_tone_map_functions[i]; } return NULL; } libplacebo-v7.349.0/src/ucrt_math.def000066400000000000000000000040221463457750100174270ustar00rootroot00000000000000LIBRARY api-ms-win-crt-math-l1-1-0 EXPORTS _Cbuild _Cmulcc _Cmulcr _FCbuild _FCmulcc _FCmulcr _LCbuild _LCmulcc _LCmulcr __setusermatherr _cabs _chgsign _chgsignf _copysign _copysignf _d_int _dclass _dexp _dlog _dnorm _dpcomp _dpoly _dscale _dsign _dsin _dtest _dunscale _except1 _fd_int _fdclass _fdexp _fdlog _fdnorm _fdopen _fdpcomp _fdpoly _fdscale _fdsign _fdsin _fdtest _fdunscale _finite _finitef _fpclass _fpclassf _get_FMA3_enable _hypot _hypotf _isnan _isnanf _j0 _j1 _jn _ld_int _ldclass _ldexp _ldlog _ldpcomp _ldpoly _ldscale _ldsign _ldsin _ldtest _ldunscale _logb _logbf _nextafter _nextafterf _scalb _scalbf _set_FMA3_enable _y0 _y1 _yn acos acosf acosh acoshf acoshl asin asinf asinh asinhf asinhl atan atan2 atan2f atanf atanh atanhf atanhl cabs cabsf cabsl cacos cacosf cacosh cacoshf cacoshl cacosl carg cargf cargl casin casinf casinh casinhf casinhl casinl catan catanf catanh catanhf catanhl catanl cbrt cbrtf cbrtl ccos ccosf ccosh ccoshf ccoshl ccosl ceil ceilf cexp cexpf cexpl cimag cimagf cimagl clog clog10 clog10f clog10l clogf clogl conj conjf conjl copysign copysignf copysignl cos cosf cosh coshf cpow cpowf cpowl cproj cprojf cprojl creal crealf creall csin csinf csinh csinhf csinhl csinl csqrt csqrtf csqrtl ctan ctanf ctanh ctanhf ctanhl ctanl erf erfc erfcf erfcl erff erfl exp exp2 exp2f exp2l expf expm1 expm1f expm1l fabs fdim fdimf fdiml floor floorf fma fmaf fmal fmax fmaxf fmaxl fmin fminf fminl fmod fmodf frexp hypot ilogb ilogbf ilogbl ldexp lgamma lgammaf lgammal llrint llrintf llrintl llround llroundf llroundl log log10 log10f log1p log1pf log1pl log2 log2f log2l logb logbf logbl logf lrint lrintf lrintl lround lroundf lroundl modf modff nan nanf nanl nearbyint nearbyintf nearbyintl nextafter nextafterf nextafterl nexttoward nexttowardf nexttowardl norm normf norml pow powf remainder remainderf remainderl remquo remquof remquol rint rintf rintl round roundf roundl scalbln scalblnf scalblnl scalbn scalbnf scalbnl sin sinf sinh sinhf sqrt sqrtf tan tanf tanh tanhf tgamma tgammaf tgammal trunc truncf truncl libplacebo-v7.349.0/src/utils/000077500000000000000000000000001463457750100161235ustar00rootroot00000000000000libplacebo-v7.349.0/src/utils/dolbyvision.c000066400000000000000000000045161463457750100206360ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "common.h" #include #ifdef PL_HAVE_LIBDOVI #include #include #endif void pl_hdr_metadata_from_dovi_rpu(struct pl_hdr_metadata *out, const uint8_t *buf, size_t size) { #ifdef PL_HAVE_LIBDOVI if (buf && size) { DoviRpuOpaque *rpu = dovi_parse_unspec62_nalu(buf, size); const DoviRpuDataHeader *header = dovi_rpu_get_header(rpu); if (header && header->vdr_dm_metadata_present_flag) { // Profile 4 reshaping isn't done as it is a dual layer format. // However there are still unknowns on its EOTF, so it cannot be enabled. // // For profile 7, the brightness metadata can still be used as most // titles are going to have accurate metadata<->image brightness, // with the exception of some titles that require the enhancement layer // to be processed to restore the intended brightness, which would then // match the metadata values. if (header->guessed_profile == 4) { goto done; } const DoviVdrDmData *vdr_dm_data = dovi_rpu_get_vdr_dm_data(rpu); if (vdr_dm_data->dm_data.level1) { const DoviExtMetadataBlockLevel1 *l1 = vdr_dm_data->dm_data.level1; out->max_pq_y = l1->max_pq / 4095.0f; out->avg_pq_y = l1->avg_pq / 4095.0f; } dovi_rpu_free_vdr_dm_data(vdr_dm_data); } done: dovi_rpu_free_header(header); dovi_rpu_free(rpu); } #endif } libplacebo-v7.349.0/src/utils/frame_queue.c000066400000000000000000001010731463457750100205670ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include #include "common.h" #include "log.h" #include "pl_thread.h" #include struct cache_entry { pl_tex tex[4]; }; struct entry { pl_rc_t rc; double pts; struct cache_entry cache; struct pl_source_frame src; struct pl_frame frame; uint64_t signature; bool mapped; bool ok; // for interlaced frames enum pl_field field; struct entry *primary; struct entry *prev, *next; bool dirty; }; // Hard limits for vsync timing validity #define MIN_FPS 10 #define MAX_FPS 400 // Limits for FPS estimation state #define MAX_SAMPLES 32 #define MIN_SAMPLES 4 // Stickiness to prevent `interpolation_threshold` oscillation #define THRESHOLD_MAX_RATIO 0.3 #define THRESHOLD_FRAMES 5 // Maximum number of not-yet-mapped frames to allow queueing in advance #define PREFETCH_FRAMES 2 struct pool { float samples[MAX_SAMPLES]; float estimate; float sum; int idx; int num; int total; }; struct pl_queue_t { pl_gpu gpu; pl_log log; // For multi-threading, we use two locks. The `lock_weak` guards the queue // state itself. The `lock_strong` has a bigger scope and should be held // for the duration of any functions that expect the queue state to // remain more or less valid (with the exception of adding new members). // // In particular, `pl_queue_reset` and `pl_queue_update` will take // the strong lock, while `pl_queue_push_*` will only take the weak // lock. pl_mutex lock_strong; pl_mutex lock_weak; pl_cond wakeup; // Frame queue and state PL_ARRAY(struct entry *) queue; uint64_t signature; int threshold_frames; bool want_frame; bool eof; // Average vsync/frame fps estimation state struct pool vps, fps; float reported_vps; float reported_fps; double prev_pts; double pts_offset; // Storage for temporary arrays PL_ARRAY(uint64_t) tmp_sig; PL_ARRAY(float) tmp_ts; PL_ARRAY(const struct pl_frame *) tmp_frame; // Queue of GPU objects to reuse PL_ARRAY(struct cache_entry) cache; }; pl_queue pl_queue_create(pl_gpu gpu) { pl_queue p = pl_alloc_ptr(NULL, p); *p = (struct pl_queue_t) { .gpu = gpu, .log = gpu->log, }; pl_mutex_init(&p->lock_strong); pl_mutex_init(&p->lock_weak); int ret = pl_cond_init(&p->wakeup); if (ret) { PL_ERR(p, "Failed to init conditional variable: %d", ret); return NULL; } return p; } static void recycle_cache(pl_queue p, struct cache_entry *cache, bool recycle) { bool has_textures = false; for (int i = 0; i < PL_ARRAY_SIZE(cache->tex); i++) { if (!cache->tex[i]) continue; has_textures = true; if (recycle) { pl_tex_invalidate(p->gpu, cache->tex[i]); } else { pl_tex_destroy(p->gpu, &cache->tex[i]); } } if (recycle && has_textures) PL_ARRAY_APPEND(p, p->cache, *cache); memset(cache, 0, sizeof(*cache)); // sanity } static void entry_deref(pl_queue p, struct entry **pentry, bool recycle) { struct entry *entry = *pentry; *pentry = NULL; if (!entry || !pl_rc_deref(&entry->rc)) return; if (!entry->mapped && entry->src.discard) { PL_TRACE(p, "Discarding unused frame id %"PRIu64" with PTS %f", entry->signature, entry->src.pts); entry->src.discard(&entry->src); } if (entry->mapped && entry->ok && entry->src.unmap) { PL_TRACE(p, "Unmapping frame id %"PRIu64" with PTS %f", entry->signature, entry->src.pts); entry->src.unmap(p->gpu, &entry->frame, &entry->src); } recycle_cache(p, &entry->cache, recycle); pl_free(entry); } static struct entry *entry_ref(struct entry *entry) { pl_rc_ref(&entry->rc); return entry; } static void entry_cull(pl_queue p, struct entry *entry, bool recycle) { // Forcibly clean up references to prev/next frames, even if `entry` has // remaining refs pointing at it. This is to prevent cyclic references. entry_deref(p, &entry->primary, recycle); entry_deref(p, &entry->prev, recycle); entry_deref(p, &entry->next, recycle); entry_deref(p, &entry, recycle); } void pl_queue_destroy(pl_queue *queue) { pl_queue p = *queue; if (!p) return; for (int n = 0; n < p->queue.num; n++) entry_cull(p, p->queue.elem[n], false); for (int n = 0; n < p->cache.num; n++) { for (int i = 0; i < PL_ARRAY_SIZE(p->cache.elem[n].tex); i++) pl_tex_destroy(p->gpu, &p->cache.elem[n].tex[i]); } pl_cond_destroy(&p->wakeup); pl_mutex_destroy(&p->lock_weak); pl_mutex_destroy(&p->lock_strong); pl_free(p); *queue = NULL; } void pl_queue_reset(pl_queue p) { pl_mutex_lock(&p->lock_strong); pl_mutex_lock(&p->lock_weak); for (int i = 0; i < p->queue.num; i++) entry_cull(p, p->queue.elem[i], false); *p = (struct pl_queue_t) { .gpu = p->gpu, .log = p->log, // Reuse lock objects .lock_strong = p->lock_strong, .lock_weak = p->lock_weak, .wakeup = p->wakeup, // Explicitly preserve allocations .queue.elem = p->queue.elem, .tmp_sig.elem = p->tmp_sig.elem, .tmp_ts.elem = p->tmp_ts.elem, .tmp_frame.elem = p->tmp_frame.elem, // Reuse GPU object cache entirely .cache = p->cache, }; pl_cond_signal(&p->wakeup); pl_mutex_unlock(&p->lock_weak); pl_mutex_unlock(&p->lock_strong); } static inline float delta(float old, float new) { return fabsf((new - old) / PL_MIN(new, old)); } static inline void default_estimate(struct pool *pool, float val) { if (!pool->estimate && isnormal(val) && val > 0.0) pool->estimate = val; } static inline void update_estimate(struct pool *pool, float cur) { if (pool->num) { static const float max_delta = 0.3; if (delta(pool->sum / pool->num, cur) > max_delta) { pool->sum = 0.0; pool->num = pool->idx = 0; } } if (pool->num++ == MAX_SAMPLES) { pool->sum -= pool->samples[pool->idx]; pool->num--; } pool->sum += pool->samples[pool->idx] = cur; pool->idx = (pool->idx + 1) % MAX_SAMPLES; pool->total++; if (pool->total < MIN_SAMPLES || pool->num >= MIN_SAMPLES) pool->estimate = pool->sum / pool->num; } static void queue_push(pl_queue p, const struct pl_source_frame *src) { if (p->eof && !src) return; // ignore duplicate EOF if (p->eof && src) { PL_INFO(p, "Received frame after EOF signaled... discarding frame!"); if (src->discard) src->discard(src); return; } pl_cond_signal(&p->wakeup); if (!src) { PL_TRACE(p, "Received EOF, draining frame queue..."); p->eof = true; p->want_frame = false; return; } // Update FPS estimates if possible/reasonable default_estimate(&p->fps, src->first_field ? src->duration / 2 : src->duration); if (p->queue.num) { double last_pts = p->queue.elem[p->queue.num - 1]->pts; float delta = src->pts - last_pts; if (delta <= 0.0f) { PL_DEBUG(p, "Non monotonically increasing PTS %f -> %f", last_pts, src->pts); } else if (p->fps.estimate && delta > 10.0 * p->fps.estimate) { PL_DEBUG(p, "Discontinuous source PTS jump %f -> %f", last_pts, src->pts); } else { update_estimate(&p->fps, delta); } } else if (src->pts != 0) { PL_DEBUG(p, "First frame received with non-zero PTS %f", src->pts); } struct entry *entry = pl_alloc_ptr(NULL, entry); *entry = (struct entry) { .signature = p->signature++, .pts = src->pts, .src = *src, }; pl_rc_init(&entry->rc); PL_ARRAY_POP(p->cache, &entry->cache); PL_TRACE(p, "Added new frame id %"PRIu64" with PTS %f", entry->signature, entry->pts); // Insert new entry into the correct spot in the queue, sorted by PTS for (int i = p->queue.num;; i--) { if (i == 0 || p->queue.elem[i - 1]->pts <= entry->pts) { if (src->first_field == PL_FIELD_NONE) { // Progressive PL_ARRAY_INSERT_AT(p, p->queue, i, entry); break; } else { // Interlaced struct entry *prev = i > 0 ? p->queue.elem[i - 1] : NULL; struct entry *next = i < p->queue.num ? p->queue.elem[i] : NULL; struct entry *entry2 = pl_zalloc_ptr(NULL, entry2); pl_rc_init(&entry2->rc); if (next) { entry2->pts = (entry->pts + next->pts) / 2; } else if (src->duration) { entry2->pts = entry->pts + src->duration / 2; } else if (p->fps.estimate) { entry2->pts = entry->pts + p->fps.estimate; } else { PL_ERR(p, "Frame with PTS %f specified as interlaced, but " "no FPS information known yet! Please specify a " "valid `pl_source_frame.duration`. Treating as " "progressive...", src->pts); PL_ARRAY_INSERT_AT(p, p->queue, i, entry); pl_free(entry2); break; } entry->field = src->first_field; entry2->primary = entry_ref(entry); entry2->field = pl_field_other(entry->field); entry2->signature = p->signature++; PL_TRACE(p, "Added second field id %"PRIu64" with PTS %f", entry2->signature, entry2->pts); // Link previous/next frames if (prev) { entry->prev = entry_ref(PL_DEF(prev->primary, prev)); entry2->prev = entry_ref(PL_DEF(prev->primary, prev)); // Retroactively re-link the previous frames that should // be referencing this frame for (int j = i - 1; j >= 0; --j) { struct entry *e = p->queue.elem[j]; if (e != prev && e != prev->primary) break; entry_deref(p, &e->next, true); e->next = entry_ref(entry); if (e->dirty) { // reset signature to signal change e->signature = p->signature++; e->dirty = false; } } } if (next) { entry->next = entry_ref(PL_DEF(next->primary, next)); entry2->next = entry_ref(PL_DEF(next->primary, next)); for (int j = i; j < p->queue.num; j++) { struct entry *e = p->queue.elem[j]; if (e != next && e != next->primary) break; entry_deref(p, &e->prev, true); e->prev = entry_ref(entry); if (e->dirty) { e->signature = p->signature++; e->dirty = false; } } } PL_ARRAY_INSERT_AT(p, p->queue, i, entry); PL_ARRAY_INSERT_AT(p, p->queue, i+1, entry2); break; } } } p->want_frame = false; } void pl_queue_push(pl_queue p, const struct pl_source_frame *frame) { pl_mutex_lock(&p->lock_weak); queue_push(p, frame); pl_mutex_unlock(&p->lock_weak); } static inline bool entry_mapped(struct entry *entry) { return entry->mapped || (entry->primary && entry->primary->mapped); } static bool queue_has_room(pl_queue p) { if (p->want_frame) return true; int wanted_frames = PREFETCH_FRAMES; if (p->fps.estimate && p->vps.estimate && p->vps.estimate <= 1.0f / MIN_FPS) wanted_frames += ceilf(p->vps.estimate / p->fps.estimate) - 1; // Examine the queue tail for (int i = p->queue.num - 1; i >= 0; i--) { if (entry_mapped(p->queue.elem[i])) return true; if (p->queue.num - i >= wanted_frames) return false; } return true; } bool pl_queue_push_block(pl_queue p, uint64_t timeout, const struct pl_source_frame *frame) { pl_mutex_lock(&p->lock_weak); if (!timeout || !frame || p->eof) goto skip_blocking; while (!queue_has_room(p) && !p->eof) { if (pl_cond_timedwait(&p->wakeup, &p->lock_weak, timeout) == ETIMEDOUT) { pl_mutex_unlock(&p->lock_weak); return false; } } skip_blocking: queue_push(p, frame); pl_mutex_unlock(&p->lock_weak); return true; } static void report_estimates(pl_queue p) { if (p->fps.total >= MIN_SAMPLES && p->vps.total >= MIN_SAMPLES) { if (p->reported_fps && p->reported_vps) { // Only re-report the estimates if they've changed considerably // from the previously reported values static const float report_delta = 0.3f; float delta_fps = delta(p->reported_fps, p->fps.estimate); float delta_vps = delta(p->reported_vps, p->vps.estimate); if (delta_fps < report_delta && delta_vps < report_delta) return; } PL_INFO(p, "Estimated source FPS: %.3f, display FPS: %.3f", 1.0 / p->fps.estimate, 1.0 / p->vps.estimate); p->reported_fps = p->fps.estimate; p->reported_vps = p->vps.estimate; } } // note: may add more than one frame, since it releases the lock static enum pl_queue_status get_frame(pl_queue p, const struct pl_queue_params *params) { if (p->eof) return PL_QUEUE_EOF; if (!params->get_frame) { if (!params->timeout) return PL_QUEUE_MORE; p->want_frame = true; pl_cond_signal(&p->wakeup); while (p->want_frame) { if (pl_cond_timedwait(&p->wakeup, &p->lock_weak, params->timeout) == ETIMEDOUT) return PL_QUEUE_MORE; } return p->eof ? PL_QUEUE_EOF : PL_QUEUE_OK; } // Don't hold the weak mutex while calling into `get_frame`, to allow // `pl_queue_push` to run concurrently while we're waiting for frames pl_mutex_unlock(&p->lock_weak); struct pl_source_frame src; enum pl_queue_status ret; switch ((ret = params->get_frame(&src, params))) { case PL_QUEUE_OK: pl_queue_push(p, &src); break; case PL_QUEUE_EOF: pl_queue_push(p, NULL); break; case PL_QUEUE_MORE: case PL_QUEUE_ERR: break; } pl_mutex_lock(&p->lock_weak); return ret; } static inline bool map_frame(pl_queue p, struct entry *entry) { if (!entry->mapped) { PL_TRACE(p, "Mapping frame id %"PRIu64" with PTS %f", entry->signature, entry->pts); entry->mapped = true; entry->ok = entry->src.map(p->gpu, entry->cache.tex, &entry->src, &entry->frame); if (!entry->ok) PL_ERR(p, "Failed mapping frame id %"PRIu64" with PTS %f", entry->signature, entry->pts); } return entry->ok; } static bool map_entry(pl_queue p, struct entry *entry) { bool ok = map_frame(p, entry->primary ? entry->primary : entry); if (entry->prev) ok &= map_frame(p, entry->prev); if (entry->next) ok &= map_frame(p, entry->next); if (!ok) return false; if (entry->primary) entry->frame = entry->primary->frame; if (entry->field) { entry->frame.field = entry->field; entry->frame.first_field = PL_DEF(entry->primary, entry)->src.first_field; entry->frame.prev = entry->prev ? &entry->prev->frame : NULL; entry->frame.next = entry->next ? &entry->next->frame : NULL; entry->dirty = true; } return true; } static bool entry_complete(struct entry *entry) { return entry->field ? !!entry->next : true; } // Advance the queue as needed to make sure idx 0 is the last frame before // `pts`, and idx 1 is the first frame after `pts` (unless this is the last). // // Returns PL_QUEUE_OK only if idx 0 is still legal under ZOH semantics. static enum pl_queue_status advance(pl_queue p, double pts, const struct pl_queue_params *params) { // Cull all frames except the last frame before `pts` int culled = 0; for (int i = 1; i < p->queue.num; i++) { if (p->queue.elem[i]->pts <= pts) { entry_cull(p, p->queue.elem[i - 1], true); culled++; } } PL_ARRAY_REMOVE_RANGE(p->queue, 0, culled); // Keep adding new frames until we find one in the future, or EOF enum pl_queue_status ret = PL_QUEUE_OK; while (p->queue.num < 2) { switch ((ret = get_frame(p, params))) { case PL_QUEUE_ERR: return ret; case PL_QUEUE_EOF: if (!p->queue.num) return ret; goto done; case PL_QUEUE_MORE: case PL_QUEUE_OK: while (p->queue.num > 1 && p->queue.elem[1]->pts <= pts) { entry_cull(p, p->queue.elem[0], true); PL_ARRAY_REMOVE_AT(p->queue, 0); } if (ret == PL_QUEUE_MORE) return ret; continue; } } if (!entry_complete(p->queue.elem[1])) { switch (get_frame(p, params)) { case PL_QUEUE_ERR: return PL_QUEUE_ERR; case PL_QUEUE_MORE: ret = PL_QUEUE_MORE; // fall through case PL_QUEUE_EOF: case PL_QUEUE_OK: goto done; } } done: if (p->eof && p->queue.num == 1) { if (p->queue.elem[0]->pts == 0.0 || !p->fps.estimate) { // If the last frame has PTS 0.0, or we have no FPS estimate, then // this is probably a single-frame file, in which case we want to // extend the ZOH to infinity, rather than returning. Not a perfect // heuristic, but w/e return PL_QUEUE_OK; } // Last frame is held for an extra `p->fps.estimate` duration, // afterwards this function just returns EOF. if (pts < p->queue.elem[0]->pts + p->fps.estimate) { ret = PL_QUEUE_OK; } else { entry_cull(p, p->queue.elem[0], true); p->queue.num = 0; return PL_QUEUE_EOF; } } pl_assert(p->queue.num); return ret; } static inline enum pl_queue_status point(pl_queue p, struct pl_frame_mix *mix, const struct pl_queue_params *params) { if (!p->queue.num) { *mix = (struct pl_frame_mix) {0}; return PL_QUEUE_MORE; } // Find closest frame (nearest neighbour semantics) struct entry *entry = p->queue.elem[0]; if (entry->pts > params->pts) { // first frame not visible yet *mix = (struct pl_frame_mix) {0}; return PL_QUEUE_OK; } double best = fabs(entry->pts - params->pts); for (int i = 1; i < p->queue.num; i++) { double dist = fabs(p->queue.elem[i]->pts - params->pts); if (dist < best) { entry = p->queue.elem[i]; best = dist; continue; } else { break; } } if (!map_entry(p, entry)) return PL_QUEUE_ERR; // Return a mix containing only this single frame p->tmp_sig.num = p->tmp_ts.num = p->tmp_frame.num = 0; PL_ARRAY_APPEND(p, p->tmp_sig, entry->signature); PL_ARRAY_APPEND(p, p->tmp_frame, &entry->frame); PL_ARRAY_APPEND(p, p->tmp_ts, 0.0); *mix = (struct pl_frame_mix) { .num_frames = 1, .frames = p->tmp_frame.elem, .signatures = p->tmp_sig.elem, .timestamps = p->tmp_ts.elem, .vsync_duration = 1.0, }; PL_TRACE(p, "Showing single frame id %"PRIu64" with PTS %f for target PTS %f", entry->signature, entry->pts, params->pts); report_estimates(p); return PL_QUEUE_OK; } // Present a single frame as appropriate for `pts` static enum pl_queue_status nearest(pl_queue p, struct pl_frame_mix *mix, const struct pl_queue_params *params) { enum pl_queue_status ret; switch ((ret = advance(p, params->pts, params))) { case PL_QUEUE_ERR: case PL_QUEUE_EOF: return ret; case PL_QUEUE_OK: case PL_QUEUE_MORE: if (mix && point(p, mix, params) == PL_QUEUE_ERR) return PL_QUEUE_ERR; return ret; } pl_unreachable(); } // Special case of `interpolate` for radius = 0, in which case we need exactly // the previous frame and the following frame static enum pl_queue_status oversample(pl_queue p, struct pl_frame_mix *mix, const struct pl_queue_params *params) { enum pl_queue_status ret; switch ((ret = advance(p, params->pts, params))) { case PL_QUEUE_ERR: case PL_QUEUE_EOF: return ret; case PL_QUEUE_OK: break; case PL_QUEUE_MORE: if (!p->queue.num) { if (mix) *mix = (struct pl_frame_mix) {0}; return ret; } break; } if (!mix) return PL_QUEUE_OK; // Can't oversample with only a single frame, fall back to point sampling if (p->queue.num < 2 || p->queue.elem[0]->pts > params->pts) { if (point(p, mix, params) != PL_QUEUE_OK) return PL_QUEUE_ERR; return ret; } struct entry *entries[2] = { p->queue.elem[0], p->queue.elem[1] }; pl_assert(entries[0]->pts <= params->pts); pl_assert(entries[1]->pts >= params->pts); // Returning a mix containing both of these two frames p->tmp_sig.num = p->tmp_ts.num = p->tmp_frame.num = 0; for (int i = 0; i < 2; i++) { if (!map_entry(p, entries[i])) return PL_QUEUE_ERR; float ts = (entries[i]->pts - params->pts) / p->fps.estimate; PL_ARRAY_APPEND(p, p->tmp_sig, entries[i]->signature); PL_ARRAY_APPEND(p, p->tmp_frame, &entries[i]->frame); PL_ARRAY_APPEND(p, p->tmp_ts, ts); } *mix = (struct pl_frame_mix) { .num_frames = 2, .frames = p->tmp_frame.elem, .signatures = p->tmp_sig.elem, .timestamps = p->tmp_ts.elem, .vsync_duration = p->vps.estimate / p->fps.estimate, }; PL_TRACE(p, "Oversampling 2 frames for target PTS %f:", params->pts); for (int i = 0; i < mix->num_frames; i++) PL_TRACE(p, " id %"PRIu64" ts %f", mix->signatures[i], mix->timestamps[i]); report_estimates(p); return ret; } // Present a mixture of frames, relative to the vsync ratio static enum pl_queue_status interpolate(pl_queue p, struct pl_frame_mix *mix, const struct pl_queue_params *params) { // No FPS estimate available, possibly source contains only a single frame, // or this is the first frame to be rendered. Fall back to point sampling. if (!p->fps.estimate) return nearest(p, mix, params); // Silently disable interpolation if the ratio dips lower than the // configured threshold float ratio = fabs(p->fps.estimate / p->vps.estimate - 1.0); if (ratio < params->interpolation_threshold) { if (!p->threshold_frames) { PL_INFO(p, "Detected fps ratio %.4f below threshold %.4f, " "disabling interpolation", ratio, params->interpolation_threshold); } p->threshold_frames = THRESHOLD_FRAMES + 1; return nearest(p, mix, params); } else if (ratio < THRESHOLD_MAX_RATIO && p->threshold_frames > 1) { p->threshold_frames--; return nearest(p, mix, params); } else { if (p->threshold_frames) { PL_INFO(p, "Detected fps ratio %.4f exceeds threshold %.4f, " "re-enabling interpolation", ratio, params->interpolation_threshold); } p->threshold_frames = 0; } // No radius information, special case in which we only need the previous // and next frames. if (!params->radius) return oversample(p, mix, params); pl_assert(p->fps.estimate && p->vps.estimate); float radius = params->radius * fmaxf(1.0f, p->vps.estimate / p->fps.estimate); double min_pts = params->pts - radius * p->fps.estimate, max_pts = params->pts + radius * p->fps.estimate; enum pl_queue_status ret; switch ((ret = advance(p, min_pts, params))) { case PL_QUEUE_ERR: case PL_QUEUE_EOF: return ret; case PL_QUEUE_MORE: goto done; case PL_QUEUE_OK: break; } // Keep adding new frames until we've covered the range we care about pl_assert(p->queue.num); while (p->queue.elem[p->queue.num - 1]->pts < max_pts) { switch ((ret = get_frame(p, params))) { case PL_QUEUE_ERR: return ret; case PL_QUEUE_MORE: goto done; case PL_QUEUE_EOF:; // Don't forward EOF until we've held the last frame for the // desired ZOH hold duration double last_pts = p->queue.elem[p->queue.num - 1]->pts; if (last_pts && params->pts >= last_pts + p->fps.estimate) return ret; ret = PL_QUEUE_OK; goto done; case PL_QUEUE_OK: continue; } } if (!entry_complete(p->queue.elem[p->queue.num - 1])) { switch ((ret = get_frame(p, params))) { case PL_QUEUE_MORE: case PL_QUEUE_OK: break; case PL_QUEUE_ERR: case PL_QUEUE_EOF: return ret; } } done: ; if (!mix) return PL_QUEUE_OK; // Construct a mix object representing the current queue state, starting at // the last frame before `min_pts` to make sure there's a fallback frame // available for ZOH semantics. p->tmp_sig.num = p->tmp_ts.num = p->tmp_frame.num = 0; for (int i = 0; i < p->queue.num; i++) { struct entry *entry = p->queue.elem[i]; if (entry->pts > max_pts) break; if (!map_entry(p, entry)) return PL_QUEUE_ERR; float ts = (entry->pts - params->pts) / p->fps.estimate; PL_ARRAY_APPEND(p, p->tmp_sig, entry->signature); PL_ARRAY_APPEND(p, p->tmp_frame, &entry->frame); PL_ARRAY_APPEND(p, p->tmp_ts, ts); } *mix = (struct pl_frame_mix) { .num_frames = p->tmp_frame.num, .frames = p->tmp_frame.elem, .signatures = p->tmp_sig.elem, .timestamps = p->tmp_ts.elem, .vsync_duration = p->vps.estimate / p->fps.estimate, }; PL_TRACE(p, "Showing mix of %d frames for target PTS %f:", mix->num_frames, params->pts); for (int i = 0; i < mix->num_frames; i++) PL_TRACE(p, " id %"PRIu64" ts %f", mix->signatures[i], mix->timestamps[i]); report_estimates(p); return ret; } static bool prefill(pl_queue p, const struct pl_queue_params *params) { int min_frames = 2 * ceilf(params->radius); if (p->fps.estimate && p->vps.estimate && p->vps.estimate <= 1.0f / MIN_FPS) min_frames *= ceilf(p->vps.estimate / p->fps.estimate); min_frames = PL_MAX(min_frames, PREFETCH_FRAMES); while (p->queue.num < min_frames) { switch (get_frame(p, params)) { case PL_QUEUE_ERR: return false; case PL_QUEUE_EOF: case PL_QUEUE_MORE: return true; case PL_QUEUE_OK: continue; } } // In the most likely case, the first few frames will all be required. So // force-map them all to initialize GPU state on initial rendering. This is // better than the alternative of missing the cache later, when timing is // more relevant. for (int i = 0; i < min_frames; i++) { if (!map_entry(p, p->queue.elem[i])) return false; } return true; } enum pl_queue_status pl_queue_update(pl_queue p, struct pl_frame_mix *out_mix, const struct pl_queue_params *params) { struct pl_queue_params fixed; pl_mutex_lock(&p->lock_strong); pl_mutex_lock(&p->lock_weak); default_estimate(&p->vps, params->vsync_duration); float delta = params->pts - p->prev_pts; if (delta < 0.0f) { // This is a backwards PTS jump. This is something we can handle // semi-gracefully, but only if we haven't culled past the current // frame yet. if (p->queue.num && p->queue.elem[0]->pts > params->pts) { PL_ERR(p, "Requested PTS %f is lower than the oldest frame " "PTS %f. This is not supported, PTS must be monotonically " "increasing! Please use `pl_queue_reset` to reset the frame " "queue on discontinuous PTS jumps.", params->pts, p->queue.elem[0]->pts); pl_mutex_unlock(&p->lock_weak); pl_mutex_unlock(&p->lock_strong); return PL_QUEUE_ERR; } } else if (delta > 1.0f) { // A jump of more than a second is probably the result of a // discontinuous jump after a suspend. To prevent this from exploding // the FPS estimate, treat this as a new frame. PL_TRACE(p, "Discontinuous target PTS jump %f -> %f, ignoring...", p->prev_pts, params->pts); p->pts_offset = 0.0; } else if (delta > 0) { update_estimate(&p->vps, params->pts - p->prev_pts); } p->prev_pts = params->pts; if (params->drift_compensation > 0.0f) { // Adjust PTS offset if PTS is near-match for existing frame double pts = params->pts + p->pts_offset; for (int i = 0; i < p->queue.num; i++) { if (fabs(p->queue.elem[i]->pts - pts) < params->drift_compensation) { p->pts_offset = p->queue.elem[i]->pts - params->pts; pts = p->queue.elem[i]->pts; break; } } fixed = *params; fixed.pts = pts; params = &fixed; } // As a special case, prefill the queue if this is the first frame if (!params->pts && !p->queue.num) { if (!prefill(p, params)) { pl_mutex_unlock(&p->lock_weak); pl_mutex_unlock(&p->lock_strong); return PL_QUEUE_ERR; } } // Ignore unrealistically high or low FPS, common near start of playback static const float max_vsync = 1.0 / MIN_FPS; static const float min_vsync = 1.0 / MAX_FPS; bool estimation_ok = p->vps.estimate > min_vsync && p->vps.estimate < max_vsync; enum pl_queue_status ret; if (estimation_ok || params->vsync_duration > 0) { // We know the vsync duration, so construct an interpolation mix ret = interpolate(p, out_mix, params); } else { // We don't know the vsync duration (yet), so just point-sample ret = nearest(p, out_mix, params); } pl_cond_signal(&p->wakeup); pl_mutex_unlock(&p->lock_weak); pl_mutex_unlock(&p->lock_strong); return ret; } float pl_queue_estimate_fps(pl_queue p) { pl_mutex_lock(&p->lock_weak); float estimate = p->fps.estimate; pl_mutex_unlock(&p->lock_weak); return estimate ? 1.0f / estimate : 0.0f; } float pl_queue_estimate_vps(pl_queue p) { pl_mutex_lock(&p->lock_weak); float estimate = p->vps.estimate; pl_mutex_unlock(&p->lock_weak); return estimate ? 1.0f / estimate : 0.0f; } int pl_queue_num_frames(pl_queue p) { pl_mutex_lock(&p->lock_weak); int count = p->queue.num; pl_mutex_unlock(&p->lock_weak); return count; } double pl_queue_pts_offset(pl_queue p) { pl_mutex_lock(&p->lock_weak); double offset = p->pts_offset; pl_mutex_unlock(&p->lock_weak); return offset; } bool pl_queue_peek(pl_queue p, int idx, struct pl_source_frame *out) { pl_mutex_lock(&p->lock_weak); bool ok = idx >= 0 && idx < p->queue.num; if (ok) *out = p->queue.elem[idx]->src; pl_mutex_unlock(&p->lock_weak); return ok; } libplacebo-v7.349.0/src/utils/upload.c000066400000000000000000000312621463457750100175570ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "log.h" #include "common.h" #include "gpu.h" #include #define MAX_COMPS 4 struct comp { int order; // e.g. 0, 1, 2, 3 for RGBA int size; // size in bits int shift; // bit-shift / offset in bits }; static int compare_comp(const void *pa, const void *pb) { const struct comp *a = pa, *b = pb; // Move all of the components with a size of 0 to the end, so they can // be ignored outright if (a->size && !b->size) return -1; if (b->size && !a->size) return 1; // Otherwise, just compare based on the shift return PL_CMP(a->shift, b->shift); } void pl_plane_data_from_comps(struct pl_plane_data *data, int size[4], int shift[4]) { struct comp comps[MAX_COMPS]; for (int i = 0; i < PL_ARRAY_SIZE(comps); i++) { comps[i].order = i; comps[i].size = size[i]; comps[i].shift = shift[i]; } // Sort the components by shift qsort(comps, MAX_COMPS, sizeof(struct comp), compare_comp); // Generate the resulting component size/pad/map int offset = 0; for (int i = 0; i < MAX_COMPS; i++) { if (comps[i].size) { assert(comps[i].shift >= offset); data->component_size[i] = comps[i].size; data->component_pad[i] = comps[i].shift - offset; data->component_map[i] = comps[i].order; offset += data->component_size[i] + data->component_pad[i]; } else { // Clear the superfluous entries for sanity data->component_size[i] = 0; data->component_pad[i] = 0; data->component_map[i] = 0; } } } void pl_plane_data_from_mask(struct pl_plane_data *data, uint64_t mask[4]) { int size[4]; int shift[4]; for (int i = 0; i < PL_ARRAY_SIZE(size); i++) { size[i] = __builtin_popcountll(mask[i]); shift[i] = PL_MAX(0, __builtin_ffsll(mask[i]) - 1); // Sanity checking uint64_t mask_reconstructed = (1LLU << size[i]) - 1; mask_reconstructed <<= shift[i]; pl_assert(mask_reconstructed == mask[i]); } pl_plane_data_from_comps(data, size, shift); } bool pl_plane_data_align(struct pl_plane_data *data, struct pl_bit_encoding *out_bits) { struct pl_plane_data aligned = *data; struct pl_bit_encoding bits = {0}; int offset = 0; #define SET_TEST(var, value) \ do { \ if (offset == 0) { \ (var) = (value); \ } else if ((var) != (value)) { \ goto misaligned; \ } \ } while (0) for (int i = 0; i < MAX_COMPS; i++) { if (!aligned.component_size[i]) break; // Can't meaningfully align alpha channel, so just skip it. This is a // limitation of the fact that `pl_bit_encoding` only applies to the // main color channels, and changing this would be very nontrivial. if (aligned.component_map[i] == PL_CHANNEL_A) continue; // Color depth is the original component size, before alignment SET_TEST(bits.color_depth, aligned.component_size[i]); // Try consuming padding of the current component to align down. This // corresponds to an extra bit shift to the left. int comp_start = offset + aligned.component_pad[i]; int left_delta = comp_start - PL_ALIGN2(comp_start - 7, 8); left_delta = PL_MIN(left_delta, aligned.component_pad[i]); aligned.component_pad[i] -= left_delta; aligned.component_size[i] += left_delta; SET_TEST(bits.bit_shift, left_delta); // Try consuming padding of the next component to align up. This // corresponds to simply ignoring some extra 0s on the end. int comp_end = comp_start + aligned.component_size[i] - left_delta; int right_delta = PL_ALIGN2(comp_end, 8) - comp_end; if (i+1 == MAX_COMPS || !aligned.component_size[i+1]) { // This is the last component, so we can be greedy aligned.component_size[i] += right_delta; } else { right_delta = PL_MIN(right_delta, aligned.component_pad[i+1]); aligned.component_pad[i+1] -= right_delta; aligned.component_size[i] += right_delta; } // Sample depth is the new total component size, including padding SET_TEST(bits.sample_depth, aligned.component_size[i]); offset += aligned.component_pad[i] + aligned.component_size[i]; } // Easy sanity check, to make sure that we don't exceed the known stride if (aligned.pixel_stride && offset > aligned.pixel_stride * 8) goto misaligned; *data = aligned; if (out_bits) *out_bits = bits; return true; misaligned: // Can't properly align anything, so just do a no-op if (out_bits) *out_bits = (struct pl_bit_encoding) {0}; return false; } pl_fmt pl_plane_find_fmt(pl_gpu gpu, int out_map[4], const struct pl_plane_data *data) { int dummy[4] = {0}; out_map = PL_DEF(out_map, dummy); // Endian swapping requires compute shaders (currently) if (data->swapped && !gpu->limits.max_ssbo_size) return NULL; // Count the number of components and initialize out_map int num = 0; for (int i = 0; i < PL_ARRAY_SIZE(data->component_size); i++) { out_map[i] = -1; if (data->component_size[i]) num = i+1; } for (int n = 0; n < gpu->num_formats; n++) { pl_fmt fmt = gpu->formats[n]; if (fmt->opaque || fmt->num_components < num) continue; if (fmt->type != data->type || fmt->texel_size != data->pixel_stride) continue; if (!(fmt->caps & PL_FMT_CAP_SAMPLEABLE)) continue; int idx = 0; // Try mapping all pl_plane_data components to texture components for (int i = 0; i < num; i++) { // If there's padding we have to map it to an unused physical // component first int pad = data->component_pad[i]; if (pad && (idx >= 4 || fmt->host_bits[idx++] != pad)) goto next_fmt; // Otherwise, try and match this component int size = data->component_size[i]; if (size && (idx >= 4 || fmt->host_bits[idx] != size)) goto next_fmt; out_map[idx++] = data->component_map[i]; } // Reject misaligned formats, check this last to only log such errors // if this is the only thing preventing a format from being used, as // this is likely an issue in the API usage. if (data->row_stride % fmt->texel_align) { PL_WARN(gpu, "Rejecting texture format '%s' due to misalignment: " "Row stride %zu is not a clean multiple of texel size %zu! " "This is likely an API usage bug.", fmt->name, data->row_stride, fmt->texel_align); continue; } return fmt; next_fmt: ; // acts as `continue` } return NULL; } bool pl_upload_plane(pl_gpu gpu, struct pl_plane *out_plane, pl_tex *tex, const struct pl_plane_data *data) { pl_assert(!data->buf ^ !data->pixels); // exactly one int out_map[4]; pl_fmt fmt = pl_plane_find_fmt(gpu, out_map, data); if (!fmt) { PL_ERR(gpu, "Failed picking any compatible texture format for a plane!"); return false; // TODO: try soft-converting to a supported format using e.g zimg? } bool ok = pl_tex_recreate(gpu, tex, pl_tex_params( .w = data->width, .h = data->height, .format = fmt, .sampleable = true, .host_writable = true, .blit_src = fmt->caps & PL_FMT_CAP_BLITTABLE, )); if (!ok) { PL_ERR(gpu, "Failed initializing plane texture!"); return false; } if (out_plane) { out_plane->texture = *tex; out_plane->components = 0; for (int i = 0; i < PL_ARRAY_SIZE(out_map); i++) { out_plane->component_mapping[i] = out_map[i]; if (out_map[i] >= 0) out_plane->components = i+1; } } struct pl_tex_transfer_params params = { .tex = *tex, .rc.x1 = data->width, // set these for `pl_tex_transfer_size` .rc.y1 = data->height, .rc.z1 = 1, .row_pitch = PL_DEF(data->row_stride, data->width * fmt->texel_size), .ptr = (void *) data->pixels, .buf = data->buf, .buf_offset = data->buf_offset, .callback = data->callback, .priv = data->priv, }; pl_buf swapbuf = NULL; if (data->swapped) { const size_t aligned = PL_ALIGN2(pl_tex_transfer_size(¶ms), 4); swapbuf = pl_buf_create(gpu, pl_buf_params( .size = aligned, .storable = true, .initial_data = params.ptr, // Note: This may over-read from `ptr` if `ptr` is not aligned to a // word boundary, but the extra texels will be ignored by // `pl_tex_upload` so this UB should be a non-issue in practice. )); if (!swapbuf) { PL_ERR(gpu, "Failed creating endian swapping buffer!"); return false; } struct pl_buf_copy_swap_params swap_params = { .src = swapbuf, .dst = swapbuf, .size = aligned, .wordsize = fmt->texel_size / fmt->num_components, }; bool can_reuse = params.buf && params.buf->params.storable && params.buf_offset % 4 == 0 && params.buf_offset + aligned <= params.buf->params.size; if (params.ptr) { // Data is already uploaded (no-op), can swap in-place } else if (can_reuse) { // We can sample directly from the source buffer swap_params.src = params.buf; swap_params.src_offset = params.buf_offset; } else { // We sadly need to do a second memcpy assert(params.buf); PL_TRACE(gpu, "Double-slow path! pl_buf_copy -> pl_buf_copy_swap..."); pl_buf_copy(gpu, swapbuf, 0, params.buf, params.buf_offset, PL_MIN(aligned, params.buf->params.size - params.buf_offset)); } if (!pl_buf_copy_swap(gpu, &swap_params)) { PL_ERR(gpu, "Failed swapping endianness!"); pl_buf_destroy(gpu, &swapbuf); return false; } params.ptr = NULL; params.buf = swapbuf; params.buf_offset = 0; } ok = pl_tex_upload(gpu, ¶ms); pl_buf_destroy(gpu, &swapbuf); return ok; } bool pl_recreate_plane(pl_gpu gpu, struct pl_plane *out_plane, pl_tex *tex, const struct pl_plane_data *data) { if (data->swapped) { PL_ERR(gpu, "Cannot call pl_recreate_plane on non-native endian plane " "data, this is only supported for `pl_upload_plane`!"); return false; } int out_map[4]; pl_fmt fmt = pl_plane_find_fmt(gpu, out_map, data); if (!fmt) { PL_ERR(gpu, "Failed picking any compatible texture format for a plane!"); return false; } bool ok = pl_tex_recreate(gpu, tex, pl_tex_params( .w = data->width, .h = data->height, .format = fmt, .renderable = true, .host_readable = fmt->caps & PL_FMT_CAP_HOST_READABLE, .blit_dst = fmt->caps & PL_FMT_CAP_BLITTABLE, .storable = fmt->caps & PL_FMT_CAP_STORABLE, )); if (!ok) { PL_ERR(gpu, "Failed initializing plane texture!"); return false; } if (out_plane) { out_plane->texture = *tex; out_plane->components = 0; for (int i = 0; i < PL_ARRAY_SIZE(out_map); i++) { out_plane->component_mapping[i] = out_map[i]; if (out_map[i] >= 0) out_plane->components = i+1; } } return true; } libplacebo-v7.349.0/src/version.h.in000066400000000000000000000000431463457750100172230ustar00rootroot00000000000000#define BUILD_VERSION "@buildver@" libplacebo-v7.349.0/src/version.py000077500000000000000000000016531463457750100170320ustar00rootroot00000000000000#!/usr/bin/env python3 import sys import subprocess infilename, outfilename, source_dir, project_version_pretty = sys.argv[1:] try: proc = subprocess.run(['git', 'describe', '--dirty'], cwd=source_dir, capture_output=True, text=True) proc.check_returncode() except (FileNotFoundError, subprocess.CalledProcessError): # No git or no repo. Hopefully a release tarball. version = project_version_pretty else: version = '{} ({})'.format(project_version_pretty, proc.stdout.strip()) with open(infilename, 'r') as infile: output = infile.read().replace('@buildver@', version) # Avoid touching file (triggering recompilation) if it's already up to date. try: with open(outfilename, 'r') as outfile: write_output = outfile.read() != output except FileNotFoundError: write_output = True if write_output: with open(outfilename, 'w') as outfile: outfile.write(output) libplacebo-v7.349.0/src/vulkan/000077500000000000000000000000001463457750100162635ustar00rootroot00000000000000libplacebo-v7.349.0/src/vulkan/command.c000066400000000000000000000460511463457750100200530ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "command.h" #include "utils.h" // returns VK_SUCCESS (completed), VK_TIMEOUT (not yet completed) or an error static VkResult vk_cmd_poll(struct vk_cmd *cmd, uint64_t timeout) { struct vk_ctx *vk = cmd->pool->vk; return vk->WaitSemaphores(vk->dev, &(VkSemaphoreWaitInfo) { .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, .semaphoreCount = 1, .pSemaphores = &cmd->sync.sem, .pValues = &cmd->sync.value, }, timeout); } static void flush_callbacks(struct vk_ctx *vk) { while (vk->num_pending_callbacks) { const struct vk_callback *cb = vk->pending_callbacks++; vk->num_pending_callbacks--; cb->run(cb->priv, cb->arg); } } static void vk_cmd_reset(struct vk_cmd *cmd) { struct vk_ctx *vk = cmd->pool->vk; // Flush possible callbacks left over from a previous command still in the // process of being reset, whose callback triggered this command being // reset. flush_callbacks(vk); vk->pending_callbacks = cmd->callbacks.elem; vk->num_pending_callbacks = cmd->callbacks.num; flush_callbacks(vk); cmd->callbacks.num = 0; cmd->deps.num = 0; cmd->sigs.num = 0; } static void vk_cmd_destroy(struct vk_cmd *cmd) { if (!cmd) return; struct vk_ctx *vk = cmd->pool->vk; vk_cmd_poll(cmd, UINT64_MAX); vk_cmd_reset(cmd); vk->DestroySemaphore(vk->dev, cmd->sync.sem, PL_VK_ALLOC); vk->FreeCommandBuffers(vk->dev, cmd->pool->pool, 1, &cmd->buf); pl_free(cmd); } static struct vk_cmd *vk_cmd_create(struct vk_cmdpool *pool) { struct vk_ctx *vk = pool->vk; struct vk_cmd *cmd = pl_zalloc_ptr(NULL, cmd); cmd->pool = pool; VkCommandBufferAllocateInfo ainfo = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, .commandPool = pool->pool, .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, .commandBufferCount = 1, }; VK(vk->AllocateCommandBuffers(vk->dev, &ainfo, &cmd->buf)); static const VkSemaphoreTypeCreateInfo stinfo = { .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE, .initialValue = 0, }; static const VkSemaphoreCreateInfo sinfo = { .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, .pNext = &stinfo, }; VK(vk->CreateSemaphore(vk->dev, &sinfo, PL_VK_ALLOC, &cmd->sync.sem)); PL_VK_NAME(SEMAPHORE, cmd->sync.sem, "cmd"); return cmd; error: vk_cmd_destroy(cmd); vk->failed = true; return NULL; } void vk_dev_callback(struct vk_ctx *vk, vk_cb callback, const void *priv, const void *arg) { pl_mutex_lock(&vk->lock); if (vk->cmds_pending.num > 0) { struct vk_cmd *last_cmd = vk->cmds_pending.elem[vk->cmds_pending.num - 1]; vk_cmd_callback(last_cmd, callback, priv, arg); } else { // The device was already idle, so we can just immediately call it callback((void *) priv, (void *) arg); } pl_mutex_unlock(&vk->lock); } void vk_cmd_callback(struct vk_cmd *cmd, vk_cb callback, const void *priv, const void *arg) { PL_ARRAY_APPEND(cmd, cmd->callbacks, (struct vk_callback) { .run = callback, .priv = (void *) priv, .arg = (void *) arg, }); } void vk_cmd_dep(struct vk_cmd *cmd, VkPipelineStageFlags2 stage, pl_vulkan_sem dep) { PL_ARRAY_APPEND(cmd, cmd->deps, (VkSemaphoreSubmitInfo) { .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, .semaphore = dep.sem, .value = dep.value, .stageMask = stage, }); } void vk_cmd_sig(struct vk_cmd *cmd, VkPipelineStageFlags2 stage, pl_vulkan_sem sig) { VkSemaphoreSubmitInfo sinfo = { .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, .semaphore = sig.sem, .value = sig.value, .stageMask = stage, }; // Try updating existing semaphore signal operations in-place for (int i = 0; i < cmd->sigs.num; i++) { if (cmd->sigs.elem[i].semaphore == sig.sem) { pl_assert(sig.value > cmd->sigs.elem[i].value); cmd->sigs.elem[i] = sinfo; return; } } PL_ARRAY_APPEND(cmd, cmd->sigs, sinfo); } #define SET(FLAG, CHECK) \ if (flags2 & (CHECK)) \ flags |= FLAG static VkAccessFlags lower_access2(VkAccessFlags2 flags2) { VkAccessFlags flags = flags2 & VK_ACCESS_FLAG_BITS_MAX_ENUM; SET(VK_ACCESS_SHADER_READ_BIT, VK_ACCESS_2_SHADER_SAMPLED_READ_BIT | VK_ACCESS_2_SHADER_STORAGE_READ_BIT); SET(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT); return flags; } static VkPipelineStageFlags lower_stage2(VkPipelineStageFlags2 flags2) { VkPipelineStageFlags flags = flags2 & VK_PIPELINE_STAGE_FLAG_BITS_MAX_ENUM; SET(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_2_COPY_BIT | VK_PIPELINE_STAGE_2_RESOLVE_BIT | VK_PIPELINE_STAGE_2_BLIT_BIT | VK_PIPELINE_STAGE_2_CLEAR_BIT); SET(VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, VK_PIPELINE_STAGE_2_INDEX_INPUT_BIT | VK_PIPELINE_STAGE_2_VERTEX_ATTRIBUTE_INPUT_BIT); return flags; } #undef SET void vk_cmd_barrier(struct vk_cmd *cmd, const VkDependencyInfo *info) { struct vk_ctx *vk = cmd->pool->vk; if (vk->CmdPipelineBarrier2KHR) { vk->CmdPipelineBarrier2KHR(cmd->buf, info); return; } pl_assert(!info->pNext); pl_assert(info->memoryBarrierCount == 0); pl_assert(info->bufferMemoryBarrierCount + info->imageMemoryBarrierCount == 1); if (info->bufferMemoryBarrierCount) { const VkBufferMemoryBarrier2 *barr2 = info->pBufferMemoryBarriers; const VkBufferMemoryBarrier barr = { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, .pNext = barr2->pNext, .srcAccessMask = lower_access2(barr2->srcAccessMask), .dstAccessMask = lower_access2(barr2->dstAccessMask), .srcQueueFamilyIndex = barr2->srcQueueFamilyIndex, .dstQueueFamilyIndex = barr2->dstQueueFamilyIndex, .buffer = barr2->buffer, .offset = barr2->offset, .size = barr2->size, }; vk->CmdPipelineBarrier(cmd->buf, lower_stage2(barr2->srcStageMask), lower_stage2(barr2->dstStageMask), info->dependencyFlags, 0, NULL, 1, &barr, 0, NULL); } else { const VkImageMemoryBarrier2 *barr2 = info->pImageMemoryBarriers; const VkImageMemoryBarrier barr = { .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, .pNext = barr2->pNext, .srcAccessMask = lower_access2(barr2->srcAccessMask), .dstAccessMask = lower_access2(barr2->dstAccessMask), .oldLayout = barr2->oldLayout, .newLayout = barr2->newLayout, .srcQueueFamilyIndex = barr2->srcQueueFamilyIndex, .dstQueueFamilyIndex = barr2->dstQueueFamilyIndex, .image = barr2->image, .subresourceRange = barr2->subresourceRange, }; vk->CmdPipelineBarrier(cmd->buf, lower_stage2(barr2->srcStageMask), lower_stage2(barr2->dstStageMask), info->dependencyFlags, 0, NULL, 0, NULL, 1, &barr); } } struct vk_sync_scope vk_sem_barrier(struct vk_cmd *cmd, struct vk_sem *sem, VkPipelineStageFlags2 stage, VkAccessFlags2 access, bool is_trans) { bool is_write = (access & vk_access_write) || is_trans; // Writes need to be synchronized against the last *read* (which is // transitively synchronized against the last write), reads only // need to be synchronized against the last write. struct vk_sync_scope last = sem->write; if (is_write && sem->read.access) last = sem->read; if (last.queue != cmd->queue) { if (!is_write && sem->read.queue == cmd->queue) { // No semaphore needed in this case because the implicit submission // order execution dependencies already transitively imply a wait // for the previous write } else if (last.sync.sem) { // Image barrier still needs to depend on this stage for implicit // ordering guarantees to apply properly vk_cmd_dep(cmd, stage, last.sync); last.stage = stage; } // Last access is on different queue, so no pipeline barrier needed last.access = 0; } if (!is_write && sem->read.queue == cmd->queue && (sem->read.stage & stage) == stage && (sem->read.access & access) == access) { // A past pipeline barrier already covers this access transitively, so // we don't need to emit another pipeline barrier at all last.access = 0; } if (is_write) { sem->write = (struct vk_sync_scope) { .sync = cmd->sync, .queue = cmd->queue, .stage = stage, .access = access, }; sem->read = (struct vk_sync_scope) { .sync = cmd->sync, .queue = cmd->queue, // no stage or access scope, because no reads happened yet }; } else if (sem->read.queue == cmd->queue) { // Coalesce multiple same-queue reads into a single access scope sem->read.sync = cmd->sync; sem->read.stage |= stage; sem->read.access |= access; } else { sem->read = (struct vk_sync_scope) { .sync = cmd->sync, .queue = cmd->queue, .stage = stage, .access = access, }; } // We never need to include pipeline barriers for reads, only writes last.access &= vk_access_write; return last; } struct vk_cmdpool *vk_cmdpool_create(struct vk_ctx *vk, int qf, int qnum, VkQueueFamilyProperties props) { struct vk_cmdpool *pool = pl_alloc_ptr(NULL, pool); *pool = (struct vk_cmdpool) { .vk = vk, .props = props, .qf = qf, .queues = pl_calloc(pool, qnum, sizeof(VkQueue)), .num_queues = qnum, }; for (int n = 0; n < qnum; n++) vk->GetDeviceQueue(vk->dev, qf, n, &pool->queues[n]); VkCommandPoolCreateInfo cinfo = { .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, .queueFamilyIndex = qf, }; VK(vk->CreateCommandPool(vk->dev, &cinfo, PL_VK_ALLOC, &pool->pool)); return pool; error: vk_cmdpool_destroy(pool); vk->failed = true; return NULL; } void vk_cmdpool_destroy(struct vk_cmdpool *pool) { if (!pool) return; for (int i = 0; i < pool->cmds.num; i++) vk_cmd_destroy(pool->cmds.elem[i]); struct vk_ctx *vk = pool->vk; vk->DestroyCommandPool(vk->dev, pool->pool, PL_VK_ALLOC); pl_free(pool); } struct vk_cmd *vk_cmd_begin(struct vk_cmdpool *pool, pl_debug_tag debug_tag) { struct vk_ctx *vk = pool->vk; // Garbage collect the cmdpool first, to increase the chances of getting // an already-available command buffer. vk_poll_commands(vk, 0); struct vk_cmd *cmd = NULL; pl_mutex_lock(&vk->lock); if (!PL_ARRAY_POP(pool->cmds, &cmd)) { cmd = vk_cmd_create(pool); if (!cmd) { pl_mutex_unlock(&vk->lock); goto error; } } cmd->qindex = pool->idx_queues; cmd->queue = pool->queues[cmd->qindex]; pl_mutex_unlock(&vk->lock); VkCommandBufferBeginInfo binfo = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, }; VK(vk->BeginCommandBuffer(cmd->buf, &binfo)); debug_tag = PL_DEF(debug_tag, "vk_cmd"); PL_VK_NAME_HANDLE(COMMAND_BUFFER, cmd->buf, debug_tag); PL_VK_NAME(SEMAPHORE, cmd->sync.sem, debug_tag); cmd->sync.value++; vk_cmd_sig(cmd, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, cmd->sync); return cmd; error: // Something has to be seriously messed up if we get to this point vk_cmd_destroy(cmd); vk->failed = true; return NULL; } static VkResult vk_queue_submit2(struct vk_ctx *vk, VkQueue queue, const VkSubmitInfo2 *info2, VkFence fence) { if (vk->QueueSubmit2KHR) return vk->QueueSubmit2KHR(queue, 1, info2, fence); const uint32_t num_deps = info2->waitSemaphoreInfoCount; const uint32_t num_sigs = info2->signalSemaphoreInfoCount; const uint32_t num_cmds = info2->commandBufferInfoCount; void *tmp = pl_tmp(NULL); VkSemaphore *deps = pl_calloc_ptr(tmp, num_deps, deps); VkPipelineStageFlags *masks = pl_calloc_ptr(tmp, num_deps, masks); uint64_t *depvals = pl_calloc_ptr(tmp, num_deps, depvals); VkSemaphore *sigs = pl_calloc_ptr(tmp, num_sigs, sigs); uint64_t *sigvals = pl_calloc_ptr(tmp, num_sigs, sigvals); VkCommandBuffer *cmds = pl_calloc_ptr(tmp, num_cmds, cmds); for (int i = 0; i < num_deps; i++) { deps[i] = info2->pWaitSemaphoreInfos[i].semaphore; masks[i] = info2->pWaitSemaphoreInfos[i].stageMask; depvals[i] = info2->pWaitSemaphoreInfos[i].value; } for (int i = 0; i < num_sigs; i++) { sigs[i] = info2->pSignalSemaphoreInfos[i].semaphore; sigvals[i] = info2->pSignalSemaphoreInfos[i].value; } for (int i = 0; i < num_cmds; i++) cmds[i] = info2->pCommandBufferInfos[i].commandBuffer; const VkTimelineSemaphoreSubmitInfo tinfo = { .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, .pNext = info2->pNext, .waitSemaphoreValueCount = num_deps, .pWaitSemaphoreValues = depvals, .signalSemaphoreValueCount = num_sigs, .pSignalSemaphoreValues = sigvals, }; const VkSubmitInfo info = { .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, .pNext = &tinfo, .waitSemaphoreCount = num_deps, .pWaitSemaphores = deps, .pWaitDstStageMask = masks, .commandBufferCount = num_cmds, .pCommandBuffers = cmds, .signalSemaphoreCount = num_sigs, .pSignalSemaphores = sigs, }; VkResult res = vk->QueueSubmit(queue, 1, &info, fence); pl_free(tmp); return res; } bool vk_cmd_submit(struct vk_cmd **pcmd) { struct vk_cmd *cmd = *pcmd; if (!cmd) return true; *pcmd = NULL; struct vk_cmdpool *pool = cmd->pool; struct vk_ctx *vk = pool->vk; VK(vk->EndCommandBuffer(cmd->buf)); VkSubmitInfo2 sinfo = { .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2, .waitSemaphoreInfoCount = cmd->deps.num, .pWaitSemaphoreInfos = cmd->deps.elem, .signalSemaphoreInfoCount = cmd->sigs.num, .pSignalSemaphoreInfos = cmd->sigs.elem, .commandBufferInfoCount = 1, .pCommandBufferInfos = &(VkCommandBufferSubmitInfo) { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO, .commandBuffer = cmd->buf, }, }; if (pl_msg_test(vk->log, PL_LOG_TRACE)) { PL_TRACE(vk, "Submitting command %p on queue %p (QF %d):", (void *) cmd->buf, (void *) cmd->queue, pool->qf); for (int n = 0; n < cmd->deps.num; n++) { PL_TRACE(vk, " waits on semaphore 0x%"PRIx64" = %"PRIu64, (uint64_t) cmd->deps.elem[n].semaphore, cmd->deps.elem[n].value); } for (int n = 0; n < cmd->sigs.num; n++) { PL_TRACE(vk, " signals semaphore 0x%"PRIx64" = %"PRIu64, (uint64_t) cmd->sigs.elem[n].semaphore, cmd->sigs.elem[n].value); } if (cmd->callbacks.num) PL_TRACE(vk, " signals %d callbacks", cmd->callbacks.num); } vk->lock_queue(vk->queue_ctx, pool->qf, cmd->qindex); VkResult res = vk_queue_submit2(vk, cmd->queue, &sinfo, VK_NULL_HANDLE); vk->unlock_queue(vk->queue_ctx, pool->qf, cmd->qindex); PL_VK_ASSERT(res, "vkQueueSubmit2"); pl_mutex_lock(&vk->lock); PL_ARRAY_APPEND(vk->alloc, vk->cmds_pending, cmd); pl_mutex_unlock(&vk->lock); return true; error: vk_cmd_reset(cmd); pl_mutex_lock(&vk->lock); PL_ARRAY_APPEND(pool, pool->cmds, cmd); pl_mutex_unlock(&vk->lock); vk->failed = true; return false; } bool vk_poll_commands(struct vk_ctx *vk, uint64_t timeout) { bool ret = false; pl_mutex_lock(&vk->lock); while (vk->cmds_pending.num) { struct vk_cmd *cmd = vk->cmds_pending.elem[0]; struct vk_cmdpool *pool = cmd->pool; pl_mutex_unlock(&vk->lock); // don't hold mutex while blocking if (vk_cmd_poll(cmd, timeout) == VK_TIMEOUT) return ret; pl_mutex_lock(&vk->lock); if (!vk->cmds_pending.num || vk->cmds_pending.elem[0] != cmd) continue; // another thread modified this state while blocking PL_TRACE(vk, "VkSemaphore signalled: 0x%"PRIx64" = %"PRIu64, (uint64_t) cmd->sync.sem, cmd->sync.value); PL_ARRAY_REMOVE_AT(vk->cmds_pending, 0); // remove before callbacks vk_cmd_reset(cmd); PL_ARRAY_APPEND(pool, pool->cmds, cmd); ret = true; // If we've successfully spent some time waiting for at least one // command, disable the timeout. This has the dual purpose of both // making sure we don't over-wait due to repeat timeout application, // but also makes sure we don't block on future commands if we've // already spend time waiting for one. timeout = 0; } pl_mutex_unlock(&vk->lock); return ret; } void vk_rotate_queues(struct vk_ctx *vk) { pl_mutex_lock(&vk->lock); // Rotate the queues to ensure good parallelism across frames for (int i = 0; i < vk->pools.num; i++) { struct vk_cmdpool *pool = vk->pools.elem[i]; pool->idx_queues = (pool->idx_queues + 1) % pool->num_queues; PL_TRACE(vk, "QF %d: %d/%d", pool->qf, pool->idx_queues, pool->num_queues); } pl_mutex_unlock(&vk->lock); } void vk_wait_idle(struct vk_ctx *vk) { while (vk_poll_commands(vk, UINT64_MAX)) ; } libplacebo-v7.349.0/src/vulkan/command.h000066400000000000000000000144601463457750100200570ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "common.h" // Since lots of vulkan operations need to be done lazily once the affected // resources are no longer in use, provide an abstraction for tracking these. // In practice, these are only checked and run when submitting new commands, so // the actual execution may be delayed by a frame. typedef void (*vk_cb)(void *priv, void *arg); #define VK_CB_FUNC(name) name ## _cb #define VK_CB_FUNC_DEF(name) \ static void VK_CB_FUNC(name)(void *priv, void *arg) { \ name(priv, arg); \ } struct vk_callback { vk_cb run; void *priv; void *arg; }; // Associate a callback with the completion of all currently pending commands. // This will essentially run once the device is completely idle. void vk_dev_callback(struct vk_ctx *vk, vk_cb callback, const void *priv, const void *arg); // Helper wrapper around command buffers that also track dependencies, // callbacks and synchronization primitives // // Thread-safety: Unsafe struct vk_cmd { struct vk_cmdpool *pool; // pool it was allocated from pl_vulkan_sem sync; // pending execution, tied to lifetime of device VkQueue queue; // the submission queue (for recording/pending) int qindex; // the index of `queue` in `pool` VkCommandBuffer buf; // the command buffer itself // Command dependencies and signals. Not owned by the vk_cmd. PL_ARRAY(VkSemaphoreSubmitInfo) deps; PL_ARRAY(VkSemaphoreSubmitInfo) sigs; // "Callbacks" to fire once a command completes. These are used for // multiple purposes, ranging from resource deallocation to fencing. PL_ARRAY(struct vk_callback) callbacks; }; // Associate a callback with the completion of the current command. This // function will be run once the command completes, or shortly thereafter. void vk_cmd_callback(struct vk_cmd *cmd, vk_cb callback, const void *priv, const void *arg); // Associate a raw dependency for the current command. This semaphore must // signal by the corresponding stage before the command may execute. void vk_cmd_dep(struct vk_cmd *cmd, VkPipelineStageFlags2 stage, pl_vulkan_sem dep); // Associate a raw signal with the current command. This semaphore will signal // after the given stage completes. void vk_cmd_sig(struct vk_cmd *cmd, VkPipelineStageFlags2 stage, pl_vulkan_sem sig); // Compatibility wrappers for vkCmdPipelineBarrier2 (works with pre-1.3) void vk_cmd_barrier(struct vk_cmd *cmd, const VkDependencyInfo *info); // Synchronization scope struct vk_sync_scope { pl_vulkan_sem sync; // semaphore of last access VkQueue queue; // source queue of last access VkPipelineStageFlags2 stage;// stage bitmask of last access VkAccessFlags2 access; // access type bitmask }; // Synchronization primitive struct vk_sem { struct vk_sync_scope read, write; }; // Updates the `vk_sem` state for a given access. If `is_trans` is set, this // access is treated as a write (since it alters the resource's state). // // Returns a struct describing the previous access to a resource. A pipeline // barrier is only required if the previous access scope is nonzero. struct vk_sync_scope vk_sem_barrier(struct vk_cmd *cmd, struct vk_sem *sem, VkPipelineStageFlags2 stage, VkAccessFlags2 access, bool is_trans); // Command pool / queue family hybrid abstraction struct vk_cmdpool { struct vk_ctx *vk; VkQueueFamilyProperties props; int qf; // queue family index VkCommandPool pool; VkQueue *queues; int num_queues; int idx_queues; // Command buffers associated with this queue. These are available for // re-recording PL_ARRAY(struct vk_cmd *) cmds; }; // Set up a vk_cmdpool corresponding to a queue family. `qnum` may be less than // `props.queueCount`, to restrict the number of queues in this queue family. struct vk_cmdpool *vk_cmdpool_create(struct vk_ctx *vk, int qf, int qnum, VkQueueFamilyProperties props); void vk_cmdpool_destroy(struct vk_cmdpool *pool); // Fetch a command buffer from a command pool and begin recording to it. // Returns NULL on failure. struct vk_cmd *vk_cmd_begin(struct vk_cmdpool *pool, pl_debug_tag debug_tag); // Finish recording a command buffer and submit it for execution. This function // takes over ownership of **cmd, and sets *cmd to NULL in doing so. bool vk_cmd_submit(struct vk_cmd **cmd); // Block until some commands complete executing. This is the only function that // actually processes the callbacks. Will wait at most `timeout` nanoseconds // for the completion of any command. The timeout may also be passed as 0, in // which case this function will not block, but only poll for completed // commands. Returns whether any forward progress was made. // // This does *not* flush any queued commands, forgetting to do so may result // in infinite loops if waiting for the completion of callbacks that were // never flushed! bool vk_poll_commands(struct vk_ctx *vk, uint64_t timeout); // Rotate through queues in each command pool. Call this once per frame, after // submitting all of the command buffers for that frame. Calling this more // often than that is possible but bad for performance. void vk_rotate_queues(struct vk_ctx *vk); // Wait until all commands are complete, i.e. the device is idle. This is // basically equivalent to calling `vk_poll_commands` with a timeout of // UINT64_MAX until it returns `false`. void vk_wait_idle(struct vk_ctx *vk); libplacebo-v7.349.0/src/vulkan/common.h000066400000000000000000000176301463457750100177330ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #define VK_NO_PROTOTYPES #define VK_ENABLE_BETA_EXTENSIONS // for VK_KHR_portability_subset #ifdef __APPLE__ #define VK_USE_PLATFORM_METAL_EXT #endif #include "../common.h" #include "../log.h" #include "../pl_thread.h" #include #ifdef PL_HAVE_WIN32 #include #include #endif // Vulkan allows the optional use of a custom allocator. We don't need one but // mark this parameter with a better name in case we ever decide to change this // in the future. (And to make the code more readable) #define PL_VK_ALLOC NULL // Type of a vulkan function that needs to be loaded #define PL_VK_FUN(name) PFN_vk##name name // Load a vulkan instance-level extension function directly (on the stack) #define PL_VK_LOAD_FUN(inst, name, get_addr) \ PL_VK_FUN(name) = (PFN_vk##name) get_addr(inst, "vk" #name); #ifndef VK_VENDOR_ID_NVIDIA #define VK_VENDOR_ID_NVIDIA 0x10DE #endif // Shared struct used to hold vulkan context information struct vk_ctx { pl_mutex lock; pl_vulkan vulkan; void *alloc; // host allocations bound to the lifetime of this vk_ctx struct vk_malloc *ma; // VRAM malloc layer pl_vk_inst internal_instance; pl_log log; VkInstance inst; VkPhysicalDevice physd; VkPhysicalDeviceProperties props; VkPhysicalDeviceFeatures2 features; uint32_t api_ver; // device API version VkDevice dev; bool imported; // device was not created by us // Generic error flag for catching "failed" devices bool failed; // Enabled extensions PL_ARRAY(const char *) exts; // Command pools (one per queue family) PL_ARRAY(struct vk_cmdpool *) pools; // Pointers into `pools` (always set) struct vk_cmdpool *pool_graphics; struct vk_cmdpool *pool_compute; struct vk_cmdpool *pool_transfer; // Queue locking functions PL_ARRAY(PL_ARRAY(pl_mutex)) queue_locks; void (*lock_queue)(void *queue_ctx, uint32_t qf, uint32_t idx); void (*unlock_queue)(void *queue_ctx, uint32_t qf, uint32_t idx); void *queue_ctx; // Pending commands. These are shared for the entire mpvk_ctx to ensure // submission and callbacks are FIFO PL_ARRAY(struct vk_cmd *) cmds_pending; // submitted but not completed // Pending callbacks that still need to be drained before processing // callbacks for the next command (in case commands are recursively being // polled from another callback) const struct vk_callback *pending_callbacks; int num_pending_callbacks; // Instance-level function pointers PL_VK_FUN(CreateDevice); PL_VK_FUN(EnumerateDeviceExtensionProperties); PL_VK_FUN(GetDeviceProcAddr); PL_VK_FUN(GetInstanceProcAddr); PL_VK_FUN(GetPhysicalDeviceExternalBufferProperties); PL_VK_FUN(GetPhysicalDeviceExternalSemaphoreProperties); PL_VK_FUN(GetPhysicalDeviceFeatures2KHR); PL_VK_FUN(GetPhysicalDeviceFormatProperties); PL_VK_FUN(GetPhysicalDeviceFormatProperties2KHR); PL_VK_FUN(GetPhysicalDeviceImageFormatProperties2KHR); PL_VK_FUN(GetPhysicalDeviceMemoryProperties); PL_VK_FUN(GetPhysicalDeviceProperties); PL_VK_FUN(GetPhysicalDeviceProperties2); PL_VK_FUN(GetPhysicalDeviceQueueFamilyProperties); PL_VK_FUN(GetPhysicalDeviceSurfaceCapabilitiesKHR); PL_VK_FUN(GetPhysicalDeviceSurfaceFormatsKHR); PL_VK_FUN(GetPhysicalDeviceSurfacePresentModesKHR); PL_VK_FUN(GetPhysicalDeviceSurfaceSupportKHR); // Device-level function pointers PL_VK_FUN(AcquireNextImageKHR); PL_VK_FUN(AllocateCommandBuffers); PL_VK_FUN(AllocateDescriptorSets); PL_VK_FUN(AllocateMemory); PL_VK_FUN(BeginCommandBuffer); PL_VK_FUN(BindBufferMemory); PL_VK_FUN(BindImageMemory); PL_VK_FUN(CmdBeginDebugUtilsLabelEXT); PL_VK_FUN(CmdBeginRenderPass); PL_VK_FUN(CmdBindDescriptorSets); PL_VK_FUN(CmdBindIndexBuffer); PL_VK_FUN(CmdBindPipeline); PL_VK_FUN(CmdBindVertexBuffers); PL_VK_FUN(CmdBlitImage); PL_VK_FUN(CmdClearColorImage); PL_VK_FUN(CmdCopyBuffer); PL_VK_FUN(CmdCopyBufferToImage); PL_VK_FUN(CmdCopyImage); PL_VK_FUN(CmdCopyImageToBuffer); PL_VK_FUN(CmdDispatch); PL_VK_FUN(CmdDraw); PL_VK_FUN(CmdDrawIndexed); PL_VK_FUN(CmdEndDebugUtilsLabelEXT); PL_VK_FUN(CmdEndRenderPass); PL_VK_FUN(CmdPipelineBarrier); PL_VK_FUN(CmdPipelineBarrier2KHR); PL_VK_FUN(CmdPushConstants); PL_VK_FUN(CmdPushDescriptorSetKHR); PL_VK_FUN(CmdResetQueryPool); PL_VK_FUN(CmdSetScissor); PL_VK_FUN(CmdSetViewport); PL_VK_FUN(CmdUpdateBuffer); PL_VK_FUN(CmdWriteTimestamp); PL_VK_FUN(CreateBuffer); PL_VK_FUN(CreateBufferView); PL_VK_FUN(CreateCommandPool); PL_VK_FUN(CreateComputePipelines); PL_VK_FUN(CreateDebugReportCallbackEXT); PL_VK_FUN(CreateDescriptorPool); PL_VK_FUN(CreateDescriptorSetLayout); PL_VK_FUN(CreateFence); PL_VK_FUN(CreateFramebuffer); PL_VK_FUN(CreateGraphicsPipelines); PL_VK_FUN(CreateImage); PL_VK_FUN(CreateImageView); PL_VK_FUN(CreatePipelineCache); PL_VK_FUN(CreatePipelineLayout); PL_VK_FUN(CreateQueryPool); PL_VK_FUN(CreateRenderPass); PL_VK_FUN(CreateSampler); PL_VK_FUN(CreateSemaphore); PL_VK_FUN(CreateShaderModule); PL_VK_FUN(CreateSwapchainKHR); PL_VK_FUN(DestroyBuffer); PL_VK_FUN(DestroyBufferView); PL_VK_FUN(DestroyCommandPool); PL_VK_FUN(DestroyDebugReportCallbackEXT); PL_VK_FUN(DestroyDescriptorPool); PL_VK_FUN(DestroyDescriptorSetLayout); PL_VK_FUN(DestroyDevice); PL_VK_FUN(DestroyFence); PL_VK_FUN(DestroyFramebuffer); PL_VK_FUN(DestroyImage); PL_VK_FUN(DestroyImageView); PL_VK_FUN(DestroyPipeline); PL_VK_FUN(DestroyPipelineCache); PL_VK_FUN(DestroyPipelineLayout); PL_VK_FUN(DestroyQueryPool); PL_VK_FUN(DestroyRenderPass); PL_VK_FUN(DestroySampler); PL_VK_FUN(DestroySemaphore); PL_VK_FUN(DestroyShaderModule); PL_VK_FUN(DestroySwapchainKHR); PL_VK_FUN(DeviceWaitIdle); PL_VK_FUN(EndCommandBuffer); PL_VK_FUN(FlushMappedMemoryRanges); PL_VK_FUN(FreeCommandBuffers); PL_VK_FUN(FreeMemory); PL_VK_FUN(GetBufferMemoryRequirements); PL_VK_FUN(GetDeviceQueue); PL_VK_FUN(GetImageDrmFormatModifierPropertiesEXT); PL_VK_FUN(GetImageMemoryRequirements2); PL_VK_FUN(GetImageSubresourceLayout); PL_VK_FUN(GetMemoryFdKHR); PL_VK_FUN(GetMemoryFdPropertiesKHR); PL_VK_FUN(GetMemoryHostPointerPropertiesEXT); PL_VK_FUN(GetPipelineCacheData); PL_VK_FUN(GetQueryPoolResults); PL_VK_FUN(GetSemaphoreFdKHR); PL_VK_FUN(GetSwapchainImagesKHR); PL_VK_FUN(InvalidateMappedMemoryRanges); PL_VK_FUN(MapMemory); PL_VK_FUN(QueuePresentKHR); PL_VK_FUN(QueueSubmit); PL_VK_FUN(QueueSubmit2KHR); PL_VK_FUN(QueueWaitIdle); PL_VK_FUN(ResetFences); PL_VK_FUN(ResetQueryPool); PL_VK_FUN(SetDebugUtilsObjectNameEXT); PL_VK_FUN(SetHdrMetadataEXT); PL_VK_FUN(UpdateDescriptorSets); PL_VK_FUN(WaitForFences); PL_VK_FUN(WaitSemaphores); #ifdef PL_HAVE_WIN32 PL_VK_FUN(GetMemoryWin32HandleKHR); PL_VK_FUN(GetSemaphoreWin32HandleKHR); #endif #ifdef VK_EXT_metal_objects PL_VK_FUN(ExportMetalObjectsEXT); #endif #ifdef VK_EXT_full_screen_exclusive PL_VK_FUN(AcquireFullScreenExclusiveModeEXT); #endif }; libplacebo-v7.349.0/src/vulkan/context.c000066400000000000000000001726421463457750100201270ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "common.h" #include "command.h" #include "utils.h" #include "gpu.h" #ifdef PL_HAVE_VK_PROC_ADDR VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vkGetInstanceProcAddr( VkInstance instance, const char* pName); #endif const struct pl_vk_inst_params pl_vk_inst_default_params = {0}; struct vk_fun { const char *name; size_t offset; bool device_level; }; struct vk_ext { const char *name; const struct vk_fun *funs; }; #define PL_VK_INST_FUN(N) \ { .name = "vk" #N, \ .offset = offsetof(struct vk_ctx, N), \ } #define PL_VK_DEV_FUN(N) \ { .name = "vk" #N, \ .offset = offsetof(struct vk_ctx, N), \ .device_level = true, \ } // Table of optional vulkan instance extensions static const char *vk_instance_extensions[] = { VK_KHR_SURFACE_EXTENSION_NAME, VK_EXT_SWAPCHAIN_COLOR_SPACE_EXTENSION_NAME, VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME, VK_KHR_EXTERNAL_SEMAPHORE_CAPABILITIES_EXTENSION_NAME, VK_KHR_GET_SURFACE_CAPABILITIES_2_EXTENSION_NAME, }; // List of mandatory instance-level function pointers, including functions // associated with mandatory instance extensions static const struct vk_fun vk_inst_funs[] = { PL_VK_INST_FUN(CreateDevice), PL_VK_INST_FUN(EnumerateDeviceExtensionProperties), PL_VK_INST_FUN(GetDeviceProcAddr), PL_VK_INST_FUN(GetPhysicalDeviceExternalBufferProperties), PL_VK_INST_FUN(GetPhysicalDeviceExternalSemaphoreProperties), PL_VK_INST_FUN(GetPhysicalDeviceFeatures2KHR), PL_VK_INST_FUN(GetPhysicalDeviceFormatProperties), PL_VK_INST_FUN(GetPhysicalDeviceFormatProperties2KHR), PL_VK_INST_FUN(GetPhysicalDeviceImageFormatProperties2KHR), PL_VK_INST_FUN(GetPhysicalDeviceMemoryProperties), PL_VK_INST_FUN(GetPhysicalDeviceProperties), PL_VK_INST_FUN(GetPhysicalDeviceProperties2), PL_VK_INST_FUN(GetPhysicalDeviceQueueFamilyProperties), // These are not actually mandatory, but they're universal enough that we // just load them unconditionally (in lieu of not having proper support for // loading arbitrary instance extensions). Their use is generally guarded // behind various VkSurfaceKHR values already being provided by the API // user (implying this extension is loaded). PL_VK_INST_FUN(GetPhysicalDeviceSurfaceCapabilitiesKHR), PL_VK_INST_FUN(GetPhysicalDeviceSurfaceFormatsKHR), PL_VK_INST_FUN(GetPhysicalDeviceSurfacePresentModesKHR), PL_VK_INST_FUN(GetPhysicalDeviceSurfaceSupportKHR), }; // Table of vulkan device extensions and functions they load, including // functions exported by dependent instance-level extensions static const struct vk_ext vk_device_extensions[] = { { .name = VK_KHR_SWAPCHAIN_EXTENSION_NAME, .funs = (const struct vk_fun[]) { PL_VK_DEV_FUN(AcquireNextImageKHR), PL_VK_DEV_FUN(CreateSwapchainKHR), PL_VK_DEV_FUN(DestroySwapchainKHR), PL_VK_DEV_FUN(GetSwapchainImagesKHR), PL_VK_DEV_FUN(QueuePresentKHR), {0} }, }, { .name = VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, .funs = (const struct vk_fun[]) { PL_VK_DEV_FUN(CmdPushDescriptorSetKHR), {0} }, }, { .name = VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, .funs = (const struct vk_fun[]) { PL_VK_DEV_FUN(GetMemoryFdKHR), {0} }, }, { .name = VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME, .funs = (const struct vk_fun[]) { PL_VK_DEV_FUN(GetMemoryFdPropertiesKHR), {0} }, #ifdef PL_HAVE_WIN32 }, { .name = VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, .funs = (const struct vk_fun[]) { PL_VK_DEV_FUN(GetMemoryWin32HandleKHR), {0} }, #endif }, { .name = VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME, .funs = (const struct vk_fun[]) { PL_VK_DEV_FUN(GetMemoryHostPointerPropertiesEXT), {0} }, }, { .name = VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME, .funs = (const struct vk_fun[]) { PL_VK_DEV_FUN(GetSemaphoreFdKHR), {0} }, #ifdef PL_HAVE_WIN32 }, { .name = VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, .funs = (const struct vk_fun[]) { PL_VK_DEV_FUN(GetSemaphoreWin32HandleKHR), {0} }, #endif }, { .name = VK_EXT_PCI_BUS_INFO_EXTENSION_NAME, }, { .name = VK_EXT_HDR_METADATA_EXTENSION_NAME, .funs = (const struct vk_fun[]) { PL_VK_DEV_FUN(SetHdrMetadataEXT), {0} }, }, { .name = VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME, .funs = (const struct vk_fun[]) { PL_VK_DEV_FUN(GetImageDrmFormatModifierPropertiesEXT), {0} }, #ifdef VK_KHR_portability_subset }, { .name = VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME, #endif #ifdef VK_EXT_metal_objects }, { .name = VK_EXT_METAL_OBJECTS_EXTENSION_NAME, .funs = (const struct vk_fun[]) { PL_VK_DEV_FUN(ExportMetalObjectsEXT), {0} }, #endif #ifdef VK_EXT_full_screen_exclusive }, { .name = VK_EXT_FULL_SCREEN_EXCLUSIVE_EXTENSION_NAME, .funs = (const struct vk_fun[]) { PL_VK_DEV_FUN(AcquireFullScreenExclusiveModeEXT), {0} }, #endif }, { .name = VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME, .funs = (const struct vk_fun[]) { PL_VK_DEV_FUN(CmdPipelineBarrier2KHR), PL_VK_DEV_FUN(QueueSubmit2KHR), {0} }, }, }; // Make sure to keep this in sync with the above! const char * const pl_vulkan_recommended_extensions[] = { VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME, VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME, VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME, #ifdef PL_HAVE_WIN32 VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, #endif VK_EXT_PCI_BUS_INFO_EXTENSION_NAME, VK_EXT_HDR_METADATA_EXTENSION_NAME, VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME, #ifdef VK_KHR_portability_subset VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME, #endif #ifdef VK_EXT_metal_objects VK_EXT_METAL_OBJECTS_EXTENSION_NAME, #endif #ifdef VK_EXT_full_screen_exclusive VK_EXT_FULL_SCREEN_EXCLUSIVE_EXTENSION_NAME, #endif VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME, }; const int pl_vulkan_num_recommended_extensions = PL_ARRAY_SIZE(pl_vulkan_recommended_extensions); // +1 because VK_KHR_swapchain is not automatically pulled in static_assert(PL_ARRAY_SIZE(pl_vulkan_recommended_extensions) + 1 == PL_ARRAY_SIZE(vk_device_extensions), "pl_vulkan_recommended_extensions out of sync with " "vk_device_extensions?"); // Recommended features; keep in sync with libavutil vulkan hwcontext static const VkPhysicalDeviceVulkan13Features recommended_vk13 = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES, .computeFullSubgroups = true, .maintenance4 = true, .shaderZeroInitializeWorkgroupMemory = true, .synchronization2 = true, }; static const VkPhysicalDeviceVulkan12Features recommended_vk12 = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES, .pNext = (void *) &recommended_vk13, .bufferDeviceAddress = true, .storagePushConstant8 = true, .shaderInt8 = true, .shaderFloat16 = true, .shaderSharedInt64Atomics = true, .storageBuffer8BitAccess = true, .uniformAndStorageBuffer8BitAccess = true, .vulkanMemoryModel = true, .vulkanMemoryModelDeviceScope = true, }; static const VkPhysicalDeviceVulkan11Features recommended_vk11 = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES, .pNext = (void *) &recommended_vk12, .samplerYcbcrConversion = true, .storagePushConstant16 = true, }; const VkPhysicalDeviceFeatures2 pl_vulkan_recommended_features = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, .pNext = (void *) &recommended_vk11, .features = { .shaderImageGatherExtended = true, .shaderStorageImageReadWithoutFormat = true, .shaderStorageImageWriteWithoutFormat = true, // Needed for GPU-assisted validation, but not harmful to enable .fragmentStoresAndAtomics = true, .vertexPipelineStoresAndAtomics = true, .shaderInt64 = true, } }; // Required features static const VkPhysicalDeviceVulkan12Features required_vk12 = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES, .hostQueryReset = true, .timelineSemaphore = true, }; static const VkPhysicalDeviceVulkan11Features required_vk11 = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES, .pNext = (void *) &required_vk12, }; const VkPhysicalDeviceFeatures2 pl_vulkan_required_features = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, .pNext = (void *) &required_vk11, }; static bool check_required_features(struct vk_ctx *vk) { #define CHECK_FEATURE(maj, min, feat) do { \ const VkPhysicalDeviceVulkan##maj##min##Features *f; \ f = vk_find_struct(&vk->features, \ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_##maj##_##min##_FEATURES); \ if (!f || !f->feat) { \ PL_ERR(vk, "Missing device feature: " #feat); \ return false; \ } \ } while (0) CHECK_FEATURE(1, 2, hostQueryReset); CHECK_FEATURE(1, 2, timelineSemaphore); #undef CHECK_FEATURE return true; } // List of mandatory device-level functions // // Note: Also includes VK_EXT_debug_utils functions, even though they aren't // mandatory, simply because we load that extension in a special way. static const struct vk_fun vk_dev_funs[] = { PL_VK_DEV_FUN(AllocateCommandBuffers), PL_VK_DEV_FUN(AllocateDescriptorSets), PL_VK_DEV_FUN(AllocateMemory), PL_VK_DEV_FUN(BeginCommandBuffer), PL_VK_DEV_FUN(BindBufferMemory), PL_VK_DEV_FUN(BindImageMemory), PL_VK_DEV_FUN(CmdBeginDebugUtilsLabelEXT), PL_VK_DEV_FUN(CmdBeginRenderPass), PL_VK_DEV_FUN(CmdBindDescriptorSets), PL_VK_DEV_FUN(CmdBindIndexBuffer), PL_VK_DEV_FUN(CmdBindPipeline), PL_VK_DEV_FUN(CmdBindVertexBuffers), PL_VK_DEV_FUN(CmdBlitImage), PL_VK_DEV_FUN(CmdClearColorImage), PL_VK_DEV_FUN(CmdCopyBuffer), PL_VK_DEV_FUN(CmdCopyBufferToImage), PL_VK_DEV_FUN(CmdCopyImage), PL_VK_DEV_FUN(CmdCopyImageToBuffer), PL_VK_DEV_FUN(CmdDispatch), PL_VK_DEV_FUN(CmdDraw), PL_VK_DEV_FUN(CmdDrawIndexed), PL_VK_DEV_FUN(CmdEndDebugUtilsLabelEXT), PL_VK_DEV_FUN(CmdEndRenderPass), PL_VK_DEV_FUN(CmdPipelineBarrier), PL_VK_DEV_FUN(CmdPushConstants), PL_VK_DEV_FUN(CmdResetQueryPool), PL_VK_DEV_FUN(CmdSetScissor), PL_VK_DEV_FUN(CmdSetViewport), PL_VK_DEV_FUN(CmdUpdateBuffer), PL_VK_DEV_FUN(CmdWriteTimestamp), PL_VK_DEV_FUN(CreateBuffer), PL_VK_DEV_FUN(CreateBufferView), PL_VK_DEV_FUN(CreateCommandPool), PL_VK_DEV_FUN(CreateComputePipelines), PL_VK_DEV_FUN(CreateDescriptorPool), PL_VK_DEV_FUN(CreateDescriptorSetLayout), PL_VK_DEV_FUN(CreateFence), PL_VK_DEV_FUN(CreateFramebuffer), PL_VK_DEV_FUN(CreateGraphicsPipelines), PL_VK_DEV_FUN(CreateImage), PL_VK_DEV_FUN(CreateImageView), PL_VK_DEV_FUN(CreatePipelineCache), PL_VK_DEV_FUN(CreatePipelineLayout), PL_VK_DEV_FUN(CreateQueryPool), PL_VK_DEV_FUN(CreateRenderPass), PL_VK_DEV_FUN(CreateSampler), PL_VK_DEV_FUN(CreateSemaphore), PL_VK_DEV_FUN(CreateShaderModule), PL_VK_DEV_FUN(DestroyBuffer), PL_VK_DEV_FUN(DestroyBufferView), PL_VK_DEV_FUN(DestroyCommandPool), PL_VK_DEV_FUN(DestroyDescriptorPool), PL_VK_DEV_FUN(DestroyDescriptorSetLayout), PL_VK_DEV_FUN(DestroyDevice), PL_VK_DEV_FUN(DestroyFence), PL_VK_DEV_FUN(DestroyFramebuffer), PL_VK_DEV_FUN(DestroyImage), PL_VK_DEV_FUN(DestroyImageView), PL_VK_DEV_FUN(DestroyPipeline), PL_VK_DEV_FUN(DestroyPipelineCache), PL_VK_DEV_FUN(DestroyPipelineLayout), PL_VK_DEV_FUN(DestroyQueryPool), PL_VK_DEV_FUN(DestroyRenderPass), PL_VK_DEV_FUN(DestroySampler), PL_VK_DEV_FUN(DestroySemaphore), PL_VK_DEV_FUN(DestroyShaderModule), PL_VK_DEV_FUN(DeviceWaitIdle), PL_VK_DEV_FUN(EndCommandBuffer), PL_VK_DEV_FUN(FlushMappedMemoryRanges), PL_VK_DEV_FUN(FreeCommandBuffers), PL_VK_DEV_FUN(FreeMemory), PL_VK_DEV_FUN(GetBufferMemoryRequirements), PL_VK_DEV_FUN(GetDeviceQueue), PL_VK_DEV_FUN(GetImageMemoryRequirements2), PL_VK_DEV_FUN(GetImageSubresourceLayout), PL_VK_DEV_FUN(GetPipelineCacheData), PL_VK_DEV_FUN(GetQueryPoolResults), PL_VK_DEV_FUN(InvalidateMappedMemoryRanges), PL_VK_DEV_FUN(MapMemory), PL_VK_DEV_FUN(QueueSubmit), PL_VK_DEV_FUN(QueueWaitIdle), PL_VK_DEV_FUN(ResetFences), PL_VK_DEV_FUN(ResetQueryPool), PL_VK_DEV_FUN(SetDebugUtilsObjectNameEXT), PL_VK_DEV_FUN(UpdateDescriptorSets), PL_VK_DEV_FUN(WaitForFences), PL_VK_DEV_FUN(WaitSemaphores), }; static void load_vk_fun(struct vk_ctx *vk, const struct vk_fun *fun) { PFN_vkVoidFunction *pfn = (void *) ((uintptr_t) vk + (ptrdiff_t) fun->offset); if (fun->device_level) { *pfn = vk->GetDeviceProcAddr(vk->dev, fun->name); } else { *pfn = vk->GetInstanceProcAddr(vk->inst, fun->name); }; if (!*pfn) { // Some functions get their extension suffix stripped when promoted // to core. As a very simple work-around to this, try loading the // function a second time with the reserved suffixes stripped. static const char *ext_suffixes[] = { "KHR", "EXT" }; pl_str fun_name = pl_str0(fun->name); char buf[64]; for (int i = 0; i < PL_ARRAY_SIZE(ext_suffixes); i++) { if (!pl_str_eatend0(&fun_name, ext_suffixes[i])) continue; pl_assert(sizeof(buf) > fun_name.len); snprintf(buf, sizeof(buf), "%.*s", PL_STR_FMT(fun_name)); if (fun->device_level) { *pfn = vk->GetDeviceProcAddr(vk->dev, buf); } else { *pfn = vk->GetInstanceProcAddr(vk->inst, buf); } return; } } } // Private struct for pl_vk_inst struct priv { VkDebugUtilsMessengerEXT debug_utils_cb; }; void pl_vk_inst_destroy(pl_vk_inst *inst_ptr) { pl_vk_inst inst = *inst_ptr; if (!inst) return; struct priv *p = PL_PRIV(inst); if (p->debug_utils_cb) { PL_VK_LOAD_FUN(inst->instance, DestroyDebugUtilsMessengerEXT, inst->get_proc_addr); DestroyDebugUtilsMessengerEXT(inst->instance, p->debug_utils_cb, PL_VK_ALLOC); } PL_VK_LOAD_FUN(inst->instance, DestroyInstance, inst->get_proc_addr); DestroyInstance(inst->instance, PL_VK_ALLOC); pl_free_ptr((void **) inst_ptr); } static VkBool32 VKAPI_PTR vk_dbg_utils_cb(VkDebugUtilsMessageSeverityFlagBitsEXT sev, VkDebugUtilsMessageTypeFlagsEXT msgType, const VkDebugUtilsMessengerCallbackDataEXT *data, void *priv) { pl_log log = priv; // Ignore errors for messages that we consider false positives switch (data->messageIdNumber) { case 0x7cd0911d: // VUID-VkSwapchainCreateInfoKHR-imageExtent-01274 case 0x8928392f: // UNASSIGNED-BestPractices-NonSuccess-Result case 0xdc18ad6b: // UNASSIGNED-BestPractices-vkAllocateMemory-small-allocation case 0xb3d4346b: // UNASSIGNED-BestPractices-vkBindMemory-small-dedicated-allocation case 0x6cfe18a5: // UNASSIGNED-BestPractices-SemaphoreCount case 0x48a09f6c: // UNASSIGNED-BestPractices-pipeline-stage-flags // profile chain expectations case 0x30f4ac70: // VUID-VkImageCreateInfo-pNext-06811 return false; case 0x5f379b89: // UNASSIGNED-BestPractices-Error-Result if (strstr(data->pMessage, "VK_ERROR_FORMAT_NOT_SUPPORTED")) return false; break; case 0xf6a37cfa: // VUID-vkGetImageSubresourceLayout-format-04461 // Work around https://github.com/KhronosGroup/Vulkan-Docs/issues/2109 return false; } enum pl_log_level lev; switch (sev) { case VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT: lev = PL_LOG_ERR; break; case VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT: lev = PL_LOG_WARN; break; case VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT: lev = PL_LOG_DEBUG; break; case VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT: lev = PL_LOG_TRACE; break; default: lev = PL_LOG_INFO; break; } pl_msg(log, lev, "vk %s", data->pMessage); for (int i = 0; i < data->queueLabelCount; i++) pl_msg(log, lev, " during %s", data->pQueueLabels[i].pLabelName); for (int i = 0; i < data->cmdBufLabelCount; i++) pl_msg(log, lev, " inside %s", data->pCmdBufLabels[i].pLabelName); for (int i = 0; i < data->objectCount; i++) { const VkDebugUtilsObjectNameInfoEXT *obj = &data->pObjects[i]; pl_msg(log, lev, " using %s: %s (0x%llx)", vk_obj_type(obj->objectType), obj->pObjectName ? obj->pObjectName : "anon", (unsigned long long) obj->objectHandle); } // The return value of this function determines whether the call will // be explicitly aborted (to prevent GPU errors) or not. In this case, // we generally want this to be on for the validation errors, but nothing // else (e.g. performance warnings) bool is_error = (sev & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) && (msgType & VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT); if (is_error) { pl_log_stack_trace(log, lev); pl_debug_abort(); return true; } return false; } static PFN_vkGetInstanceProcAddr get_proc_addr_fallback(pl_log log, PFN_vkGetInstanceProcAddr get_proc_addr) { if (get_proc_addr) return get_proc_addr; #ifdef PL_HAVE_VK_PROC_ADDR return vkGetInstanceProcAddr; #else pl_fatal(log, "No `vkGetInstanceProcAddr` function provided, and " "libplacebo built without linking against this function!"); return NULL; #endif } #define PRINTF_VER(ver) \ (int) VK_API_VERSION_MAJOR(ver), \ (int) VK_API_VERSION_MINOR(ver), \ (int) VK_API_VERSION_PATCH(ver) pl_vk_inst pl_vk_inst_create(pl_log log, const struct pl_vk_inst_params *params) { void *tmp = pl_tmp(NULL); params = PL_DEF(params, &pl_vk_inst_default_params); VkInstance inst = NULL; pl_clock_t start; PL_ARRAY(const char *) exts = {0}; PFN_vkGetInstanceProcAddr get_addr; if (!(get_addr = get_proc_addr_fallback(log, params->get_proc_addr))) goto error; // Query instance version support uint32_t api_ver = VK_API_VERSION_1_0; PL_VK_LOAD_FUN(NULL, EnumerateInstanceVersion, get_addr); if (EnumerateInstanceVersion && EnumerateInstanceVersion(&api_ver) != VK_SUCCESS) goto error; pl_debug(log, "Available instance version: %d.%d.%d", PRINTF_VER(api_ver)); if (params->max_api_version) { api_ver = PL_MIN(api_ver, params->max_api_version); pl_info(log, "Restricting API version to %d.%d.%d... new version %d.%d.%d", PRINTF_VER(params->max_api_version), PRINTF_VER(api_ver)); } if (api_ver < PL_VK_MIN_VERSION) { pl_fatal(log, "Instance API version %d.%d.%d is lower than the minimum " "required version of %d.%d.%d, cannot proceed!", PRINTF_VER(api_ver), PRINTF_VER(PL_VK_MIN_VERSION)); goto error; } VkInstanceCreateInfo info = { .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, .pApplicationInfo = &(VkApplicationInfo) { .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, .apiVersion = api_ver, }, }; // Enumerate all supported layers start = pl_clock_now(); PL_VK_LOAD_FUN(NULL, EnumerateInstanceLayerProperties, get_addr); uint32_t num_layers_avail = 0; EnumerateInstanceLayerProperties(&num_layers_avail, NULL); VkLayerProperties *layers_avail = pl_calloc_ptr(tmp, num_layers_avail, layers_avail); EnumerateInstanceLayerProperties(&num_layers_avail, layers_avail); pl_log_cpu_time(log, start, pl_clock_now(), "enumerating instance layers"); pl_debug(log, "Available layers:"); for (int i = 0; i < num_layers_avail; i++) { pl_debug(log, " %s (v%d.%d.%d)", layers_avail[i].layerName, PRINTF_VER(layers_avail[i].specVersion)); } PL_ARRAY(const char *) layers = {0}; // Sorted by priority static const char *debug_layers[] = { "VK_LAYER_KHRONOS_validation", "VK_LAYER_LUNARG_standard_validation", }; // This layer has to be initialized first, otherwise all sorts of weirdness // happens (random segfaults, yum) bool debug = params->debug; uint32_t debug_layer = 0; // layer idx of debug layer uint32_t debug_layer_version = 0; if (debug) { for (int i = 0; i < PL_ARRAY_SIZE(debug_layers); i++) { for (int n = 0; n < num_layers_avail; n++) { if (strcmp(debug_layers[i], layers_avail[n].layerName) != 0) continue; debug_layer = n; debug_layer_version = layers_avail[n].specVersion; pl_info(log, "Enabling debug meta layer: %s (v%d.%d.%d)", debug_layers[i], PRINTF_VER(debug_layer_version)); PL_ARRAY_APPEND(tmp, layers, debug_layers[i]); goto debug_layers_done; } } // No layer found.. pl_warn(log, "API debugging requested but no debug meta layers present... ignoring"); debug = false; } debug_layers_done: ; for (int i = 0; i < params->num_layers; i++) PL_ARRAY_APPEND(tmp, layers, params->layers[i]); for (int i = 0; i < params->num_opt_layers; i++) { const char *layer = params->opt_layers[i]; for (int n = 0; n < num_layers_avail; n++) { if (strcmp(layer, layers_avail[n].layerName) == 0) { PL_ARRAY_APPEND(tmp, layers, layer); break; } } } // Enumerate all supported extensions start = pl_clock_now(); PL_VK_LOAD_FUN(NULL, EnumerateInstanceExtensionProperties, get_addr); uint32_t num_exts_avail = 0; EnumerateInstanceExtensionProperties(NULL, &num_exts_avail, NULL); VkExtensionProperties *exts_avail = pl_calloc_ptr(tmp, num_exts_avail, exts_avail); EnumerateInstanceExtensionProperties(NULL, &num_exts_avail, exts_avail); struct { VkExtensionProperties *exts; uint32_t num_exts; } *layer_exts = pl_calloc_ptr(tmp, num_layers_avail, layer_exts); // Enumerate extensions from layers for (int i = 0; i < num_layers_avail; i++) { VkExtensionProperties **lexts = &layer_exts[i].exts; uint32_t *num = &layer_exts[i].num_exts; EnumerateInstanceExtensionProperties(layers_avail[i].layerName, num, NULL); *lexts = pl_calloc_ptr(tmp, *num, *lexts); EnumerateInstanceExtensionProperties(layers_avail[i].layerName, num, *lexts); // Replace all extensions that are already available globally by {0} for (int j = 0; j < *num; j++) { for (int k = 0; k < num_exts_avail; k++) { if (strcmp((*lexts)[j].extensionName, exts_avail[k].extensionName) == 0) (*lexts)[j] = (VkExtensionProperties) {0}; } } } pl_log_cpu_time(log, start, pl_clock_now(), "enumerating instance extensions"); pl_debug(log, "Available instance extensions:"); for (int i = 0; i < num_exts_avail; i++) pl_debug(log, " %s", exts_avail[i].extensionName); for (int i = 0; i < num_layers_avail; i++) { for (int j = 0; j < layer_exts[i].num_exts; j++) { if (!layer_exts[i].exts[j].extensionName[0]) continue; pl_debug(log, " %s (via %s)", layer_exts[i].exts[j].extensionName, layers_avail[i].layerName); } } // Add mandatory extensions PL_ARRAY_APPEND(tmp, exts, VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); // Add optional extensions for (int i = 0; i < PL_ARRAY_SIZE(vk_instance_extensions); i++) { const char *ext = vk_instance_extensions[i]; for (int n = 0; n < num_exts_avail; n++) { if (strcmp(ext, exts_avail[n].extensionName) == 0) { PL_ARRAY_APPEND(tmp, exts, ext); break; } } } #ifdef VK_KHR_portability_enumeration // Required for macOS ( MoltenVK ) compatibility for (int n = 0; n < num_exts_avail; n++) { if (strcmp(VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME, exts_avail[n].extensionName) == 0) { PL_ARRAY_APPEND(tmp, exts, VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME); info.flags |= VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR; break; } } #endif // Add extra user extensions for (int i = 0; i < params->num_extensions; i++) { const char *ext = params->extensions[i]; PL_ARRAY_APPEND(tmp, exts, ext); // Enable any additional layers that are required for this extension for (int n = 0; n < num_layers_avail; n++) { for (int j = 0; j < layer_exts[n].num_exts; j++) { if (!layer_exts[n].exts[j].extensionName[0]) continue; if (strcmp(ext, layer_exts[n].exts[j].extensionName) == 0) { PL_ARRAY_APPEND(tmp, layers, layers_avail[n].layerName); goto next_user_ext; } } } next_user_ext: ; } // Add extra optional user extensions for (int i = 0; i < params->num_opt_extensions; i++) { const char *ext = params->opt_extensions[i]; for (int n = 0; n < num_exts_avail; n++) { if (strcmp(ext, exts_avail[n].extensionName) == 0) { PL_ARRAY_APPEND(tmp, exts, ext); goto next_opt_user_ext; } } for (int n = 0; n < num_layers_avail; n++) { for (int j = 0; j < layer_exts[n].num_exts; j++) { if (!layer_exts[n].exts[j].extensionName[0]) continue; if (strcmp(ext, layer_exts[n].exts[j].extensionName) == 0) { PL_ARRAY_APPEND(tmp, exts, ext); PL_ARRAY_APPEND(tmp, layers, layers_avail[n].layerName); goto next_opt_user_ext; } } } next_opt_user_ext: ; } // If debugging is enabled, load the necessary debug utils extension if (debug) { const char * const ext = VK_EXT_DEBUG_UTILS_EXTENSION_NAME; for (int n = 0; n < num_exts_avail; n++) { if (strcmp(ext, exts_avail[n].extensionName) == 0) { PL_ARRAY_APPEND(tmp, exts, ext); goto debug_ext_done; } } for (int n = 0; n < layer_exts[debug_layer].num_exts; n++) { if (strcmp(ext, layer_exts[debug_layer].exts[n].extensionName) == 0) { PL_ARRAY_APPEND(tmp, exts, ext); goto debug_ext_done; } } // No extension found pl_warn(log, "API debug layers enabled but no debug report extension " "found... ignoring. Debug messages may be spilling to " "stdout/stderr!"); debug = false; } debug_ext_done: ; // Limit this to 1.3.250+ because of bugs in older versions. if (debug && params->debug_extra && debug_layer_version >= VK_MAKE_API_VERSION(0, 1, 3, 259)) { // Try enabling as many validation features as possible static const VkValidationFeatureEnableEXT validation_features[] = { VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT, VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT, VK_VALIDATION_FEATURE_ENABLE_BEST_PRACTICES_EXT, // Depends on timeline semaphores being implemented: // See https://github.com/KhronosGroup/Vulkan-ValidationLayers/issues/7600 //VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT, }; static const VkValidationFeaturesEXT vinfo = { .sType = VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT, .pEnabledValidationFeatures = validation_features, .enabledValidationFeatureCount = PL_ARRAY_SIZE(validation_features), }; const char * const ext = VK_EXT_VALIDATION_FEATURES_EXTENSION_NAME; for (int n = 0; n < num_exts_avail; n++) { if (strcmp(ext, exts_avail[n].extensionName) == 0) { PL_ARRAY_APPEND(tmp, exts, ext); vk_link_struct(&info, &vinfo); goto debug_extra_ext_done; } } for (int n = 0; n < layer_exts[debug_layer].num_exts; n++) { if (strcmp(ext, layer_exts[debug_layer].exts[n].extensionName) == 0) { PL_ARRAY_APPEND(tmp, exts, ext); vk_link_struct(&info, &vinfo); goto debug_extra_ext_done; } } pl_warn(log, "GPU-assisted validation enabled but not supported by " "instance, disabling..."); } debug_extra_ext_done: ; info.ppEnabledExtensionNames = exts.elem; info.enabledExtensionCount = exts.num; info.ppEnabledLayerNames = layers.elem; info.enabledLayerCount = layers.num; pl_info(log, "Creating vulkan instance%s", exts.num ? " with extensions:" : ""); for (int i = 0; i < exts.num; i++) pl_info(log, " %s", exts.elem[i]); if (layers.num) { pl_info(log, " and layers:"); for (int i = 0; i < layers.num; i++) pl_info(log, " %s", layers.elem[i]); } start = pl_clock_now(); PL_VK_LOAD_FUN(NULL, CreateInstance, get_addr); VkResult res = CreateInstance(&info, PL_VK_ALLOC, &inst); pl_log_cpu_time(log, start, pl_clock_now(), "creating vulkan instance"); if (res != VK_SUCCESS) { pl_fatal(log, "Failed creating instance: %s", vk_res_str(res)); goto error; } struct pl_vk_inst_t *pl_vk = pl_zalloc_obj(NULL, pl_vk, struct priv); struct priv *p = PL_PRIV(pl_vk); *pl_vk = (struct pl_vk_inst_t) { .instance = inst, .api_version = api_ver, .get_proc_addr = get_addr, .extensions = pl_steal(pl_vk, exts.elem), .num_extensions = exts.num, .layers = pl_steal(pl_vk, layers.elem), .num_layers = layers.num, }; // Set up a debug callback to catch validation messages if (debug) { VkDebugUtilsMessengerCreateInfoEXT dinfo = { .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT, .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT, .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT, .pfnUserCallback = vk_dbg_utils_cb, .pUserData = (void *) log, }; PL_VK_LOAD_FUN(inst, CreateDebugUtilsMessengerEXT, get_addr); CreateDebugUtilsMessengerEXT(inst, &dinfo, PL_VK_ALLOC, &p->debug_utils_cb); } pl_free(tmp); return pl_vk; error: pl_fatal(log, "Failed initializing vulkan instance"); if (inst) { PL_VK_LOAD_FUN(inst, DestroyInstance, get_addr); DestroyInstance(inst, PL_VK_ALLOC); } pl_free(tmp); return NULL; } const struct pl_vulkan_params pl_vulkan_default_params = { PL_VULKAN_DEFAULTS }; void pl_vulkan_destroy(pl_vulkan *pl_vk) { if (!*pl_vk) return; struct vk_ctx *vk = PL_PRIV(*pl_vk); if (vk->dev) { if ((*pl_vk)->gpu) { PL_DEBUG(vk, "Waiting for remaining commands..."); pl_gpu_finish((*pl_vk)->gpu); pl_assert(vk->cmds_pending.num == 0); pl_gpu_destroy((*pl_vk)->gpu); } vk_malloc_destroy(&vk->ma); for (int i = 0; i < vk->pools.num; i++) vk_cmdpool_destroy(vk->pools.elem[i]); if (!vk->imported) vk->DestroyDevice(vk->dev, PL_VK_ALLOC); } for (int i = 0; i < vk->queue_locks.num; i++) { for (int n = 0; n < vk->queue_locks.elem[i].num; n++) pl_mutex_destroy(&vk->queue_locks.elem[i].elem[n]); } pl_vk_inst_destroy(&vk->internal_instance); pl_mutex_destroy(&vk->lock); pl_free_ptr((void **) pl_vk); } static bool supports_surf(pl_log log, VkInstance inst, PFN_vkGetInstanceProcAddr get_addr, VkPhysicalDevice physd, VkSurfaceKHR surf) { // Hack for the VK macro's logging to work struct { pl_log log; } *vk = (void *) &log; PL_VK_LOAD_FUN(inst, GetPhysicalDeviceQueueFamilyProperties, get_addr); PL_VK_LOAD_FUN(inst, GetPhysicalDeviceSurfaceSupportKHR, get_addr); uint32_t qfnum = 0; GetPhysicalDeviceQueueFamilyProperties(physd, &qfnum, NULL); for (int i = 0; i < qfnum; i++) { VkBool32 sup = false; VK(GetPhysicalDeviceSurfaceSupportKHR(physd, i, surf, &sup)); if (sup) return true; } error: return false; } VkPhysicalDevice pl_vulkan_choose_device(pl_log log, const struct pl_vulkan_device_params *params) { // Hack for the VK macro's logging to work struct { pl_log log; } *vk = (void *) &log; PL_INFO(vk, "Probing for vulkan devices:"); pl_assert(params->instance); VkInstance inst = params->instance; VkPhysicalDevice dev = VK_NULL_HANDLE; PFN_vkGetInstanceProcAddr get_addr; if (!(get_addr = get_proc_addr_fallback(log, params->get_proc_addr))) return NULL; PL_VK_LOAD_FUN(inst, EnumeratePhysicalDevices, get_addr); PL_VK_LOAD_FUN(inst, GetPhysicalDeviceProperties2, get_addr); pl_assert(GetPhysicalDeviceProperties2); pl_clock_t start = pl_clock_now(); VkPhysicalDevice *devices = NULL; uint32_t num = 0; VK(EnumeratePhysicalDevices(inst, &num, NULL)); devices = pl_calloc_ptr(NULL, num, devices); VK(EnumeratePhysicalDevices(inst, &num, devices)); pl_log_cpu_time(log, start, pl_clock_now(), "enumerating physical devices"); static const struct { const char *name; int priority; } types[] = { [VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU] = {"discrete", 5}, [VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU] = {"integrated", 4}, [VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU] = {"virtual", 3}, [VK_PHYSICAL_DEVICE_TYPE_CPU] = {"software", 2}, [VK_PHYSICAL_DEVICE_TYPE_OTHER] = {"other", 1}, }; static const uint8_t nil[VK_UUID_SIZE] = {0}; bool uuid_set = memcmp(params->device_uuid, nil, VK_UUID_SIZE) != 0; int best = -1; for (int i = 0; i < num; i++) { VkPhysicalDeviceIDPropertiesKHR id_props = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR, }; VkPhysicalDeviceProperties2 prop = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR, .pNext = &id_props, }; GetPhysicalDeviceProperties2(devices[i], &prop); VkPhysicalDeviceType t = prop.properties.deviceType; const char *dtype = t < PL_ARRAY_SIZE(types) ? types[t].name : "unknown?"; PL_INFO(vk, " GPU %d: %s v%d.%d.%d (%s)", i, prop.properties.deviceName, PRINTF_VER(prop.properties.apiVersion), dtype); PL_INFO(vk, " uuid: %s", PRINT_UUID(id_props.deviceUUID)); if (params->surface) { if (!supports_surf(log, inst, get_addr, devices[i], params->surface)) { PL_DEBUG(vk, " -> excluding due to lack of surface support"); continue; } } if (uuid_set) { if (memcmp(id_props.deviceUUID, params->device_uuid, VK_UUID_SIZE) == 0) { dev = devices[i]; continue; } else { PL_DEBUG(vk, " -> excluding due to UUID mismatch"); continue; } } else if (params->device_name && params->device_name[0] != '\0') { if (strcmp(params->device_name, prop.properties.deviceName) == 0) { dev = devices[i]; continue; } else { PL_DEBUG(vk, " -> excluding due to name mismatch"); continue; } } if (!params->allow_software && t == VK_PHYSICAL_DEVICE_TYPE_CPU) { PL_DEBUG(vk, " -> excluding due to !params->allow_software"); continue; } if (prop.properties.apiVersion < PL_VK_MIN_VERSION) { PL_DEBUG(vk, " -> excluding due to too low API version"); continue; } int priority = t < PL_ARRAY_SIZE(types) ? types[t].priority : 0; if (priority > best) { dev = devices[i]; best = priority; } } error: pl_free(devices); return dev; } static void lock_queue_internal(void *priv, uint32_t qf, uint32_t qidx) { struct vk_ctx *vk = priv; pl_mutex_lock(&vk->queue_locks.elem[qf].elem[qidx]); } static void unlock_queue_internal(void *priv, uint32_t qf, uint32_t qidx) { struct vk_ctx *vk = priv; pl_mutex_unlock(&vk->queue_locks.elem[qf].elem[qidx]); } static void init_queue_locks(struct vk_ctx *vk, uint32_t qfnum, const VkQueueFamilyProperties *qfs) { vk->queue_locks.elem = pl_calloc_ptr(vk->alloc, qfnum, vk->queue_locks.elem); vk->queue_locks.num = qfnum; for (int i = 0; i < qfnum; i++) { const uint32_t qnum = qfs[i].queueCount; vk->queue_locks.elem[i].elem = pl_calloc(vk->alloc, qnum, sizeof(pl_mutex)); vk->queue_locks.elem[i].num = qnum; for (int n = 0; n < qnum; n++) pl_mutex_init(&vk->queue_locks.elem[i].elem[n]); } vk->lock_queue = lock_queue_internal; vk->unlock_queue = unlock_queue_internal; vk->queue_ctx = vk; } // Find the most specialized queue supported a combination of flags. In cases // where there are multiple queue families at the same specialization level, // this finds the one with the most queues. Returns -1 if no queue was found. static int find_qf(VkQueueFamilyProperties *qfs, int qfnum, VkQueueFlags flags) { int idx = -1; for (int i = 0; i < qfnum; i++) { if ((qfs[i].queueFlags & flags) != flags) continue; // QF is more specialized. Since we don't care about other bits like // SPARSE_BIT, mask the ones we're interestew in const VkQueueFlags mask = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_TRANSFER_BIT | VK_QUEUE_COMPUTE_BIT; if (idx < 0 || (qfs[i].queueFlags & mask) < (qfs[idx].queueFlags & mask)) idx = i; // QF has more queues (at the same specialization level) if (qfs[i].queueFlags == qfs[idx].queueFlags && qfs[i].queueCount > qfs[idx].queueCount) idx = i; } return idx; } static bool device_init(struct vk_ctx *vk, const struct pl_vulkan_params *params) { pl_assert(vk->physd); void *tmp = pl_tmp(NULL); // Enumerate the queue families and find suitable families for each task uint32_t qfnum = 0; vk->GetPhysicalDeviceQueueFamilyProperties(vk->physd, &qfnum, NULL); VkQueueFamilyProperties *qfs = pl_calloc_ptr(tmp, qfnum, qfs); vk->GetPhysicalDeviceQueueFamilyProperties(vk->physd, &qfnum, qfs); init_queue_locks(vk, qfnum, qfs); PL_DEBUG(vk, "Queue families supported by device:"); for (int i = 0; i < qfnum; i++) { PL_DEBUG(vk, " %d: flags 0x%"PRIx32" num %"PRIu32, i, qfs[i].queueFlags, qfs[i].queueCount); } VkQueueFlagBits gfx_flags = VK_QUEUE_GRAPHICS_BIT; if (!params->async_compute) gfx_flags |= VK_QUEUE_COMPUTE_BIT; int idx_gfx = find_qf(qfs, qfnum, gfx_flags); int idx_comp = find_qf(qfs, qfnum, VK_QUEUE_COMPUTE_BIT); int idx_tf = find_qf(qfs, qfnum, VK_QUEUE_TRANSFER_BIT); if (idx_tf < 0) idx_tf = idx_comp; if (!params->async_compute) idx_comp = idx_gfx; if (!params->async_transfer) idx_tf = idx_gfx; PL_DEBUG(vk, "Using graphics queue %d", idx_gfx); if (idx_tf != idx_gfx) PL_INFO(vk, "Using async transfer (queue %d)", idx_tf); if (idx_comp != idx_gfx) PL_INFO(vk, "Using async compute (queue %d)", idx_comp); // Vulkan requires at least one GRAPHICS+COMPUTE queue, so if this fails // something is horribly wrong. pl_assert(idx_gfx >= 0 && idx_comp >= 0 && idx_tf >= 0); // If needed, ensure we can actually present to the surface using this queue if (params->surface) { VkBool32 sup = false; VK(vk->GetPhysicalDeviceSurfaceSupportKHR(vk->physd, idx_gfx, params->surface, &sup)); if (!sup) { PL_FATAL(vk, "Queue family does not support surface presentation!"); goto error; } } // Enumerate all supported extensions pl_clock_t start = pl_clock_now(); uint32_t num_exts_avail = 0; VK(vk->EnumerateDeviceExtensionProperties(vk->physd, NULL, &num_exts_avail, NULL)); VkExtensionProperties *exts_avail = pl_calloc_ptr(tmp, num_exts_avail, exts_avail); VK(vk->EnumerateDeviceExtensionProperties(vk->physd, NULL, &num_exts_avail, exts_avail)); pl_log_cpu_time(vk->log, start, pl_clock_now(), "enumerating device extensions"); PL_DEBUG(vk, "Available device extensions:"); for (int i = 0; i < num_exts_avail; i++) PL_DEBUG(vk, " %s", exts_avail[i].extensionName); // Add all extensions we need if (params->surface) PL_ARRAY_APPEND(vk->alloc, vk->exts, VK_KHR_SWAPCHAIN_EXTENSION_NAME); // Keep track of all optional function pointers associated with extensions PL_ARRAY(const struct vk_fun *) ext_funs = {0}; // Add all optional device-level extensions extensions for (int i = 0; i < PL_ARRAY_SIZE(vk_device_extensions); i++) { const struct vk_ext *ext = &vk_device_extensions[i]; uint32_t core_ver = vk_ext_promoted_ver(ext->name); if (core_ver && vk->api_ver >= core_ver) { // Layer is already implicitly enabled by the API version for (const struct vk_fun *f = ext->funs; f && f->name; f++) PL_ARRAY_APPEND(tmp, ext_funs, f); continue; } for (int n = 0; n < num_exts_avail; n++) { if (strcmp(ext->name, exts_avail[n].extensionName) == 0) { PL_ARRAY_APPEND(vk->alloc, vk->exts, ext->name); for (const struct vk_fun *f = ext->funs; f && f->name; f++) PL_ARRAY_APPEND(tmp, ext_funs, f); break; } } } // Add extra user extensions for (int i = 0; i < params->num_extensions; i++) PL_ARRAY_APPEND(vk->alloc, vk->exts, params->extensions[i]); // Add optional extra user extensions for (int i = 0; i < params->num_opt_extensions; i++) { const char *ext = params->opt_extensions[i]; for (int n = 0; n < num_exts_avail; n++) { if (strcmp(ext, exts_avail[n].extensionName) == 0) { PL_ARRAY_APPEND(vk->alloc, vk->exts, ext); break; } } } VkPhysicalDeviceFeatures2 features = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR }; vk_features_normalize(tmp, &pl_vulkan_required_features, vk->api_ver, &features); vk_features_normalize(tmp, &pl_vulkan_recommended_features, vk->api_ver, &features); vk_features_normalize(tmp, params->features, vk->api_ver, &features); // Explicitly clear the features struct before querying feature support // from the driver. This way, we don't mistakenly mark as supported // features coming from structs the driver doesn't have support for. VkPhysicalDeviceFeatures2 *features_sup = vk_chain_memdup(tmp, &features);; for (VkBaseOutStructure *out = (void *) features_sup; out; out = out->pNext) { const size_t size = vk_struct_size(out->sType); memset(&out[1], 0, size - sizeof(out[0])); } vk->GetPhysicalDeviceFeatures2KHR(vk->physd, features_sup); // Filter out unsupported features for (VkBaseOutStructure *f = (VkBaseOutStructure *) &features; f; f = f->pNext) { const VkBaseInStructure *sup = vk_find_struct(features_sup, f->sType); VkBool32 *flags = (VkBool32 *) &f[1]; const VkBool32 *flags_sup = (const VkBool32 *) &sup[1]; const size_t size = vk_struct_size(f->sType) - sizeof(VkBaseOutStructure); for (int i = 0; i < size / sizeof(VkBool32); i++) flags[i] &= flags_sup[i]; } // Construct normalized output chain vk->features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; vk_features_normalize(vk->alloc, &features, 0, &vk->features); if (!check_required_features(vk)) { PL_FATAL(vk, "Vulkan device does not support all required features!"); goto error; } // Enable all queues at device creation time, to maximize compatibility // with other API users (e.g. FFmpeg) PL_ARRAY(VkDeviceQueueCreateInfo) qinfos = {0}; for (int i = 0; i < qfnum; i++) { bool use_qf = i == idx_gfx || i == idx_comp || i == idx_tf; use_qf |= qfs[i].queueFlags & params->extra_queues; if (!use_qf) continue; PL_ARRAY_APPEND(tmp, qinfos, (VkDeviceQueueCreateInfo) { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, .queueFamilyIndex = i, .queueCount = qfs[i].queueCount, .pQueuePriorities = pl_calloc(tmp, qfs[i].queueCount, sizeof(float)), }); } VkDeviceCreateInfo dinfo = { .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, .pNext = &features, .pQueueCreateInfos = qinfos.elem, .queueCreateInfoCount = qinfos.num, .ppEnabledExtensionNames = vk->exts.elem, .enabledExtensionCount = vk->exts.num, }; PL_INFO(vk, "Creating vulkan device%s", vk->exts.num ? " with extensions:" : ""); for (int i = 0; i < vk->exts.num; i++) PL_INFO(vk, " %s", vk->exts.elem[i]); start = pl_clock_now(); VK(vk->CreateDevice(vk->physd, &dinfo, PL_VK_ALLOC, &vk->dev)); pl_log_cpu_time(vk->log, start, pl_clock_now(), "creating vulkan device"); // Load all mandatory device-level functions for (int i = 0; i < PL_ARRAY_SIZE(vk_dev_funs); i++) load_vk_fun(vk, &vk_dev_funs[i]); // Load all of the optional functions from the extensions we enabled for (int i = 0; i < ext_funs.num; i++) load_vk_fun(vk, ext_funs.elem[i]); // Create the command pools for the queues we care about const uint32_t qmax = PL_DEF(params->queue_count, UINT32_MAX); for (int i = 0; i < qfnum; i++) { if (i != idx_gfx && i != idx_tf && i != idx_comp) continue; // ignore QFs not used internally int qnum = qfs[i].queueCount; if (qmax < qnum) { PL_DEBUG(vk, "Restricting QF %d from %d queues to %d", i, qnum, qmax); qnum = qmax; } struct vk_cmdpool *pool = vk_cmdpool_create(vk, i, qnum, qfs[i]); if (!pool) goto error; PL_ARRAY_APPEND(vk->alloc, vk->pools, pool); // Update the pool_* pointers based on the corresponding index const char *qf_name = NULL; if (i == idx_tf) { vk->pool_transfer = pool; qf_name = "transfer"; } if (i == idx_comp) { vk->pool_compute = pool; qf_name = "compute"; } if (i == idx_gfx) { vk->pool_graphics = pool; qf_name = "graphics"; } for (int n = 0; n < pool->num_queues; n++) PL_VK_NAME_HANDLE(QUEUE, pool->queues[n], qf_name); } pl_free(tmp); return true; error: PL_FATAL(vk, "Failed creating logical device!"); pl_free(tmp); vk->failed = true; return false; } static void lock_queue(pl_vulkan pl_vk, uint32_t qf, uint32_t qidx) { struct vk_ctx *vk = PL_PRIV(pl_vk); vk->lock_queue(vk->queue_ctx, qf, qidx); } static void unlock_queue(pl_vulkan pl_vk, uint32_t qf, uint32_t qidx) { struct vk_ctx *vk = PL_PRIV(pl_vk); vk->unlock_queue(vk->queue_ctx, qf, qidx); } static bool finalize_context(struct pl_vulkan_t *pl_vk, int max_glsl_version) { struct vk_ctx *vk = PL_PRIV(pl_vk); pl_assert(vk->pool_graphics); pl_assert(vk->pool_compute); pl_assert(vk->pool_transfer); vk->ma = vk_malloc_create(vk); if (!vk->ma) return false; pl_vk->gpu = pl_gpu_create_vk(vk); if (!pl_vk->gpu) return false; // Blacklist / restrict features if (max_glsl_version) { struct pl_glsl_version *glsl = (struct pl_glsl_version *) &pl_vk->gpu->glsl; glsl->version = PL_MIN(glsl->version, max_glsl_version); glsl->version = PL_MAX(glsl->version, 140); // required for GL_KHR_vulkan_glsl PL_INFO(vk, "Restricting GLSL version to %d... new version is %d", max_glsl_version, glsl->version); } // Expose the resulting vulkan objects pl_vk->instance = vk->inst; pl_vk->phys_device = vk->physd; pl_vk->device = vk->dev; pl_vk->get_proc_addr = vk->GetInstanceProcAddr; pl_vk->api_version = vk->api_ver; pl_vk->extensions = vk->exts.elem; pl_vk->num_extensions = vk->exts.num; pl_vk->features = &vk->features; pl_vk->num_queues = vk->pools.num; pl_vk->queues = pl_calloc_ptr(vk->alloc, vk->pools.num, pl_vk->queues); pl_vk->lock_queue = lock_queue; pl_vk->unlock_queue = unlock_queue; for (int i = 0; i < vk->pools.num; i++) { struct pl_vulkan_queue *queues = (struct pl_vulkan_queue *) pl_vk->queues; queues[i] = (struct pl_vulkan_queue) { .index = vk->pools.elem[i]->qf, .count = vk->pools.elem[i]->num_queues, }; if (vk->pools.elem[i] == vk->pool_graphics) pl_vk->queue_graphics = queues[i]; if (vk->pools.elem[i] == vk->pool_compute) pl_vk->queue_compute = queues[i]; if (vk->pools.elem[i] == vk->pool_transfer) pl_vk->queue_transfer = queues[i]; } pl_assert(vk->lock_queue); pl_assert(vk->unlock_queue); return true; } pl_vulkan pl_vulkan_create(pl_log log, const struct pl_vulkan_params *params) { params = PL_DEF(params, &pl_vulkan_default_params); struct pl_vulkan_t *pl_vk = pl_zalloc_obj(NULL, pl_vk, struct vk_ctx); struct vk_ctx *vk = PL_PRIV(pl_vk); *vk = (struct vk_ctx) { .vulkan = pl_vk, .alloc = pl_vk, .log = log, .inst = params->instance, .GetInstanceProcAddr = get_proc_addr_fallback(log, params->get_proc_addr), }; pl_mutex_init_type(&vk->lock, PL_MUTEX_RECURSIVE); if (!vk->GetInstanceProcAddr) goto error; if (!vk->inst) { pl_assert(!params->surface); pl_assert(!params->device); PL_DEBUG(vk, "No VkInstance provided, creating one..."); // Mirror the instance params here to set `get_proc_addr` correctly struct pl_vk_inst_params iparams; iparams = *PL_DEF(params->instance_params, &pl_vk_inst_default_params); iparams.get_proc_addr = params->get_proc_addr; vk->internal_instance = pl_vk_inst_create(log, &iparams); if (!vk->internal_instance) goto error; vk->inst = vk->internal_instance->instance; } // Directly load all mandatory instance-level function pointers, since // these will be required for all further device creation logic for (int i = 0; i < PL_ARRAY_SIZE(vk_inst_funs); i++) load_vk_fun(vk, &vk_inst_funs[i]); // Choose the physical device if (params->device) { PL_DEBUG(vk, "Using specified VkPhysicalDevice"); vk->physd = params->device; } else { struct pl_vulkan_device_params dparams = { .instance = vk->inst, .get_proc_addr = params->get_proc_addr, .surface = params->surface, .device_name = params->device_name, .allow_software = params->allow_software, }; memcpy(dparams.device_uuid, params->device_uuid, VK_UUID_SIZE); vk->physd = pl_vulkan_choose_device(log, &dparams); if (!vk->physd) { PL_FATAL(vk, "Found no suitable device, giving up."); goto error; } } VkPhysicalDeviceIDPropertiesKHR id_props = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR, }; VkPhysicalDeviceProperties2KHR prop = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR, .pNext = &id_props, }; vk->GetPhysicalDeviceProperties2(vk->physd, &prop); vk->props = prop.properties; PL_INFO(vk, "Vulkan device properties:"); PL_INFO(vk, " Device Name: %s", prop.properties.deviceName); PL_INFO(vk, " Device ID: %"PRIx32":%"PRIx32, prop.properties.vendorID, prop.properties.deviceID); PL_INFO(vk, " Device UUID: %s", PRINT_UUID(id_props.deviceUUID)); PL_INFO(vk, " Driver version: %"PRIx32, prop.properties.driverVersion); PL_INFO(vk, " API version: %d.%d.%d", PRINTF_VER(prop.properties.apiVersion)); // Needed by device_init vk->api_ver = prop.properties.apiVersion; if (params->max_api_version) { vk->api_ver = PL_MIN(vk->api_ver, params->max_api_version); PL_INFO(vk, "Restricting API version to %d.%d.%d... new version %d.%d.%d", PRINTF_VER(params->max_api_version), PRINTF_VER(vk->api_ver)); } if (vk->api_ver < PL_VK_MIN_VERSION) { PL_FATAL(vk, "Device API version %d.%d.%d is lower than the minimum " "required version of %d.%d.%d, cannot proceed!", PRINTF_VER(vk->api_ver), PRINTF_VER(PL_VK_MIN_VERSION)); goto error; } // Finally, initialize the logical device and the rest of the vk_ctx if (!device_init(vk, params)) goto error; if (!finalize_context(pl_vk, params->max_glsl_version)) goto error; return pl_vk; error: PL_FATAL(vk, "Failed initializing vulkan device"); pl_vulkan_destroy((pl_vulkan *) &pl_vk); return NULL; } pl_vulkan pl_vulkan_import(pl_log log, const struct pl_vulkan_import_params *params) { void *tmp = pl_tmp(NULL); struct pl_vulkan_t *pl_vk = pl_zalloc_obj(NULL, pl_vk, struct vk_ctx); struct vk_ctx *vk = PL_PRIV(pl_vk); *vk = (struct vk_ctx) { .vulkan = pl_vk, .alloc = pl_vk, .log = log, .imported = true, .inst = params->instance, .physd = params->phys_device, .dev = params->device, .GetInstanceProcAddr = get_proc_addr_fallback(log, params->get_proc_addr), .lock_queue = params->lock_queue, .unlock_queue = params->unlock_queue, .queue_ctx = params->queue_ctx, }; pl_mutex_init_type(&vk->lock, PL_MUTEX_RECURSIVE); if (!vk->GetInstanceProcAddr) goto error; for (int i = 0; i < PL_ARRAY_SIZE(vk_inst_funs); i++) load_vk_fun(vk, &vk_inst_funs[i]); VkPhysicalDeviceIDPropertiesKHR id_props = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR, }; VkPhysicalDeviceProperties2KHR prop = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR, .pNext = &id_props, }; pl_assert(vk->GetPhysicalDeviceProperties2); vk->GetPhysicalDeviceProperties2(vk->physd, &prop); vk->props = prop.properties; PL_INFO(vk, "Imported vulkan device properties:"); PL_INFO(vk, " Device Name: %s", prop.properties.deviceName); PL_INFO(vk, " Device ID: %"PRIx32":%"PRIx32, prop.properties.vendorID, prop.properties.deviceID); PL_INFO(vk, " Device UUID: %s", PRINT_UUID(id_props.deviceUUID)); PL_INFO(vk, " Driver version: %"PRIx32, prop.properties.driverVersion); PL_INFO(vk, " API version: %d.%d.%d", PRINTF_VER(prop.properties.apiVersion)); vk->api_ver = prop.properties.apiVersion; if (params->max_api_version) { vk->api_ver = PL_MIN(vk->api_ver, params->max_api_version); PL_INFO(vk, "Restricting API version to %d.%d.%d... new version %d.%d.%d", PRINTF_VER(params->max_api_version), PRINTF_VER(vk->api_ver)); } if (vk->api_ver < PL_VK_MIN_VERSION) { PL_FATAL(vk, "Device API version %d.%d.%d is lower than the minimum " "required version of %d.%d.%d, cannot proceed!", PRINTF_VER(vk->api_ver), PRINTF_VER(PL_VK_MIN_VERSION)); goto error; } vk->features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; vk_features_normalize(vk->alloc, params->features, 0, &vk->features); if (!check_required_features(vk)) { PL_FATAL(vk, "Imported Vulkan device was not created with all required " "features!"); goto error; } // Load all mandatory device-level functions for (int i = 0; i < PL_ARRAY_SIZE(vk_dev_funs); i++) load_vk_fun(vk, &vk_dev_funs[i]); // Load all of the optional functions from the extensions enabled for (int i = 0; i < PL_ARRAY_SIZE(vk_device_extensions); i++) { const struct vk_ext *ext = &vk_device_extensions[i]; uint32_t core_ver = vk_ext_promoted_ver(ext->name); if (core_ver && vk->api_ver >= core_ver) { for (const struct vk_fun *f = ext->funs; f && f->name; f++) load_vk_fun(vk, f); continue; } for (int n = 0; n < params->num_extensions; n++) { if (strcmp(ext->name, params->extensions[n]) == 0) { for (const struct vk_fun *f = ext->funs; f && f->name; f++) load_vk_fun(vk, f); break; } } } uint32_t qfnum = 0; vk->GetPhysicalDeviceQueueFamilyProperties(vk->physd, &qfnum, NULL); VkQueueFamilyProperties *qfs = pl_calloc_ptr(tmp, qfnum, qfs); vk->GetPhysicalDeviceQueueFamilyProperties(vk->physd, &qfnum, qfs); if (!params->lock_queue) init_queue_locks(vk, qfnum, qfs); // Create the command pools for each unique qf that exists struct { const struct pl_vulkan_queue *info; struct vk_cmdpool **pool; VkQueueFlagBits flags; // *any* of these flags provide the cap } qinfos[] = { { .info = ¶ms->queue_graphics, .pool = &vk->pool_graphics, .flags = VK_QUEUE_GRAPHICS_BIT, }, { .info = ¶ms->queue_compute, .pool = &vk->pool_compute, .flags = VK_QUEUE_COMPUTE_BIT, }, { .info = ¶ms->queue_transfer, .pool = &vk->pool_transfer, .flags = VK_QUEUE_TRANSFER_BIT | VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT, } }; for (int i = 0; i < PL_ARRAY_SIZE(qinfos); i++) { int qf = qinfos[i].info->index; struct vk_cmdpool **pool = qinfos[i].pool; if (!qinfos[i].info->count) continue; // API sanity check pl_assert(qfs[qf].queueFlags & qinfos[i].flags); // See if we already created a pool for this queue family for (int j = 0; j < i; j++) { if (qinfos[j].info->count && qinfos[j].info->index == qf) { *pool = *qinfos[j].pool; goto next_qf; } } *pool = vk_cmdpool_create(vk, qf, qinfos[i].info->count, qfs[qf]); if (!*pool) goto error; PL_ARRAY_APPEND(vk->alloc, vk->pools, *pool); // Pre-emptively set "lower priority" pools as well for (int j = i+1; j < PL_ARRAY_SIZE(qinfos); j++) { if (qfs[qf].queueFlags & qinfos[j].flags) *qinfos[j].pool = *pool; } next_qf: ; } if (!vk->pool_graphics) { PL_ERR(vk, "No valid queues provided?"); goto error; } if (!finalize_context(pl_vk, params->max_glsl_version)) goto error; pl_free(tmp); return pl_vk; error: PL_FATAL(vk, "Failed importing vulkan device"); pl_vulkan_destroy((pl_vulkan *) &pl_vk); pl_free(tmp); return NULL; } libplacebo-v7.349.0/src/vulkan/formats.c000066400000000000000000000647251463457750100201200ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "formats.h" #define FMT(_name, num, size, ftype, bits, idx) \ (struct pl_fmt_t) { \ .name = _name, \ .type = PL_FMT_##ftype, \ .num_components = num, \ .component_depth = bits, \ .internal_size = size, \ .opaque = false, \ .texel_size = size, \ .texel_align = size, \ .host_bits = bits, \ .sample_order = idx, \ } #define IDX(...) {__VA_ARGS__} #define BITS(...) {__VA_ARGS__} #define REGFMT(name, num, bits, type) \ FMT(name, num, (num) * (bits) / 8, type, \ BITS(bits, bits, bits, bits), \ IDX(0, 1, 2, 3)) #define EMUFMT(_name, in, en, ib, eb, ftype) \ (struct pl_fmt_t) { \ .name = _name, \ .type = PL_FMT_##ftype, \ .num_components = en, \ .component_depth = BITS(ib, ib, ib, ib),\ .internal_size = (in) * (ib) / 8, \ .opaque = false, \ .emulated = true, \ .texel_size = (en) * (eb) / 8, \ .texel_align = (eb) / 8, \ .host_bits = BITS(eb, eb, eb, eb),\ .sample_order = IDX(0, 1, 2, 3), \ } #define PACKED16FMT(_name, num, b) \ (struct pl_fmt_t) { \ .name = _name, \ .type = PL_FMT_UNORM, \ .num_components = num, \ .component_depth = BITS(b, b, b, b), \ .internal_size = (num) * 2, \ .texel_size = (num) * 2, \ .texel_align = (num) * 2, \ .host_bits = BITS(16, 16, 16, 16),\ .sample_order = IDX(0, 1, 2, 3), \ } #define PLANARFMT(_name, planes, size, bits) \ (struct pl_fmt_t) { \ .name = _name, \ .type = PL_FMT_UNORM, \ .num_planes = planes, \ .num_components = 3, \ .component_depth = {bits, bits, bits}, \ .internal_size = size, \ .opaque = true, \ } static const struct vk_format rgb8e = { .tfmt = VK_FORMAT_R8G8B8A8_UNORM, .bfmt = VK_FORMAT_R8G8B8_UNORM, .icomps = 4, .fmt = EMUFMT("rgb8", 4, 3, 8, 8, UNORM), }; static const struct vk_format rgb16e = { .tfmt = VK_FORMAT_R16G16B16A16_UNORM, .bfmt = VK_FORMAT_R16G16B16_UNORM, .icomps = 4, .fmt = EMUFMT("rgb16", 4, 3, 16, 16, UNORM), }; static const struct vk_format vk_formats[] = { // Regular, byte-aligned integer formats {VK_FORMAT_R8_UNORM, REGFMT("r8", 1, 8, UNORM)}, {VK_FORMAT_R8G8_UNORM, REGFMT("rg8", 2, 8, UNORM)}, {VK_FORMAT_R8G8B8_UNORM, REGFMT("rgb8", 3, 8, UNORM), .emufmt = &rgb8e}, {VK_FORMAT_R8G8B8A8_UNORM, REGFMT("rgba8", 4, 8, UNORM)}, {VK_FORMAT_R16_UNORM, REGFMT("r16", 1, 16, UNORM)}, {VK_FORMAT_R16G16_UNORM, REGFMT("rg16", 2, 16, UNORM)}, {VK_FORMAT_R16G16B16_UNORM, REGFMT("rgb16", 3, 16, UNORM), .emufmt = &rgb16e}, {VK_FORMAT_R16G16B16A16_UNORM, REGFMT("rgba16", 4, 16, UNORM)}, {VK_FORMAT_R8_SNORM, REGFMT("r8s", 1, 8, SNORM)}, {VK_FORMAT_R8G8_SNORM, REGFMT("rg8s", 2, 8, SNORM)}, {VK_FORMAT_R8G8B8_SNORM, REGFMT("rgb8s", 3, 8, SNORM)}, {VK_FORMAT_R8G8B8A8_SNORM, REGFMT("rgba8s", 4, 8, SNORM)}, {VK_FORMAT_R16_SNORM, REGFMT("r16s", 1, 16, SNORM)}, {VK_FORMAT_R16G16_SNORM, REGFMT("rg16s", 2, 16, SNORM)}, {VK_FORMAT_R16G16B16_SNORM, REGFMT("rgb16s", 3, 16, SNORM)}, {VK_FORMAT_R16G16B16A16_SNORM, REGFMT("rgba16s", 4, 16, SNORM)}, // Float formats (native formats: hf = half float, df = double float) {VK_FORMAT_R16_SFLOAT, REGFMT("r16hf", 1, 16, FLOAT)}, {VK_FORMAT_R16G16_SFLOAT, REGFMT("rg16hf", 2, 16, FLOAT)}, {VK_FORMAT_R16G16B16_SFLOAT, REGFMT("rgb16hf", 3, 16, FLOAT)}, {VK_FORMAT_R16G16B16A16_SFLOAT, REGFMT("rgba16hf", 4, 16, FLOAT)}, {VK_FORMAT_R32_SFLOAT, REGFMT("r32f", 1, 32, FLOAT)}, {VK_FORMAT_R32G32_SFLOAT, REGFMT("rg32f", 2, 32, FLOAT)}, {VK_FORMAT_R32G32B32_SFLOAT, REGFMT("rgb32f", 3, 32, FLOAT)}, {VK_FORMAT_R32G32B32A32_SFLOAT, REGFMT("rgba32f", 4, 32, FLOAT)}, // Float formats (emulated upload/download) {VK_FORMAT_R16_SFLOAT, EMUFMT("r16f", 1, 1, 16, 32, FLOAT)}, {VK_FORMAT_R16G16_SFLOAT, EMUFMT("rg16f", 2, 2, 16, 32, FLOAT)}, {VK_FORMAT_R16G16B16_SFLOAT, EMUFMT("rgb16f", 3, 3, 16, 32, FLOAT)}, {VK_FORMAT_R16G16B16A16_SFLOAT, EMUFMT("rgba16f", 4, 4, 16, 32, FLOAT)}, // Integer-sampled formats {VK_FORMAT_R8_UINT, REGFMT("r8u", 1, 8, UINT)}, {VK_FORMAT_R8G8_UINT, REGFMT("rg8u", 2, 8, UINT)}, {VK_FORMAT_R8G8B8_UINT, REGFMT("rgb8u", 3, 8, UINT)}, {VK_FORMAT_R8G8B8A8_UINT, REGFMT("rgba8u", 4, 8, UINT)}, {VK_FORMAT_R16_UINT, REGFMT("r16u", 1, 16, UINT)}, {VK_FORMAT_R16G16_UINT, REGFMT("rg16u", 2, 16, UINT)}, {VK_FORMAT_R16G16B16_UINT, REGFMT("rgb16u", 3, 16, UINT)}, {VK_FORMAT_R16G16B16A16_UINT, REGFMT("rgba16u", 4, 16, UINT)}, {VK_FORMAT_R32_UINT, REGFMT("r32u", 1, 32, UINT)}, {VK_FORMAT_R32G32_UINT, REGFMT("rg32u", 2, 32, UINT)}, {VK_FORMAT_R32G32B32_UINT, REGFMT("rgb32u", 3, 32, UINT)}, {VK_FORMAT_R32G32B32A32_UINT, REGFMT("rgba32u", 4, 32, UINT)}, {VK_FORMAT_R8_SINT, REGFMT("r8i", 1, 8, SINT)}, {VK_FORMAT_R8G8_SINT, REGFMT("rg8i", 2, 8, SINT)}, {VK_FORMAT_R8G8B8_SINT, REGFMT("rgb8i", 3, 8, SINT)}, {VK_FORMAT_R8G8B8A8_SINT, REGFMT("rgba8i", 4, 8, SINT)}, {VK_FORMAT_R16_SINT, REGFMT("r16i", 1, 16, SINT)}, {VK_FORMAT_R16G16_SINT, REGFMT("rg16i", 2, 16, SINT)}, {VK_FORMAT_R16G16B16_SINT, REGFMT("rgb16i", 3, 16, SINT)}, {VK_FORMAT_R16G16B16A16_SINT, REGFMT("rgba16i", 4, 16, SINT)}, {VK_FORMAT_R32_SINT, REGFMT("r32i", 1, 32, SINT)}, {VK_FORMAT_R32G32_SINT, REGFMT("rg32i", 2, 32, SINT)}, {VK_FORMAT_R32G32B32_SINT, REGFMT("rgb32i", 3, 32, SINT)}, {VK_FORMAT_R32G32B32A32_SINT, REGFMT("rgba32i", 4, 32, SINT)}, // "Swapped" component order formats {VK_FORMAT_B8G8R8_UNORM, FMT("bgr8", 3, 3, UNORM, BITS(8, 8, 8), IDX(2, 1, 0))}, {VK_FORMAT_B8G8R8A8_UNORM, FMT("bgra8", 4, 4, UNORM, BITS(8, 8, 8, 8), IDX(2, 1, 0, 3))}, {VK_FORMAT_B8G8R8_UINT, FMT("bgr8u", 3, 3, UINT, BITS(8, 8, 8), IDX(2, 1, 0))}, {VK_FORMAT_B8G8R8A8_UINT, FMT("bgra8u", 4, 4, UINT, BITS(8, 8, 8, 8), IDX(2, 1, 0, 3))}, {VK_FORMAT_B8G8R8_SINT, FMT("bgr8i", 3, 3, SINT, BITS(8, 8, 8), IDX(2, 1, 0))}, {VK_FORMAT_B8G8R8A8_SINT, FMT("bgra8i", 4, 4, SINT, BITS(8, 8, 8, 8), IDX(2, 1, 0, 3))}, // "Packed" integer formats // // Note: These have the component order reversed from what the vulkan name // implies, because we order our IDX from LSB to MSB (consistent with the // usual ordering from lowest byte to highest byte, on little endian // platforms), but Vulkan names them from MSB to LSB. {VK_FORMAT_R4G4_UNORM_PACK8, FMT("gr4", 2, 1, UNORM, BITS(4, 4), IDX(1, 0))}, {VK_FORMAT_B4G4R4A4_UNORM_PACK16, FMT("argb4", 4, 2, UNORM, BITS(4, 4, 4, 4), IDX(3, 0, 1, 2))}, {VK_FORMAT_R4G4B4A4_UNORM_PACK16, FMT("abgr4", 4, 2, UNORM, BITS(4, 4, 4, 4), IDX(3, 2, 1, 0))}, {VK_FORMAT_R5G6B5_UNORM_PACK16, FMT("bgr565", 3, 2, UNORM, BITS(5, 6, 5), IDX(2, 1, 0))}, {VK_FORMAT_B5G6R5_UNORM_PACK16, FMT("rgb565", 3, 2, UNORM, BITS(5, 6, 5), IDX(0, 1, 2))}, {VK_FORMAT_R5G5B5A1_UNORM_PACK16, FMT("a1bgr5", 4, 2, UNORM, BITS(1, 5, 5, 5), IDX(3, 2, 1, 0))}, {VK_FORMAT_B5G5R5A1_UNORM_PACK16, FMT("a1rgb5", 4, 2, UNORM, BITS(1, 5, 5, 5), IDX(3, 0, 1, 2))}, {VK_FORMAT_A1R5G5B5_UNORM_PACK16, FMT("bgr5a1", 4, 2, UNORM, BITS(5, 5, 5, 1), IDX(2, 1, 0, 3))}, {VK_FORMAT_A2B10G10R10_UNORM_PACK32, FMT("rgb10a2", 4, 4, UNORM, BITS(10, 10, 10, 2), IDX(0, 1, 2, 3))}, {VK_FORMAT_A2R10G10B10_UNORM_PACK32, FMT("bgr10a2", 4, 4, UNORM, BITS(10, 10, 10, 2), IDX(2, 1, 0, 3))}, {VK_FORMAT_A2B10G10R10_SNORM_PACK32, FMT("rgb10a2s", 4, 4, SNORM, BITS(10, 10, 10, 2), IDX(0, 1, 2, 3))}, {VK_FORMAT_A2R10G10B10_SNORM_PACK32, FMT("bgr10a2s", 4, 4, SNORM, BITS(10, 10, 10, 2), IDX(2, 1, 0, 3))}, {VK_FORMAT_A2B10G10R10_UINT_PACK32, FMT("rgb10a2u", 4, 4, UINT, BITS(10, 10, 10, 2), IDX(0, 1, 2, 3))}, {VK_FORMAT_A2R10G10B10_UINT_PACK32, FMT("bgr10a2u", 4, 4, UINT, BITS(10, 10, 10, 2), IDX(2, 1, 0, 3))}, {VK_FORMAT_A2B10G10R10_SINT_PACK32, FMT("rgb10a2i", 4, 4, SINT, BITS(10, 10, 10, 2), IDX(0, 1, 2, 3))}, {VK_FORMAT_A2R10G10B10_SINT_PACK32, FMT("bgr10a2i", 4, 4, SINT, BITS(10, 10, 10, 2), IDX(2, 1, 0, 3))}, // Packed 16 bit formats {VK_FORMAT_R10X6_UNORM_PACK16, PACKED16FMT("rx10", 1, 10)}, {VK_FORMAT_R10X6G10X6_UNORM_2PACK16, PACKED16FMT("rxgx10", 2, 10)}, {VK_FORMAT_R12X4_UNORM_PACK16, PACKED16FMT("rx12", 1, 12)}, {VK_FORMAT_R12X4G12X4_UNORM_2PACK16, PACKED16FMT("rxgx12", 2, 12)}, // FIXME: enabling these requires VK_EXT_rgba10x6_formats or equivalent // {VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16, PACKED16FMT("rxgxbxax10", 4, 10)}, // {VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16, PACKED16FMT("rxgxbxax12", 4, 12)}, // Planar formats {VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM, PLANARFMT("g8_b8_r8_420", 3, 12, 8), .pfmt = { {VK_FORMAT_R8_UNORM}, {VK_FORMAT_R8_UNORM, .sx = 1, .sy = 1}, {VK_FORMAT_R8_UNORM, .sx = 1, .sy = 1}, }, }, {VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM, PLANARFMT("g8_b8_r8_422", 3, 16, 8), .pfmt = { {VK_FORMAT_R8_UNORM}, {VK_FORMAT_R8_UNORM, .sx = 1}, {VK_FORMAT_R8_UNORM, .sx = 1}, }, }, {VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM, PLANARFMT("g8_b8_r8_444", 3, 24, 8), .pfmt = { {VK_FORMAT_R8_UNORM}, {VK_FORMAT_R8_UNORM}, {VK_FORMAT_R8_UNORM}, }, }, {VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, PLANARFMT("g16_b16_r16_420", 3, 24, 16), .pfmt = { {VK_FORMAT_R16_UNORM}, {VK_FORMAT_R16_UNORM, .sx = 1, .sy = 1}, {VK_FORMAT_R16_UNORM, .sx = 1, .sy = 1}, }, }, {VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, PLANARFMT("g16_b16_r16_422", 3, 32, 16), .pfmt = { {VK_FORMAT_R16_UNORM}, {VK_FORMAT_R16_UNORM, .sx = 1}, {VK_FORMAT_R16_UNORM, .sx = 1}, }, }, {VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, PLANARFMT("g16_b16_r16_444", 3, 48, 16), .pfmt = { {VK_FORMAT_R16_UNORM}, {VK_FORMAT_R16_UNORM}, {VK_FORMAT_R16_UNORM}, }, }, {VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16, PLANARFMT("gx10_bx10_rx10_420", 3, 24, 10), .pfmt = { {VK_FORMAT_R10X6_UNORM_PACK16}, {VK_FORMAT_R10X6_UNORM_PACK16, .sx = 1, .sy = 1}, {VK_FORMAT_R10X6_UNORM_PACK16, .sx = 1, .sy = 1}, }, }, {VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16, PLANARFMT("gx10_bx10_rx10_422", 3, 32, 10), .pfmt = { {VK_FORMAT_R10X6_UNORM_PACK16}, {VK_FORMAT_R10X6_UNORM_PACK16, .sx = 1}, {VK_FORMAT_R10X6_UNORM_PACK16, .sx = 1}, }, }, {VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16, PLANARFMT("gx10_bx10_rx10_444", 3, 48, 10), .pfmt = { {VK_FORMAT_R10X6_UNORM_PACK16}, {VK_FORMAT_R10X6_UNORM_PACK16}, {VK_FORMAT_R10X6_UNORM_PACK16}, }, }, {VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16, PLANARFMT("gx12_bx12_rx12_420", 3, 24, 12), .pfmt = { {VK_FORMAT_R12X4_UNORM_PACK16}, {VK_FORMAT_R12X4_UNORM_PACK16, .sx = 1, .sy = 1}, {VK_FORMAT_R12X4_UNORM_PACK16, .sx = 1, .sy = 1}, }, }, {VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16, PLANARFMT("gx12_bx12_rx12_422", 3, 32, 12), .pfmt = { {VK_FORMAT_R12X4_UNORM_PACK16}, {VK_FORMAT_R12X4_UNORM_PACK16, .sx = 1}, {VK_FORMAT_R12X4_UNORM_PACK16, .sx = 1}, }, }, {VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16, PLANARFMT("gx12_bx12_rx12_444", 3, 48, 12), .pfmt = { {VK_FORMAT_R12X4_UNORM_PACK16}, {VK_FORMAT_R12X4_UNORM_PACK16}, {VK_FORMAT_R12X4_UNORM_PACK16}, }, }, {VK_FORMAT_G8_B8R8_2PLANE_420_UNORM, PLANARFMT("g8_br8_420", 2, 12, 8), .pfmt = { {VK_FORMAT_R8_UNORM}, {VK_FORMAT_R8G8_UNORM, .sx = 1, .sy = 1}, }, }, {VK_FORMAT_G8_B8R8_2PLANE_422_UNORM, PLANARFMT("g8_br8_422", 2, 16, 8), .pfmt = { {VK_FORMAT_R8_UNORM}, {VK_FORMAT_R8G8_UNORM, .sx = 1}, }, }, {VK_FORMAT_G8_B8R8_2PLANE_444_UNORM, PLANARFMT("g8_br8_444", 2, 24, 8), .min_ver = VK_API_VERSION_1_3, .pfmt = { {VK_FORMAT_R8_UNORM}, {VK_FORMAT_R8G8_UNORM}, }, }, {VK_FORMAT_G16_B16R16_2PLANE_420_UNORM, PLANARFMT("g16_br16_420", 2, 24, 16), .pfmt = { {VK_FORMAT_R16_UNORM}, {VK_FORMAT_R16G16_UNORM, .sx = 1, .sy = 1}, }, }, {VK_FORMAT_G16_B16R16_2PLANE_422_UNORM, PLANARFMT("g16_br16_422", 2, 32, 16), .pfmt = { {VK_FORMAT_R16_UNORM}, {VK_FORMAT_R16G16_UNORM, .sx = 1}, }, }, {VK_FORMAT_G16_B16R16_2PLANE_444_UNORM, PLANARFMT("g16_br16_444", 2, 48, 16), .min_ver = VK_API_VERSION_1_3, .pfmt = { {VK_FORMAT_R16_UNORM}, {VK_FORMAT_R16G16_UNORM}, }, }, {VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16, PLANARFMT("gx10_bxrx10_420", 2, 24, 10), .pfmt = { {VK_FORMAT_R10X6_UNORM_PACK16}, {VK_FORMAT_R10X6G10X6_UNORM_2PACK16, .sx = 1, .sy = 1}, }, }, {VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16, PLANARFMT("gx10_bxrx10_422", 2, 32, 10), .pfmt = { {VK_FORMAT_R10X6_UNORM_PACK16}, {VK_FORMAT_R10X6G10X6_UNORM_2PACK16, .sx = 1}, }, }, {VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16, PLANARFMT("gx10_bxrx10_444", 2, 48, 10), .min_ver = VK_API_VERSION_1_3, .pfmt = { {VK_FORMAT_R10X6_UNORM_PACK16}, {VK_FORMAT_R10X6G10X6_UNORM_2PACK16}, }, }, {VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16, PLANARFMT("gx12_bxrx12_420", 2, 24, 12), .pfmt = { {VK_FORMAT_R12X4_UNORM_PACK16}, {VK_FORMAT_R12X4G12X4_UNORM_2PACK16, .sx = 1, .sy = 1}, }, }, {VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16, PLANARFMT("gx12_bxrx12_422", 2, 32, 12), .pfmt = { {VK_FORMAT_R12X4_UNORM_PACK16}, {VK_FORMAT_R12X4G12X4_UNORM_2PACK16, .sx = 1}, }, }, {VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16, PLANARFMT("gx12_bxrx12_444", 2, 48, 12), .min_ver = VK_API_VERSION_1_3, .pfmt = { {VK_FORMAT_R12X4_UNORM_PACK16}, {VK_FORMAT_R12X4G12X4_UNORM_2PACK16}, }, }, {0} }; #undef BITS #undef IDX #undef REGFMT #undef FMT void vk_setup_formats(struct pl_gpu_t *gpu) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; PL_ARRAY(pl_fmt) formats = {0}; // Texture format emulation requires at least support for texel buffers bool has_emu = gpu->glsl.compute && gpu->limits.max_buffer_texels; for (const struct vk_format *pvk_fmt = vk_formats; pvk_fmt->tfmt; pvk_fmt++) { const struct vk_format *vk_fmt = pvk_fmt; // Skip formats that require a too new version of Vulkan if (vk_fmt->min_ver > vk->api_ver) continue; // Skip formats with innately emulated representation if unsupported if (vk_fmt->fmt.emulated && !has_emu) continue; // Suppress some errors/warnings spit out by the format probing code pl_log_level_cap(vk->log, PL_LOG_INFO); bool has_drm_mods = vk->GetImageDrmFormatModifierPropertiesEXT; VkDrmFormatModifierPropertiesEXT modifiers[16] = {0}; VkDrmFormatModifierPropertiesListEXT drm_props = { .sType = VK_STRUCTURE_TYPE_DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT, .drmFormatModifierCount = PL_ARRAY_SIZE(modifiers), .pDrmFormatModifierProperties = modifiers, }; VkFormatProperties2KHR prop2 = { .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2, .pNext = has_drm_mods ? &drm_props : NULL, }; vk->GetPhysicalDeviceFormatProperties2KHR(vk->physd, vk_fmt->tfmt, &prop2); // If wholly unsupported, try falling back to the emulation formats // for texture operations VkFormatProperties *prop = &prop2.formatProperties; while (has_emu && !prop->optimalTilingFeatures && vk_fmt->emufmt) { vk_fmt = vk_fmt->emufmt; vk->GetPhysicalDeviceFormatProperties2KHR(vk->physd, vk_fmt->tfmt, &prop2); } VkFormatFeatureFlags texflags = prop->optimalTilingFeatures; VkFormatFeatureFlags bufflags = prop->bufferFeatures; if (vk_fmt->fmt.emulated) { // Emulated formats might have a different buffer representation // than their texture representation. If they don't, assume their // buffer representation is nonsensical (e.g. r16f) if (vk_fmt->bfmt) { vk->GetPhysicalDeviceFormatProperties(vk->physd, vk_fmt->bfmt, prop); bufflags = prop->bufferFeatures; } else { bufflags = 0; } } else if (vk_fmt->fmt.num_planes) { // Planar textures cannot be used directly texflags = bufflags = 0; } pl_log_level_cap(vk->log, PL_LOG_NONE); struct pl_fmt_t *fmt = pl_alloc_obj(gpu, fmt, struct pl_fmt_vk); struct pl_fmt_vk *fmtp = PL_PRIV(fmt); *fmt = vk_fmt->fmt; *fmtp = (struct pl_fmt_vk) { .vk_fmt = vk_fmt }; // Always set the signature to the actual texture format, so we can use // it to guarantee renderpass compatibility. fmt->signature = (uint64_t) vk_fmt->tfmt; // For sanity, clear the superfluous fields for (int i = fmt->num_components; i < 4; i++) { fmt->component_depth[i] = 0; fmt->sample_order[i] = 0; fmt->host_bits[i] = 0; } // We can set this universally fmt->fourcc = pl_fmt_fourcc(fmt); if (has_drm_mods) { if (drm_props.drmFormatModifierCount == PL_ARRAY_SIZE(modifiers)) { PL_WARN(gpu, "DRM modifier list for format %s possibly truncated", fmt->name); } // Query the list of supported DRM modifiers from the driver PL_ARRAY(uint64_t) modlist = {0}; for (int i = 0; i < drm_props.drmFormatModifierCount; i++) { if (modifiers[i].drmFormatModifierPlaneCount > 1) { PL_TRACE(gpu, "Ignoring format modifier %s of " "format %s because its plane count %d > 1", PRINT_DRM_MOD(modifiers[i].drmFormatModifier), fmt->name, modifiers[i].drmFormatModifierPlaneCount); continue; } // Only warn about texture format features relevant to us const VkFormatFeatureFlags flag_mask = VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT | VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT | VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; VkFormatFeatureFlags flags = modifiers[i].drmFormatModifierTilingFeatures; if ((flags & flag_mask) != (texflags & flag_mask)) { PL_DEBUG(gpu, "DRM format modifier %s of format %s " "supports fewer caps (0x%"PRIx32") than optimal tiling " "(0x%"PRIx32"), may result in limited capability!", PRINT_DRM_MOD(modifiers[i].drmFormatModifier), fmt->name, flags, texflags); } PL_ARRAY_APPEND(fmt, modlist, modifiers[i].drmFormatModifier); } fmt->num_modifiers = modlist.num; fmt->modifiers = modlist.elem; } else if (gpu->export_caps.tex & PL_HANDLE_DMA_BUF) { // Hard-code a list of static mods that we're likely to support static const uint64_t static_mods[2] = { DRM_FORMAT_MOD_INVALID, DRM_FORMAT_MOD_LINEAR, }; fmt->num_modifiers = PL_ARRAY_SIZE(static_mods); fmt->modifiers = static_mods; } struct { VkFormatFeatureFlags flags; enum pl_fmt_caps caps; } bufbits[] = { {VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT, PL_FMT_CAP_VERTEX}, {VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT, PL_FMT_CAP_TEXEL_UNIFORM}, {VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT, PL_FMT_CAP_TEXEL_STORAGE}, }; for (int i = 0; i < PL_ARRAY_SIZE(bufbits); i++) { if ((bufflags & bufbits[i].flags) == bufbits[i].flags) fmt->caps |= bufbits[i].caps; } if (fmt->caps) { fmt->glsl_type = pl_var_glsl_type_name(pl_var_from_fmt(fmt, "")); pl_assert(fmt->glsl_type); } struct { VkFormatFeatureFlags flags; enum pl_fmt_caps caps; } bits[] = { {VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT, PL_FMT_CAP_BLENDABLE}, {VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT, PL_FMT_CAP_LINEAR}, {VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT, PL_FMT_CAP_SAMPLEABLE}, {VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT, PL_FMT_CAP_STORABLE}, {VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT, PL_FMT_CAP_RENDERABLE}, // We don't distinguish between the two blit modes for pl_fmt_caps {VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT, PL_FMT_CAP_BLITTABLE}, }; for (int i = 0; i < PL_ARRAY_SIZE(bits); i++) { if ((texflags & bits[i].flags) == bits[i].flags) fmt->caps |= bits[i].caps; } // For blit emulation via compute shaders if (!(fmt->caps & PL_FMT_CAP_BLITTABLE) && (fmt->caps & PL_FMT_CAP_STORABLE)) { fmt->caps |= PL_FMT_CAP_BLITTABLE; fmtp->blit_emulated = true; } // This is technically supported for all textures, but the semantics // of pl_gpu require it only be listed for non-opaque ones if (!fmt->opaque) fmt->caps |= PL_FMT_CAP_HOST_READABLE; // Vulkan requires a minimum GLSL version that supports textureGather() if (fmt->caps & PL_FMT_CAP_SAMPLEABLE) fmt->gatherable = true; // Disable implied capabilities where the dependencies are unavailable enum pl_fmt_caps storable = PL_FMT_CAP_STORABLE | PL_FMT_CAP_TEXEL_STORAGE; if (!(fmt->caps & PL_FMT_CAP_SAMPLEABLE)) fmt->caps &= ~PL_FMT_CAP_LINEAR; if (!gpu->glsl.compute) fmt->caps &= ~storable; bool has_nofmt = vk->features.features.shaderStorageImageReadWithoutFormat && vk->features.features.shaderStorageImageWriteWithoutFormat; if (fmt->caps & storable) { int real_comps = PL_DEF(vk_fmt->icomps, fmt->num_components); fmt->glsl_format = pl_fmt_glsl_format(fmt, real_comps); if (!fmt->glsl_format && !has_nofmt) { PL_DEBUG(gpu, "Storable format '%s' has no matching GLSL " "format qualifier but read/write without format " "is not supported.. disabling", fmt->name); fmt->caps &= ~storable; } } if (fmt->caps & storable) fmt->caps |= PL_FMT_CAP_READWRITE; // Pick sub-plane formats for planar formats for (int n = 0; n < fmt->num_planes; n++) { for (int i = 0; i < formats.num; i++) { if (formats.elem[i]->signature == vk_fmt->pfmt[n].fmt) { fmt->planes[n].format = formats.elem[i]; fmt->planes[n].shift_x = vk_fmt->pfmt[n].sx; fmt->planes[n].shift_y = vk_fmt->pfmt[n].sy; break; } } pl_assert(fmt->planes[n].format); } PL_ARRAY_APPEND(gpu, formats, fmt); } gpu->formats = formats.elem; gpu->num_formats = formats.num; } libplacebo-v7.349.0/src/vulkan/formats.h000066400000000000000000000026231463457750100201120ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "common.h" #include "gpu.h" struct vk_format { VkFormat tfmt; // internal vulkan format enum (textures) struct pl_fmt_t fmt;// pl_fmt template (features will be auto-detected) int icomps; // internal component count (or 0 to infer from `fmt`) VkFormat bfmt; // vulkan format for use as buffers (or 0 to use `tfmt`) const struct vk_format *emufmt; // alternate format for emulation uint32_t min_ver; // minimum vulkan API version for this format to exist struct { VkFormat fmt; int sx, sy; } pfmt[4]; // plane formats (for planar textures) }; // Add all supported formats to the `pl_gpu` format list void vk_setup_formats(struct pl_gpu_t *gpu); libplacebo-v7.349.0/src/vulkan/gpu.c000066400000000000000000000631611463457750100172310ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "gpu.h" #include "formats.h" #include "glsl/spirv.h" #ifdef PL_HAVE_UNIX #include #endif // Gives us enough queries for 8 results #define QUERY_POOL_SIZE 16 struct pl_timer_t { VkQueryPool qpool; // even=start, odd=stop int index_write; // next index to write to int index_read; // next index to read from uint64_t pending; // bitmask of queries that are still running }; static inline uint64_t timer_bit(int index) { return 1llu << (index / 2); } static void timer_destroy_cb(pl_gpu gpu, pl_timer timer) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; pl_assert(!timer->pending); vk->DestroyQueryPool(vk->dev, timer->qpool, PL_VK_ALLOC); pl_free(timer); } VK_CB_FUNC_DEF(timer_destroy_cb); static pl_timer vk_timer_create(pl_gpu gpu) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; pl_timer timer = pl_alloc_ptr(NULL, timer); *timer = (struct pl_timer_t) {0}; struct VkQueryPoolCreateInfo qinfo = { .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, .queryType = VK_QUERY_TYPE_TIMESTAMP, .queryCount = QUERY_POOL_SIZE, }; VK(vk->CreateQueryPool(vk->dev, &qinfo, PL_VK_ALLOC, &timer->qpool)); return timer; error: timer_destroy_cb(gpu, timer); return NULL; } static void vk_timer_destroy(pl_gpu gpu, pl_timer timer) { vk_gpu_idle_callback(gpu, VK_CB_FUNC(timer_destroy_cb), gpu, timer); } static uint64_t vk_timer_query(pl_gpu gpu, pl_timer timer) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; if (timer->index_read == timer->index_write) return 0; // no more unprocessed results vk_poll_commands(vk, 0); if (timer->pending & timer_bit(timer->index_read)) return 0; // still waiting for results VkResult res; uint64_t ts[2] = {0}; res = vk->GetQueryPoolResults(vk->dev, timer->qpool, timer->index_read, 2, sizeof(ts), &ts[0], sizeof(uint64_t), VK_QUERY_RESULT_64_BIT); switch (res) { case VK_SUCCESS: timer->index_read = (timer->index_read + 2) % QUERY_POOL_SIZE; return (ts[1] - ts[0]) * vk->props.limits.timestampPeriod; case VK_NOT_READY: return 0; default: PL_VK_ASSERT(res, "Retrieving query pool results"); } error: return 0; } static void timer_begin(pl_gpu gpu, struct vk_cmd *cmd, pl_timer timer) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; if (!timer) return; if (!cmd->pool->props.timestampValidBits) { PL_TRACE(gpu, "QF %d does not support timestamp queries", cmd->pool->qf); return; } vk_poll_commands(vk, 0); if (timer->pending & timer_bit(timer->index_write)) return; // next query is still running, skip this timer VkQueueFlags reset_flags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT; if (cmd->pool->props.queueFlags & reset_flags) { // Use direct command buffer resets vk->CmdResetQueryPool(cmd->buf, timer->qpool, timer->index_write, 2); } else { // Use host query reset vk->ResetQueryPool(vk->dev, timer->qpool, timer->index_write, 2); } vk->CmdWriteTimestamp(cmd->buf, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, timer->qpool, timer->index_write); p->cmd_timer = timer; } static inline bool supports_marks(struct vk_cmd *cmd) { // Spec says debug markers are only available on graphics/compute queues VkQueueFlags flags = cmd->pool->props.queueFlags; return flags & (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT); } struct vk_cmd *_begin_cmd(pl_gpu gpu, enum queue_type type, const char *label, pl_timer timer) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; pl_mutex_lock(&p->recording); struct vk_cmdpool *pool; switch (type) { case ANY: pool = p->cmd ? p->cmd->pool : vk->pool_graphics; break; case GRAPHICS: pool = vk->pool_graphics; break; case COMPUTE: pool = vk->pool_compute; break; case TRANSFER: pool = vk->pool_transfer; break; default: pl_unreachable(); } if (!p->cmd || p->cmd->pool != pool) { vk_cmd_submit(&p->cmd); p->cmd = vk_cmd_begin(pool, label); if (!p->cmd) { pl_mutex_unlock(&p->recording); return NULL; } } if (vk->CmdBeginDebugUtilsLabelEXT && supports_marks(p->cmd)) { vk->CmdBeginDebugUtilsLabelEXT(p->cmd->buf, &(VkDebugUtilsLabelEXT) { .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, .pLabelName = label, }); } timer_begin(gpu, p->cmd, timer); return p->cmd; } static void timer_end_cb(void *ptimer, void *pindex) { pl_timer timer = ptimer; int index = (uintptr_t) pindex; timer->pending &= ~timer_bit(index); } bool _end_cmd(pl_gpu gpu, struct vk_cmd **pcmd, bool submit) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; bool ret = true; if (!pcmd) { if (submit) { pl_mutex_lock(&p->recording); ret = vk_cmd_submit(&p->cmd); pl_mutex_unlock(&p->recording); } return ret; } struct vk_cmd *cmd = *pcmd; pl_assert(p->cmd == cmd); if (p->cmd_timer) { pl_timer timer = p->cmd_timer; vk->CmdWriteTimestamp(cmd->buf, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, timer->qpool, timer->index_write + 1); timer->pending |= timer_bit(timer->index_write); vk_cmd_callback(cmd, timer_end_cb, timer, (void *) (uintptr_t) timer->index_write); timer->index_write = (timer->index_write + 2) % QUERY_POOL_SIZE; if (timer->index_write == timer->index_read) { // forcibly drop the least recent result to make space timer->index_read = (timer->index_read + 2) % QUERY_POOL_SIZE; } p->cmd_timer = NULL; } if (vk->CmdEndDebugUtilsLabelEXT && supports_marks(cmd)) vk->CmdEndDebugUtilsLabelEXT(cmd->buf); if (submit) ret = vk_cmd_submit(&p->cmd); pl_mutex_unlock(&p->recording); return ret; } void vk_gpu_idle_callback(pl_gpu gpu, vk_cb cb, const void *priv, const void *arg) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; pl_mutex_lock(&p->recording); if (p->cmd) { vk_cmd_callback(p->cmd, cb, priv, arg); } else { vk_dev_callback(vk, cb, priv, arg); } pl_mutex_unlock(&p->recording); } static void vk_gpu_destroy(pl_gpu gpu) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; vk_cmd_submit(&p->cmd); vk_wait_idle(vk); for (enum pl_tex_sample_mode s = 0; s < PL_TEX_SAMPLE_MODE_COUNT; s++) { for (enum pl_tex_address_mode a = 0; a < PL_TEX_ADDRESS_MODE_COUNT; a++) vk->DestroySampler(vk->dev, p->samplers[s][a], PL_VK_ALLOC); } pl_spirv_destroy(&p->spirv); pl_mutex_destroy(&p->recording); pl_free((void *) gpu); } pl_vulkan pl_vulkan_get(pl_gpu gpu) { const struct pl_gpu_fns *impl = PL_PRIV(gpu); if (impl->destroy == vk_gpu_destroy) { struct pl_vk *p = (struct pl_vk *) impl; return p->vk->vulkan; } return NULL; } static pl_handle_caps vk_sync_handle_caps(struct vk_ctx *vk) { pl_handle_caps caps = 0; for (int i = 0; vk_sync_handle_list[i]; i++) { enum pl_handle_type type = vk_sync_handle_list[i]; VkPhysicalDeviceExternalSemaphoreInfo info = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_SEMAPHORE_INFO_KHR, .handleType = vk_sync_handle_type(type), }; VkExternalSemaphoreProperties props = { .sType = VK_STRUCTURE_TYPE_EXTERNAL_SEMAPHORE_PROPERTIES_KHR, }; vk->GetPhysicalDeviceExternalSemaphoreProperties(vk->physd, &info, &props); VkExternalSemaphoreFeatureFlags flags = props.externalSemaphoreFeatures; if ((props.compatibleHandleTypes & info.handleType) && (flags & VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR)) { caps |= type; } } return caps; } static pl_handle_caps vk_tex_handle_caps(struct vk_ctx *vk, bool import) { pl_handle_caps caps = 0; for (int i = 0; vk_mem_handle_list[i]; i++) { enum pl_handle_type handle_type = vk_mem_handle_list[i]; if (handle_type == PL_HANDLE_DMA_BUF && !vk->GetImageDrmFormatModifierPropertiesEXT) { PL_DEBUG(vk, "Tex caps for %s (0x%x) unsupported: no DRM modifiers", vk_handle_name(vk_mem_handle_type(PL_HANDLE_DMA_BUF)), (unsigned int) PL_HANDLE_DMA_BUF); continue; } // Query whether creation of a "basic" dummy texture would work VkPhysicalDeviceImageDrmFormatModifierInfoEXT drm_pinfo = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT, .drmFormatModifier = DRM_FORMAT_MOD_LINEAR, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, }; VkPhysicalDeviceExternalImageFormatInfoKHR ext_pinfo = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO_KHR, .handleType = vk_mem_handle_type(handle_type), }; VkPhysicalDeviceImageFormatInfo2KHR pinfo = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2_KHR, .pNext = &ext_pinfo, .format = VK_FORMAT_R8_UNORM, .type = VK_IMAGE_TYPE_2D, .tiling = VK_IMAGE_TILING_OPTIMAL, .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT, }; if (handle_type == PL_HANDLE_DMA_BUF) { vk_link_struct(&pinfo, &drm_pinfo); pinfo.tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT; } VkExternalImageFormatPropertiesKHR ext_props = { .sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR, }; VkImageFormatProperties2KHR props = { .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2_KHR, .pNext = &ext_props, }; VkResult res; res = vk->GetPhysicalDeviceImageFormatProperties2KHR(vk->physd, &pinfo, &props); if (res != VK_SUCCESS) { PL_DEBUG(vk, "Tex caps for %s (0x%x) unsupported: %s", vk_handle_name(ext_pinfo.handleType), (unsigned int) handle_type, vk_res_str(res)); continue; } if (vk_external_mem_check(vk, &ext_props.externalMemoryProperties, handle_type, import)) { caps |= handle_type; } } #ifdef VK_EXT_metal_objects if (vk->ExportMetalObjectsEXT && import) caps |= PL_HANDLE_MTL_TEX | PL_HANDLE_IOSURFACE; #endif return caps; } static const VkFilter filters[PL_TEX_SAMPLE_MODE_COUNT] = { [PL_TEX_SAMPLE_NEAREST] = VK_FILTER_NEAREST, [PL_TEX_SAMPLE_LINEAR] = VK_FILTER_LINEAR, }; static inline struct pl_spirv_version get_spirv_version(const struct vk_ctx *vk) { if (vk->api_ver >= VK_API_VERSION_1_3) { const VkPhysicalDeviceMaintenance4Features *device_maintenance4; device_maintenance4 = vk_find_struct(&vk->features, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_4_FEATURES); if (device_maintenance4 && device_maintenance4->maintenance4) { return (struct pl_spirv_version) { .env_version = VK_API_VERSION_1_3, .spv_version = PL_SPV_VERSION(1, 6), }; } } pl_assert(vk->api_ver >= VK_API_VERSION_1_2); return (struct pl_spirv_version) { .env_version = VK_API_VERSION_1_2, .spv_version = PL_SPV_VERSION(1, 5), }; } static const struct pl_gpu_fns pl_fns_vk; pl_gpu pl_gpu_create_vk(struct vk_ctx *vk) { pl_assert(vk->dev); struct pl_gpu_t *gpu = pl_zalloc_obj(NULL, gpu, struct pl_vk); gpu->log = vk->log; struct pl_vk *p = PL_PRIV(gpu); pl_mutex_init(&p->recording); p->vk = vk; p->impl = pl_fns_vk; p->spirv = pl_spirv_create(vk->log, get_spirv_version(vk)); if (!p->spirv) goto error; // Query all device properties VkPhysicalDevicePCIBusInfoPropertiesEXT pci_props = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT, }; VkPhysicalDeviceIDPropertiesKHR id_props = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR, .pNext = &pci_props, }; VkPhysicalDevicePushDescriptorPropertiesKHR pushd_props = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR, .pNext = &id_props, }; VkPhysicalDeviceSubgroupProperties group_props = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES, .pNext = &pushd_props, }; VkPhysicalDeviceExternalMemoryHostPropertiesEXT host_props = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT, .pNext = &group_props, }; VkPhysicalDeviceProperties2KHR props = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR, .pNext = &host_props, }; bool is_portability = false; #ifdef VK_KHR_portability_subset VkPhysicalDevicePortabilitySubsetPropertiesKHR port_props = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PORTABILITY_SUBSET_PROPERTIES_KHR, .minVertexInputBindingStrideAlignment = 1, }; for (int i = 0; i < vk->exts.num; i++) { if (!strcmp(vk->exts.elem[i], VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME)) { vk_link_struct(&props, &port_props); is_portability = true; break; } } #endif vk->GetPhysicalDeviceProperties2(vk->physd, &props); VkPhysicalDeviceLimits limits = props.properties.limits; // Determine GLSL features and limits gpu->glsl = (struct pl_glsl_version) { .version = 450, .vulkan = true, .compute = true, .max_shmem_size = limits.maxComputeSharedMemorySize, .max_group_threads = limits.maxComputeWorkGroupInvocations, .max_group_size = { limits.maxComputeWorkGroupSize[0], limits.maxComputeWorkGroupSize[1], limits.maxComputeWorkGroupSize[2], }, }; VkShaderStageFlags req_stages = VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT; VkSubgroupFeatureFlags req_flags = VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_VOTE_BIT | VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT | VK_SUBGROUP_FEATURE_SHUFFLE_BIT; if ((group_props.supportedStages & req_stages) == req_stages && (group_props.supportedOperations & req_flags) == req_flags) { gpu->glsl.subgroup_size = group_props.subgroupSize; } if (vk->features.features.shaderImageGatherExtended) { gpu->glsl.min_gather_offset = limits.minTexelGatherOffset; gpu->glsl.max_gather_offset = limits.maxTexelGatherOffset; } const size_t max_size = vk_malloc_avail(vk->ma, 0); gpu->limits = (struct pl_gpu_limits) { // pl_gpu .thread_safe = true, .callbacks = true, // pl_buf .max_buf_size = max_size, .max_ubo_size = PL_MIN(limits.maxUniformBufferRange, max_size), .max_ssbo_size = PL_MIN(limits.maxStorageBufferRange, max_size), .max_vbo_size = vk_malloc_avail(vk->ma, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT), .max_mapped_size = vk_malloc_avail(vk->ma, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT), .max_mapped_vram = vk_malloc_avail(vk->ma, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT), .max_buffer_texels = PL_MIN(limits.maxTexelBufferElements, max_size), .align_host_ptr = host_props.minImportedHostPointerAlignment, .host_cached = vk_malloc_avail(vk->ma, VK_MEMORY_PROPERTY_HOST_CACHED_BIT), // pl_tex .max_tex_1d_dim = limits.maxImageDimension1D, .max_tex_2d_dim = limits.maxImageDimension2D, .max_tex_3d_dim = limits.maxImageDimension3D, .blittable_1d_3d = true, .buf_transfer = true, .align_tex_xfer_pitch = limits.optimalBufferCopyRowPitchAlignment, .align_tex_xfer_offset = pl_lcm(limits.optimalBufferCopyOffsetAlignment, 4), // pl_pass .max_variable_comps = 0, // vulkan doesn't support these at all .max_constants = SIZE_MAX, .array_size_constants = !is_portability, .max_pushc_size = limits.maxPushConstantsSize, #ifdef VK_KHR_portability_subset .align_vertex_stride = port_props.minVertexInputBindingStrideAlignment, #else .align_vertex_stride = 1, #endif .max_dispatch = { limits.maxComputeWorkGroupCount[0], limits.maxComputeWorkGroupCount[1], limits.maxComputeWorkGroupCount[2], }, .fragment_queues = vk->pool_graphics->num_queues, .compute_queues = vk->pool_compute->num_queues, }; gpu->export_caps.buf = vk_malloc_handle_caps(vk->ma, false); gpu->import_caps.buf = vk_malloc_handle_caps(vk->ma, true); gpu->export_caps.tex = vk_tex_handle_caps(vk, false); gpu->import_caps.tex = vk_tex_handle_caps(vk, true); gpu->export_caps.sync = vk_sync_handle_caps(vk); gpu->import_caps.sync = 0; // Not supported yet if (pl_gpu_supports_interop(gpu)) { pl_static_assert(sizeof(gpu->uuid) == VK_UUID_SIZE); memcpy(gpu->uuid, id_props.deviceUUID, sizeof(gpu->uuid)); gpu->pci.domain = pci_props.pciDomain; gpu->pci.bus = pci_props.pciBus; gpu->pci.device = pci_props.pciDevice; gpu->pci.function = pci_props.pciFunction; } if (vk->CmdPushDescriptorSetKHR) p->max_push_descriptors = pushd_props.maxPushDescriptors; vk_setup_formats(gpu); // Compute the correct minimum texture alignment p->min_texel_alignment = 1; for (int i = 0; i < gpu->num_formats; i++) { if (gpu->formats[i]->emulated || gpu->formats[i]->opaque) continue; size_t texel_size = gpu->formats[i]->texel_size; p->min_texel_alignment = pl_lcm(p->min_texel_alignment, texel_size); } PL_DEBUG(gpu, "Minimum texel alignment: %zu", p->min_texel_alignment); // Initialize the samplers for (enum pl_tex_sample_mode s = 0; s < PL_TEX_SAMPLE_MODE_COUNT; s++) { for (enum pl_tex_address_mode a = 0; a < PL_TEX_ADDRESS_MODE_COUNT; a++) { static const VkSamplerAddressMode modes[PL_TEX_ADDRESS_MODE_COUNT] = { [PL_TEX_ADDRESS_CLAMP] = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, [PL_TEX_ADDRESS_REPEAT] = VK_SAMPLER_ADDRESS_MODE_REPEAT, [PL_TEX_ADDRESS_MIRROR] = VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT, }; VkSamplerCreateInfo sinfo = { .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, .magFilter = filters[s], .minFilter = filters[s], .addressModeU = modes[a], .addressModeV = modes[a], .addressModeW = modes[a], .maxAnisotropy = 1.0, }; VK(vk->CreateSampler(vk->dev, &sinfo, PL_VK_ALLOC, &p->samplers[s][a])); } } return pl_gpu_finalize(gpu); error: vk_gpu_destroy(gpu); return NULL; } void pl_vulkan_sem_destroy(pl_gpu gpu, VkSemaphore *semaphore) { VkSemaphore sem = *semaphore; if (!sem) return; struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; vk->DestroySemaphore(vk->dev, sem, PL_VK_ALLOC); *semaphore = VK_NULL_HANDLE; } VkSemaphore pl_vulkan_sem_create(pl_gpu gpu, const struct pl_vulkan_sem_params *params) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; pl_assert(PL_ISPOT(params->export_handle)); if ((params->export_handle & gpu->export_caps.sync) != params->export_handle) { PL_ERR(gpu, "Invalid handle type 0x%"PRIx64" specified for " "`pl_vulkan_sem_create`!", (uint64_t) params->export_handle); return VK_NULL_HANDLE; } switch (params->export_handle) { case PL_HANDLE_FD: params->out_handle->fd = -1; break; case PL_HANDLE_WIN32: case PL_HANDLE_WIN32_KMT: params->out_handle->handle = NULL; break; case PL_HANDLE_DMA_BUF: case PL_HANDLE_HOST_PTR: case PL_HANDLE_MTL_TEX: case PL_HANDLE_IOSURFACE: pl_unreachable(); } const VkExportSemaphoreCreateInfoKHR einfo = { .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO_KHR, .handleTypes = vk_sync_handle_type(params->export_handle), }; const VkSemaphoreTypeCreateInfo stinfo = { .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, .pNext = params->export_handle ? &einfo : NULL, .semaphoreType = params->type, .initialValue = params->initial_value, }; const VkSemaphoreCreateInfo sinfo = { .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, .pNext = &stinfo, }; VkSemaphore sem = VK_NULL_HANDLE; VK(vk->CreateSemaphore(vk->dev, &sinfo, PL_VK_ALLOC, &sem)); PL_VK_NAME(SEMAPHORE, sem, PL_DEF(params->debug_tag, "pl_vulkan_sem")); #ifdef PL_HAVE_UNIX if (params->export_handle == PL_HANDLE_FD) { VkSemaphoreGetFdInfoKHR finfo = { .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR, .handleType = einfo.handleTypes, .semaphore = sem, }; VK(vk->GetSemaphoreFdKHR(vk->dev, &finfo, ¶ms->out_handle->fd)); } #endif #ifdef PL_HAVE_WIN32 if (params->export_handle == PL_HANDLE_WIN32 || params->export_handle == PL_HANDLE_WIN32_KMT) { VkSemaphoreGetWin32HandleInfoKHR handle_info = { .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_WIN32_HANDLE_INFO_KHR, .handleType = einfo.handleTypes, .semaphore = sem, }; VK(vk->GetSemaphoreWin32HandleKHR(vk->dev, &handle_info, ¶ms->out_handle->handle)); } #endif return sem; error: #ifdef PL_HAVE_UNIX if (params->export_handle == PL_HANDLE_FD) { if (params->out_handle->fd > -1) close(params->out_handle->fd); } #endif #ifdef PL_HAVE_WIN32 if (params->export_handle == PL_HANDLE_WIN32) { if (params->out_handle->handle != NULL) CloseHandle(params->out_handle->handle); } // PL_HANDLE_WIN32_KMT is just an identifier. It doesn't get closed. #endif vk->DestroySemaphore(vk->dev, sem, PL_VK_ALLOC); return VK_NULL_HANDLE; } static void vk_gpu_flush(pl_gpu gpu) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; CMD_SUBMIT(NULL); vk_rotate_queues(vk); vk_malloc_garbage_collect(vk->ma); } static void vk_gpu_finish(pl_gpu gpu) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; CMD_SUBMIT(NULL); vk_wait_idle(vk); } static bool vk_gpu_is_failed(pl_gpu gpu) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; return vk->failed; } struct vk_cmd *pl_vk_steal_cmd(pl_gpu gpu) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; pl_mutex_lock(&p->recording); struct vk_cmd *cmd = p->cmd; p->cmd = NULL; pl_mutex_unlock(&p->recording); struct vk_cmdpool *pool = vk->pool_graphics; if (!cmd || cmd->pool != pool) { vk_cmd_submit(&cmd); cmd = vk_cmd_begin(pool, NULL); } return cmd; } void pl_vk_print_heap(pl_gpu gpu, enum pl_log_level lev) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; vk_malloc_print_stats(vk->ma, lev); } static const struct pl_gpu_fns pl_fns_vk = { .destroy = vk_gpu_destroy, .tex_create = vk_tex_create, .tex_destroy = vk_tex_deref, .tex_invalidate = vk_tex_invalidate, .tex_clear_ex = vk_tex_clear_ex, .tex_blit = vk_tex_blit, .tex_upload = vk_tex_upload, .tex_download = vk_tex_download, .tex_poll = vk_tex_poll, .buf_create = vk_buf_create, .buf_destroy = vk_buf_deref, .buf_write = vk_buf_write, .buf_read = vk_buf_read, .buf_copy = vk_buf_copy, .buf_export = vk_buf_export, .buf_poll = vk_buf_poll, .desc_namespace = vk_desc_namespace, .pass_create = vk_pass_create, .pass_destroy = vk_pass_destroy, .pass_run = vk_pass_run, .timer_create = vk_timer_create, .timer_destroy = vk_timer_destroy, .timer_query = vk_timer_query, .gpu_flush = vk_gpu_flush, .gpu_finish = vk_gpu_finish, .gpu_is_failed = vk_gpu_is_failed, }; libplacebo-v7.349.0/src/vulkan/gpu.h000066400000000000000000000127011463457750100172300ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "common.h" #include "command.h" #include "formats.h" #include "malloc.h" #include "utils.h" #include "../gpu.h" #include "../glsl/spirv.h" #include "../pl_thread.h" pl_gpu pl_gpu_create_vk(struct vk_ctx *vk); // This function takes the current graphics command and steals it from the // GPU, so the caller can do custom vk_cmd_ calls on it. The caller should // submit it as well. struct vk_cmd *pl_vk_steal_cmd(pl_gpu gpu); // Print memory usage statistics void pl_vk_print_heap(pl_gpu, enum pl_log_level); // --- pl_gpu internal structs and helpers struct pl_fmt_vk { const struct vk_format *vk_fmt; bool blit_emulated; }; enum queue_type { GRAPHICS, COMPUTE, TRANSFER, ANY, }; struct pl_vk { struct pl_gpu_fns impl; struct vk_ctx *vk; pl_spirv spirv; // Some additional cached device limits and features checks uint32_t max_push_descriptors; size_t min_texel_alignment; // The "currently recording" command. This will be queued and replaced by // a new command every time we need to "switch" between queue families. pl_mutex recording; struct vk_cmd *cmd; pl_timer cmd_timer; // Array of VkSamplers for every combination of sample/address modes VkSampler samplers[PL_TEX_SAMPLE_MODE_COUNT][PL_TEX_ADDRESS_MODE_COUNT]; // To avoid spamming warnings bool warned_modless; }; struct vk_cmd *_begin_cmd(pl_gpu, enum queue_type, const char *label, pl_timer); bool _end_cmd(pl_gpu, struct vk_cmd **, bool submit); #define CMD_BEGIN(type) _begin_cmd(gpu, type, __func__, NULL) #define CMD_BEGIN_TIMED(type, timer) _begin_cmd(gpu, type, __func__, timer) #define CMD_FINISH(cmd) _end_cmd(gpu, cmd, false) #define CMD_SUBMIT(cmd) _end_cmd(gpu, cmd, true) // Helper to fire a callback the next time the `pl_gpu` is in an idle state // // Use this instead of `vk_dev_callback` when you need to clean up after // resources that might possibly still be in use by the `pl_gpu` at the time of // creating the callback. void vk_gpu_idle_callback(pl_gpu, vk_cb, const void *priv, const void *arg); struct pl_tex_vk { pl_rc_t rc; bool external_img; enum queue_type transfer_queue; VkImageType type; VkImage img; VkImageAspectFlags aspect; struct vk_memslice mem; // cached properties VkFormat img_fmt; VkImageUsageFlags usage_flags; // for sampling VkImageView view; // for rendering VkFramebuffer framebuffer; // for vk_tex_upload/download fallback code pl_fmt texel_fmt; // for planar textures (as a convenience) int num_planes; struct pl_tex_vk *planes[4]; // synchronization and current state (planes only) struct vk_sem sem; VkImageLayout layout; PL_ARRAY(pl_vulkan_sem) ext_deps; // external semaphore, not owned by the pl_tex uint32_t qf; // last queue family to access this texture (for barriers) bool may_invalidate; bool held; }; pl_tex vk_tex_create(pl_gpu, const struct pl_tex_params *); void vk_tex_deref(pl_gpu, pl_tex); void vk_tex_invalidate(pl_gpu, pl_tex); void vk_tex_clear_ex(pl_gpu, pl_tex, const union pl_clear_color); void vk_tex_blit(pl_gpu, const struct pl_tex_blit_params *); bool vk_tex_upload(pl_gpu, const struct pl_tex_transfer_params *); bool vk_tex_download(pl_gpu, const struct pl_tex_transfer_params *); bool vk_tex_poll(pl_gpu, pl_tex, uint64_t timeout); void vk_tex_barrier(pl_gpu, struct vk_cmd *, pl_tex, VkPipelineStageFlags2, VkAccessFlags2, VkImageLayout, uint32_t qf); struct pl_buf_vk { pl_rc_t rc; struct vk_memslice mem; enum queue_type update_queue; VkBufferView view; // for texel buffers // synchronization and current state struct vk_sem sem; bool exported; bool needs_flush; }; pl_buf vk_buf_create(pl_gpu, const struct pl_buf_params *); void vk_buf_deref(pl_gpu, pl_buf); void vk_buf_write(pl_gpu, pl_buf, size_t offset, const void *src, size_t size); bool vk_buf_read(pl_gpu, pl_buf, size_t offset, void *dst, size_t size); void vk_buf_copy(pl_gpu, pl_buf dst, size_t dst_offset, pl_buf src, size_t src_offset, size_t size); bool vk_buf_export(pl_gpu, pl_buf); bool vk_buf_poll(pl_gpu, pl_buf, uint64_t timeout); // Helper to ease buffer barrier creation. (`offset` is relative to pl_buf) void vk_buf_barrier(pl_gpu, struct vk_cmd *, pl_buf, VkPipelineStageFlags2, VkAccessFlags2, size_t offset, size_t size, bool export); // Flush visible writes to a buffer made by the API void vk_buf_flush(pl_gpu, struct vk_cmd *, pl_buf, size_t offset, size_t size); struct pl_pass_vk; int vk_desc_namespace(pl_gpu, enum pl_desc_type); pl_pass vk_pass_create(pl_gpu, const struct pl_pass_params *); void vk_pass_destroy(pl_gpu, pl_pass); void vk_pass_run(pl_gpu, const struct pl_pass_run_params *); libplacebo-v7.349.0/src/vulkan/gpu_buf.c000066400000000000000000000413241463457750100200620ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "gpu.h" VK_CB_FUNC_DEF(vk_buf_deref); void vk_buf_barrier(pl_gpu gpu, struct vk_cmd *cmd, pl_buf buf, VkPipelineStageFlags2 stage, VkAccessFlags2 access, size_t offset, size_t size, bool export) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; struct pl_buf_vk *buf_vk = PL_PRIV(buf); pl_assert(!export || !buf_vk->exported); // can't re-export exported buffers pl_rc_ref(&buf_vk->rc); bool needs_flush = buf_vk->needs_flush || buf->params.host_mapped || buf->params.import_handle == PL_HANDLE_HOST_PTR; bool noncoherent = buf_vk->mem.data && !buf_vk->mem.coherent; if (needs_flush && noncoherent) { VK(vk->FlushMappedMemoryRanges(vk->dev, 1, &(struct VkMappedMemoryRange) { .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, .memory = buf_vk->mem.vkmem, .offset = buf_vk->mem.map_offset, .size = buf_vk->mem.map_size, })); // Just ignore errors, not much we can do about them other than // logging them and moving on... error: ; } struct vk_sync_scope last; last = vk_sem_barrier(cmd, &buf_vk->sem, stage, access, export); // CONCURRENT buffers require transitioning to/from IGNORED, EXCLUSIVE // buffers require transitioning to/from the concrete QF index uint32_t qf = vk->pools.num > 1 ? VK_QUEUE_FAMILY_IGNORED : cmd->pool->qf; uint32_t src_qf = buf_vk->exported ? VK_QUEUE_FAMILY_EXTERNAL_KHR : qf; uint32_t dst_qf = export ? VK_QUEUE_FAMILY_EXTERNAL_KHR : qf; if (last.access || src_qf != dst_qf) { vk_cmd_barrier(cmd, &(VkDependencyInfo) { .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, .bufferMemoryBarrierCount = 1, .pBufferMemoryBarriers = &(VkBufferMemoryBarrier2) { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, .srcStageMask = last.stage, .srcAccessMask = last.access, .dstStageMask = stage, .dstAccessMask = access, .srcQueueFamilyIndex = src_qf, .dstQueueFamilyIndex = dst_qf, .buffer = buf_vk->mem.buf, .offset = buf_vk->mem.offset + offset, .size = size, }, }); } buf_vk->needs_flush = false; buf_vk->exported = export; vk_cmd_callback(cmd, VK_CB_FUNC(vk_buf_deref), gpu, buf); } void vk_buf_deref(pl_gpu gpu, pl_buf buf) { if (!buf) return; struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; struct pl_buf_vk *buf_vk = PL_PRIV(buf); if (pl_rc_deref(&buf_vk->rc)) { vk->DestroyBufferView(vk->dev, buf_vk->view, PL_VK_ALLOC); vk_malloc_free(vk->ma, &buf_vk->mem); pl_free((void *) buf); } } pl_buf vk_buf_create(pl_gpu gpu, const struct pl_buf_params *params) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; struct pl_buf_t *buf = pl_zalloc_obj(NULL, buf, struct pl_buf_vk); buf->params = *params; buf->params.initial_data = NULL; struct pl_buf_vk *buf_vk = PL_PRIV(buf); pl_rc_init(&buf_vk->rc); struct vk_malloc_params mparams = { .reqs = { .size = PL_ALIGN2(params->size, 4), // for vk_buf_write .memoryTypeBits = UINT32_MAX, .alignment = 1, }, // these are always set, because `vk_buf_copy` can always be used .buf_usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, .export_handle = params->export_handle, .import_handle = params->import_handle, .shared_mem = params->shared_mem, .debug_tag = params->debug_tag, }; // Mandatory/optimal buffer offset alignment VkDeviceSize *align = &mparams.reqs.alignment; VkDeviceSize extra_align = vk->props.limits.optimalBufferCopyOffsetAlignment; // Try and align all buffers to the minimum texel alignment, to make sure // tex_upload/tex_download always gets aligned buffer copies if possible extra_align = pl_lcm(extra_align, p->min_texel_alignment); enum pl_buf_mem_type mem_type = params->memory_type; bool is_texel = false; if (params->uniform) { mparams.buf_usage |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; *align = pl_lcm(*align, vk->props.limits.minUniformBufferOffsetAlignment); mem_type = PL_BUF_MEM_DEVICE; if (params->format) { mparams.buf_usage |= VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT; is_texel = true; } } if (params->storable) { mparams.buf_usage |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; *align = pl_lcm(*align, vk->props.limits.minStorageBufferOffsetAlignment); buf_vk->update_queue = COMPUTE; mem_type = PL_BUF_MEM_DEVICE; if (params->format) { mparams.buf_usage |= VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT; is_texel = true; } } if (is_texel) { *align = pl_lcm(*align, vk->props.limits.minTexelBufferOffsetAlignment); *align = pl_lcm(*align, params->format->texel_size); } if (params->drawable) { mparams.buf_usage |= VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT; mem_type = PL_BUF_MEM_DEVICE; } if (params->host_writable || params->initial_data) { // Buffers should be written using mapped memory if possible mparams.optimal = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; // Use the transfer queue for updates on very large buffers (1 MB) if (params->size > 1024*1024) buf_vk->update_queue = TRANSFER; } if (params->host_mapped || params->host_readable) { mparams.required |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; if (params->size > 1024) { // Prefer cached memory for large buffers (1 kB) which may be read // from, because uncached reads are extremely slow mparams.optimal |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT; } } switch (mem_type) { case PL_BUF_MEM_AUTO: // We generally prefer VRAM since it's faster than RAM, but any number // of other requirements could potentially exclude it, so just mark it // as optimal by default. Additionally, don't do this if the available // VRAM size is very small. if (!(mparams.optimal & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) && params->size * MAPPED_VRAM_THRESHOLD <= gpu->limits.max_mapped_vram) { mparams.optimal |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; } break; case PL_BUF_MEM_DEVICE: // Force device local memory. mparams.required |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; break; case PL_BUF_MEM_HOST: // This isn't a true guarantee, but actually trying to restrict the // device-local bit locks out all memory heaps on iGPUs. Requiring // the memory be host-mapped is the easiest compromise. mparams.required |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; mparams.optimal |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT; break; case PL_BUF_MEM_TYPE_COUNT: pl_unreachable(); } if (params->import_handle) { size_t offset = params->shared_mem.offset; if (PL_ALIGN(offset, *align) != offset) { PL_ERR(gpu, "Imported memory offset %zu violates minimum alignment " "requirement of enabled usage flags (%zu)!", offset, (size_t) *align); goto error; } } else { *align = pl_lcm(*align, extra_align); } if (!vk_malloc_slice(vk->ma, &buf_vk->mem, &mparams)) goto error; if (params->host_mapped) buf->data = buf_vk->mem.data; if (params->export_handle) { buf->shared_mem = buf_vk->mem.shared_mem; buf->shared_mem.drm_format_mod = DRM_FORMAT_MOD_LINEAR; buf_vk->exported = true; } if (is_texel) { struct pl_fmt_vk *fmtp = PL_PRIV(params->format); VkBufferViewCreateInfo vinfo = { .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, .buffer = buf_vk->mem.buf, .format = PL_DEF(fmtp->vk_fmt->bfmt, fmtp->vk_fmt->tfmt), .offset = buf_vk->mem.offset, .range = buf_vk->mem.size, }; VK(vk->CreateBufferView(vk->dev, &vinfo, PL_VK_ALLOC, &buf_vk->view)); PL_VK_NAME(BUFFER_VIEW, buf_vk->view, PL_DEF(params->debug_tag, "texel")); } if (params->initial_data) vk_buf_write(gpu, buf, 0, params->initial_data, params->size); return buf; error: vk_buf_deref(gpu, buf); return NULL; } static void invalidate_buf(pl_gpu gpu, pl_buf buf) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; struct pl_buf_vk *buf_vk = PL_PRIV(buf); if (buf_vk->mem.data && !buf_vk->mem.coherent) { VK(vk->InvalidateMappedMemoryRanges(vk->dev, 1, &(VkMappedMemoryRange) { .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, .memory = buf_vk->mem.vkmem, .offset = buf_vk->mem.map_offset, .size = buf_vk->mem.map_size, })); } // Ignore errors (after logging), nothing useful we can do anyway error: ; vk_buf_deref(gpu, buf); } VK_CB_FUNC_DEF(invalidate_buf); void vk_buf_flush(pl_gpu gpu, struct vk_cmd *cmd, pl_buf buf, size_t offset, size_t size) { struct pl_buf_vk *buf_vk = PL_PRIV(buf); // We need to perform a flush if the host is capable of reading back from // the buffer, or if we intend to overwrite it using mapped memory bool can_read = buf->params.host_readable; bool can_write = buf_vk->mem.data && buf->params.host_writable; if (buf->params.host_mapped || buf->params.import_handle == PL_HANDLE_HOST_PTR) can_read = can_write = true; if (!can_read && !can_write) return; vk_cmd_barrier(cmd, &(VkDependencyInfo) { .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, .bufferMemoryBarrierCount = 1, .pBufferMemoryBarriers = &(VkBufferMemoryBarrier2) { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, .srcStageMask = buf_vk->sem.write.stage, .srcAccessMask = buf_vk->sem.write.access, .dstStageMask = VK_PIPELINE_STAGE_2_HOST_BIT, .dstAccessMask = (can_read ? VK_ACCESS_2_HOST_READ_BIT : 0) | (can_write ? VK_ACCESS_2_HOST_WRITE_BIT : 0), .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .buffer = buf_vk->mem.buf, .offset = buf_vk->mem.offset + offset, .size = size, }, }); // We need to hold on to the buffer until this barrier completes vk_cmd_callback(cmd, VK_CB_FUNC(invalidate_buf), gpu, buf); pl_rc_ref(&buf_vk->rc); } bool vk_buf_poll(pl_gpu gpu, pl_buf buf, uint64_t timeout) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; struct pl_buf_vk *buf_vk = PL_PRIV(buf); // Opportunistically check if we can re-use this buffer without flush vk_poll_commands(vk, 0); if (pl_rc_count(&buf_vk->rc) == 1) return false; // Otherwise, we're force to submit any queued command so that the // user is guaranteed to see progress eventually, even if they call // this in a tight loop CMD_SUBMIT(NULL); vk_poll_commands(vk, timeout); return pl_rc_count(&buf_vk->rc) > 1; } void vk_buf_write(pl_gpu gpu, pl_buf buf, size_t offset, const void *data, size_t size) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; struct pl_buf_vk *buf_vk = PL_PRIV(buf); // For host-mapped buffers, we can just directly memcpy the buffer contents. // Otherwise, we can update the buffer from the GPU using a command buffer. if (buf_vk->mem.data) { // ensure no queued operations while (vk_buf_poll(gpu, buf, UINT64_MAX)) ; // do nothing uintptr_t addr = (uintptr_t) buf_vk->mem.data + offset; memcpy((void *) addr, data, size); buf_vk->needs_flush = true; } else { struct vk_cmd *cmd = CMD_BEGIN(buf_vk->update_queue); if (!cmd) { PL_ERR(gpu, "Failed updating buffer!"); return; } vk_buf_barrier(gpu, cmd, buf, VK_PIPELINE_STAGE_2_COPY_BIT, VK_ACCESS_2_TRANSFER_WRITE_BIT, offset, size, false); // Vulkan requires `size` to be a multiple of 4, so we need to make // sure to handle the end separately if the original data is not const size_t max_transfer = 64 * 1024; size_t size_rem = size % 4; size_t size_base = size - size_rem; VkDeviceSize buf_offset = buf_vk->mem.offset + offset; if (size_base > max_transfer) { PL_TRACE(gpu, "Using multiple vkCmdUpdateBuffer calls to upload " "large buffer. Consider using buffer-buffer transfers " "instead!"); } for (size_t xfer = 0; xfer < size_base; xfer += max_transfer) { vk->CmdUpdateBuffer(cmd->buf, buf_vk->mem.buf, buf_offset + xfer, PL_MIN(size_base - xfer, max_transfer), (void *) ((uint8_t *) data + xfer)); } if (size_rem) { uint8_t tail[4] = {0}; memcpy(tail, data, size_rem); vk->CmdUpdateBuffer(cmd->buf, buf_vk->mem.buf, buf_offset + size_base, sizeof(tail), tail); } pl_assert(!buf->params.host_readable); // no flush needed due to this CMD_FINISH(&cmd); } } bool vk_buf_read(pl_gpu gpu, pl_buf buf, size_t offset, void *dest, size_t size) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; struct pl_buf_vk *buf_vk = PL_PRIV(buf); pl_assert(buf_vk->mem.data); if (vk_buf_poll(gpu, buf, 0) && buf_vk->sem.write.sync.sem) { // ensure no more queued writes VK(vk->WaitSemaphores(vk->dev, &(VkSemaphoreWaitInfo) { .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, .semaphoreCount = 1, .pSemaphores = &buf_vk->sem.write.sync.sem, .pValues = &buf_vk->sem.write.sync.value, }, UINT64_MAX)); // process callbacks vk_poll_commands(vk, 0); } uintptr_t addr = (uintptr_t) buf_vk->mem.data + (size_t) offset; memcpy(dest, (void *) addr, size); return true; error: return false; } void vk_buf_copy(pl_gpu gpu, pl_buf dst, size_t dst_offset, pl_buf src, size_t src_offset, size_t size) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; struct pl_buf_vk *dst_vk = PL_PRIV(dst); struct pl_buf_vk *src_vk = PL_PRIV(src); struct vk_cmd *cmd = CMD_BEGIN(dst_vk->update_queue); if (!cmd) { PL_ERR(gpu, "Failed copying buffer!"); return; } vk_buf_barrier(gpu, cmd, dst, VK_PIPELINE_STAGE_2_COPY_BIT, VK_ACCESS_2_TRANSFER_WRITE_BIT, dst_offset, size, false); vk_buf_barrier(gpu, cmd, src, VK_PIPELINE_STAGE_2_COPY_BIT, VK_ACCESS_2_TRANSFER_READ_BIT, src_offset, size, false); VkBufferCopy region = { .srcOffset = src_vk->mem.offset + src_offset, .dstOffset = dst_vk->mem.offset + dst_offset, .size = size, }; vk->CmdCopyBuffer(cmd->buf, src_vk->mem.buf, dst_vk->mem.buf, 1, ®ion); vk_buf_flush(gpu, cmd, dst, dst_offset, size); CMD_FINISH(&cmd); } bool vk_buf_export(pl_gpu gpu, pl_buf buf) { struct pl_buf_vk *buf_vk = PL_PRIV(buf); if (buf_vk->exported) return true; struct vk_cmd *cmd = CMD_BEGIN(ANY); if (!cmd) { PL_ERR(gpu, "Failed exporting buffer!"); return false; } // For the queue family ownership transfer, we can ignore all pipeline // stages since the synchronization via fences/semaphores is required vk_buf_barrier(gpu, cmd, buf, VK_PIPELINE_STAGE_2_NONE, 0, 0, buf->params.size, true); return CMD_SUBMIT(&cmd); } libplacebo-v7.349.0/src/vulkan/gpu_pass.c000066400000000000000000001065011463457750100202530ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "gpu.h" #include "cache.h" #include "glsl/spirv.h" // For pl_pass.priv struct pl_pass_vk { // Pipeline / render pass VkPipeline base; VkPipeline pipe; VkPipelineLayout pipeLayout; VkRenderPass renderPass; // Descriptor set (bindings) bool use_pushd; VkDescriptorSetLayout dsLayout; VkDescriptorPool dsPool; // To keep track of which descriptor sets are and aren't available, we // allocate a fixed number and use a bitmask of all available sets. VkDescriptorSet dss[16]; uint16_t dmask; // For recompilation VkVertexInputAttributeDescription *attrs; VkPipelineCache cache; VkShaderModule vert; VkShaderModule shader; // For updating VkWriteDescriptorSet *dswrite; VkDescriptorImageInfo *dsiinfo; VkDescriptorBufferInfo *dsbinfo; VkSpecializationInfo specInfo; size_t spec_size; }; int vk_desc_namespace(pl_gpu gpu, enum pl_desc_type type) { return 0; } static void pass_destroy_cb(pl_gpu gpu, pl_pass pass) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; struct pl_pass_vk *pass_vk = PL_PRIV(pass); vk->DestroyPipeline(vk->dev, pass_vk->pipe, PL_VK_ALLOC); vk->DestroyPipeline(vk->dev, pass_vk->base, PL_VK_ALLOC); vk->DestroyRenderPass(vk->dev, pass_vk->renderPass, PL_VK_ALLOC); vk->DestroyPipelineLayout(vk->dev, pass_vk->pipeLayout, PL_VK_ALLOC); vk->DestroyPipelineCache(vk->dev, pass_vk->cache, PL_VK_ALLOC); vk->DestroyDescriptorPool(vk->dev, pass_vk->dsPool, PL_VK_ALLOC); vk->DestroyDescriptorSetLayout(vk->dev, pass_vk->dsLayout, PL_VK_ALLOC); vk->DestroyShaderModule(vk->dev, pass_vk->vert, PL_VK_ALLOC); vk->DestroyShaderModule(vk->dev, pass_vk->shader, PL_VK_ALLOC); pl_free((void *) pass); } VK_CB_FUNC_DEF(pass_destroy_cb); void vk_pass_destroy(pl_gpu gpu, pl_pass pass) { vk_gpu_idle_callback(gpu, VK_CB_FUNC(pass_destroy_cb), gpu, pass); } static const VkDescriptorType dsType[] = { [PL_DESC_SAMPLED_TEX] = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, [PL_DESC_STORAGE_IMG] = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, [PL_DESC_BUF_UNIFORM] = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, [PL_DESC_BUF_STORAGE] = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, [PL_DESC_BUF_TEXEL_UNIFORM] = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, [PL_DESC_BUF_TEXEL_STORAGE] = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, }; static VkResult vk_compile_glsl(pl_gpu gpu, void *alloc, enum glsl_shader_stage stage, const char *shader, pl_cache_obj *out_spirv) { struct pl_vk *p = PL_PRIV(gpu); pl_cache cache = pl_gpu_cache(gpu); uint64_t key = CACHE_KEY_SPIRV; if (cache) { // skip computing key if `cache pl_hash_merge(&key, p->spirv->signature); pl_hash_merge(&key, pl_str0_hash(shader)); out_spirv->key = key; if (pl_cache_get(cache, out_spirv)) { PL_DEBUG(gpu, "Re-using cached SPIR-V object 0x%"PRIx64, key); return VK_SUCCESS; } } pl_clock_t start = pl_clock_now(); pl_str spirv = pl_spirv_compile_glsl(p->spirv, alloc, gpu->glsl, stage, shader); pl_log_cpu_time(gpu->log, start, pl_clock_now(), "translating SPIR-V"); out_spirv->data = spirv.buf; out_spirv->size = spirv.len; out_spirv->free = pl_free; return spirv.len ? VK_SUCCESS : VK_ERROR_INITIALIZATION_FAILED; } static const VkShaderStageFlags stageFlags[] = { [PL_PASS_RASTER] = VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_VERTEX_BIT, [PL_PASS_COMPUTE] = VK_SHADER_STAGE_COMPUTE_BIT, }; static inline void destroy_pipeline(struct vk_ctx *vk, void *pipeline) { vk->DestroyPipeline(vk->dev, vk_unwrap_handle(pipeline), PL_VK_ALLOC); } VK_CB_FUNC_DEF(destroy_pipeline); static VkResult vk_recreate_pipelines(struct vk_ctx *vk, pl_pass pass, bool derivable, VkPipeline base, VkPipeline *out_pipe) { struct pl_pass_vk *pass_vk = PL_PRIV(pass); const struct pl_pass_params *params = &pass->params; // The old pipeline might still be in use, so we have to destroy it // asynchronously with a device idle callback if (*out_pipe) { // We don't need to use `vk_gpu_idle_callback` because the only command // that can access a VkPipeline, `vk_pass_run`, always flushes `p->cmd`. vk_dev_callback(vk, VK_CB_FUNC(destroy_pipeline), vk, vk_wrap_handle(*out_pipe)); *out_pipe = VK_NULL_HANDLE; } VkPipelineCreateFlags flags = 0; if (derivable) flags |= VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT; if (base) flags |= VK_PIPELINE_CREATE_DERIVATIVE_BIT; const VkSpecializationInfo *specInfo = &pass_vk->specInfo; if (!specInfo->dataSize) specInfo = NULL; switch (params->type) { case PL_PASS_RASTER: { static const VkBlendFactor blendFactors[] = { [PL_BLEND_ZERO] = VK_BLEND_FACTOR_ZERO, [PL_BLEND_ONE] = VK_BLEND_FACTOR_ONE, [PL_BLEND_SRC_ALPHA] = VK_BLEND_FACTOR_SRC_ALPHA, [PL_BLEND_ONE_MINUS_SRC_ALPHA] = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, }; VkPipelineColorBlendAttachmentState blendState = { .colorBlendOp = VK_BLEND_OP_ADD, .alphaBlendOp = VK_BLEND_OP_ADD, .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, }; const struct pl_blend_params *blend = params->blend_params; if (blend) { blendState.blendEnable = true; blendState.srcColorBlendFactor = blendFactors[blend->src_rgb]; blendState.dstColorBlendFactor = blendFactors[blend->dst_rgb]; blendState.srcAlphaBlendFactor = blendFactors[blend->src_alpha]; blendState.dstAlphaBlendFactor = blendFactors[blend->dst_alpha]; } static const VkPrimitiveTopology topologies[PL_PRIM_TYPE_COUNT] = { [PL_PRIM_TRIANGLE_LIST] = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, [PL_PRIM_TRIANGLE_STRIP] = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, }; VkGraphicsPipelineCreateInfo cinfo = { .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, .flags = flags, .stageCount = 2, .pStages = (VkPipelineShaderStageCreateInfo[]) { { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .stage = VK_SHADER_STAGE_VERTEX_BIT, .module = pass_vk->vert, .pName = "main", }, { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .stage = VK_SHADER_STAGE_FRAGMENT_BIT, .module = pass_vk->shader, .pName = "main", .pSpecializationInfo = specInfo, } }, .pVertexInputState = &(VkPipelineVertexInputStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, .vertexBindingDescriptionCount = 1, .pVertexBindingDescriptions = &(VkVertexInputBindingDescription) { .binding = 0, .stride = params->vertex_stride, .inputRate = VK_VERTEX_INPUT_RATE_VERTEX, }, .vertexAttributeDescriptionCount = params->num_vertex_attribs, .pVertexAttributeDescriptions = pass_vk->attrs, }, .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, .topology = topologies[params->vertex_type], }, .pViewportState = &(VkPipelineViewportStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, .viewportCount = 1, .scissorCount = 1, }, .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, .polygonMode = VK_POLYGON_MODE_FILL, .cullMode = VK_CULL_MODE_NONE, .lineWidth = 1.0f, }, .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, }, .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, .attachmentCount = 1, .pAttachments = &blendState, }, .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, .dynamicStateCount = 2, .pDynamicStates = (VkDynamicState[]){ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, }, }, .layout = pass_vk->pipeLayout, .renderPass = pass_vk->renderPass, .basePipelineHandle = base, .basePipelineIndex = -1, }; return vk->CreateGraphicsPipelines(vk->dev, pass_vk->cache, 1, &cinfo, PL_VK_ALLOC, out_pipe); } case PL_PASS_COMPUTE: { VkComputePipelineCreateInfo cinfo = { .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .flags = flags, .stage = { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .stage = VK_SHADER_STAGE_COMPUTE_BIT, .module = pass_vk->shader, .pName = "main", .pSpecializationInfo = specInfo, }, .layout = pass_vk->pipeLayout, .basePipelineHandle = base, .basePipelineIndex = -1, }; return vk->CreateComputePipelines(vk->dev, pass_vk->cache, 1, &cinfo, PL_VK_ALLOC, out_pipe); } case PL_PASS_INVALID: case PL_PASS_TYPE_COUNT: break; } pl_unreachable(); } pl_pass vk_pass_create(pl_gpu gpu, const struct pl_pass_params *params) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; bool success = false; struct pl_pass_t *pass = pl_zalloc_obj(NULL, pass, struct pl_pass_vk); pass->params = pl_pass_params_copy(pass, params); struct pl_pass_vk *pass_vk = PL_PRIV(pass); pass_vk->dmask = -1; // all descriptors available // temporary allocations void *tmp = pl_tmp(NULL); int num_desc = params->num_descriptors; if (!num_desc) goto no_descriptors; if (num_desc > vk->props.limits.maxPerStageResources) { PL_ERR(gpu, "Pass with %d descriptors exceeds the maximum number of " "per-stage resources %" PRIu32"!", num_desc, vk->props.limits.maxPerStageResources); goto error; } pass_vk->dswrite = pl_calloc(pass, num_desc, sizeof(VkWriteDescriptorSet)); pass_vk->dsiinfo = pl_calloc(pass, num_desc, sizeof(VkDescriptorImageInfo)); pass_vk->dsbinfo = pl_calloc(pass, num_desc, sizeof(VkDescriptorBufferInfo)); #define NUM_DS (PL_ARRAY_SIZE(pass_vk->dss)) int dsSize[PL_DESC_TYPE_COUNT] = {0}; VkDescriptorSetLayoutBinding *bindings = pl_calloc_ptr(tmp, num_desc, bindings); uint32_t max_tex = vk->props.limits.maxPerStageDescriptorSampledImages, max_img = vk->props.limits.maxPerStageDescriptorStorageImages, max_ubo = vk->props.limits.maxPerStageDescriptorUniformBuffers, max_ssbo = vk->props.limits.maxPerStageDescriptorStorageBuffers; uint32_t *dsLimits[PL_DESC_TYPE_COUNT] = { [PL_DESC_SAMPLED_TEX] = &max_tex, [PL_DESC_STORAGE_IMG] = &max_img, [PL_DESC_BUF_UNIFORM] = &max_ubo, [PL_DESC_BUF_STORAGE] = &max_ssbo, [PL_DESC_BUF_TEXEL_UNIFORM] = &max_tex, [PL_DESC_BUF_TEXEL_STORAGE] = &max_img, }; for (int i = 0; i < num_desc; i++) { struct pl_desc *desc = ¶ms->descriptors[i]; if (!(*dsLimits[desc->type])--) { PL_ERR(gpu, "Pass exceeds the maximum number of per-stage " "descriptors of type %u!", (unsigned) desc->type); goto error; } dsSize[desc->type]++; bindings[i] = (VkDescriptorSetLayoutBinding) { .binding = desc->binding, .descriptorType = dsType[desc->type], .descriptorCount = 1, .stageFlags = stageFlags[params->type], }; } VkDescriptorSetLayoutCreateInfo dinfo = { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, .pBindings = bindings, .bindingCount = num_desc, }; if (p->max_push_descriptors && num_desc <= p->max_push_descriptors) { dinfo.flags |= VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR; pass_vk->use_pushd = true; } else if (p->max_push_descriptors) { PL_INFO(gpu, "Pass with %d descriptors exceeds the maximum push " "descriptor count (%d). Falling back to descriptor sets!", num_desc, p->max_push_descriptors); } VK(vk->CreateDescriptorSetLayout(vk->dev, &dinfo, PL_VK_ALLOC, &pass_vk->dsLayout)); if (!pass_vk->use_pushd) { PL_ARRAY(VkDescriptorPoolSize) dsPoolSizes = {0}; for (enum pl_desc_type t = 0; t < PL_DESC_TYPE_COUNT; t++) { if (dsSize[t] > 0) { PL_ARRAY_APPEND(tmp, dsPoolSizes, (VkDescriptorPoolSize) { .type = dsType[t], .descriptorCount = dsSize[t] * NUM_DS, }); } } if (dsPoolSizes.num) { VkDescriptorPoolCreateInfo pinfo = { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, .maxSets = NUM_DS, .pPoolSizes = dsPoolSizes.elem, .poolSizeCount = dsPoolSizes.num, }; VK(vk->CreateDescriptorPool(vk->dev, &pinfo, PL_VK_ALLOC, &pass_vk->dsPool)); VkDescriptorSetLayout layouts[NUM_DS]; for (int i = 0; i < NUM_DS; i++) layouts[i] = pass_vk->dsLayout; VkDescriptorSetAllocateInfo ainfo = { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, .descriptorPool = pass_vk->dsPool, .descriptorSetCount = NUM_DS, .pSetLayouts = layouts, }; VK(vk->AllocateDescriptorSets(vk->dev, &ainfo, pass_vk->dss)); } } no_descriptors: ; bool has_spec = params->num_constants; if (has_spec) { PL_ARRAY(VkSpecializationMapEntry) entries = {0}; PL_ARRAY_RESIZE(pass, entries, params->num_constants); size_t spec_size = 0; for (int i = 0; i < params->num_constants; i++) { const struct pl_constant *con = ¶ms->constants[i]; size_t con_size = pl_var_type_size(con->type); entries.elem[i] = (VkSpecializationMapEntry) { .constantID = con->id, .offset = con->offset, .size = con_size, }; size_t req_size = con->offset + con_size; spec_size = PL_MAX(spec_size, req_size); } pass_vk->spec_size = spec_size; pass_vk->specInfo = (VkSpecializationInfo) { .mapEntryCount = params->num_constants, .pMapEntries = entries.elem, }; if (params->constant_data) { pass_vk->specInfo.pData = pl_memdup(pass, params->constant_data, spec_size); pass_vk->specInfo.dataSize = spec_size; } } VkPipelineLayoutCreateInfo linfo = { .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .setLayoutCount = num_desc ? 1 : 0, .pSetLayouts = &pass_vk->dsLayout, .pushConstantRangeCount = params->push_constants_size ? 1 : 0, .pPushConstantRanges = &(VkPushConstantRange){ .stageFlags = stageFlags[params->type], .offset = 0, .size = params->push_constants_size, }, }; VK(vk->CreatePipelineLayout(vk->dev, &linfo, PL_VK_ALLOC, &pass_vk->pipeLayout)); pl_cache_obj vert = {0}, frag = {0}, comp = {0}; switch (params->type) { case PL_PASS_RASTER: ; VK(vk_compile_glsl(gpu, tmp, GLSL_SHADER_VERTEX, params->vertex_shader, &vert)); VK(vk_compile_glsl(gpu, tmp, GLSL_SHADER_FRAGMENT, params->glsl_shader, &frag)); break; case PL_PASS_COMPUTE: VK(vk_compile_glsl(gpu, tmp, GLSL_SHADER_COMPUTE, params->glsl_shader, &comp)); break; case PL_PASS_INVALID: case PL_PASS_TYPE_COUNT: pl_unreachable(); } // Use hash of generated SPIR-V as key for pipeline cache const pl_cache cache = pl_gpu_cache(gpu); pl_cache_obj pipecache = {0}; if (cache) { pipecache.key = CACHE_KEY_VK_PIPE; pl_hash_merge(&pipecache.key, pl_var_hash(vk->props.pipelineCacheUUID)); pl_hash_merge(&pipecache.key, pl_mem_hash(vert.data, vert.size)); pl_hash_merge(&pipecache.key, pl_mem_hash(frag.data, frag.size)); pl_hash_merge(&pipecache.key, pl_mem_hash(comp.data, comp.size)); pl_cache_get(cache, &pipecache); } if (cache || has_spec) { // Don't create pipeline cache unless we either plan on caching the // result of this shader to a pl_cache, or if we will possibly re-use // it due to the presence of specialization constants VkPipelineCacheCreateInfo pcinfo = { .sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, .pInitialData = pipecache.data, .initialDataSize = pipecache.size, }; VK(vk->CreatePipelineCache(vk->dev, &pcinfo, PL_VK_ALLOC, &pass_vk->cache)); } VkShaderModuleCreateInfo sinfo = { .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, }; pl_clock_t start = pl_clock_now(); switch (params->type) { case PL_PASS_RASTER: { sinfo.pCode = (uint32_t *) vert.data; sinfo.codeSize = vert.size; VK(vk->CreateShaderModule(vk->dev, &sinfo, PL_VK_ALLOC, &pass_vk->vert)); PL_VK_NAME(SHADER_MODULE, pass_vk->vert, "vertex"); sinfo.pCode = (uint32_t *) frag.data; sinfo.codeSize = frag.size; VK(vk->CreateShaderModule(vk->dev, &sinfo, PL_VK_ALLOC, &pass_vk->shader)); PL_VK_NAME(SHADER_MODULE, pass_vk->shader, "fragment"); pass_vk->attrs = pl_calloc_ptr(pass, params->num_vertex_attribs, pass_vk->attrs); for (int i = 0; i < params->num_vertex_attribs; i++) { struct pl_vertex_attrib *va = ¶ms->vertex_attribs[i]; const struct vk_format **pfmt_vk = PL_PRIV(va->fmt); pass_vk->attrs[i] = (VkVertexInputAttributeDescription) { .binding = 0, .location = va->location, .offset = va->offset, .format = PL_DEF((*pfmt_vk)->bfmt, (*pfmt_vk)->tfmt), }; } VkRenderPassCreateInfo rinfo = { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, .attachmentCount = 1, .pAttachments = &(VkAttachmentDescription) { .format = (VkFormat) params->target_format->signature, .samples = VK_SAMPLE_COUNT_1_BIT, .loadOp = pass->params.load_target ? VK_ATTACHMENT_LOAD_OP_LOAD : VK_ATTACHMENT_LOAD_OP_DONT_CARE, .storeOp = VK_ATTACHMENT_STORE_OP_STORE, .initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, .finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, }, .subpassCount = 1, .pSubpasses = &(VkSubpassDescription) { .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, .colorAttachmentCount = 1, .pColorAttachments = &(VkAttachmentReference) { .attachment = 0, .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, }, }, }; VK(vk->CreateRenderPass(vk->dev, &rinfo, PL_VK_ALLOC, &pass_vk->renderPass)); break; } case PL_PASS_COMPUTE: { sinfo.pCode = (uint32_t *) comp.data; sinfo.codeSize = comp.size; VK(vk->CreateShaderModule(vk->dev, &sinfo, PL_VK_ALLOC, &pass_vk->shader)); PL_VK_NAME(SHADER_MODULE, pass_vk->shader, "compute"); break; } case PL_PASS_INVALID: case PL_PASS_TYPE_COUNT: pl_unreachable(); } pl_clock_t after_compilation = pl_clock_now(); pl_log_cpu_time(gpu->log, start, after_compilation, "compiling shader"); // Update cache entries on successful compilation pl_cache_steal(cache, &vert); pl_cache_steal(cache, &frag); pl_cache_steal(cache, &comp); // Create the graphics/compute pipeline VkPipeline *pipe = has_spec ? &pass_vk->base : &pass_vk->pipe; VK(vk_recreate_pipelines(vk, pass, has_spec, VK_NULL_HANDLE, pipe)); pl_log_cpu_time(gpu->log, after_compilation, pl_clock_now(), "creating pipeline"); // Update pipeline cache if (cache) { size_t size = 0; VK(vk->GetPipelineCacheData(vk->dev, pass_vk->cache, &size, NULL)); pl_cache_obj_resize(tmp, &pipecache, size); VK(vk->GetPipelineCacheData(vk->dev, pass_vk->cache, &size, pipecache.data)); pl_cache_steal(cache, &pipecache); } if (!has_spec) { // We can free these if we no longer need them for specialization pl_free_ptr(&pass_vk->attrs); vk->DestroyShaderModule(vk->dev, pass_vk->vert, PL_VK_ALLOC); vk->DestroyShaderModule(vk->dev, pass_vk->shader, PL_VK_ALLOC); vk->DestroyPipelineCache(vk->dev, pass_vk->cache, PL_VK_ALLOC); pass_vk->vert = VK_NULL_HANDLE; pass_vk->shader = VK_NULL_HANDLE; pass_vk->cache = VK_NULL_HANDLE; } PL_DEBUG(vk, "Pass statistics: size %zu, SPIR-V: vert %zu frag %zu comp %zu", pipecache.size, vert.size, frag.size, comp.size); success = true; error: if (!success) { pass_destroy_cb(gpu, pass); pass = NULL; } #undef NUM_DS pl_free(tmp); return pass; } static const VkPipelineStageFlags2 shaderStages[] = { [PL_PASS_RASTER] = VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT, [PL_PASS_COMPUTE] = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, }; static void vk_update_descriptor(pl_gpu gpu, struct vk_cmd *cmd, pl_pass pass, struct pl_desc_binding db, VkDescriptorSet ds, int idx) { struct pl_vk *p = PL_PRIV(gpu); struct pl_pass_vk *pass_vk = PL_PRIV(pass); struct pl_desc *desc = &pass->params.descriptors[idx]; VkWriteDescriptorSet *wds = &pass_vk->dswrite[idx]; *wds = (VkWriteDescriptorSet) { .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, .dstSet = ds, .dstBinding = desc->binding, .descriptorCount = 1, .descriptorType = dsType[desc->type], }; static const VkAccessFlags2 storageAccess[PL_DESC_ACCESS_COUNT] = { [PL_DESC_ACCESS_READONLY] = VK_ACCESS_2_SHADER_STORAGE_READ_BIT, [PL_DESC_ACCESS_WRITEONLY] = VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, [PL_DESC_ACCESS_READWRITE] = VK_ACCESS_2_SHADER_STORAGE_READ_BIT | VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, }; switch (desc->type) { case PL_DESC_SAMPLED_TEX: { pl_tex tex = db.object; struct pl_tex_vk *tex_vk = PL_PRIV(tex); vk_tex_barrier(gpu, cmd, tex, shaderStages[pass->params.type], VK_ACCESS_2_SHADER_SAMPLED_READ_BIT, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_QUEUE_FAMILY_IGNORED); VkDescriptorImageInfo *iinfo = &pass_vk->dsiinfo[idx]; *iinfo = (VkDescriptorImageInfo) { .sampler = p->samplers[db.sample_mode][db.address_mode], .imageView = tex_vk->view, .imageLayout = tex_vk->layout, }; wds->pImageInfo = iinfo; return; } case PL_DESC_STORAGE_IMG: { pl_tex tex = db.object; struct pl_tex_vk *tex_vk = PL_PRIV(tex); vk_tex_barrier(gpu, cmd, tex, shaderStages[pass->params.type], storageAccess[desc->access], VK_IMAGE_LAYOUT_GENERAL, VK_QUEUE_FAMILY_IGNORED); VkDescriptorImageInfo *iinfo = &pass_vk->dsiinfo[idx]; *iinfo = (VkDescriptorImageInfo) { .imageView = tex_vk->view, .imageLayout = tex_vk->layout, }; wds->pImageInfo = iinfo; return; } case PL_DESC_BUF_UNIFORM: case PL_DESC_BUF_STORAGE: { pl_buf buf = db.object; struct pl_buf_vk *buf_vk = PL_PRIV(buf); VkAccessFlags2 access = VK_ACCESS_2_UNIFORM_READ_BIT; if (desc->type == PL_DESC_BUF_STORAGE) access = storageAccess[desc->access]; vk_buf_barrier(gpu, cmd, buf, shaderStages[pass->params.type], access, 0, buf->params.size, false); VkDescriptorBufferInfo *binfo = &pass_vk->dsbinfo[idx]; *binfo = (VkDescriptorBufferInfo) { .buffer = buf_vk->mem.buf, .offset = buf_vk->mem.offset, .range = buf->params.size, }; wds->pBufferInfo = binfo; return; } case PL_DESC_BUF_TEXEL_UNIFORM: case PL_DESC_BUF_TEXEL_STORAGE: { pl_buf buf = db.object; struct pl_buf_vk *buf_vk = PL_PRIV(buf); VkAccessFlags2 access = VK_ACCESS_2_SHADER_SAMPLED_READ_BIT; if (desc->type == PL_DESC_BUF_TEXEL_STORAGE) access = storageAccess[desc->access]; vk_buf_barrier(gpu, cmd, buf, shaderStages[pass->params.type], access, 0, buf->params.size, false); wds->pTexelBufferView = &buf_vk->view; return; } case PL_DESC_INVALID: case PL_DESC_TYPE_COUNT: break; } pl_unreachable(); } static void vk_release_descriptor(pl_gpu gpu, struct vk_cmd *cmd, pl_pass pass, struct pl_desc_binding db, int idx) { const struct pl_desc *desc = &pass->params.descriptors[idx]; switch (desc->type) { case PL_DESC_BUF_UNIFORM: case PL_DESC_BUF_STORAGE: case PL_DESC_BUF_TEXEL_UNIFORM: case PL_DESC_BUF_TEXEL_STORAGE: if (desc->access != PL_DESC_ACCESS_READONLY) { pl_buf buf = db.object; vk_buf_flush(gpu, cmd, buf, 0, buf->params.size); } return; case PL_DESC_SAMPLED_TEX: case PL_DESC_STORAGE_IMG: return; case PL_DESC_INVALID: case PL_DESC_TYPE_COUNT: break; } pl_unreachable(); } static void set_ds(struct pl_pass_vk *pass_vk, void *dsbit) { pass_vk->dmask |= (uintptr_t) dsbit; } VK_CB_FUNC_DEF(set_ds); static bool need_respec(pl_pass pass, const struct pl_pass_run_params *params) { struct pl_pass_vk *pass_vk = PL_PRIV(pass); if (!pass_vk->spec_size || !params->constant_data) return false; VkSpecializationInfo *specInfo = &pass_vk->specInfo; size_t size = pass_vk->spec_size; if (!specInfo->pData) { // Shader was never specialized before specInfo->pData = pl_memdup((void *) pass, params->constant_data, size); specInfo->dataSize = size; return true; } // Shader is being re-specialized with new values if (memcmp(specInfo->pData, params->constant_data, size) != 0) { memcpy((void *) specInfo->pData, params->constant_data, size); return true; } return false; } void vk_pass_run(pl_gpu gpu, const struct pl_pass_run_params *params) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; pl_pass pass = params->pass; struct pl_pass_vk *pass_vk = PL_PRIV(pass); if (params->vertex_data || params->index_data) return pl_pass_run_vbo(gpu, params); // Check if we need to re-specialize this pipeline if (need_respec(pass, params)) { pl_clock_t start = pl_clock_now(); VK(vk_recreate_pipelines(vk, pass, false, pass_vk->base, &pass_vk->pipe)); pl_log_cpu_time(gpu->log, start, pl_clock_now(), "re-specializing shader"); } if (!pass_vk->use_pushd) { // Wait for a free descriptor set while (!pass_vk->dmask) { PL_TRACE(gpu, "No free descriptor sets! ...blocking (slow path)"); vk_poll_commands(vk, 10000000); // 10 ms } } static const enum queue_type types[] = { [PL_PASS_RASTER] = GRAPHICS, [PL_PASS_COMPUTE] = COMPUTE, }; struct vk_cmd *cmd = CMD_BEGIN_TIMED(types[pass->params.type], params->timer); if (!cmd) goto error; // Find a descriptor set to use VkDescriptorSet ds = VK_NULL_HANDLE; if (!pass_vk->use_pushd) { for (int i = 0; i < PL_ARRAY_SIZE(pass_vk->dss); i++) { uint16_t dsbit = 1u << i; if (pass_vk->dmask & dsbit) { ds = pass_vk->dss[i]; pass_vk->dmask &= ~dsbit; // unset vk_cmd_callback(cmd, VK_CB_FUNC(set_ds), pass_vk, (void *)(uintptr_t) dsbit); break; } } } // Update the dswrite structure with all of the new values for (int i = 0; i < pass->params.num_descriptors; i++) vk_update_descriptor(gpu, cmd, pass, params->desc_bindings[i], ds, i); if (!pass_vk->use_pushd) { vk->UpdateDescriptorSets(vk->dev, pass->params.num_descriptors, pass_vk->dswrite, 0, NULL); } // Bind the pipeline, descriptor set, etc. static const VkPipelineBindPoint bindPoint[] = { [PL_PASS_RASTER] = VK_PIPELINE_BIND_POINT_GRAPHICS, [PL_PASS_COMPUTE] = VK_PIPELINE_BIND_POINT_COMPUTE, }; vk->CmdBindPipeline(cmd->buf, bindPoint[pass->params.type], PL_DEF(pass_vk->pipe, pass_vk->base)); if (ds) { vk->CmdBindDescriptorSets(cmd->buf, bindPoint[pass->params.type], pass_vk->pipeLayout, 0, 1, &ds, 0, NULL); } if (pass_vk->use_pushd) { vk->CmdPushDescriptorSetKHR(cmd->buf, bindPoint[pass->params.type], pass_vk->pipeLayout, 0, pass->params.num_descriptors, pass_vk->dswrite); } if (pass->params.push_constants_size) { vk->CmdPushConstants(cmd->buf, pass_vk->pipeLayout, stageFlags[pass->params.type], 0, pass->params.push_constants_size, params->push_constants); } switch (pass->params.type) { case PL_PASS_RASTER: { pl_tex tex = params->target; struct pl_tex_vk *tex_vk = PL_PRIV(tex); pl_buf vert = params->vertex_buf; struct pl_buf_vk *vert_vk = PL_PRIV(vert); pl_buf index = params->index_buf; struct pl_buf_vk *index_vk = index ? PL_PRIV(index) : NULL; pl_assert(vert); // In the edge case that vert = index buffer, we need to synchronize // for both flags simultaneously VkPipelineStageFlags2 vbo_stage = VK_PIPELINE_STAGE_2_VERTEX_ATTRIBUTE_INPUT_BIT; VkAccessFlags2 vbo_flags = VK_ACCESS_2_VERTEX_ATTRIBUTE_READ_BIT; if (index == vert) { vbo_stage |= VK_PIPELINE_STAGE_2_INDEX_INPUT_BIT; vbo_flags |= VK_ACCESS_2_INDEX_READ_BIT; } vk_buf_barrier(gpu, cmd, vert, vbo_stage, vbo_flags, 0, vert->params.size, false); VkDeviceSize offset = vert_vk->mem.offset + params->buf_offset; vk->CmdBindVertexBuffers(cmd->buf, 0, 1, &vert_vk->mem.buf, &offset); if (index) { if (index != vert) { vk_buf_barrier(gpu, cmd, index, VK_PIPELINE_STAGE_2_INDEX_INPUT_BIT, VK_ACCESS_2_INDEX_READ_BIT, 0, index->params.size, false); } static const VkIndexType index_fmts[PL_INDEX_FORMAT_COUNT] = { [PL_INDEX_UINT16] = VK_INDEX_TYPE_UINT16, [PL_INDEX_UINT32] = VK_INDEX_TYPE_UINT32, }; vk->CmdBindIndexBuffer(cmd->buf, index_vk->mem.buf, index_vk->mem.offset + params->index_offset, index_fmts[params->index_fmt]); } VkAccessFlags2 fbo_access = VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT; if (pass->params.load_target) fbo_access |= VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT; vk_tex_barrier(gpu, cmd, tex, VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT, fbo_access, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_QUEUE_FAMILY_IGNORED); VkViewport viewport = { .x = params->viewport.x0, .y = params->viewport.y0, .width = pl_rect_w(params->viewport), .height = pl_rect_h(params->viewport), }; VkRect2D scissor = { .offset = {params->scissors.x0, params->scissors.y0}, .extent = {pl_rect_w(params->scissors), pl_rect_h(params->scissors)}, }; vk->CmdSetViewport(cmd->buf, 0, 1, &viewport); vk->CmdSetScissor(cmd->buf, 0, 1, &scissor); VkRenderPassBeginInfo binfo = { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, .renderPass = pass_vk->renderPass, .framebuffer = tex_vk->framebuffer, .renderArea.extent = {tex->params.w, tex->params.h}, }; vk->CmdBeginRenderPass(cmd->buf, &binfo, VK_SUBPASS_CONTENTS_INLINE); if (index) { vk->CmdDrawIndexed(cmd->buf, params->vertex_count, 1, 0, 0, 0); } else { vk->CmdDraw(cmd->buf, params->vertex_count, 1, 0, 0); } vk->CmdEndRenderPass(cmd->buf); break; } case PL_PASS_COMPUTE: vk->CmdDispatch(cmd->buf, params->compute_groups[0], params->compute_groups[1], params->compute_groups[2]); break; case PL_PASS_INVALID: case PL_PASS_TYPE_COUNT: pl_unreachable(); }; for (int i = 0; i < pass->params.num_descriptors; i++) vk_release_descriptor(gpu, cmd, pass, params->desc_bindings[i], i); // submit this command buffer for better intra-frame granularity CMD_SUBMIT(&cmd); error: return; } libplacebo-v7.349.0/src/vulkan/gpu_tex.c000066400000000000000000001424441463457750100201130ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "gpu.h" VK_CB_FUNC_DEF(vk_tex_deref); void vk_tex_barrier(pl_gpu gpu, struct vk_cmd *cmd, pl_tex tex, VkPipelineStageFlags2 stage, VkAccessFlags2 access, VkImageLayout layout, uint32_t qf) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; struct pl_tex_vk *tex_vk = PL_PRIV(tex); pl_rc_ref(&tex_vk->rc); pl_assert(!tex_vk->held); pl_assert(!tex_vk->num_planes); // CONCURRENT images require transitioning to/from IGNORED, EXCLUSIVE // images require transitioning to/from the concrete QF index if (vk->pools.num == 1) { if (tex_vk->qf == VK_QUEUE_FAMILY_IGNORED) tex_vk->qf = cmd->pool->qf; if (qf == VK_QUEUE_FAMILY_IGNORED) qf = cmd->pool->qf; } struct vk_sync_scope last; bool is_trans = layout != tex_vk->layout, is_xfer = qf != tex_vk->qf; last = vk_sem_barrier(cmd, &tex_vk->sem, stage, access, is_trans || is_xfer); VkImageMemoryBarrier2 barr = { .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, .srcStageMask = last.stage, .srcAccessMask = last.access, .dstStageMask = stage, .dstAccessMask = access, .oldLayout = tex_vk->layout, .newLayout = layout, .srcQueueFamilyIndex = tex_vk->qf, .dstQueueFamilyIndex = qf, .image = tex_vk->img, .subresourceRange = { .aspectMask = tex_vk->aspect, .levelCount = 1, .layerCount = 1, }, }; if (tex_vk->may_invalidate) { tex_vk->may_invalidate = false; barr.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; } if (last.access || is_trans || is_xfer) { vk_cmd_barrier(cmd, &(VkDependencyInfo) { .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, .imageMemoryBarrierCount = 1, .pImageMemoryBarriers = &barr, }); } tex_vk->qf = qf; tex_vk->layout = layout; vk_cmd_callback(cmd, VK_CB_FUNC(vk_tex_deref), gpu, tex); for (int i = 0; i < tex_vk->ext_deps.num; i++) vk_cmd_dep(cmd, stage, tex_vk->ext_deps.elem[i]); tex_vk->ext_deps.num = 0; } static void vk_tex_destroy(pl_gpu gpu, struct pl_tex_t *tex) { if (!tex) return; struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; struct pl_tex_vk *tex_vk = PL_PRIV(tex); vk->DestroyFramebuffer(vk->dev, tex_vk->framebuffer, PL_VK_ALLOC); vk->DestroyImageView(vk->dev, tex_vk->view, PL_VK_ALLOC); for (int i = 0; i < tex_vk->num_planes; i++) vk_tex_deref(gpu, tex->planes[i]); if (!tex_vk->external_img) { vk->DestroyImage(vk->dev, tex_vk->img, PL_VK_ALLOC); vk_malloc_free(vk->ma, &tex_vk->mem); } pl_free(tex); } void vk_tex_deref(pl_gpu gpu, pl_tex tex) { if (!tex) return; struct pl_tex_vk *tex_vk = PL_PRIV(tex); if (pl_rc_deref(&tex_vk->rc)) vk_tex_destroy(gpu, (struct pl_tex_t *) tex); } // Initializes non-VkImage values like the image view, framebuffers, etc. static bool vk_init_image(pl_gpu gpu, pl_tex tex, pl_debug_tag debug_tag) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; const struct pl_tex_params *params = &tex->params; struct pl_tex_vk *tex_vk = PL_PRIV(tex); pl_assert(tex_vk->img); PL_VK_NAME(IMAGE, tex_vk->img, debug_tag); pl_rc_init(&tex_vk->rc); if (tex_vk->num_planes) return true; tex_vk->layout = VK_IMAGE_LAYOUT_UNDEFINED; tex_vk->transfer_queue = GRAPHICS; tex_vk->qf = VK_QUEUE_FAMILY_IGNORED; // will be set on first use, if needed // Always use the transfer pool if available, for efficiency if ((params->host_writable || params->host_readable) && vk->pool_transfer) tex_vk->transfer_queue = TRANSFER; // For emulated formats: force usage of the compute queue, because we // can't properly track cross-queue dependencies for buffers (yet?) if (params->format->emulated) tex_vk->transfer_queue = COMPUTE; bool ret = false; VkRenderPass dummyPass = VK_NULL_HANDLE; if (params->sampleable || params->renderable || params->storable) { static const VkImageViewType viewType[] = { [VK_IMAGE_TYPE_1D] = VK_IMAGE_VIEW_TYPE_1D, [VK_IMAGE_TYPE_2D] = VK_IMAGE_VIEW_TYPE_2D, [VK_IMAGE_TYPE_3D] = VK_IMAGE_VIEW_TYPE_3D, }; const VkImageViewCreateInfo vinfo = { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = tex_vk->img, .viewType = viewType[tex_vk->type], .format = tex_vk->img_fmt, .subresourceRange = { .aspectMask = tex_vk->aspect, .levelCount = 1, .layerCount = 1, }, }; VK(vk->CreateImageView(vk->dev, &vinfo, PL_VK_ALLOC, &tex_vk->view)); PL_VK_NAME(IMAGE_VIEW, tex_vk->view, debug_tag); } if (params->renderable) { // Framebuffers need to be created against a specific render pass // layout, so we need to temporarily create a skeleton/dummy render // pass for vulkan to figure out the compatibility VkRenderPassCreateInfo rinfo = { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, .attachmentCount = 1, .pAttachments = &(VkAttachmentDescription) { .format = tex_vk->img_fmt, .samples = VK_SAMPLE_COUNT_1_BIT, .loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, .storeOp = VK_ATTACHMENT_STORE_OP_STORE, .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, .finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, }, .subpassCount = 1, .pSubpasses = &(VkSubpassDescription) { .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, .colorAttachmentCount = 1, .pColorAttachments = &(VkAttachmentReference) { .attachment = 0, .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, }, }, }; VK(vk->CreateRenderPass(vk->dev, &rinfo, PL_VK_ALLOC, &dummyPass)); VkFramebufferCreateInfo finfo = { .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, .renderPass = dummyPass, .attachmentCount = 1, .pAttachments = &tex_vk->view, .width = tex->params.w, .height = tex->params.h, .layers = 1, }; if (finfo.width > vk->props.limits.maxFramebufferWidth || finfo.height > vk->props.limits.maxFramebufferHeight) { PL_ERR(gpu, "Framebuffer of size %dx%d exceeds the maximum allowed " "dimensions: %dx%d", finfo.width, finfo.height, vk->props.limits.maxFramebufferWidth, vk->props.limits.maxFramebufferHeight); goto error; } VK(vk->CreateFramebuffer(vk->dev, &finfo, PL_VK_ALLOC, &tex_vk->framebuffer)); PL_VK_NAME(FRAMEBUFFER, tex_vk->framebuffer, debug_tag); } ret = true; error: vk->DestroyRenderPass(vk->dev, dummyPass, PL_VK_ALLOC); return ret; } pl_tex vk_tex_create(pl_gpu gpu, const struct pl_tex_params *params) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; enum pl_handle_type handle_type = params->export_handle | params->import_handle; VkExternalMemoryHandleTypeFlagBitsKHR vk_handle_type = vk_mem_handle_type(handle_type); struct pl_tex_t *tex = pl_zalloc_obj(NULL, tex, struct pl_tex_vk); pl_fmt fmt = params->format; tex->params = *params; tex->params.initial_data = NULL; tex->sampler_type = PL_SAMPLER_NORMAL; struct pl_tex_vk *tex_vk = PL_PRIV(tex); struct pl_fmt_vk *fmtp = PL_PRIV(fmt); tex_vk->img_fmt = fmtp->vk_fmt->tfmt; tex_vk->num_planes = fmt->num_planes; for (int i = 0; i < tex_vk->num_planes; i++) tex_vk->aspect |= VK_IMAGE_ASPECT_PLANE_0_BIT << i; tex_vk->aspect = PL_DEF(tex_vk->aspect, VK_IMAGE_ASPECT_COLOR_BIT); switch (pl_tex_params_dimension(*params)) { case 1: tex_vk->type = VK_IMAGE_TYPE_1D; break; case 2: tex_vk->type = VK_IMAGE_TYPE_2D; break; case 3: tex_vk->type = VK_IMAGE_TYPE_3D; break; } if (fmt->emulated) { tex_vk->texel_fmt = pl_find_fmt(gpu, fmt->type, 1, 0, fmt->host_bits[0], PL_FMT_CAP_TEXEL_UNIFORM); if (!tex_vk->texel_fmt) { PL_ERR(gpu, "Failed picking texel format for emulated texture!"); goto error; } // Our format emulation requires storage image support. In order to // make a bunch of checks happy, just mark it off as storable (and also // enable VK_IMAGE_USAGE_STORAGE_BIT, which we do below) tex->params.storable = true; } if (fmtp->blit_emulated) { // Enable what's required for sampling tex->params.sampleable = fmt->caps & PL_FMT_CAP_SAMPLEABLE; tex->params.storable = true; } // Blit emulation on planar textures requires storage if ((params->blit_src || params->blit_dst) && tex_vk->num_planes) tex->params.storable = true; VkImageUsageFlags usage = 0; VkImageCreateFlags flags = 0; if (tex->params.sampleable) usage |= VK_IMAGE_USAGE_SAMPLED_BIT; if (tex->params.renderable) usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; if (tex->params.storable) usage |= VK_IMAGE_USAGE_STORAGE_BIT; if (tex->params.host_readable || tex->params.blit_src) usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT; if (tex->params.host_writable || tex->params.blit_dst || params->initial_data) usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT; if (!usage) { // Vulkan requires images have at least *some* image usage set, but our // API is perfectly happy with a (useless) image. So just put // VK_IMAGE_USAGE_TRANSFER_DST_BIT since this harmless. usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT; } if (tex_vk->num_planes) { flags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT | VK_IMAGE_CREATE_EXTENDED_USAGE_BIT; } // FIXME: Since we can't keep track of queue family ownership properly, // and we don't know in advance what types of queue families this image // will belong to, we're forced to share all of our images between all // command pools. uint32_t qfs[3] = {0}; pl_assert(vk->pools.num <= PL_ARRAY_SIZE(qfs)); for (int i = 0; i < vk->pools.num; i++) qfs[i] = vk->pools.elem[i]->qf; VkImageDrmFormatModifierExplicitCreateInfoEXT drm_explicit = { .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT, .drmFormatModifier = params->shared_mem.drm_format_mod, .drmFormatModifierPlaneCount = 1, .pPlaneLayouts = &(VkSubresourceLayout) { .rowPitch = PL_DEF(params->shared_mem.stride_w, params->w), .depthPitch = params->d ? PL_DEF(params->shared_mem.stride_h, params->h) : 0, .offset = params->shared_mem.offset, }, }; #ifdef VK_EXT_metal_objects VkImportMetalTextureInfoEXT import_metal_tex = { .sType = VK_STRUCTURE_TYPE_IMPORT_METAL_TEXTURE_INFO_EXT, .plane = VK_IMAGE_ASPECT_PLANE_0_BIT << params->shared_mem.plane, }; VkImportMetalIOSurfaceInfoEXT import_iosurface = { .sType = VK_STRUCTURE_TYPE_IMPORT_METAL_IO_SURFACE_INFO_EXT, }; #endif VkImageDrmFormatModifierListCreateInfoEXT drm_list = { .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT, .drmFormatModifierCount = fmt->num_modifiers, .pDrmFormatModifiers = fmt->modifiers, }; VkExternalMemoryImageCreateInfoKHR ext_info = { .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO_KHR, .handleTypes = vk_handle_type, }; VkImageCreateInfo iinfo = { .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .pNext = vk_handle_type ? &ext_info : NULL, .imageType = tex_vk->type, .format = tex_vk->img_fmt, .extent = (VkExtent3D) { .width = params->w, .height = PL_MAX(1, params->h), .depth = PL_MAX(1, params->d) }, .mipLevels = 1, .arrayLayers = 1, .samples = VK_SAMPLE_COUNT_1_BIT, .tiling = VK_IMAGE_TILING_OPTIMAL, .usage = usage, .flags = flags, .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, .sharingMode = vk->pools.num > 1 ? VK_SHARING_MODE_CONCURRENT : VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = vk->pools.num, .pQueueFamilyIndices = qfs, }; struct vk_malloc_params mparams = { .optimal = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, .export_handle = params->export_handle, .import_handle = params->import_handle, .shared_mem = params->shared_mem, .debug_tag = params->debug_tag, }; if (params->import_handle == PL_HANDLE_DMA_BUF) { vk_link_struct(&iinfo, &drm_explicit); iinfo.tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT; mparams.shared_mem.offset = 0x0; // handled via plane offsets } #ifdef VK_EXT_metal_objects if (params->import_handle == PL_HANDLE_MTL_TEX) { vk_link_struct(&iinfo, &import_metal_tex); import_metal_tex.mtlTexture = params->shared_mem.handle.handle; } if (params->import_handle == PL_HANDLE_IOSURFACE) { vk_link_struct(&iinfo, &import_iosurface); import_iosurface.ioSurface = params->shared_mem.handle.handle; } #endif if (params->export_handle == PL_HANDLE_DMA_BUF) { pl_assert(drm_list.drmFormatModifierCount > 0); vk_link_struct(&iinfo, &drm_list); iinfo.tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT; } // Double-check physical image format limits and fail if invalid VkPhysicalDeviceImageDrmFormatModifierInfoEXT drm_pinfo = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT, .sharingMode = iinfo.sharingMode, .queueFamilyIndexCount = iinfo.queueFamilyIndexCount, .pQueueFamilyIndices = iinfo.pQueueFamilyIndices, }; VkPhysicalDeviceExternalImageFormatInfoKHR ext_pinfo = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO_KHR, .handleType = ext_info.handleTypes, }; if (handle_type == PL_HANDLE_DMA_BUF) { if (params->import_handle) { // On import, we know exactly which format modifier to test drm_pinfo.drmFormatModifier = drm_explicit.drmFormatModifier; } else { // On export, the choice of format modifier is ambiguous, because // we offer the implementation a whole list to choose from. In // principle, we must check *all* supported drm format modifiers, // but in practice it should hopefully suffice to just check one drm_pinfo.drmFormatModifier = drm_list.pDrmFormatModifiers[0]; } vk_link_struct(&ext_pinfo, &drm_pinfo); } VkPhysicalDeviceImageFormatInfo2KHR pinfo = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2_KHR, .pNext = vk_handle_type ? &ext_pinfo : NULL, .format = iinfo.format, .type = iinfo.imageType, .tiling = iinfo.tiling, .usage = iinfo.usage, .flags = iinfo.flags, }; VkExternalImageFormatPropertiesKHR ext_props = { .sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR, }; VkImageFormatProperties2KHR props = { .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2_KHR, .pNext = vk_handle_type ? &ext_props : NULL, }; VkResult res; res = vk->GetPhysicalDeviceImageFormatProperties2KHR(vk->physd, &pinfo, &props); if (res == VK_ERROR_FORMAT_NOT_SUPPORTED) { PL_DEBUG(gpu, "Texture creation failed: not supported"); goto error; } else { PL_VK_ASSERT(res, "Querying image format properties"); } VkExtent3D max = props.imageFormatProperties.maxExtent; if (params->w > max.width || params->h > max.height || params->d > max.depth) { PL_ERR(gpu, "Requested image size %dx%dx%d exceeds the maximum allowed " "dimensions %dx%dx%d for vulkan image format %x", params->w, params->h, params->d, max.width, max.height, max.depth, (unsigned) iinfo.format); goto error; } // Ensure the handle type is supported if (vk_handle_type) { bool ok = vk_external_mem_check(vk, &ext_props.externalMemoryProperties, handle_type, params->import_handle); if (!ok) { PL_ERR(gpu, "Requested handle type is not compatible with the " "specified combination of image parameters. Possibly the " "handle type is unsupported altogether?"); goto error; } } VK(vk->CreateImage(vk->dev, &iinfo, PL_VK_ALLOC, &tex_vk->img)); tex_vk->usage_flags = iinfo.usage; VkMemoryDedicatedRequirements ded_reqs = { .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR, }; VkMemoryRequirements2 reqs = { .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2_KHR, .pNext = &ded_reqs, }; VkImageMemoryRequirementsInfo2 req_info = { .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2_KHR, .image = tex_vk->img, }; vk->GetImageMemoryRequirements2(vk->dev, &req_info, &reqs); mparams.reqs = reqs.memoryRequirements; if (ded_reqs.prefersDedicatedAllocation) { mparams.ded_image = tex_vk->img; if (vk_mem_handle_type(params->import_handle)) mparams.shared_mem.size = reqs.memoryRequirements.size; } const char *debug_tag = params->debug_tag ? params->debug_tag : params->import_handle ? "imported" : "created"; if (!params->import_handle || vk_mem_handle_type(params->import_handle)) { struct vk_memslice *mem = &tex_vk->mem; if (!vk_malloc_slice(vk->ma, mem, &mparams)) goto error; VK(vk->BindImageMemory(vk->dev, tex_vk->img, mem->vkmem, mem->offset)); } static const char * const plane_names[4] = { "plane 0", "plane 1", "plane 2", "plane 3", }; if (tex_vk->num_planes) { for (int i = 0; i < tex_vk->num_planes; i++) { struct pl_tex_t *plane; pl_assert(tex_vk->type == VK_IMAGE_TYPE_2D); plane = (struct pl_tex_t *) pl_vulkan_wrap(gpu, pl_vulkan_wrap_params( .image = tex_vk->img, .aspect = VK_IMAGE_ASPECT_PLANE_0_BIT << i, .width = PL_RSHIFT_UP(tex->params.w, fmt->planes[i].shift_x), .height = PL_RSHIFT_UP(tex->params.h, fmt->planes[i].shift_y), .format = fmtp->vk_fmt->pfmt[i].fmt, .usage = usage, .user_data = params->user_data, .debug_tag = PL_DEF(params->debug_tag, plane_names[i]), )); if (!plane) goto error; plane->parent = tex; tex->planes[i] = plane; tex_vk->planes[i] = PL_PRIV(plane); tex_vk->planes[i]->held = false; tex_vk->planes[i]->layout = tex_vk->layout; } // Explicitly mask out all usage flags from planar parent images pl_assert(!fmt->caps); tex->params.sampleable = false; tex->params.renderable = false; tex->params.storable = false; tex->params.blit_src = false; tex->params.blit_dst = false; tex->params.host_writable = false; tex->params.host_readable = false; } if (!vk_init_image(gpu, tex, debug_tag)) goto error; if (params->export_handle) tex->shared_mem = tex_vk->mem.shared_mem; if (params->export_handle == PL_HANDLE_DMA_BUF) { if (vk->GetImageDrmFormatModifierPropertiesEXT) { // Query the DRM format modifier and plane layout from the driver VkImageDrmFormatModifierPropertiesEXT mod_props = { .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT, }; VK(vk->GetImageDrmFormatModifierPropertiesEXT(vk->dev, tex_vk->img, &mod_props)); tex->shared_mem.drm_format_mod = mod_props.drmFormatModifier; VkSubresourceLayout layout = {0}; VkImageSubresource plane = { .aspectMask = VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT, }; vk->GetImageSubresourceLayout(vk->dev, tex_vk->img, &plane, &layout); if (layout.offset != 0) { PL_ERR(gpu, "Exported DRM plane 0 has nonzero offset %zu, " "this should never happen! Erroring for safety...", (size_t) layout.offset); goto error; } tex->shared_mem.stride_w = layout.rowPitch; tex->shared_mem.stride_h = layout.depthPitch; } else { // Fallback for no modifiers, just do something stupid. tex->shared_mem.drm_format_mod = DRM_FORMAT_MOD_INVALID; tex->shared_mem.stride_w = params->w; tex->shared_mem.stride_h = params->h; } } if (params->initial_data) { struct pl_tex_transfer_params ul_params = { .tex = tex, .ptr = (void *) params->initial_data, .rc = { 0, 0, 0, params->w, params->h, params->d }, }; // Since we re-use GPU helpers which require writable images, just fake it bool writable = tex->params.host_writable; tex->params.host_writable = true; if (!pl_tex_upload(gpu, &ul_params)) goto error; tex->params.host_writable = writable; } return tex; error: vk_tex_destroy(gpu, tex); return NULL; } void vk_tex_invalidate(pl_gpu gpu, pl_tex tex) { struct pl_tex_vk *tex_vk = PL_PRIV(tex); tex_vk->may_invalidate = true; for (int i = 0; i < tex_vk->num_planes; i++) tex_vk->planes[i]->may_invalidate = true; } static bool tex_clear_fallback(pl_gpu gpu, pl_tex tex, const union pl_clear_color color) { pl_tex pixel = pl_tex_create(gpu, pl_tex_params( .w = 1, .h = 1, .format = tex->params.format, .storable = true, .blit_src = true, .blit_dst = true, )); if (!pixel) return false; pl_tex_clear_ex(gpu, pixel, color); pl_assert(tex->params.storable); pl_tex_blit(gpu, pl_tex_blit_params( .src = pixel, .dst = tex, .sample_mode = PL_TEX_SAMPLE_NEAREST, )); pl_tex_destroy(gpu, &pixel); return true; } void vk_tex_clear_ex(pl_gpu gpu, pl_tex tex, const union pl_clear_color color) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; struct pl_tex_vk *tex_vk = PL_PRIV(tex); if (tex_vk->aspect != VK_IMAGE_ASPECT_COLOR_BIT) { if (!tex_clear_fallback(gpu, tex, color)) { PL_ERR(gpu, "Failed clearing imported planar image: color aspect " "clears disallowed by spec and no shader fallback " "available"); } return; } struct vk_cmd *cmd = CMD_BEGIN(GRAPHICS); if (!cmd) return; vk_tex_barrier(gpu, cmd, tex, VK_PIPELINE_STAGE_2_CLEAR_BIT, VK_ACCESS_2_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_QUEUE_FAMILY_IGNORED); pl_static_assert(sizeof(VkClearColorValue) == sizeof(union pl_clear_color)); const VkClearColorValue *clearColor = (const VkClearColorValue *) &color; pl_assert(tex_vk->aspect == VK_IMAGE_ASPECT_COLOR_BIT); static const VkImageSubresourceRange range = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .levelCount = 1, .layerCount = 1, }; vk->CmdClearColorImage(cmd->buf, tex_vk->img, tex_vk->layout, clearColor, 1, &range); CMD_FINISH(&cmd); } void vk_tex_blit(pl_gpu gpu, const struct pl_tex_blit_params *params) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; struct pl_tex_vk *src_vk = PL_PRIV(params->src); struct pl_tex_vk *dst_vk = PL_PRIV(params->dst); struct pl_fmt_vk *src_fmtp = PL_PRIV(params->src->params.format); struct pl_fmt_vk *dst_fmtp = PL_PRIV(params->dst->params.format); bool blit_emulated = src_fmtp->blit_emulated || dst_fmtp->blit_emulated; bool planar_fallback = src_vk->aspect != VK_IMAGE_ASPECT_COLOR_BIT || dst_vk->aspect != VK_IMAGE_ASPECT_COLOR_BIT; pl_rect3d src_rc = params->src_rc, dst_rc = params->dst_rc; bool requires_scaling = !pl_rect3d_eq(src_rc, dst_rc); if ((requires_scaling && blit_emulated) || planar_fallback) { if (!pl_tex_blit_compute(gpu, params)) PL_ERR(gpu, "Failed emulating texture blit, incompatible textures?"); return; } struct vk_cmd *cmd = CMD_BEGIN(GRAPHICS); if (!cmd) return; // When the blit operation doesn't require scaling, we can use the more // efficient vkCmdCopyImage instead of vkCmdBlitImage if (!requires_scaling) { vk_tex_barrier(gpu, cmd, params->src, VK_PIPELINE_STAGE_2_COPY_BIT, VK_ACCESS_2_TRANSFER_READ_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_QUEUE_FAMILY_IGNORED); vk_tex_barrier(gpu, cmd, params->dst, VK_PIPELINE_STAGE_2_COPY_BIT, VK_ACCESS_2_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_QUEUE_FAMILY_IGNORED); pl_rect3d_normalize(&src_rc); VkImageCopy region = { .srcSubresource = { .aspectMask = src_vk->aspect, .layerCount = 1, }, .dstSubresource = { .aspectMask = dst_vk->aspect, .layerCount = 1, }, .srcOffset = {src_rc.x0, src_rc.y0, src_rc.z0}, .dstOffset = {src_rc.x0, src_rc.y0, src_rc.z0}, .extent = { pl_rect_w(src_rc), pl_rect_h(src_rc), pl_rect_d(src_rc), }, }; vk->CmdCopyImage(cmd->buf, src_vk->img, src_vk->layout, dst_vk->img, dst_vk->layout, 1, ®ion); } else { vk_tex_barrier(gpu, cmd, params->src, VK_PIPELINE_STAGE_2_BLIT_BIT, VK_ACCESS_2_TRANSFER_READ_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_QUEUE_FAMILY_IGNORED); vk_tex_barrier(gpu, cmd, params->dst, VK_PIPELINE_STAGE_2_BLIT_BIT, VK_ACCESS_2_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_QUEUE_FAMILY_IGNORED); VkImageBlit region = { .srcSubresource = { .aspectMask = src_vk->aspect, .layerCount = 1, }, .dstSubresource = { .aspectMask = dst_vk->aspect, .layerCount = 1, }, .srcOffsets = {{src_rc.x0, src_rc.y0, src_rc.z0}, {src_rc.x1, src_rc.y1, src_rc.z1}}, .dstOffsets = {{dst_rc.x0, dst_rc.y0, dst_rc.z0}, {dst_rc.x1, dst_rc.y1, dst_rc.z1}}, }; static const VkFilter filters[PL_TEX_SAMPLE_MODE_COUNT] = { [PL_TEX_SAMPLE_NEAREST] = VK_FILTER_NEAREST, [PL_TEX_SAMPLE_LINEAR] = VK_FILTER_LINEAR, }; vk->CmdBlitImage(cmd->buf, src_vk->img, src_vk->layout, dst_vk->img, dst_vk->layout, 1, ®ion, filters[params->sample_mode]); } CMD_FINISH(&cmd); } // Determine the best queue type to perform a buffer<->image copy on static enum queue_type vk_img_copy_queue(pl_gpu gpu, pl_tex tex, const struct VkBufferImageCopy *region) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; const struct pl_tex_vk *tex_vk = PL_PRIV(tex); enum queue_type queue = tex_vk->transfer_queue; if (queue != TRANSFER) return queue; VkExtent3D alignment = vk->pool_transfer->props.minImageTransferGranularity; enum queue_type fallback = GRAPHICS; if (gpu->limits.compute_queues > gpu->limits.fragment_queues) fallback = COMPUTE; // prefer async compute queue int tex_w = PL_DEF(tex->params.w, 1), tex_h = PL_DEF(tex->params.h, 1), tex_d = PL_DEF(tex->params.d, 1); bool full_w = region->imageOffset.x + region->imageExtent.width == tex_w, full_h = region->imageOffset.y + region->imageExtent.height == tex_h, full_d = region->imageOffset.z + region->imageExtent.depth == tex_d; if (alignment.width) { bool unaligned = false; unaligned |= region->imageOffset.x % alignment.width; unaligned |= region->imageOffset.y % alignment.height; unaligned |= region->imageOffset.z % alignment.depth; unaligned |= (region->imageExtent.width % alignment.width) && !full_w; unaligned |= (region->imageExtent.height % alignment.height) && !full_h; unaligned |= (region->imageExtent.depth % alignment.depth) && !full_d; return unaligned ? fallback : queue; } else { // an alignment of {0} means the copy must span the entire image bool unaligned = false; unaligned |= region->imageOffset.x || !full_w; unaligned |= region->imageOffset.y || !full_h; unaligned |= region->imageOffset.z || !full_d; return unaligned ? fallback : queue; } } static void tex_xfer_cb(void *ctx, void *arg) { void (*fun)(void *priv) = ctx; fun(arg); } bool vk_tex_upload(pl_gpu gpu, const struct pl_tex_transfer_params *params) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; pl_tex tex = params->tex; pl_fmt fmt = tex->params.format; struct pl_tex_vk *tex_vk = PL_PRIV(tex); struct pl_tex_transfer_params *slices = NULL; int num_slices = 0; if (!params->buf) return pl_tex_upload_pbo(gpu, params); pl_buf buf = params->buf; struct pl_buf_vk *buf_vk = PL_PRIV(buf); pl_rect3d rc = params->rc; const size_t size = pl_tex_transfer_size(params); const size_t buf_offset = buf_vk->mem.offset + params->buf_offset; bool unaligned = buf_offset % fmt->texel_size; if (unaligned) PL_TRACE(gpu, "vk_tex_upload: unaligned transfer (slow path)"); if (fmt->emulated || unaligned) { // Create all slice buffers first, to early-fail if OOM, and to avoid // blocking unnecessarily on waiting for these buffers to get read from num_slices = pl_tex_transfer_slices(gpu, tex_vk->texel_fmt, params, &slices); for (int i = 0; i < num_slices; i++) { slices[i].buf = pl_buf_create(gpu, pl_buf_params( .memory_type = PL_BUF_MEM_DEVICE, .format = tex_vk->texel_fmt, .size = pl_tex_transfer_size(&slices[i]), .storable = fmt->emulated, )); if (!slices[i].buf) { PL_ERR(gpu, "Failed creating buffer for tex upload fallback!"); num_slices = i; // only clean up buffers up to here goto error; } } // All temporary buffers successfully created, begin copying source data struct vk_cmd *cmd = CMD_BEGIN_TIMED(tex_vk->transfer_queue, params->timer); if (!cmd) goto error; vk_buf_barrier(gpu, cmd, buf, VK_PIPELINE_STAGE_2_COPY_BIT, VK_ACCESS_2_TRANSFER_READ_BIT, params->buf_offset, size, false); for (int i = 0; i < num_slices; i++) { pl_buf slice = slices[i].buf; struct pl_buf_vk *slice_vk = PL_PRIV(slice); vk_buf_barrier(gpu, cmd, slice, VK_PIPELINE_STAGE_2_COPY_BIT, VK_ACCESS_2_TRANSFER_WRITE_BIT, 0, slice->params.size, false); vk->CmdCopyBuffer(cmd->buf, buf_vk->mem.buf, slice_vk->mem.buf, 1, &(VkBufferCopy) { .srcOffset = buf_vk->mem.offset + slices[i].buf_offset, .dstOffset = slice_vk->mem.offset, .size = slice->params.size, }); } if (params->callback) vk_cmd_callback(cmd, tex_xfer_cb, params->callback, params->priv); bool ok = CMD_FINISH(&cmd); // Finally, dispatch the (texel) upload asynchronously. We can fire // the callback already at the completion of previous command because // these temporary buffers already hold persistent copies of the data for (int i = 0; i < num_slices; i++) { if (ok) { slices[i].buf_offset = 0; ok = fmt->emulated ? pl_tex_upload_texel(gpu, &slices[i]) : pl_tex_upload(gpu, &slices[i]); } pl_buf_destroy(gpu, &slices[i].buf); } pl_free(slices); return ok; } else { pl_assert(fmt->texel_align == fmt->texel_size); const VkBufferImageCopy region = { .bufferOffset = buf_offset, .bufferRowLength = params->row_pitch / fmt->texel_size, .bufferImageHeight = params->depth_pitch / params->row_pitch, .imageOffset = { rc.x0, rc.y0, rc.z0 }, .imageExtent = { rc.x1, rc.y1, rc.z1 }, .imageSubresource = { .aspectMask = tex_vk->aspect, .layerCount = 1, }, }; enum queue_type queue = vk_img_copy_queue(gpu, tex, ®ion); struct vk_cmd *cmd = CMD_BEGIN_TIMED(queue, params->timer); if (!cmd) goto error; vk_buf_barrier(gpu, cmd, buf, VK_PIPELINE_STAGE_2_COPY_BIT, VK_ACCESS_2_TRANSFER_READ_BIT, params->buf_offset, size, false); vk_tex_barrier(gpu, cmd, tex, VK_PIPELINE_STAGE_2_COPY_BIT, VK_ACCESS_2_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_QUEUE_FAMILY_IGNORED); vk->CmdCopyBufferToImage(cmd->buf, buf_vk->mem.buf, tex_vk->img, tex_vk->layout, 1, ®ion); if (params->callback) vk_cmd_callback(cmd, tex_xfer_cb, params->callback, params->priv); return CMD_FINISH(&cmd); } pl_unreachable(); error: for (int i = 0; i < num_slices; i++) pl_buf_destroy(gpu, &slices[i].buf); pl_free(slices); return false; } bool vk_tex_download(pl_gpu gpu, const struct pl_tex_transfer_params *params) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; pl_tex tex = params->tex; pl_fmt fmt = tex->params.format; struct pl_tex_vk *tex_vk = PL_PRIV(tex); struct pl_tex_transfer_params *slices = NULL; int num_slices = 0; if (!params->buf) return pl_tex_download_pbo(gpu, params); pl_buf buf = params->buf; struct pl_buf_vk *buf_vk = PL_PRIV(buf); pl_rect3d rc = params->rc; const size_t size = pl_tex_transfer_size(params); const size_t buf_offset = buf_vk->mem.offset + params->buf_offset; bool unaligned = buf_offset % fmt->texel_size; if (unaligned) PL_TRACE(gpu, "vk_tex_download: unaligned transfer (slow path)"); if (fmt->emulated || unaligned) { num_slices = pl_tex_transfer_slices(gpu, tex_vk->texel_fmt, params, &slices); for (int i = 0; i < num_slices; i++) { slices[i].buf = pl_buf_create(gpu, pl_buf_params( .memory_type = PL_BUF_MEM_DEVICE, .format = tex_vk->texel_fmt, .size = pl_tex_transfer_size(&slices[i]), .storable = fmt->emulated, )); if (!slices[i].buf) { PL_ERR(gpu, "Failed creating buffer for tex download fallback!"); num_slices = i; goto error; } } for (int i = 0; i < num_slices; i++) { // Restore buffer offset after downloading into temporary buffer, // because we still need to copy the data from the temporary buffer // into this offset in the original buffer const size_t tmp_offset = slices[i].buf_offset; slices[i].buf_offset = 0; bool ok = fmt->emulated ? pl_tex_download_texel(gpu, &slices[i]) : pl_tex_download(gpu, &slices[i]); slices[i].buf_offset = tmp_offset; if (!ok) goto error; } // Finally, download into the user buffer struct vk_cmd *cmd = CMD_BEGIN_TIMED(tex_vk->transfer_queue, params->timer); if (!cmd) goto error; vk_buf_barrier(gpu, cmd, buf, VK_PIPELINE_STAGE_2_COPY_BIT, VK_ACCESS_2_TRANSFER_WRITE_BIT, params->buf_offset, size, false); for (int i = 0; i < num_slices; i++) { pl_buf slice = slices[i].buf; struct pl_buf_vk *slice_vk = PL_PRIV(slice); vk_buf_barrier(gpu, cmd, slice, VK_PIPELINE_STAGE_2_COPY_BIT, VK_ACCESS_2_TRANSFER_READ_BIT, 0, slice->params.size, false); vk->CmdCopyBuffer(cmd->buf, slice_vk->mem.buf, buf_vk->mem.buf, 1, &(VkBufferCopy) { .srcOffset = slice_vk->mem.offset, .dstOffset = buf_vk->mem.offset + slices[i].buf_offset, .size = slice->params.size, }); pl_buf_destroy(gpu, &slices[i].buf); } vk_buf_flush(gpu, cmd, buf, params->buf_offset, size); if (params->callback) vk_cmd_callback(cmd, tex_xfer_cb, params->callback, params->priv); pl_free(slices); return CMD_FINISH(&cmd); } else { pl_assert(params->row_pitch % fmt->texel_size == 0); pl_assert(params->depth_pitch % params->row_pitch == 0); const VkBufferImageCopy region = { .bufferOffset = buf_offset, .bufferRowLength = params->row_pitch / fmt->texel_size, .bufferImageHeight = params->depth_pitch / params->row_pitch, .imageOffset = { rc.x0, rc.y0, rc.z0 }, .imageExtent = { rc.x1, rc.y1, rc.z1 }, .imageSubresource = { .aspectMask = tex_vk->aspect, .layerCount = 1, }, }; enum queue_type queue = vk_img_copy_queue(gpu, tex, ®ion); struct vk_cmd *cmd = CMD_BEGIN_TIMED(queue, params->timer); if (!cmd) goto error; vk_buf_barrier(gpu, cmd, buf, VK_PIPELINE_STAGE_2_COPY_BIT, VK_ACCESS_2_TRANSFER_WRITE_BIT, params->buf_offset, size, false); vk_tex_barrier(gpu, cmd, tex, VK_PIPELINE_STAGE_2_COPY_BIT, VK_ACCESS_2_TRANSFER_READ_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_QUEUE_FAMILY_IGNORED); vk->CmdCopyImageToBuffer(cmd->buf, tex_vk->img, tex_vk->layout, buf_vk->mem.buf, 1, ®ion); vk_buf_flush(gpu, cmd, buf, params->buf_offset, size); if (params->callback) vk_cmd_callback(cmd, tex_xfer_cb, params->callback, params->priv); return CMD_FINISH(&cmd); } pl_unreachable(); error: for (int i = 0; i < num_slices; i++) pl_buf_destroy(gpu, &slices[i].buf); pl_free(slices); return false; } bool vk_tex_poll(pl_gpu gpu, pl_tex tex, uint64_t timeout) { struct pl_vk *p = PL_PRIV(gpu); struct vk_ctx *vk = p->vk; struct pl_tex_vk *tex_vk = PL_PRIV(tex); // Opportunistically check if we can re-use this texture without flush vk_poll_commands(vk, 0); if (pl_rc_count(&tex_vk->rc) == 1) goto skip_blocking; // Otherwise, we're force to submit any queued command so that the user is // guaranteed to see progress eventually, even if they call this in a loop CMD_SUBMIT(NULL); vk_poll_commands(vk, timeout); if (pl_rc_count(&tex_vk->rc) > 1) return true; // fall through skip_blocking: for (int i = 0; i < tex_vk->num_planes; i++) { if (vk_tex_poll(gpu, tex->planes[i], timeout)) return true; } return false; } pl_tex pl_vulkan_wrap(pl_gpu gpu, const struct pl_vulkan_wrap_params *params) { pl_fmt fmt = NULL; for (int i = 0; i < gpu->num_formats; i++) { const struct vk_format **vkfmt = PL_PRIV(gpu->formats[i]); if ((*vkfmt)->tfmt == params->format) { fmt = gpu->formats[i]; break; } } if (!fmt) { PL_ERR(gpu, "Could not find pl_fmt suitable for wrapped image " "with format %s", vk_fmt_name(params->format)); return NULL; } VkImageUsageFlags usage = params->usage; if (fmt->num_planes) usage = 0; // mask capabilities from the base texture struct pl_tex_t *tex = pl_zalloc_obj(NULL, tex, struct pl_tex_vk); tex->params = (struct pl_tex_params) { .format = fmt, .w = params->width, .h = params->height, .d = params->depth, .sampleable = !!(usage & VK_IMAGE_USAGE_SAMPLED_BIT), .renderable = !!(usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT), .storable = !!(usage & VK_IMAGE_USAGE_STORAGE_BIT), .blit_src = !!(usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT), .blit_dst = !!(usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT), .host_writable = !!(usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT), .host_readable = !!(usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT), .user_data = params->user_data, .debug_tag = params->debug_tag, }; // Mask out capabilities not permitted by the `pl_fmt` #define MASK(field, cap) \ do { \ if (tex->params.field && !(fmt->caps & cap)) { \ PL_WARN(gpu, "Masking `" #field "` from wrapped texture because " \ "the corresponding format '%s' does not support " #cap, \ fmt->name); \ tex->params.field = false; \ } \ } while (0) MASK(sampleable, PL_FMT_CAP_SAMPLEABLE); MASK(renderable, PL_FMT_CAP_RENDERABLE); MASK(storable, PL_FMT_CAP_STORABLE); MASK(blit_src, PL_FMT_CAP_BLITTABLE); MASK(blit_dst, PL_FMT_CAP_BLITTABLE); MASK(host_readable, PL_FMT_CAP_HOST_READABLE); #undef MASK // For simplicity, explicitly mask out blit emulation for wrapped textures struct pl_fmt_vk *fmtp = PL_PRIV(fmt); if (fmtp->blit_emulated) { tex->params.blit_src = false; tex->params.blit_dst = false; } struct pl_tex_vk *tex_vk = PL_PRIV(tex); switch (pl_tex_params_dimension(tex->params)) { case 1: tex_vk->type = VK_IMAGE_TYPE_1D; break; case 2: tex_vk->type = VK_IMAGE_TYPE_2D; break; case 3: tex_vk->type = VK_IMAGE_TYPE_3D; break; } tex_vk->external_img = true; tex_vk->held = !fmt->num_planes; tex_vk->img = params->image; tex_vk->img_fmt = params->format; tex_vk->num_planes = fmt->num_planes; tex_vk->usage_flags = usage; tex_vk->aspect = params->aspect; if (!tex_vk->aspect) { for (int i = 0; i < tex_vk->num_planes; i++) tex_vk->aspect |= VK_IMAGE_ASPECT_PLANE_0_BIT << i; tex_vk->aspect = PL_DEF(tex_vk->aspect, VK_IMAGE_ASPECT_COLOR_BIT); } // Blitting to planar images requires fallback via compute shaders if (tex_vk->aspect != VK_IMAGE_ASPECT_COLOR_BIT) { tex->params.blit_src &= tex->params.storable; tex->params.blit_dst &= tex->params.storable; } static const char * const wrapped_plane_names[4] = { "wrapped plane 0", "wrapped plane 1", "wrapped plane 2", "wrapped plane 3", }; for (int i = 0; i < tex_vk->num_planes; i++) { struct pl_tex_t *plane; VkImageAspectFlags aspect = VK_IMAGE_ASPECT_PLANE_0_BIT << i; if (!(aspect & tex_vk->aspect)) { PL_INFO(gpu, "Not wrapping plane %d due to aspect bit 0x%x not " "being contained in supplied params->aspect 0x%x!", i, (unsigned) aspect, (unsigned) tex_vk->aspect); continue; } pl_assert(tex_vk->type == VK_IMAGE_TYPE_2D); plane = (struct pl_tex_t *) pl_vulkan_wrap(gpu, pl_vulkan_wrap_params( .image = tex_vk->img, .aspect = aspect, .width = PL_RSHIFT_UP(tex->params.w, fmt->planes[i].shift_x), .height = PL_RSHIFT_UP(tex->params.h, fmt->planes[i].shift_y), .format = fmtp->vk_fmt->pfmt[i].fmt, .usage = params->usage, .user_data = params->user_data, .debug_tag = PL_DEF(params->debug_tag, wrapped_plane_names[i]), )); if (!plane) goto error; plane->parent = tex; tex->planes[i] = plane; tex_vk->planes[i] = PL_PRIV(plane); } if (!vk_init_image(gpu, tex, PL_DEF(params->debug_tag, "wrapped"))) goto error; return tex; error: vk_tex_destroy(gpu, tex); return NULL; } VkImage pl_vulkan_unwrap(pl_gpu gpu, pl_tex tex, VkFormat *out_format, VkImageUsageFlags *out_flags) { struct pl_tex_vk *tex_vk = PL_PRIV(tex); if (out_format) *out_format = tex_vk->img_fmt; if (out_flags) *out_flags = tex_vk->usage_flags; return tex_vk->img; } bool pl_vulkan_hold_ex(pl_gpu gpu, const struct pl_vulkan_hold_params *params) { struct pl_tex_vk *tex_vk = PL_PRIV(params->tex); pl_assert(params->semaphore.sem); bool held = tex_vk->held; for (int i = 0; i < tex_vk->num_planes; i++) held |= tex_vk->planes[i]->held; if (held) { PL_ERR(gpu, "Attempting to hold an already held image!"); return false; } struct vk_cmd *cmd = CMD_BEGIN(GRAPHICS); if (!cmd) { PL_ERR(gpu, "Failed holding external image!"); return false; } VkImageLayout layout = params->layout; if (params->out_layout) { // For planar images, arbitrarily pick the current image layout of the // first plane. This should be fine in practice, since all planes will // share the same usage capabilities. if (tex_vk->num_planes) { layout = tex_vk->planes[0]->layout; } else { layout = tex_vk->layout; } } bool may_invalidate = true; if (!tex_vk->num_planes) { may_invalidate &= tex_vk->may_invalidate; vk_tex_barrier(gpu, cmd, params->tex, VK_PIPELINE_STAGE_2_NONE, 0, layout, params->qf); } for (int i = 0; i < tex_vk->num_planes; i++) { may_invalidate &= tex_vk->planes[i]->may_invalidate; vk_tex_barrier(gpu, cmd, params->tex->planes[i], VK_PIPELINE_STAGE_2_NONE, 0, layout, params->qf); } vk_cmd_sig(cmd, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, params->semaphore); bool ok = CMD_SUBMIT(&cmd); if (!tex_vk->num_planes) { tex_vk->sem.write.queue = tex_vk->sem.read.queue = NULL; tex_vk->held = ok; } for (int i = 0; i < tex_vk->num_planes; i++) { struct pl_tex_vk *plane_vk = tex_vk->planes[i]; plane_vk->sem.write.queue = plane_vk->sem.read.queue = NULL; plane_vk->held = ok; } if (ok && params->out_layout) *params->out_layout = may_invalidate ? VK_IMAGE_LAYOUT_UNDEFINED : layout; return ok; } void pl_vulkan_release_ex(pl_gpu gpu, const struct pl_vulkan_release_params *params) { struct pl_tex_vk *tex_vk = PL_PRIV(params->tex); if (tex_vk->num_planes) { struct pl_vulkan_release_params plane_pars = *params; for (int i = 0; i < tex_vk->num_planes; i++) { plane_pars.tex = params->tex->planes[i]; pl_vulkan_release_ex(gpu, &plane_pars); } return; } if (!tex_vk->held) { PL_ERR(gpu, "Attempting to release an unheld image?"); return; } if (params->semaphore.sem) PL_ARRAY_APPEND(params->tex, tex_vk->ext_deps, params->semaphore); tex_vk->qf = params->qf; tex_vk->layout = params->layout; tex_vk->held = false; } libplacebo-v7.349.0/src/vulkan/malloc.c000066400000000000000000001070721463457750100177050ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "malloc.h" #include "command.h" #include "utils.h" #include "pl_thread.h" #ifdef PL_HAVE_UNIX #include #include #endif // Controls the page size alignment, to help coalesce allocations into the same // slab. Pages are rounded up to multiples of this value. (Default: 4 KB) #define PAGE_SIZE_ALIGN (1LLU << 12) // Controls the minimum/maximum number of pages for new slabs. As slabs are // exhausted of memory, the number of pages per new slab grows exponentially, // starting with the minimum until the maximum is reached. // // Note: The maximum must never exceed the size of `vk_slab.spacemap`. #define MINIMUM_PAGE_COUNT 4 #define MAXIMUM_PAGE_COUNT (sizeof(uint64_t) * 8) // Controls the maximum page size. Any allocations above this threshold // (absolute size or fraction of VRAM, whichever is higher) will be served by // dedicated allocations. (Default: 64 MB or 1/16 of VRAM) #define MAXIMUM_PAGE_SIZE_ABSOLUTE (1LLU << 26) #define MAXIMUM_PAGE_SIZE_RELATIVE 16 // Controls the minimum slab size, to avoid excessive re-allocation of very // small slabs. (Default: 256 KB) #define MINIMUM_SLAB_SIZE (1LLU << 18) // How long to wait before garbage collecting empty slabs. Slabs older than // this many invocations of `vk_malloc_garbage_collect` will be released. #define MAXIMUM_SLAB_AGE 32 // A single slab represents a contiguous region of allocated memory. Actual // allocations are served as pages of this. Slabs are organized into pools, // each of which contains a list of slabs of differing page sizes. struct vk_slab { pl_mutex lock; pl_debug_tag debug_tag; // debug tag of the triggering allocation VkDeviceMemory mem; // underlying device allocation VkDeviceSize size; // total allocated size of `mem` VkMemoryType mtype; // underlying memory type bool dedicated; // slab is allocated specifically for one object bool imported; // slab represents an imported memory allocation // free space accounting (only for non-dedicated slabs) uint64_t spacemap; // bitset of available pages size_t pagesize; // size in bytes per page size_t used; // number of bytes actually in use uint64_t age; // timestamp of last use // optional, depends on the memory type: VkBuffer buffer; // buffer spanning the entire slab void *data; // mapped memory corresponding to `mem` bool coherent; // mapped memory is coherent union pl_handle handle; // handle associated with this device memory enum pl_handle_type handle_type; }; // Represents a single memory pool. We keep track of a vk_pool for each // combination of malloc parameters. This shouldn't actually be that many in // practice, because some combinations simply never occur, and others will // generally be the same for the same objects. // // Note: `vk_pool` addresses are not immutable, so we mustn't expose any // dangling references to a `vk_pool` from e.g. `vk_memslice.priv = vk_slab`. struct vk_pool { struct vk_malloc_params params; // allocation params (with some fields nulled) PL_ARRAY(struct vk_slab *) slabs; // array of slabs, unsorted int index; // running index in `vk_malloc.pools` }; // The overall state of the allocator, which keeps track of a vk_pool for each // memory type. struct vk_malloc { struct vk_ctx *vk; pl_mutex lock; VkPhysicalDeviceMemoryProperties props; size_t maximum_page_size; PL_ARRAY(struct vk_pool) pools; uint64_t age; }; static inline float efficiency(size_t used, size_t total) { if (!total) return 100.0; return 100.0f * used / total; } static const char *print_size(char buf[8], size_t size) { const char *suffixes = "\0KMG"; while (suffixes[1] && size > 9999) { size >>= 10; suffixes++; } int ret = *suffixes ? snprintf(buf, 8, "%4zu%c", size, *suffixes) : snprintf(buf, 8, "%5zu", size); return ret >= 0 ? buf : "(error)"; } #define PRINT_SIZE(x) (print_size((char[8]){0}, (size_t) (x))) void vk_malloc_print_stats(struct vk_malloc *ma, enum pl_log_level lev) { struct vk_ctx *vk = ma->vk; size_t total_size = 0; size_t total_used = 0; size_t total_res = 0; PL_MSG(vk, lev, "Memory heaps supported by device:"); for (int i = 0; i < ma->props.memoryHeapCount; i++) { VkMemoryHeap heap = ma->props.memoryHeaps[i]; PL_MSG(vk, lev, " %d: flags 0x%x size %s", i, (unsigned) heap.flags, PRINT_SIZE(heap.size)); } PL_DEBUG(vk, "Memory types supported by device:"); for (int i = 0; i < ma->props.memoryTypeCount; i++) { VkMemoryType type = ma->props.memoryTypes[i]; PL_DEBUG(vk, " %d: flags 0x%x heap %d", i, (unsigned) type.propertyFlags, (int) type.heapIndex); } pl_mutex_lock(&ma->lock); for (int i = 0; i < ma->pools.num; i++) { struct vk_pool *pool = &ma->pools.elem[i]; const struct vk_malloc_params *par = &pool->params; PL_MSG(vk, lev, "Memory pool %d:", i); PL_MSG(vk, lev, " Compatible types: 0x%"PRIx32, par->reqs.memoryTypeBits); if (par->required) PL_MSG(vk, lev, " Required flags: 0x%"PRIx32, par->required); if (par->optimal) PL_MSG(vk, lev, " Optimal flags: 0x%"PRIx32, par->optimal); if (par->buf_usage) PL_MSG(vk, lev, " Buffer flags: 0x%"PRIx32, par->buf_usage); if (par->export_handle) PL_MSG(vk, lev, " Export handle: 0x%x", par->export_handle); size_t pool_size = 0; size_t pool_used = 0; size_t pool_res = 0; for (int j = 0; j < pool->slabs.num; j++) { struct vk_slab *slab = pool->slabs.elem[j]; pl_mutex_lock(&slab->lock); size_t avail = __builtin_popcountll(slab->spacemap) * slab->pagesize; size_t slab_res = slab->size - avail; PL_MSG(vk, lev, " Slab %2d: %8"PRIx64" x %s: " "%s used %s res %s alloc from heap %d, efficiency %.2f%% [%s]", j, slab->spacemap, PRINT_SIZE(slab->pagesize), PRINT_SIZE(slab->used), PRINT_SIZE(slab_res), PRINT_SIZE(slab->size), (int) slab->mtype.heapIndex, efficiency(slab->used, slab_res), PL_DEF(slab->debug_tag, "unknown")); pool_size += slab->size; pool_used += slab->used; pool_res += slab_res; pl_mutex_unlock(&slab->lock); } PL_MSG(vk, lev, " Pool summary: %s used %s res %s alloc, " "efficiency %.2f%%, utilization %.2f%%", PRINT_SIZE(pool_used), PRINT_SIZE(pool_res), PRINT_SIZE(pool_size), efficiency(pool_used, pool_res), efficiency(pool_res, pool_size)); total_size += pool_size; total_used += pool_used; total_res += pool_res; } pl_mutex_unlock(&ma->lock); PL_MSG(vk, lev, "Memory summary: %s used %s res %s alloc, " "efficiency %.2f%%, utilization %.2f%%, max page: %s", PRINT_SIZE(total_used), PRINT_SIZE(total_res), PRINT_SIZE(total_size), efficiency(total_used, total_res), efficiency(total_res, total_size), PRINT_SIZE(ma->maximum_page_size)); } static void slab_free(struct vk_ctx *vk, struct vk_slab *slab) { if (!slab) return; #ifndef NDEBUG if (!slab->dedicated && slab->used > 0) { PL_WARN(vk, "Leaked %zu bytes of vulkan memory!", slab->used); PL_WARN(vk, "slab total size: %zu bytes, heap: %d, flags: 0x%"PRIX64, (size_t) slab->size, (int) slab->mtype.heapIndex, (uint64_t) slab->mtype.propertyFlags); if (slab->debug_tag) PL_WARN(vk, "last used for: %s", slab->debug_tag); pl_log_stack_trace(vk->log, PL_LOG_WARN); pl_debug_abort(); } #endif if (slab->imported) { switch (slab->handle_type) { case PL_HANDLE_FD: case PL_HANDLE_DMA_BUF: PL_TRACE(vk, "Unimporting slab of size %s from fd: %d", PRINT_SIZE(slab->size), slab->handle.fd); break; case PL_HANDLE_WIN32: case PL_HANDLE_WIN32_KMT: #ifdef PL_HAVE_WIN32 PL_TRACE(vk, "Unimporting slab of size %s from handle: %p", PRINT_SIZE(slab->size), (void *) slab->handle.handle); #endif break; case PL_HANDLE_HOST_PTR: PL_TRACE(vk, "Unimporting slab of size %s from ptr: %p", PRINT_SIZE(slab->size), (void *) slab->handle.ptr); break; case PL_HANDLE_IOSURFACE: case PL_HANDLE_MTL_TEX: pl_unreachable(); } } else { switch (slab->handle_type) { case PL_HANDLE_FD: case PL_HANDLE_DMA_BUF: #ifdef PL_HAVE_UNIX if (slab->handle.fd > -1) close(slab->handle.fd); #endif break; case PL_HANDLE_WIN32: #ifdef PL_HAVE_WIN32 if (slab->handle.handle != NULL) CloseHandle(slab->handle.handle); #endif break; case PL_HANDLE_WIN32_KMT: // PL_HANDLE_WIN32_KMT is just an identifier. It doesn't get closed. break; case PL_HANDLE_HOST_PTR: // Implicitly unmapped break; case PL_HANDLE_IOSURFACE: case PL_HANDLE_MTL_TEX: pl_unreachable(); } PL_DEBUG(vk, "Freeing slab of size %s", PRINT_SIZE(slab->size)); } vk->DestroyBuffer(vk->dev, slab->buffer, PL_VK_ALLOC); // also implicitly unmaps the memory if needed vk->FreeMemory(vk->dev, slab->mem, PL_VK_ALLOC); pl_mutex_destroy(&slab->lock); pl_free(slab); } // type_mask: optional // thread-safety: safe static bool find_best_memtype(const struct vk_malloc *ma, uint32_t type_mask, const struct vk_malloc_params *params, uint32_t *out_index) { struct vk_ctx *vk = ma->vk; int best = -1; // The vulkan spec requires memory types to be sorted in the "optimal" // order, so the first matching type we find will be the best/fastest one. // That being said, we still want to prioritize memory types that have // better optional flags. type_mask &= params->reqs.memoryTypeBits; for (int i = 0; i < ma->props.memoryTypeCount; i++) { const VkMemoryType *mtype = &ma->props.memoryTypes[i]; // The memory type flags must include our properties if ((mtype->propertyFlags & params->required) != params->required) continue; // The memory heap must be large enough for the allocation VkDeviceSize heapSize = ma->props.memoryHeaps[mtype->heapIndex].size; if (params->reqs.size > heapSize) continue; // The memory type must be supported by the type mask (bitfield) if (!(type_mask & (1LU << i))) continue; // Calculate the score as the number of optimal property flags matched int score = __builtin_popcountl(mtype->propertyFlags & params->optimal); if (score > best) { *out_index = i; best = score; } } if (best < 0) { PL_ERR(vk, "Found no memory type matching property flags 0x%x and type " "bits 0x%x!", (unsigned) params->required, (unsigned) type_mask); return false; } return true; } static bool buf_external_check(struct vk_ctx *vk, VkBufferUsageFlags usage, enum pl_handle_type handle_type, bool import) { if (!handle_type) return true; VkPhysicalDeviceExternalBufferInfo info = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_BUFFER_INFO_KHR, .usage = usage, .handleType = vk_mem_handle_type(handle_type), }; VkExternalBufferProperties props = { .sType = VK_STRUCTURE_TYPE_EXTERNAL_BUFFER_PROPERTIES_KHR, }; if (!info.handleType) return false; vk->GetPhysicalDeviceExternalBufferProperties(vk->physd, &info, &props); return vk_external_mem_check(vk, &props.externalMemoryProperties, handle_type, import); } // thread-safety: safe static struct vk_slab *slab_alloc(struct vk_malloc *ma, const struct vk_malloc_params *params) { struct vk_ctx *vk = ma->vk; struct vk_slab *slab = pl_alloc_ptr(NULL, slab); *slab = (struct vk_slab) { .age = ma->age, .size = params->reqs.size, .handle_type = params->export_handle, .debug_tag = params->debug_tag, }; pl_mutex_init(&slab->lock); switch (slab->handle_type) { case PL_HANDLE_FD: case PL_HANDLE_DMA_BUF: slab->handle.fd = -1; break; case PL_HANDLE_WIN32: case PL_HANDLE_WIN32_KMT: case PL_HANDLE_MTL_TEX: case PL_HANDLE_IOSURFACE: slab->handle.handle = NULL; break; case PL_HANDLE_HOST_PTR: slab->handle.ptr = NULL; break; } VkExportMemoryAllocateInfoKHR ext_info = { .sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR, .handleTypes = vk_mem_handle_type(slab->handle_type), }; uint32_t type_mask = UINT32_MAX; if (params->buf_usage) { // Queue family sharing modes don't matter for buffers, so we just // set them as concurrent and stop worrying about it. uint32_t qfs[3] = {0}; pl_assert(vk->pools.num <= PL_ARRAY_SIZE(qfs)); for (int i = 0; i < vk->pools.num; i++) qfs[i] = vk->pools.elem[i]->qf; VkExternalMemoryBufferCreateInfoKHR ext_buf_info = { .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR, .handleTypes = ext_info.handleTypes, }; VkBufferCreateInfo binfo = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = slab->handle_type ? &ext_buf_info : NULL, .size = slab->size, .usage = params->buf_usage, .sharingMode = vk->pools.num > 1 ? VK_SHARING_MODE_CONCURRENT : VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = vk->pools.num, .pQueueFamilyIndices = qfs, }; if (!buf_external_check(vk, binfo.usage, slab->handle_type, false)) { PL_ERR(vk, "Failed allocating shared memory buffer: possibly " "the handle type is unsupported?"); goto error; } VK(vk->CreateBuffer(vk->dev, &binfo, PL_VK_ALLOC, &slab->buffer)); PL_VK_NAME(BUFFER, slab->buffer, "slab"); VkMemoryRequirements reqs = {0}; vk->GetBufferMemoryRequirements(vk->dev, slab->buffer, &reqs); slab->size = reqs.size; // this can be larger than `slab->size` type_mask = reqs.memoryTypeBits; // Note: we can ignore `reqs.align` because we always bind the buffer // memory to offset 0 } VkMemoryAllocateInfo minfo = { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .allocationSize = slab->size, }; if (params->export_handle) vk_link_struct(&minfo, &ext_info); VkMemoryDedicatedAllocateInfoKHR dinfo = { .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR, .image = params->ded_image, }; if (params->ded_image) vk_link_struct(&minfo, &dinfo); if (!find_best_memtype(ma, type_mask, params, &minfo.memoryTypeIndex)) goto error; const VkMemoryType *mtype = &ma->props.memoryTypes[minfo.memoryTypeIndex]; PL_DEBUG(vk, "Allocating %zu memory of type 0x%x (id %d) in heap %d: %s", (size_t) slab->size, (unsigned) mtype->propertyFlags, (int) minfo.memoryTypeIndex, (int) mtype->heapIndex, PL_DEF(params->debug_tag, "unknown")); pl_clock_t start = pl_clock_now(); VkResult res = vk->AllocateMemory(vk->dev, &minfo, PL_VK_ALLOC, &slab->mem); switch (res) { case VK_ERROR_OUT_OF_DEVICE_MEMORY: case VK_ERROR_OUT_OF_HOST_MEMORY: PL_ERR(vk, "Allocation of size %s failed: %s!", PRINT_SIZE(slab->size), vk_res_str(res)); vk_malloc_print_stats(ma, PL_LOG_ERR); pl_log_stack_trace(vk->log, PL_LOG_ERR); pl_debug_abort(); goto error; default: PL_VK_ASSERT(res, "vkAllocateMemory"); } slab->mtype = *mtype; if (mtype->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { VK(vk->MapMemory(vk->dev, slab->mem, 0, VK_WHOLE_SIZE, 0, &slab->data)); slab->coherent = mtype->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; } if (slab->buffer) VK(vk->BindBufferMemory(vk->dev, slab->buffer, slab->mem, 0)); #ifdef PL_HAVE_UNIX if (slab->handle_type == PL_HANDLE_FD || slab->handle_type == PL_HANDLE_DMA_BUF) { VkMemoryGetFdInfoKHR fd_info = { .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR, .memory = slab->mem, .handleType = ext_info.handleTypes, }; VK(vk->GetMemoryFdKHR(vk->dev, &fd_info, &slab->handle.fd)); } #endif #ifdef PL_HAVE_WIN32 if (slab->handle_type == PL_HANDLE_WIN32 || slab->handle_type == PL_HANDLE_WIN32_KMT) { VkMemoryGetWin32HandleInfoKHR handle_info = { .sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR, .memory = slab->mem, .handleType = ext_info.handleTypes, }; VK(vk->GetMemoryWin32HandleKHR(vk->dev, &handle_info, &slab->handle.handle)); } #endif pl_log_cpu_time(vk->log, start, pl_clock_now(), "allocating slab"); // free space accounting is done by the caller return slab; error: if (params->debug_tag) PL_ERR(vk, " for malloc: %s", params->debug_tag); slab_free(vk, slab); return NULL; } static void pool_uninit(struct vk_ctx *vk, struct vk_pool *pool) { for (int i = 0; i < pool->slabs.num; i++) slab_free(vk, pool->slabs.elem[i]); pl_free(pool->slabs.elem); *pool = (struct vk_pool) {0}; } struct vk_malloc *vk_malloc_create(struct vk_ctx *vk) { struct vk_malloc *ma = pl_zalloc_ptr(NULL, ma); pl_mutex_init(&ma->lock); vk->GetPhysicalDeviceMemoryProperties(vk->physd, &ma->props); ma->vk = vk; // Determine maximum page size ma->maximum_page_size = MAXIMUM_PAGE_SIZE_ABSOLUTE; for (int i = 0; i < ma->props.memoryHeapCount; i++) { VkMemoryHeap heap = ma->props.memoryHeaps[i]; if (heap.flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) { size_t size_max = heap.size / MAXIMUM_PAGE_SIZE_RELATIVE; ma->maximum_page_size = PL_MAX(ma->maximum_page_size, size_max); } } vk_malloc_print_stats(ma, PL_LOG_INFO); return ma; } void vk_malloc_destroy(struct vk_malloc **ma_ptr) { struct vk_malloc *ma = *ma_ptr; if (!ma) return; vk_malloc_print_stats(ma, PL_LOG_DEBUG); for (int i = 0; i < ma->pools.num; i++) pool_uninit(ma->vk, &ma->pools.elem[i]); pl_mutex_destroy(&ma->lock); pl_free_ptr(ma_ptr); } void vk_malloc_garbage_collect(struct vk_malloc *ma) { struct vk_ctx *vk = ma->vk; pl_mutex_lock(&ma->lock); ma->age++; for (int i = 0; i < ma->pools.num; i++) { struct vk_pool *pool = &ma->pools.elem[i]; for (int n = 0; n < pool->slabs.num; n++) { struct vk_slab *slab = pool->slabs.elem[n]; pl_mutex_lock(&slab->lock); if (slab->used || (ma->age - slab->age) <= MAXIMUM_SLAB_AGE) { pl_mutex_unlock(&slab->lock); continue; } PL_DEBUG(vk, "Garbage collected slab of size %s from pool %d", PRINT_SIZE(slab->size), pool->index); pl_mutex_unlock(&slab->lock); slab_free(ma->vk, slab); PL_ARRAY_REMOVE_AT(pool->slabs, n--); } } pl_mutex_unlock(&ma->lock); } pl_handle_caps vk_malloc_handle_caps(const struct vk_malloc *ma, bool import) { struct vk_ctx *vk = ma->vk; pl_handle_caps caps = 0; for (int i = 0; vk_mem_handle_list[i]; i++) { // Try seeing if we could allocate a "basic" buffer using these // capabilities, with no fancy buffer usage. More specific checks will // happen down the line at VkBuffer creation time, but this should give // us a rough idea of what the driver supports. enum pl_handle_type type = vk_mem_handle_list[i]; if (buf_external_check(vk, VK_BUFFER_USAGE_TRANSFER_DST_BIT, type, import)) caps |= type; } return caps; } void vk_malloc_free(struct vk_malloc *ma, struct vk_memslice *slice) { struct vk_ctx *vk = ma->vk; struct vk_slab *slab = slice->priv; if (!slab || slab->dedicated) { slab_free(vk, slab); goto done; } pl_mutex_lock(&slab->lock); int page_idx = slice->offset / slab->pagesize; slab->spacemap |= 0x1LLU << page_idx; slab->used -= slice->size; slab->age = ma->age; pl_assert(slab->used >= 0); pl_mutex_unlock(&slab->lock); done: *slice = (struct vk_memslice) {0}; } static inline bool pool_params_eq(const struct vk_malloc_params *a, const struct vk_malloc_params *b) { return a->reqs.size == b->reqs.size && a->reqs.alignment == b->reqs.alignment && a->reqs.memoryTypeBits == b->reqs.memoryTypeBits && a->required == b->required && a->optimal == b->optimal && a->buf_usage == b->buf_usage && a->export_handle == b->export_handle; } static struct vk_pool *find_pool(struct vk_malloc *ma, const struct vk_malloc_params *params) { pl_assert(!params->import_handle); pl_assert(!params->ded_image); struct vk_malloc_params fixed = *params; fixed.reqs.alignment = 0; fixed.reqs.size = 0; fixed.shared_mem = (struct pl_shared_mem) {0}; for (int i = 0; i < ma->pools.num; i++) { if (pool_params_eq(&ma->pools.elem[i].params, &fixed)) return &ma->pools.elem[i]; } // Not found => add it PL_ARRAY_GROW(ma, ma->pools); size_t idx = ma->pools.num++; ma->pools.elem[idx] = (struct vk_pool) { .params = fixed, .index = idx, }; return &ma->pools.elem[idx]; } // Returns a suitable memory page from the pool. A new slab will be allocated // under the hood, if necessary. // // Note: This locks the slab it returns static struct vk_slab *pool_get_page(struct vk_malloc *ma, struct vk_pool *pool, size_t size, size_t align, VkDeviceSize *offset) { struct vk_slab *slab = NULL; int slab_pages = MINIMUM_PAGE_COUNT; size = PL_ALIGN2(size, PAGE_SIZE_ALIGN); const size_t pagesize = PL_ALIGN(size, align); for (int i = 0; i < pool->slabs.num; i++) { slab = pool->slabs.elem[i]; if (slab->pagesize < size) continue; if (slab->pagesize > pagesize * MINIMUM_PAGE_COUNT) // rough heuristic continue; if (slab->pagesize % align) continue; pl_mutex_lock(&slab->lock); int page_idx = __builtin_ffsll(slab->spacemap); if (!page_idx--) { pl_mutex_unlock(&slab->lock); // Increase the number of slabs to allocate for new slabs the // more existing full slabs exist for this size range slab_pages = PL_MIN(slab_pages << 1, MAXIMUM_PAGE_COUNT); continue; } slab->spacemap ^= 0x1LLU << page_idx; *offset = page_idx * slab->pagesize; return slab; } // Otherwise, allocate a new vk_slab and append it to the list. VkDeviceSize slab_size = slab_pages * pagesize; pl_static_assert(MINIMUM_SLAB_SIZE <= PAGE_SIZE_ALIGN * MAXIMUM_PAGE_COUNT); const VkDeviceSize max_slab_size = ma->maximum_page_size * MINIMUM_PAGE_COUNT; pl_assert(pagesize <= ma->maximum_page_size); slab_size = PL_CLAMP(slab_size, MINIMUM_SLAB_SIZE, max_slab_size); slab_pages = slab_size / pagesize; slab_size = slab_pages * pagesize; // max_slab_size may be npot2, trim excess struct vk_malloc_params params = pool->params; params.reqs.size = slab_size; // Don't hold the lock while allocating the slab, because it can be a // potentially very costly operation. pl_mutex_unlock(&ma->lock); slab = slab_alloc(ma, ¶ms); pl_mutex_lock(&ma->lock); if (!slab) return NULL; pl_mutex_lock(&slab->lock); slab->spacemap = (slab_pages == sizeof(uint64_t) * 8) ? ~0LLU : ~(~0LLU << slab_pages); slab->pagesize = pagesize; PL_ARRAY_APPEND(NULL, pool->slabs, slab); // Return the first page in this newly allocated slab slab->spacemap ^= 0x1; *offset = 0; return slab; } static bool vk_malloc_import(struct vk_malloc *ma, struct vk_memslice *out, const struct vk_malloc_params *params) { struct vk_ctx *vk = ma->vk; VkExternalMemoryHandleTypeFlagBitsKHR vk_handle_type; vk_handle_type = vk_mem_handle_type(params->import_handle); struct vk_slab *slab = NULL; const struct pl_shared_mem *shmem = ¶ms->shared_mem; VkMemoryDedicatedAllocateInfoKHR dinfo = { .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR, .image = params->ded_image, }; VkImportMemoryFdInfoKHR fdinfo = { .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR, .handleType = vk_handle_type, .fd = -1, }; VkImportMemoryHostPointerInfoEXT ptrinfo = { .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT, .handleType = vk_handle_type, }; VkMemoryAllocateInfo ainfo = { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .allocationSize = shmem->size, }; if (params->ded_image) vk_link_struct(&ainfo, &dinfo); VkBuffer buffer = VK_NULL_HANDLE; VkMemoryRequirements reqs = params->reqs; if (params->buf_usage) { uint32_t qfs[3] = {0}; pl_assert(vk->pools.num <= PL_ARRAY_SIZE(qfs)); for (int i = 0; i < vk->pools.num; i++) qfs[i] = vk->pools.elem[i]->qf; VkExternalMemoryBufferCreateInfoKHR ext_buf_info = { .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR, .handleTypes = vk_handle_type, }; VkBufferCreateInfo binfo = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = &ext_buf_info, .size = shmem->size, .usage = params->buf_usage, .sharingMode = vk->pools.num > 1 ? VK_SHARING_MODE_CONCURRENT : VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = vk->pools.num, .pQueueFamilyIndices = qfs, }; VK(vk->CreateBuffer(vk->dev, &binfo, PL_VK_ALLOC, &buffer)); PL_VK_NAME(BUFFER, buffer, "imported"); vk->GetBufferMemoryRequirements(vk->dev, buffer, &reqs); } if (reqs.size > shmem->size) { PL_ERR(vk, "Imported object requires %zu bytes, larger than the " "provided size %zu!", (size_t) reqs.size, shmem->size); goto error; } if (shmem->offset % reqs.alignment || shmem->offset % params->reqs.alignment) { PL_ERR(vk, "Imported object offset %zu conflicts with alignment %zu!", shmem->offset, pl_lcm(reqs.alignment, params->reqs.alignment)); goto error; } switch (params->import_handle) { #ifdef PL_HAVE_UNIX case PL_HANDLE_DMA_BUF: { if (!vk->GetMemoryFdPropertiesKHR) { PL_ERR(vk, "Importing PL_HANDLE_DMA_BUF requires %s.", VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME); goto error; } VkMemoryFdPropertiesKHR fdprops = { .sType = VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR, }; VK(vk->GetMemoryFdPropertiesKHR(vk->dev, vk_handle_type, shmem->handle.fd, &fdprops)); // We dup() the fd to make it safe to import the same original fd // multiple times. fdinfo.fd = dup(shmem->handle.fd); if (fdinfo.fd == -1) { PL_ERR(vk, "Failed to dup() fd (%d) when importing memory: %s", fdinfo.fd, strerror(errno)); goto error; } reqs.memoryTypeBits &= fdprops.memoryTypeBits; vk_link_struct(&ainfo, &fdinfo); break; } #else // !PL_HAVE_UNIX case PL_HANDLE_DMA_BUF: PL_ERR(vk, "PL_HANDLE_DMA_BUF requires building with UNIX support!"); goto error; #endif case PL_HANDLE_HOST_PTR: { VkMemoryHostPointerPropertiesEXT ptrprops = { .sType = VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT, }; VK(vk->GetMemoryHostPointerPropertiesEXT(vk->dev, vk_handle_type, shmem->handle.ptr, &ptrprops)); ptrinfo.pHostPointer = (void *) shmem->handle.ptr; reqs.memoryTypeBits &= ptrprops.memoryTypeBits; vk_link_struct(&ainfo, &ptrinfo); break; } case PL_HANDLE_FD: case PL_HANDLE_WIN32: case PL_HANDLE_WIN32_KMT: case PL_HANDLE_IOSURFACE: case PL_HANDLE_MTL_TEX: PL_ERR(vk, "vk_malloc_import: unsupported handle type %d", params->import_handle); goto error; } if (!find_best_memtype(ma, reqs.memoryTypeBits, params, &ainfo.memoryTypeIndex)) { PL_ERR(vk, "No compatible memory types offered for imported memory!"); goto error; } VkDeviceMemory vkmem = VK_NULL_HANDLE; VK(vk->AllocateMemory(vk->dev, &ainfo, PL_VK_ALLOC, &vkmem)); slab = pl_alloc_ptr(NULL, slab); *slab = (struct vk_slab) { .mem = vkmem, .dedicated = true, .imported = true, .buffer = buffer, .size = shmem->size, .handle_type = params->import_handle, }; pl_mutex_init(&slab->lock); *out = (struct vk_memslice) { .vkmem = vkmem, .buf = buffer, .size = shmem->size - shmem->offset, .offset = shmem->offset, .shared_mem = *shmem, .priv = slab, }; switch (params->import_handle) { case PL_HANDLE_DMA_BUF: case PL_HANDLE_FD: PL_TRACE(vk, "Imported %s bytes from fd: %d%s", PRINT_SIZE(slab->size), shmem->handle.fd, params->ded_image ? " (dedicated)" : ""); // fd ownership is transferred at this point. slab->handle.fd = fdinfo.fd; fdinfo.fd = -1; break; case PL_HANDLE_HOST_PTR: PL_TRACE(vk, "Imported %s bytes from ptr: %p%s", PRINT_SIZE(slab->size), shmem->handle.ptr, params->ded_image ? " (dedicated" : ""); slab->handle.ptr = ptrinfo.pHostPointer; break; case PL_HANDLE_WIN32: case PL_HANDLE_WIN32_KMT: case PL_HANDLE_IOSURFACE: case PL_HANDLE_MTL_TEX: break; } VkMemoryPropertyFlags flags = ma->props.memoryTypes[ainfo.memoryTypeIndex].propertyFlags; if (flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { VK(vk->MapMemory(vk->dev, slab->mem, 0, VK_WHOLE_SIZE, 0, &slab->data)); slab->coherent = flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; out->data = (uint8_t *) slab->data + out->offset; out->coherent = slab->coherent; if (!slab->coherent) { // Use entire buffer range, since this is a dedicated memory // allocation. This avoids issues with noncoherent atomicity out->map_offset = 0; out->map_size = VK_WHOLE_SIZE; // Mapping does not implicitly invalidate mapped memory VK(vk->InvalidateMappedMemoryRanges(vk->dev, 1, &(VkMappedMemoryRange) { .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, .memory = slab->mem, .offset = out->map_offset, .size = out->map_size, })); } } if (buffer) VK(vk->BindBufferMemory(vk->dev, buffer, vkmem, 0)); return true; error: if (params->debug_tag) PL_ERR(vk, " for malloc: %s", params->debug_tag); vk->DestroyBuffer(vk->dev, buffer, PL_VK_ALLOC); #ifdef PL_HAVE_UNIX if (fdinfo.fd > -1) close(fdinfo.fd); #endif pl_free(slab); *out = (struct vk_memslice) {0}; return false; } size_t vk_malloc_avail(struct vk_malloc *ma, VkMemoryPropertyFlags flags) { size_t avail = 0; for (int i = 0; i < ma->props.memoryTypeCount; i++) { const VkMemoryType *mtype = &ma->props.memoryTypes[i]; if ((mtype->propertyFlags & flags) != flags) continue; avail = PL_MAX(avail, ma->props.memoryHeaps[mtype->heapIndex].size); } return avail; } bool vk_malloc_slice(struct vk_malloc *ma, struct vk_memslice *out, const struct vk_malloc_params *params) { struct vk_ctx *vk = ma->vk; pl_assert(!params->import_handle || !params->export_handle); if (params->import_handle) return vk_malloc_import(ma, out, params); pl_assert(params->reqs.size); size_t size = params->reqs.size; size_t align = params->reqs.alignment; align = pl_lcm(align, vk->props.limits.bufferImageGranularity); align = pl_lcm(align, vk->props.limits.nonCoherentAtomSize); struct vk_slab *slab; VkDeviceSize offset; if (params->ded_image || size > ma->maximum_page_size) { slab = slab_alloc(ma, params); if (!slab) return false; slab->dedicated = true; offset = 0; } else { pl_mutex_lock(&ma->lock); struct vk_pool *pool = find_pool(ma, params); slab = pool_get_page(ma, pool, size, align, &offset); pl_mutex_unlock(&ma->lock); if (!slab) { PL_ERR(ma->vk, "No slab to serve request for %s bytes (with " "alignment 0x%zx) in pool %d!", PRINT_SIZE(size), align, pool->index); return false; } // For accounting, just treat the alignment as part of the used size. // Doing it this way makes sure that the sizes reported to vk_memslice // consumers are always aligned properly. size = PL_ALIGN(size, align); slab->used += size; slab->age = ma->age; if (params->debug_tag) slab->debug_tag = params->debug_tag; pl_mutex_unlock(&slab->lock); } pl_assert(offset % align == 0); *out = (struct vk_memslice) { .vkmem = slab->mem, .offset = offset, .size = size, .buf = slab->buffer, .data = slab->data ? (uint8_t *) slab->data + offset : 0x0, .coherent = slab->coherent, .map_offset = slab->data ? offset : 0, .map_size = slab->data ? size : 0, .priv = slab, .shared_mem = { .handle = slab->handle, .offset = offset, .size = slab->size, }, }; return true; } libplacebo-v7.349.0/src/vulkan/malloc.h000066400000000000000000000061301463457750100177030ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "common.h" // The threshold for which allocations to serve from host-mapped VRAM, as // opposed to host memory. Will not allocate more than this fraction of VRAM in // one go. (For a 256 MB non-resizable BAR, this is equivalent to 4 MB) // // Note: Not actually used by malloc.c, but by gpu_buf.c #define MAPPED_VRAM_THRESHOLD 64 // All memory allocated from a vk_malloc MUST be explicitly released by // the caller before vk_malloc_destroy is called. struct vk_malloc *vk_malloc_create(struct vk_ctx *vk); void vk_malloc_destroy(struct vk_malloc **ma); // Get the supported handle types for this malloc instance pl_handle_caps vk_malloc_handle_caps(const struct vk_malloc *ma, bool import); // Represents a single "slice" of generic (non-buffer) memory, plus some // metadata for accounting. This struct is essentially read-only. struct vk_memslice { VkDeviceMemory vkmem; VkDeviceSize offset; VkDeviceSize size; void *priv; // depending on the type/flags: struct pl_shared_mem shared_mem; VkBuffer buf; // associated buffer (when `buf_usage` is nonzero) void *data; // pointer to slice (for persistently mapped slices) bool coherent; // whether `data` is coherent VkDeviceSize map_offset; // can be larger than offset/size VkDeviceSize map_size; }; struct vk_malloc_params { VkMemoryRequirements reqs; VkMemoryPropertyFlags required; VkMemoryPropertyFlags optimal; VkBufferUsageFlags buf_usage; VkImage ded_image; // for dedicated image allocations enum pl_handle_type export_handle; enum pl_handle_type import_handle; struct pl_shared_mem shared_mem; // for `import_handle` pl_debug_tag debug_tag; }; // Returns the amount of available memory matching a given set of property // flags. Always returns the highest single allocation, not the combined total. size_t vk_malloc_avail(struct vk_malloc *ma, VkMemoryPropertyFlags flags); bool vk_malloc_slice(struct vk_malloc *ma, struct vk_memslice *out, const struct vk_malloc_params *params); void vk_malloc_free(struct vk_malloc *ma, struct vk_memslice *slice); // Clean up unused slabs. Call this roughly once per frame to reduce // memory pressure / memory leaks. void vk_malloc_garbage_collect(struct vk_malloc *ma); // For debugging purposes. Doesn't include dedicated slab allocations! void vk_malloc_print_stats(struct vk_malloc *ma, enum pl_log_level); libplacebo-v7.349.0/src/vulkan/meson.build000066400000000000000000000037401463457750100204310ustar00rootroot00000000000000vulkan_build = get_option('vulkan') vulkan_link = get_option('vk-proc-addr') vulkan_loader = dependency('vulkan', required: false) vulkan_headers = vulkan_loader.partial_dependency(includes: true, compile_args: true) registry_xml = get_option('vulkan-registry') # Prefer our Vulkan headers for portability vulkan_headers_dir = thirdparty/'Vulkan-Headers' vulkan_headers_inc = include_directories() if fs.is_dir(vulkan_headers_dir/'include') vulkan_headers = declare_dependency() vulkan_headers_inc = include_directories('../../3rdparty/Vulkan-Headers/include') # Force the use of this vk.xml because it has to be in sync with the headers registry_xml = vulkan_headers_dir/'registry/vk.xml' endif vulkan_build = vulkan_build.require( cc.has_header_symbol('vulkan/vulkan_core.h', 'VK_VERSION_1_3', include_directories: vulkan_headers_inc, dependencies: vulkan_headers), error_message: 'vulkan.h was not found on the system, nor inside ' + '`3rdparty/Vulkan-Headers`. Please run `git submodule update --init` ' + 'followed by `meson --wipe`.') components.set('vulkan', vulkan_build.allowed()) vulkan_link = vulkan_link.require(vulkan_loader.found() and vulkan_build.allowed()) components.set('vk-proc-addr', vulkan_link.allowed()) build_deps += vulkan_headers if vulkan_build.allowed() sources += [ 'vulkan/command.c', 'vulkan/context.c', 'vulkan/formats.c', 'vulkan/gpu.c', 'vulkan/gpu_buf.c', 'vulkan/gpu_tex.c', 'vulkan/gpu_pass.c', 'vulkan/malloc.c', 'vulkan/swapchain.c', 'vulkan/utils.c', ] datadir = get_option('prefix') / get_option('datadir') sources += custom_target('utils_gen.c', input: 'utils_gen.py', output: 'utils_gen.c', command: [python, '@INPUT@', datadir, registry_xml, '@OUTPUT@'], env: python_env, ) if vulkan_link.allowed() build_deps += vulkan_loader tests += 'vulkan.c' endif else sources += 'vulkan/stubs.c' endif libplacebo-v7.349.0/src/vulkan/stubs.c000066400000000000000000000054021463457750100175700ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "../common.h" #include "log.h" #include const struct pl_vk_inst_params pl_vk_inst_default_params = {0}; const struct pl_vulkan_params pl_vulkan_default_params = { PL_VULKAN_DEFAULTS }; pl_vk_inst pl_vk_inst_create(pl_log log, const struct pl_vk_inst_params *params) { pl_fatal(log, "libplacebo compiled without Vulkan support!"); return NULL; } void pl_vk_inst_destroy(pl_vk_inst *pinst) { pl_vk_inst inst = *pinst; pl_assert(!inst); } pl_vulkan pl_vulkan_create(pl_log log, const struct pl_vulkan_params *params) { pl_fatal(log, "libplacebo compiled without Vulkan support!"); return NULL; } void pl_vulkan_destroy(pl_vulkan *pvk) { pl_vulkan vk = *pvk; pl_assert(!vk); } pl_vulkan pl_vulkan_get(pl_gpu gpu) { return NULL; } VkPhysicalDevice pl_vulkan_choose_device(pl_log log, const struct pl_vulkan_device_params *params) { pl_err(log, "libplacebo compiled without Vulkan support!"); return NULL; } pl_swapchain pl_vulkan_create_swapchain(pl_vulkan vk, const struct pl_vulkan_swapchain_params *params) { pl_unreachable(); } bool pl_vulkan_swapchain_suboptimal(pl_swapchain sw) { pl_unreachable(); } pl_vulkan pl_vulkan_import(pl_log log, const struct pl_vulkan_import_params *params) { pl_fatal(log, "libplacebo compiled without Vulkan support!"); return NULL; } pl_tex pl_vulkan_wrap(pl_gpu gpu, const struct pl_vulkan_wrap_params *params) { pl_unreachable(); } VkImage pl_vulkan_unwrap(pl_gpu gpu, pl_tex tex, VkFormat *out_format, VkImageUsageFlags *out_flags) { pl_unreachable(); } bool pl_vulkan_hold_ex(pl_gpu gpu, const struct pl_vulkan_hold_params *params) { pl_unreachable(); } void pl_vulkan_release_ex(pl_gpu gpu, const struct pl_vulkan_release_params *params) { pl_unreachable(); } VkSemaphore pl_vulkan_sem_create(pl_gpu gpu, const struct pl_vulkan_sem_params *params) { pl_unreachable(); } void pl_vulkan_sem_destroy(pl_gpu gpu, VkSemaphore *semaphore) { pl_unreachable(); } libplacebo-v7.349.0/src/vulkan/swapchain.c000066400000000000000000000752341463457750100204170ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "common.h" #include "command.h" #include "formats.h" #include "utils.h" #include "gpu.h" #include "swapchain.h" #include "pl_thread.h" struct sem_pair { VkSemaphore in; VkSemaphore out; }; struct priv { struct pl_sw_fns impl; pl_mutex lock; struct vk_ctx *vk; VkSurfaceKHR surf; PL_ARRAY(VkSurfaceFormatKHR) formats; // current swapchain and metadata: struct pl_vulkan_swapchain_params params; VkSwapchainCreateInfoKHR protoInfo; // partially filled-in prototype VkSwapchainKHR swapchain; int cur_width, cur_height; int swapchain_depth; pl_rc_t frames_in_flight; // number of frames currently queued bool suboptimal; // true once VK_SUBOPTIMAL_KHR is returned bool needs_recreate; // swapchain needs to be recreated struct pl_color_repr color_repr; struct pl_color_space color_space; struct pl_hdr_metadata hdr_metadata; // state of the images: PL_ARRAY(pl_tex) images; // pl_tex wrappers for the VkImages PL_ARRAY(struct sem_pair) sems; // pool of semaphores used to synchronize images int idx_sems; // index of next free semaphore pair int last_imgidx; // the image index last acquired (for submit) }; static const struct pl_sw_fns vulkan_swapchain; static bool map_color_space(VkColorSpaceKHR space, struct pl_color_space *out) { switch (space) { // Note: This is technically against the spec, but more often than not // it's the correct result since `SRGB_NONLINEAR` is just a catch-all // for any sort of typical SDR curve, which is better approximated by // `pl_color_space_monitor`. case VK_COLOR_SPACE_SRGB_NONLINEAR_KHR: *out = pl_color_space_monitor; return true; case VK_COLOR_SPACE_BT709_NONLINEAR_EXT: *out = pl_color_space_monitor; return true; case VK_COLOR_SPACE_DISPLAY_P3_NONLINEAR_EXT: *out = (struct pl_color_space) { .primaries = PL_COLOR_PRIM_DISPLAY_P3, .transfer = PL_COLOR_TRC_BT_1886, }; return true; case VK_COLOR_SPACE_DCI_P3_LINEAR_EXT: *out = (struct pl_color_space) { .primaries = PL_COLOR_PRIM_DCI_P3, .transfer = PL_COLOR_TRC_LINEAR, }; return true; case VK_COLOR_SPACE_DCI_P3_NONLINEAR_EXT: *out = (struct pl_color_space) { .primaries = PL_COLOR_PRIM_DCI_P3, .transfer = PL_COLOR_TRC_BT_1886, }; return true; case VK_COLOR_SPACE_EXTENDED_SRGB_LINEAR_EXT: case VK_COLOR_SPACE_EXTENDED_SRGB_NONLINEAR_EXT: // TODO return false; case VK_COLOR_SPACE_BT709_LINEAR_EXT: *out = (struct pl_color_space) { .primaries = PL_COLOR_PRIM_BT_709, .transfer = PL_COLOR_TRC_LINEAR, }; return true; case VK_COLOR_SPACE_BT2020_LINEAR_EXT: *out = (struct pl_color_space) { .primaries = PL_COLOR_PRIM_BT_2020, .transfer = PL_COLOR_TRC_LINEAR, }; return true; case VK_COLOR_SPACE_HDR10_ST2084_EXT: *out = (struct pl_color_space) { .primaries = PL_COLOR_PRIM_BT_2020, .transfer = PL_COLOR_TRC_PQ, }; return true; case VK_COLOR_SPACE_DOLBYVISION_EXT: // Unlikely to ever be implemented return false; case VK_COLOR_SPACE_HDR10_HLG_EXT: *out = (struct pl_color_space) { .primaries = PL_COLOR_PRIM_BT_2020, .transfer = PL_COLOR_TRC_HLG, }; return true; case VK_COLOR_SPACE_ADOBERGB_LINEAR_EXT: *out = (struct pl_color_space) { .primaries = PL_COLOR_PRIM_ADOBE, .transfer = PL_COLOR_TRC_LINEAR, }; return true; case VK_COLOR_SPACE_ADOBERGB_NONLINEAR_EXT: *out = (struct pl_color_space) { .primaries = PL_COLOR_PRIM_ADOBE, .transfer = PL_COLOR_TRC_GAMMA22, }; return true; case VK_COLOR_SPACE_PASS_THROUGH_EXT: *out = pl_color_space_unknown; return true; #ifdef VK_AMD_display_native_hdr case VK_COLOR_SPACE_DISPLAY_NATIVE_AMD: // TODO return false; #endif default: return false; } } static bool pick_surf_format(pl_swapchain sw, const struct pl_color_space *hint) { struct priv *p = PL_PRIV(sw); struct vk_ctx *vk = p->vk; pl_gpu gpu = sw->gpu; int best_score = 0, best_id; bool wide_gamut = pl_color_primaries_is_wide_gamut(hint->primaries); bool prefer_hdr = pl_color_transfer_is_hdr(hint->transfer); for (int i = 0; i < p->formats.num; i++) { // Color space / format whitelist struct pl_color_space space; if (!map_color_space(p->formats.elem[i].colorSpace, &space)) continue; bool disable10 = !pl_color_transfer_is_hdr(space.transfer) && p->params.disable_10bit_sdr; switch (p->formats.elem[i].format) { // Only accept floating point formats for linear curves case VK_FORMAT_R16G16B16_SFLOAT: case VK_FORMAT_R16G16B16A16_SFLOAT: case VK_FORMAT_R32G32B32_SFLOAT: case VK_FORMAT_R32G32B32A32_SFLOAT: case VK_FORMAT_R64G64B64_SFLOAT: case VK_FORMAT_R64G64B64A64_SFLOAT: if (space.transfer == PL_COLOR_TRC_LINEAR) break; // accept continue; // Only accept 8 bit for non-HDR curves case VK_FORMAT_R8G8B8_UNORM: case VK_FORMAT_B8G8R8_UNORM: case VK_FORMAT_R8G8B8A8_UNORM: case VK_FORMAT_B8G8R8A8_UNORM: case VK_FORMAT_A8B8G8R8_UNORM_PACK32: if (!pl_color_transfer_is_hdr(space.transfer)) break; // accept continue; // Only accept 10 bit formats for non-linear curves case VK_FORMAT_A2R10G10B10_UNORM_PACK32: case VK_FORMAT_A2B10G10R10_UNORM_PACK32: if (space.transfer != PL_COLOR_TRC_LINEAR && !disable10) break; // accept continue; // Accept 16-bit formats for everything case VK_FORMAT_R16G16B16_UNORM: case VK_FORMAT_R16G16B16A16_UNORM: if (!disable10) break; // accept continue; default: continue; } // Make sure we can wrap this format to a meaningful, valid pl_fmt for (int n = 0; n < gpu->num_formats; n++) { pl_fmt plfmt = gpu->formats[n]; const struct vk_format **pvkfmt = PL_PRIV(plfmt); if ((*pvkfmt)->tfmt != p->formats.elem[i].format) continue; enum pl_fmt_caps render_caps = 0; render_caps |= PL_FMT_CAP_RENDERABLE; render_caps |= PL_FMT_CAP_BLITTABLE; if ((plfmt->caps & render_caps) != render_caps) continue; // format valid, use it if it has a higher score int score = 0; for (int c = 0; c < 3; c++) score += plfmt->component_depth[c]; if (pl_color_primaries_is_wide_gamut(space.primaries) == wide_gamut) score += 1000; if (space.primaries == hint->primaries) score += 2000; if (pl_color_transfer_is_hdr(space.transfer) == prefer_hdr) score += 10000; if (space.transfer == hint->transfer) score += 20000; switch (plfmt->type) { case PL_FMT_UNKNOWN: break; case PL_FMT_UINT: break; case PL_FMT_SINT: break; case PL_FMT_UNORM: score += 500; break; case PL_FMT_SNORM: score += 400; break; case PL_FMT_FLOAT: score += 300; break; case PL_FMT_TYPE_COUNT: pl_unreachable(); }; if (score > best_score) { best_score = score; best_id = i; break; } } } if (!best_score) { PL_ERR(vk, "Failed picking any valid, renderable surface format!"); return false; } VkSurfaceFormatKHR new_sfmt = p->formats.elem[best_id]; if (p->protoInfo.imageFormat != new_sfmt.format || p->protoInfo.imageColorSpace != new_sfmt.colorSpace) { PL_INFO(vk, "Picked surface configuration %d: %s + %s", best_id, vk_fmt_name(new_sfmt.format), vk_csp_name(new_sfmt.colorSpace)); p->protoInfo.imageFormat = new_sfmt.format; p->protoInfo.imageColorSpace = new_sfmt.colorSpace; p->needs_recreate = true; } return true; } static void set_hdr_metadata(struct priv *p, const struct pl_hdr_metadata *metadata) { struct vk_ctx *vk = p->vk; if (!vk->SetHdrMetadataEXT) return; // Whitelist only values that we support signalling metadata for struct pl_hdr_metadata fix = { .prim = metadata->prim, .min_luma = metadata->min_luma, .max_luma = metadata->max_luma, .max_cll = metadata->max_cll, .max_fall = metadata->max_fall, }; // Ignore no-op changes if (pl_hdr_metadata_equal(&fix, &p->hdr_metadata)) return; // Remember the metadata so we can re-apply it after swapchain recreation p->hdr_metadata = fix; // Ignore HDR metadata requests for SDR swapchains if (!pl_color_transfer_is_hdr(p->color_space.transfer)) return; if (!p->swapchain) return; vk->SetHdrMetadataEXT(vk->dev, 1, &p->swapchain, &(VkHdrMetadataEXT) { .sType = VK_STRUCTURE_TYPE_HDR_METADATA_EXT, .displayPrimaryRed = { fix.prim.red.x, fix.prim.red.y }, .displayPrimaryGreen = { fix.prim.green.x, fix.prim.green.y }, .displayPrimaryBlue = { fix.prim.blue.x, fix.prim.blue.y }, .whitePoint = { fix.prim.white.x, fix.prim.white.y }, .maxLuminance = fix.max_luma, .minLuminance = fix.min_luma, .maxContentLightLevel = fix.max_cll, .maxFrameAverageLightLevel = fix.max_fall, }); // Keep track of applied HDR colorimetry metadata p->color_space.hdr = p->hdr_metadata; } pl_swapchain pl_vulkan_create_swapchain(pl_vulkan plvk, const struct pl_vulkan_swapchain_params *params) { struct vk_ctx *vk = PL_PRIV(plvk); pl_gpu gpu = plvk->gpu; if (!vk->CreateSwapchainKHR) { PL_ERR(gpu, VK_KHR_SWAPCHAIN_EXTENSION_NAME " not enabled!"); return NULL; } struct pl_swapchain_t *sw = pl_zalloc_obj(NULL, sw, struct priv); sw->log = vk->log; sw->gpu = gpu; struct priv *p = PL_PRIV(sw); pl_mutex_init(&p->lock); p->impl = vulkan_swapchain; p->params = *params; p->vk = vk; p->surf = params->surface; p->swapchain_depth = PL_DEF(params->swapchain_depth, 3); pl_assert(p->swapchain_depth > 0); atomic_init(&p->frames_in_flight, 0); p->last_imgidx = -1; p->protoInfo = (VkSwapchainCreateInfoKHR) { .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR, .surface = p->surf, .imageArrayLayers = 1, // non-stereoscopic .imageSharingMode = VK_SHARING_MODE_EXCLUSIVE, .minImageCount = p->swapchain_depth + 1, // +1 for the FB .presentMode = params->present_mode, .clipped = true, }; // These fields will be updated by `vk_sw_recreate` p->color_space = pl_color_space_unknown; p->color_repr = (struct pl_color_repr) { .sys = PL_COLOR_SYSTEM_RGB, .levels = PL_COLOR_LEVELS_FULL, .alpha = PL_ALPHA_UNKNOWN, }; // Make sure the swapchain present mode is supported VkPresentModeKHR *modes = NULL; uint32_t num_modes = 0; VK(vk->GetPhysicalDeviceSurfacePresentModesKHR(vk->physd, p->surf, &num_modes, NULL)); modes = pl_calloc_ptr(NULL, num_modes, modes); VK(vk->GetPhysicalDeviceSurfacePresentModesKHR(vk->physd, p->surf, &num_modes, modes)); bool supported = false; for (int i = 0; i < num_modes; i++) supported |= (modes[i] == p->protoInfo.presentMode); pl_free_ptr(&modes); if (!supported) { PL_WARN(vk, "Requested swap mode unsupported by this device, falling " "back to VK_PRESENT_MODE_FIFO_KHR"); p->protoInfo.presentMode = VK_PRESENT_MODE_FIFO_KHR; } // Enumerate the supported surface color spaces uint32_t num_formats = 0; VK(vk->GetPhysicalDeviceSurfaceFormatsKHR(vk->physd, p->surf, &num_formats, NULL)); PL_ARRAY_RESIZE(sw, p->formats, num_formats); VK(vk->GetPhysicalDeviceSurfaceFormatsKHR(vk->physd, p->surf, &num_formats, p->formats.elem)); p->formats.num = num_formats; PL_INFO(gpu, "Available surface configurations:"); for (int i = 0; i < p->formats.num; i++) { PL_INFO(gpu, " %d: %-40s %s", i, vk_fmt_name(p->formats.elem[i].format), vk_csp_name(p->formats.elem[i].colorSpace)); } // Ensure there exists at least some valid renderable surface format struct pl_color_space hint = {0}; if (!pick_surf_format(sw, &hint)) goto error; return sw; error: pl_free(modes); pl_free(sw); return NULL; } static void vk_sw_destroy(pl_swapchain sw) { pl_gpu gpu = sw->gpu; struct priv *p = PL_PRIV(sw); struct vk_ctx *vk = p->vk; pl_gpu_flush(gpu); vk_wait_idle(vk); // Vulkan offers no way to know when a queue presentation command is done, // leading to spec-mandated undefined behavior when destroying resources // tied to the swapchain. Use an extra `vkQueueWaitIdle` on all of the // queues we may have oustanding presentation calls on, to hopefully inform // the driver that we want to wait until the device is truly idle. for (int i = 0; i < vk->pool_graphics->num_queues; i++) vk->QueueWaitIdle(vk->pool_graphics->queues[i]); for (int i = 0; i < p->images.num; i++) pl_tex_destroy(gpu, &p->images.elem[i]); for (int i = 0; i < p->sems.num; i++) { vk->DestroySemaphore(vk->dev, p->sems.elem[i].in, PL_VK_ALLOC); vk->DestroySemaphore(vk->dev, p->sems.elem[i].out, PL_VK_ALLOC); } vk->DestroySwapchainKHR(vk->dev, p->swapchain, PL_VK_ALLOC); pl_mutex_destroy(&p->lock); pl_free((void *) sw); } static int vk_sw_latency(pl_swapchain sw) { struct priv *p = PL_PRIV(sw); return p->swapchain_depth; } static bool update_swapchain_info(struct priv *p, VkSwapchainCreateInfoKHR *info, int w, int h) { struct vk_ctx *vk = p->vk; // Query the supported capabilities and update this struct as needed VkSurfaceCapabilitiesKHR caps = {0}; VK(vk->GetPhysicalDeviceSurfaceCapabilitiesKHR(vk->physd, p->surf, &caps)); // Check for hidden/invisible window if (!caps.currentExtent.width || !caps.currentExtent.height) { PL_DEBUG(vk, "maxImageExtent reported as 0x0, hidden window? skipping"); return false; } // Sorted by preference static const struct { VkCompositeAlphaFlagsKHR vk_mode; enum pl_alpha_mode pl_mode; } alphaModes[] = { {VK_COMPOSITE_ALPHA_POST_MULTIPLIED_BIT_KHR, PL_ALPHA_INDEPENDENT}, {VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR, PL_ALPHA_PREMULTIPLIED}, {VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR, PL_ALPHA_UNKNOWN}, {VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR, PL_ALPHA_UNKNOWN}, }; for (int i = 0; i < PL_ARRAY_SIZE(alphaModes); i++) { if (caps.supportedCompositeAlpha & alphaModes[i].vk_mode) { info->compositeAlpha = alphaModes[i].vk_mode; p->color_repr.alpha = alphaModes[i].pl_mode; PL_DEBUG(vk, "Requested alpha compositing mode: %s", vk_alpha_mode(info->compositeAlpha)); break; } } if (!info->compositeAlpha) { PL_ERR(vk, "Failed picking alpha compositing mode (caps: 0x%x)", caps.supportedCompositeAlpha); goto error; } // Note: We could probably also allow picking a surface transform that // flips the framebuffer and set `pl_swapchain_frame.flipped`, but this // doesn't appear to be necessary for any vulkan implementations. static const VkSurfaceTransformFlagsKHR rotModes[] = { VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR, VK_SURFACE_TRANSFORM_INHERIT_BIT_KHR, }; for (int i = 0; i < PL_ARRAY_SIZE(rotModes); i++) { if (caps.supportedTransforms & rotModes[i]) { info->preTransform = rotModes[i]; PL_DEBUG(vk, "Requested surface transform: %s", vk_surface_transform(info->preTransform)); break; } } if (!info->preTransform) { PL_ERR(vk, "Failed picking surface transform mode (caps: 0x%x)", caps.supportedTransforms); goto error; } // Image count as required PL_DEBUG(vk, "Requested image count: %d (min %d max %d)", (int) info->minImageCount, (int) caps.minImageCount, (int) caps.maxImageCount); info->minImageCount = PL_MAX(info->minImageCount, caps.minImageCount); if (caps.maxImageCount) info->minImageCount = PL_MIN(info->minImageCount, caps.maxImageCount); PL_DEBUG(vk, "Requested image size: %dx%d (min %dx%d < cur %dx%d < max %dx%d)", w, h, caps.minImageExtent.width, caps.minImageExtent.height, caps.currentExtent.width, caps.currentExtent.height, caps.maxImageExtent.width, caps.maxImageExtent.height); // Default the requested size based on the reported extent if (caps.currentExtent.width != 0xFFFFFFFF) w = PL_DEF(w, caps.currentExtent.width); if (caps.currentExtent.height != 0xFFFFFFFF) h = PL_DEF(h, caps.currentExtent.height); // Otherwise, re-use the existing size if available w = PL_DEF(w, info->imageExtent.width); h = PL_DEF(h, info->imageExtent.height); if (!w || !h) { PL_ERR(vk, "Failed resizing swapchain: unknown size?"); goto error; } // Clamp the extent based on the supported limits w = PL_CLAMP(w, caps.minImageExtent.width, caps.maxImageExtent.width); h = PL_CLAMP(h, caps.minImageExtent.height, caps.maxImageExtent.height); info->imageExtent = (VkExtent2D) { w, h }; // We just request whatever makes sense, and let the pl_vk decide what // pl_tex_params that translates to. That said, we still need to intersect // the swapchain usage flags with the format usage flags VkImageUsageFlags req_flags = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT; VkImageUsageFlags opt_flags = VK_IMAGE_USAGE_STORAGE_BIT; info->imageUsage = caps.supportedUsageFlags & (req_flags | opt_flags); VkFormatProperties fmtprop = {0}; vk->GetPhysicalDeviceFormatProperties(vk->physd, info->imageFormat, &fmtprop); #define CHECK(usage, feature) \ if (!((fmtprop.optimalTilingFeatures & VK_FORMAT_FEATURE_##feature##_BIT))) \ info->imageUsage &= ~VK_IMAGE_USAGE_##usage##_BIT CHECK(COLOR_ATTACHMENT, COLOR_ATTACHMENT); CHECK(TRANSFER_DST, TRANSFER_DST); CHECK(STORAGE, STORAGE_IMAGE); if ((info->imageUsage & req_flags) != req_flags) { PL_ERR(vk, "The swapchain doesn't support rendering and blitting!"); goto error; } return true; error: return false; } static void destroy_swapchain(struct vk_ctx *vk, void *swapchain) { vk->DestroySwapchainKHR(vk->dev, vk_unwrap_handle(swapchain), PL_VK_ALLOC); } VK_CB_FUNC_DEF(destroy_swapchain); static bool vk_sw_recreate(pl_swapchain sw, int w, int h) { pl_gpu gpu = sw->gpu; struct priv *p = PL_PRIV(sw); struct vk_ctx *vk = p->vk; VkImage *vkimages = NULL; uint32_t num_images = 0; if (!update_swapchain_info(p, &p->protoInfo, w, h)) return false; VkSwapchainCreateInfoKHR sinfo = p->protoInfo; #ifdef VK_EXT_full_screen_exclusive // Explicitly disallow full screen exclusive mode if possible static const VkSurfaceFullScreenExclusiveInfoEXT fsinfo = { .sType = VK_STRUCTURE_TYPE_SURFACE_FULL_SCREEN_EXCLUSIVE_INFO_EXT, .fullScreenExclusive = VK_FULL_SCREEN_EXCLUSIVE_DISALLOWED_EXT, }; if (vk->AcquireFullScreenExclusiveModeEXT) vk_link_struct(&sinfo, &fsinfo); #endif p->suboptimal = false; p->needs_recreate = false; p->cur_width = sinfo.imageExtent.width; p->cur_height = sinfo.imageExtent.height; PL_DEBUG(sw, "(Re)creating swapchain of size %dx%d", sinfo.imageExtent.width, sinfo.imageExtent.height); #ifdef PL_HAVE_UNIX if (vk->props.vendorID == VK_VENDOR_ID_NVIDIA) { vk->DeviceWaitIdle(vk->dev); vk_wait_idle(vk); } #endif // Calling `vkCreateSwapchainKHR` puts sinfo.oldSwapchain into a retired // state whether the call succeeds or not, so we always need to garbage // collect it afterwards - asynchronously as it may still be in use sinfo.oldSwapchain = p->swapchain; p->swapchain = VK_NULL_HANDLE; VkResult res = vk->CreateSwapchainKHR(vk->dev, &sinfo, PL_VK_ALLOC, &p->swapchain); vk_dev_callback(vk, VK_CB_FUNC(destroy_swapchain), vk, vk_wrap_handle(sinfo.oldSwapchain)); PL_VK_ASSERT(res, "vk->CreateSwapchainKHR(...)"); // Get the new swapchain images VK(vk->GetSwapchainImagesKHR(vk->dev, p->swapchain, &num_images, NULL)); vkimages = pl_calloc_ptr(NULL, num_images, vkimages); VK(vk->GetSwapchainImagesKHR(vk->dev, p->swapchain, &num_images, vkimages)); for (int i = 0; i < num_images; i++) PL_VK_NAME(IMAGE, vkimages[i], "swapchain"); // If needed, allocate some more semaphores while (num_images > p->sems.num) { VkSemaphore sem_in = VK_NULL_HANDLE, sem_out = VK_NULL_HANDLE; static const VkSemaphoreCreateInfo seminfo = { .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, }; VK(vk->CreateSemaphore(vk->dev, &seminfo, PL_VK_ALLOC, &sem_in)); VK(vk->CreateSemaphore(vk->dev, &seminfo, PL_VK_ALLOC, &sem_out)); PL_VK_NAME(SEMAPHORE, sem_in, "swapchain in"); PL_VK_NAME(SEMAPHORE, sem_out, "swapchain out"); PL_ARRAY_APPEND(sw, p->sems, (struct sem_pair) { .in = sem_in, .out = sem_out, }); } // Recreate the pl_tex wrappers for (int i = 0; i < p->images.num; i++) pl_tex_destroy(gpu, &p->images.elem[i]); p->images.num = 0; for (int i = 0; i < num_images; i++) { const VkExtent2D *ext = &sinfo.imageExtent; pl_tex tex = pl_vulkan_wrap(gpu, pl_vulkan_wrap_params( .image = vkimages[i], .width = ext->width, .height = ext->height, .format = sinfo.imageFormat, .usage = sinfo.imageUsage, )); if (!tex) goto error; PL_ARRAY_APPEND(sw, p->images, tex); } pl_assert(num_images > 0); int bits = 0; // The channel with the most bits is probably the most authoritative about // the actual color information (consider e.g. a2bgr10). Slight downside // in that it results in rounding r/b for e.g. rgb565, but we don't pick // surfaces with fewer than 8 bits anyway, so let's not care for now. pl_fmt fmt = p->images.elem[0]->params.format; for (int i = 0; i < fmt->num_components; i++) bits = PL_MAX(bits, fmt->component_depth[i]); p->color_repr.bits.sample_depth = bits; p->color_repr.bits.color_depth = bits; // Note: `p->color_space.hdr` is (re-)applied by `set_hdr_metadata` map_color_space(sinfo.imageColorSpace, &p->color_space); // Forcibly re-apply HDR metadata, bypassing the no-op check struct pl_hdr_metadata metadata = p->hdr_metadata; p->hdr_metadata = pl_hdr_metadata_empty; set_hdr_metadata(p, &metadata); pl_free(vkimages); return true; error: PL_ERR(vk, "Failed (re)creating swapchain!"); pl_free(vkimages); vk->DestroySwapchainKHR(vk->dev, p->swapchain, PL_VK_ALLOC); p->swapchain = VK_NULL_HANDLE; p->cur_width = p->cur_height = 0; return false; } static bool vk_sw_start_frame(pl_swapchain sw, struct pl_swapchain_frame *out_frame) { struct priv *p = PL_PRIV(sw); struct vk_ctx *vk = p->vk; pl_mutex_lock(&p->lock); bool recreate = !p->swapchain || p->needs_recreate; if (p->suboptimal && !p->params.allow_suboptimal) recreate = true; if (recreate && !vk_sw_recreate(sw, 0, 0)) { pl_mutex_unlock(&p->lock); return false; } VkSemaphore sem_in = p->sems.elem[p->idx_sems].in; PL_TRACE(vk, "vkAcquireNextImageKHR signals 0x%"PRIx64, (uint64_t) sem_in); for (int attempts = 0; attempts < 2; attempts++) { uint32_t imgidx = 0; VkResult res = vk->AcquireNextImageKHR(vk->dev, p->swapchain, UINT64_MAX, sem_in, VK_NULL_HANDLE, &imgidx); switch (res) { case VK_SUBOPTIMAL_KHR: p->suboptimal = true; // fall through case VK_SUCCESS: p->last_imgidx = imgidx; pl_vulkan_release_ex(sw->gpu, pl_vulkan_release_params( .tex = p->images.elem[imgidx], .layout = VK_IMAGE_LAYOUT_UNDEFINED, .qf = VK_QUEUE_FAMILY_IGNORED, .semaphore = { sem_in }, )); *out_frame = (struct pl_swapchain_frame) { .fbo = p->images.elem[imgidx], .flipped = false, .color_repr = p->color_repr, .color_space = p->color_space, }; // keep lock held return true; case VK_ERROR_OUT_OF_DATE_KHR: { // In these cases try recreating the swapchain if (!vk_sw_recreate(sw, 0, 0)) { pl_mutex_unlock(&p->lock); return false; } continue; } default: PL_ERR(vk, "Failed acquiring swapchain image: %s", vk_res_str(res)); pl_mutex_unlock(&p->lock); return false; } } // If we've exhausted the number of attempts to recreate the swapchain, // just give up silently and let the user retry some time later. pl_mutex_unlock(&p->lock); return false; } static void present_cb(struct priv *p, void *arg) { (void) pl_rc_deref(&p->frames_in_flight); } VK_CB_FUNC_DEF(present_cb); static bool vk_sw_submit_frame(pl_swapchain sw) { pl_gpu gpu = sw->gpu; struct priv *p = PL_PRIV(sw); struct vk_ctx *vk = p->vk; pl_assert(p->last_imgidx >= 0); pl_assert(p->swapchain); uint32_t idx = p->last_imgidx; VkSemaphore sem_out = p->sems.elem[p->idx_sems++].out; p->idx_sems %= p->sems.num; p->last_imgidx = -1; bool held = pl_vulkan_hold_ex(gpu, pl_vulkan_hold_params( .tex = p->images.elem[idx], .layout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, .qf = VK_QUEUE_FAMILY_IGNORED, .semaphore = { sem_out }, )); if (!held) { PL_ERR(gpu, "Failed holding swapchain image for presentation"); pl_mutex_unlock(&p->lock); return false; } struct vk_cmd *cmd = pl_vk_steal_cmd(gpu); if (!cmd) { pl_mutex_unlock(&p->lock); return false; } pl_rc_ref(&p->frames_in_flight); vk_cmd_callback(cmd, VK_CB_FUNC(present_cb), p, NULL); if (!vk_cmd_submit(&cmd)) { pl_mutex_unlock(&p->lock); return false; } struct vk_cmdpool *pool = vk->pool_graphics; int qidx = pool->idx_queues; VkQueue queue = pool->queues[qidx]; vk_rotate_queues(p->vk); vk_malloc_garbage_collect(vk->ma); VkPresentInfoKHR pinfo = { .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, .waitSemaphoreCount = 1, .pWaitSemaphores = &sem_out, .swapchainCount = 1, .pSwapchains = &p->swapchain, .pImageIndices = &idx, }; PL_TRACE(vk, "vkQueuePresentKHR waits on 0x%"PRIx64, (uint64_t) sem_out); vk->lock_queue(vk->queue_ctx, pool->qf, qidx); VkResult res = vk->QueuePresentKHR(queue, &pinfo); vk->unlock_queue(vk->queue_ctx, pool->qf, qidx); pl_mutex_unlock(&p->lock); switch (res) { case VK_SUBOPTIMAL_KHR: p->suboptimal = true; // fall through case VK_SUCCESS: return true; case VK_ERROR_OUT_OF_DATE_KHR: // We can silently ignore this error, since the next start_frame will // recreate the swapchain automatically. return true; default: PL_ERR(vk, "Failed presenting to queue %p: %s", (void *) queue, vk_res_str(res)); return false; } } static void vk_sw_swap_buffers(pl_swapchain sw) { struct priv *p = PL_PRIV(sw); pl_mutex_lock(&p->lock); while (pl_rc_count(&p->frames_in_flight) >= p->swapchain_depth) { pl_mutex_unlock(&p->lock); // don't hold mutex while blocking vk_poll_commands(p->vk, UINT64_MAX); pl_mutex_lock(&p->lock); } pl_mutex_unlock(&p->lock); } static bool vk_sw_resize(pl_swapchain sw, int *width, int *height) { struct priv *p = PL_PRIV(sw); bool ok = true; pl_mutex_lock(&p->lock); bool width_changed = *width && *width != p->cur_width, height_changed = *height && *height != p->cur_height; if (p->suboptimal || p->needs_recreate || width_changed || height_changed) ok = vk_sw_recreate(sw, *width, *height); *width = p->cur_width; *height = p->cur_height; pl_mutex_unlock(&p->lock); return ok; } static void vk_sw_colorspace_hint(pl_swapchain sw, const struct pl_color_space *csp) { struct priv *p = PL_PRIV(sw); pl_mutex_lock(&p->lock); // This should never fail if the swapchain already exists bool ok = pick_surf_format(sw, csp); set_hdr_metadata(p, &csp->hdr); pl_assert(ok); pl_mutex_unlock(&p->lock); } bool pl_vulkan_swapchain_suboptimal(pl_swapchain sw) { struct priv *p = PL_PRIV(sw); return p->suboptimal; } static const struct pl_sw_fns vulkan_swapchain = { .destroy = vk_sw_destroy, .latency = vk_sw_latency, .resize = vk_sw_resize, .colorspace_hint = vk_sw_colorspace_hint, .start_frame = vk_sw_start_frame, .submit_frame = vk_sw_submit_frame, .swap_buffers = vk_sw_swap_buffers, }; libplacebo-v7.349.0/src/vulkan/utils.c000066400000000000000000000114401463457750100175670ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "utils.h" VkExternalMemoryHandleTypeFlagBitsKHR vk_mem_handle_type(enum pl_handle_type handle_type) { if (!handle_type) return 0; switch (handle_type) { case PL_HANDLE_FD: return VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR; case PL_HANDLE_WIN32: return VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR; case PL_HANDLE_WIN32_KMT: return VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR; case PL_HANDLE_DMA_BUF: return VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT; case PL_HANDLE_HOST_PTR: return VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT; case PL_HANDLE_MTL_TEX: case PL_HANDLE_IOSURFACE: return 0; } pl_unreachable(); } VkExternalSemaphoreHandleTypeFlagBitsKHR vk_sync_handle_type(enum pl_handle_type handle_type) { if (!handle_type) return 0; switch (handle_type) { case PL_HANDLE_FD: return VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR; case PL_HANDLE_WIN32: return VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR; case PL_HANDLE_WIN32_KMT: return VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR; case PL_HANDLE_DMA_BUF: case PL_HANDLE_HOST_PTR: case PL_HANDLE_MTL_TEX: case PL_HANDLE_IOSURFACE: return 0; } pl_unreachable(); } bool vk_external_mem_check(struct vk_ctx *vk, const VkExternalMemoryPropertiesKHR *props, enum pl_handle_type handle_type, bool import) { VkExternalMemoryFeatureFlagsKHR flags = props->externalMemoryFeatures; VkExternalMemoryHandleTypeFlagBitsKHR vk_handle = vk_mem_handle_type(handle_type); if (import) { if (!(flags & VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT_KHR)) { PL_DEBUG(vk, "Handle type %s (0x%x) is not importable", vk_handle_name(vk_handle), (unsigned int) handle_type); return false; } } else { if (!(flags & VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT_KHR)) { PL_DEBUG(vk, "Handle type %s (0x%x) is not exportable", vk_handle_name(vk_handle), (unsigned int) handle_type); return false; } } return true; } const enum pl_handle_type vk_mem_handle_list[] = { PL_HANDLE_HOST_PTR, #ifdef PL_HAVE_UNIX PL_HANDLE_FD, PL_HANDLE_DMA_BUF, #endif #ifdef PL_HAVE_WIN32 PL_HANDLE_WIN32, PL_HANDLE_WIN32_KMT, #endif 0 }; const enum pl_handle_type vk_sync_handle_list[] = { #ifdef PL_HAVE_UNIX PL_HANDLE_FD, #endif #ifdef PL_HAVE_WIN32 PL_HANDLE_WIN32, PL_HANDLE_WIN32_KMT, #endif 0 }; const void *vk_find_struct(const void *chain, VkStructureType stype) { const VkBaseInStructure *in = chain; while (in) { if (in->sType == stype) return in; in = in->pNext; } return NULL; } void vk_link_struct(void *chain, const void *in) { if (!in) return; VkBaseOutStructure *out = chain; while (out->pNext) out = out->pNext; out->pNext = (void *) in; } void *vk_struct_memdup(void *alloc, const void *pin) { if (!pin) return NULL; const VkBaseInStructure *in = pin; size_t size = vk_struct_size(in->sType); pl_assert(size); VkBaseOutStructure *out = pl_memdup(alloc, in, size); out->pNext = NULL; return out; } void *vk_chain_memdup(void *alloc, const void *pin) { if (!pin) return NULL; const VkBaseInStructure *in = pin; VkBaseOutStructure *out = vk_struct_memdup(alloc, in); pl_assert(out); out->pNext = vk_chain_memdup(alloc, in->pNext); return out; } void *vk_chain_alloc(void *alloc, void *chain, VkStructureType stype) { for (VkBaseOutStructure *out = chain;; out = out->pNext) { if (out->sType == stype) return out; if (!out->pNext) { VkBaseOutStructure *s = pl_zalloc(alloc, vk_struct_size(stype)); s->sType = stype; out->pNext = s; return s; } } } libplacebo-v7.349.0/src/vulkan/utils.h000066400000000000000000000133261463457750100176010ustar00rootroot00000000000000/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #pragma once #include "common.h" // Return a human-readable name for various vulkan enums const char *vk_res_str(VkResult res); const char *vk_fmt_name(VkFormat fmt); const char *vk_csp_name(VkColorSpaceKHR csp); const char *vk_handle_name(VkExternalMemoryHandleTypeFlagBitsKHR handle); const char *vk_obj_type(VkObjectType obj); const char *vk_alpha_mode(VkCompositeAlphaFlagsKHR alpha); const char *vk_surface_transform(VkSurfaceTransformFlagsKHR transform); // Return the size of an arbitrary vulkan struct. Returns 0 for unknown structs size_t vk_struct_size(VkStructureType stype); // Returns the vulkan API version which a given extension was promoted to, or 0 // if the extension is not promoted. uint32_t vk_ext_promoted_ver(const char *extension); // Enum translation boilerplate VkExternalMemoryHandleTypeFlagBitsKHR vk_mem_handle_type(enum pl_handle_type); VkExternalSemaphoreHandleTypeFlagBitsKHR vk_sync_handle_type(enum pl_handle_type); // Bitmask of all access flags that imply a read/write operation, respectively extern const VkAccessFlags2 vk_access_read; extern const VkAccessFlags2 vk_access_write; // Check for compatibility of a VkExternalMemoryProperties bool vk_external_mem_check(struct vk_ctx *vk, const VkExternalMemoryPropertiesKHR *props, enum pl_handle_type handle_type, bool check_import); // Static lists of external handle types we should try probing for extern const enum pl_handle_type vk_mem_handle_list[]; extern const enum pl_handle_type vk_sync_handle_list[]; // Find a structure in a pNext chain, or NULL const void *vk_find_struct(const void *chain, VkStructureType stype); // Link a structure into a pNext chain void vk_link_struct(void *chain, const void *in); // Make a copy of a structure, not including the pNext chain void *vk_struct_memdup(void *alloc, const void *in); // Make a deep copy of an entire pNext chain void *vk_chain_memdup(void *alloc, const void *in); // Find a structure in a pNext chain, or allocate + link it if absent. void *vk_chain_alloc(void *alloc, void *chain, VkStructureType stype); // Renormalize input features into a state consistent for a given API version. // If `api_ver` is specified as 0, *both* meta-structs and extension structs // will be emitted. Note: `out` should be initialized by the user. In // particular, if it already contains a valid features chain, then this // function will effectively act as a union. void vk_features_normalize(void *alloc, const VkPhysicalDeviceFeatures2 *in, uint32_t api_ver, VkPhysicalDeviceFeatures2 *out); // Convenience macros to simplify a lot of common boilerplate #define PL_VK_ASSERT(res, str) \ do { \ if (res != VK_SUCCESS) { \ PL_ERR(vk, str ": %s (%s:%d)", \ vk_res_str(res), __FILE__, __LINE__); \ goto error; \ } \ } while (0) #define VK(cmd) \ do { \ PL_TRACE(vk, #cmd); \ VkResult _res = (cmd); \ PL_VK_ASSERT(_res, #cmd); \ } while (0) #define PL_VK_NAME(type, obj, name) \ do { \ if (vk->SetDebugUtilsObjectNameEXT) { \ vk->SetDebugUtilsObjectNameEXT(vk->dev, &(VkDebugUtilsObjectNameInfoEXT) { \ .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT, \ .objectType = VK_OBJECT_TYPE_##type, \ .objectHandle = (uint64_t) (obj), \ .pObjectName = (name), \ }); \ } \ } while (0) // Variant of PL_VK_NAME for dispatchable handles #define PL_VK_NAME_HANDLE(type, obj, name) \ PL_VK_NAME(type, (uintptr_t) (obj), name) // Helper functions to wrap and unwrap non-dispatchable handles into pointers. // Note that wrap/unwrap must always be used linearly. #if VK_USE_64_BIT_PTR_DEFINES == 1 #define vk_wrap_handle(h) (h) #define vk_unwrap_handle(h) (h) #elif UINTPTR_MAX >= UINT64_MAX #define vk_wrap_handle(h) ((void *) (uintptr_t) (h)) #define vk_unwrap_handle(h) ((uint64_t) (uintptr_t) (h)) #else static inline void *vk_wrap_handle(uint64_t h) { uint64_t *wrapper = malloc(sizeof(h)); assert(wrapper); *wrapper = h; return wrapper; } static inline uint64_t vk_unwrap_handle(void *h) { uint64_t *wrapper = h; uint64_t ret = *wrapper; free(wrapper); return ret; } #endif libplacebo-v7.349.0/src/vulkan/utils_gen.c.j2000066400000000000000000000057411463457750100207410ustar00rootroot00000000000000#define VK_ENABLE_BETA_EXTENSIONS #include "vulkan/utils.h" const char *vk_res_str(VkResult res) { switch (res) { {% for res in vkresults %} case {{ res }}: return "{{ res }}"; {% endfor %} default: return "unknown error"; } } const char *vk_fmt_name(VkFormat fmt) { switch (fmt) { {% for fmt in vkformats %} case {{ fmt }}: return "{{ fmt }}"; {% endfor %} default: return "unknown format"; } } const char *vk_csp_name(VkColorSpaceKHR csp) { switch (csp) { {% for csp in vkspaces %} case {{ csp }}: return "{{ csp }}"; {% endfor %} default: return "unknown color space"; } } const char *vk_handle_name(VkExternalMemoryHandleTypeFlagBitsKHR handle) { switch (handle) { {% for handle in vkhandles %} case {{ handle }}: return "{{ handle }}"; {% endfor %} default: return "unknown handle type"; } } const char *vk_alpha_mode(VkCompositeAlphaFlagsKHR alpha) { switch (alpha) { {% for mode in vkalphas %} case {{ mode }}: return "{{ mode }}"; {% endfor %} default: return "unknown alpha mode"; } } const char *vk_surface_transform(VkSurfaceTransformFlagsKHR tf) { switch (tf) { {% for tf in vktransforms %} case {{ tf }}: return "{{ tf }}"; {% endfor %} default: return "unknown surface transform"; } } const char *vk_obj_type(VkObjectType obj) { switch (obj) { {% for obj in vkobjects %} case {{ obj.enum }}: return "{{ obj.name }}"; {% endfor %} default: return "unknown object"; } } size_t vk_struct_size(VkStructureType stype) { switch (stype) { {% for struct in vkstructs %} case {{ struct.stype }}: return sizeof({{ struct.name }}); {% endfor %} default: return 0; } } uint32_t vk_ext_promoted_ver(const char *extension) { {% for ext in vkexts %} {% if ext.promoted_ver %} if (!strcmp(extension, "{{ ext.name }}")) return {{ ext.promoted_ver }}; {% endif %} {% endfor %} return 0; } void vk_features_normalize(void *alloc, const VkPhysicalDeviceFeatures2 *fin, uint32_t api_ver, VkPhysicalDeviceFeatures2 *out) { for (const VkBaseInStructure *in = (void *) fin; in; in = in->pNext) { switch (in->sType) { default: break; {% for fs in vkfeatures %} case {{ fs.stype }}: { const {{ fs.name }} *i = (const void *) in; {% for f in fs.features %} if (i->{{ f.name }}) { {% for r in f.replacements %} {% if r.core_ver %} if (!api_ver || api_ver >= {{ r.core_ver }}) {% elif r.max_ver %} if (!api_ver || api_ver < {{ r.max_ver }}) {% endif %} {% if fs.is_base %} out->{{ f.name }} = true; {% else %} (({{ r.name }} *) vk_chain_alloc(alloc, out, {{ r.stype }}))->{{ f.name }} = true; {% endif %} {% endfor %} } {% endfor %} break; } {% endfor %} } } } const VkAccessFlags2 vk_access_read = {{ '0x%x' % vkaccess.read }}LLU; const VkAccessFlags2 vk_access_write = {{ '0x%x' % vkaccess.write }}LLU; libplacebo-v7.349.0/src/vulkan/utils_gen.py000066400000000000000000000176671463457750100206470ustar00rootroot00000000000000#!/usr/bin/env python3 # # This file is part of libplacebo. # # libplacebo is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. # # libplacebo is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with libplacebo. If not, see . import os.path import re import sys import xml.etree.ElementTree as ET try: import jinja2 except ModuleNotFoundError: print('Module \'jinja2\' not found, please install \'python3-Jinja2\' or ' 'an equivalent package on your system! Alternatively, run ' '`git submodule update --init` followed by `meson --wipe`.', file=sys.stderr) sys.exit(1) TEMPLATE = jinja2.Environment( loader = jinja2.FileSystemLoader(searchpath=os.path.dirname(__file__)), trim_blocks=True, ).get_template('utils_gen.c.j2') class Obj(object): def __init__(self, **kwargs): self.__dict__.update(kwargs) class VkXML(ET.ElementTree): def blacklist_block(self, req): for t in req.iterfind('type'): self.blacklist_types.add(t.attrib['name']) for e in req.iterfind('enum'): self.blacklist_enums.add(e.attrib['name']) def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.blacklist_types = set() self.blacklist_enums = set() for f in self.iterfind('feature'): # Feature block for non-Vulkan API if not 'vulkan' in f.attrib['api'].split(','): for r in f.iterfind('require'): self.blacklist_block(r) for e in self.iterfind('extensions/extension'): # Entire extension is unsupported on vulkan or platform-specifid if not 'vulkan' in e.attrib['supported'].split(',') or 'platform' in e.attrib: for r in e.iterfind('require'): self.blacklist_block(r) continue # Only individual blocks are API-specific for r in e.iterfind('require[@api]'): if not 'vulkan' in r.attrib['api'].split(','): self.blacklist_block(r) def findall_enum(self, name): for e in self.iterfind('enums[@name="{0}"]/enum'.format(name)): if not 'alias' in e.attrib: if not e.attrib['name'] in self.blacklist_enums: yield e for e in self.iterfind('.//enum[@extends="{0}"]'.format(name)): if not 'alias' in e.attrib: if not e.attrib['name'] in self.blacklist_enums: yield e def findall_type(self, category): for t in self.iterfind('types/type[@category="{0}"]'.format(category)): name = t.attrib.get('name') or t.find('name').text if name in self.blacklist_types: continue yield t def get_vkenum(registry, enum): for e in registry.findall_enum(enum): yield e.attrib['name'] def get_vkobjects(registry): for t in registry.findall_type('handle'): if 'objtypeenum' in t.attrib: yield Obj(enum = t.attrib['objtypeenum'], name = t.find('name').text) def get_vkstructs(registry): for t in registry.findall_type('struct'): stype = None for m in t.iterfind('member'): if m.find('name').text == 'sType': stype = m break if stype is not None and 'values' in stype.attrib: yield Obj(stype = stype.attrib['values'], name = t.attrib['name']) def get_vkaccess(registry): access = Obj(read = 0, write = 0) for e in registry.findall_enum('VkAccessFlagBits2'): if '_READ_' in e.attrib['name']: access.read |= 1 << int(e.attrib['bitpos']) if '_WRITE_' in e.attrib['name']: access.write |= 1 << int(e.attrib['bitpos']) return access def get_vkexts(registry): for e in registry.iterfind('extensions/extension'): promoted_ver = None if res := re.match(r'VK_VERSION_(\d)_(\d)', e.attrib.get('promotedto', '')): promoted_ver = 'VK_API_VERSION_{0}_{1}'.format(res[1], res[2]) yield Obj(name = e.attrib['name'], promoted_ver = promoted_ver) def get_vkfeatures(registry): structs = []; featuremap = {}; # features -> [struct] for t in registry.findall_type('struct'): sname = t.attrib['name'] is_base = sname == 'VkPhysicalDeviceFeatures' extends = t.attrib.get('structextends', []) if is_base: sname = 'VkPhysicalDeviceFeatures2' stype = 'VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2' elif not 'VkPhysicalDeviceFeatures2' in extends: continue features = [] for f in t.iterfind('member'): if f.find('type').text == 'VkStructureType': stype = f.attrib['values'] elif f.find('type').text == 'VkBool32': fname = f.find('name').text if is_base: fname = 'features.' + fname features.append(Obj(name = fname)) core_ver = None if res := re.match(r'VkPhysicalDeviceVulkan(\d)(\d)Features', sname): core_ver = 'VK_API_VERSION_{0}_{1}'.format(res[1], res[2]) struct = Obj(name = sname, stype = stype, core_ver = core_ver, is_base = is_base, features = features) structs.append(struct) for f in features: featuremap.setdefault(f.name, []).append(struct) for s in structs: for f in s.features: f.replacements = featuremap[f.name] core_ver = next(( r.core_ver for r in f.replacements if r.core_ver ), None) for r in f.replacements: if not r.core_ver: r.max_ver = core_ver yield from structs def find_registry_xml(datadir): registry_paths = [ '{0}/vulkan/registry/vk.xml'.format(datadir), '$MINGW_PREFIX/share/vulkan/registry/vk.xml', '%VULKAN_SDK%/share/vulkan/registry/vk.xml', '$VULKAN_SDK/share/vulkan/registry/vk.xml', '/usr/share/vulkan/registry/vk.xml', ] for p in registry_paths: path = os.path.expandvars(p) if os.path.isfile(path): print('Found vk.xml: {0}'.format(path)) return path print('Could not find the vulkan registry (vk.xml), please specify its ' 'location manually using the -Dvulkan-registry=/path/to/vk.xml ' 'option!', file=sys.stderr) sys.exit(1) if __name__ == '__main__': assert len(sys.argv) == 4 datadir = sys.argv[1] xmlfile = sys.argv[2] outfile = sys.argv[3] if not xmlfile or xmlfile == '': xmlfile = find_registry_xml(datadir) registry = VkXML(ET.parse(xmlfile)) with open(outfile, 'w') as f: f.write(TEMPLATE.render( vkresults = get_vkenum(registry, 'VkResult'), vkformats = get_vkenum(registry, 'VkFormat'), vkspaces = get_vkenum(registry, 'VkColorSpaceKHR'), vkhandles = get_vkenum(registry, 'VkExternalMemoryHandleTypeFlagBits'), vkalphas = get_vkenum(registry, 'VkCompositeAlphaFlagBitsKHR'), vktransforms = get_vkenum(registry, 'VkSurfaceTransformFlagBitsKHR'), vkobjects = get_vkobjects(registry), vkstructs = get_vkstructs(registry), vkaccess = get_vkaccess(registry), vkexts = get_vkexts(registry), vkfeatures = get_vkfeatures(registry), )) libplacebo-v7.349.0/tools/000077500000000000000000000000001463457750100153345ustar00rootroot00000000000000libplacebo-v7.349.0/tools/glsl_preproc/000077500000000000000000000000001463457750100200275ustar00rootroot00000000000000libplacebo-v7.349.0/tools/glsl_preproc/macros.py000066400000000000000000000063731463457750100216760ustar00rootroot00000000000000#!/usr/bin/env python3 import re from variables import Var from templates import * from statement import * PATTERN_PRAGMA = re.compile(flags=re.VERBOSE, pattern=r''' \s*\#\s*pragma\s+ # '#pragma' (?P(?: # pragma name GLSL[PHF]? ))\s* (?P.*)$ # rest of line (pragma body) ''') # Represents a single #pragma macro class Macro(object): PRAGMAS = { 'GLSL': 'SH_BUF_BODY', 'GLSLP': 'SH_BUF_PRELUDE', 'GLSLH': 'SH_BUF_HEADER', 'GLSLF': 'SH_BUF_FOOTER', } def __init__(self, linenr=0, type='GLSL'): self.linenr = linenr self.buf = Macro.PRAGMAS[type] self.name = '_glsl_' + str(linenr) self.body = [] # list of statements self.last = None # previous GLSLBlock (if unterminated) self.vars = VarSet() def needs_single_line(self): if not self.body: return False prev = self.body[-1] return isinstance(prev, BlockStart) and not prev.multiline def push_line(self, line): self.vars.merge(line.vars) if isinstance(line, GLSLLine): if self.last: self.last.append(line) elif self.needs_single_line(): self.body.append(GLSLBlock(line)) else: # start new GLSL block self.last = GLSLBlock(line) self.body.append(self.last) else: self.body.append(line) self.last = None def render_struct(self): return STRUCT_TEMPLATE.render(macro=self) def render_call(self): return CALL_TEMPLATE.render(macro=self) def render_fun(self): return FUNCTION_TEMPLATE.render(macro=self, Var=Var) # yields output lines @staticmethod def process_file(lines, strip=False): macro = None macros = [] for linenr, line_orig in enumerate(lines, start=1): line = line_orig.rstrip() # Strip leading spaces, due to C indent. Skip first pragma line. if macro and leading_spaces is None: leading_spaces = len(line) - len(line.lstrip()) # check for start of macro if not macro: leading_spaces = None if result := re.match(PATTERN_PRAGMA, line): macro = Macro(linenr, type=result['pragma']) line = result['rest'] # strip pragma prefix if macro: if leading_spaces: line = re.sub(f'^\s{{1,{leading_spaces}}}', '', line) if more_lines := line.endswith('\\'): line = line[:-1] if statement := Statement.parse(line, strip=strip, linenr=linenr): macro.push_line(statement) if more_lines: continue # stay in macro else: yield macro.render_call() yield '#line {}\n'.format(linenr + 1) macros.append(macro) macro = None else: yield line_orig if macros: yield '\n// Auto-generated template functions:' for macro in macros: yield macro.render_fun() libplacebo-v7.349.0/tools/glsl_preproc/main.py000077500000000000000000000007441463457750100213350ustar00rootroot00000000000000#!/usr/bin/env python3 import sys import argparse from macros import Macro parser = argparse.ArgumentParser() parser.add_argument('input') parser.add_argument('output') parser.add_argument('-s', '--strip', default=False, action='store_true') args = parser.parse_args() with open(args.input, encoding='utf-8') as infile: with open(args.output, 'w', encoding='utf-8') as outfile: for line in Macro.process_file(infile, strip=args.strip): outfile.write(line) libplacebo-v7.349.0/tools/glsl_preproc/meson.build000066400000000000000000000006071463457750100221740ustar00rootroot00000000000000strip_arg = get_option('debug') ? [] : [ '--strip' ] glsl_preproc = [ python, join_paths(meson.current_source_dir(), 'main.py') ] + \ strip_arg + [ '@INPUT@', '@OUTPUT@' ] glsl_deps = files( 'macros.py', 'statement.py', 'templates.py', 'templates/call.c.j2', 'templates/function.c.j2', 'templates/glsl_block.c.j2', 'templates/struct.c.j2', 'variables.py', ) libplacebo-v7.349.0/tools/glsl_preproc/statement.py000066400000000000000000000240251463457750100224100ustar00rootroot00000000000000import re from templates import GLSL_BLOCK_TEMPLATE from variables import VarSet, slugify VAR_PATTERN = re.compile(flags=re.VERBOSE, pattern=r''' # long form ${ ... } syntax \${ (?:\s*(?P(?: # optional type prefix ident # identifiers (always dynamic) | (?:(?:const|dynamic)\s+)? # optional const/dynamic modifiers (?:float|u?int) # base type | swizzle # swizzle mask | (?:i|u)?vecType # vector type (for mask) )):)? (?P[^{}]+) } | \$(?P\w+) # reference to captured variable | @(?P\w+) # reference to locally defined var ''') class FmtSpec(object): def __init__(self, ctype='ident_t', fmtstr='_%hx', wrap_expr=lambda name, expr: expr, fmt_expr=lambda name: name): self.ctype = ctype self.fmtstr = fmtstr self.wrap_expr = wrap_expr self.fmt_expr = fmt_expr @staticmethod def wrap_var(type, dynamic=False): if dynamic: return lambda name, expr: f'sh_var_{type}(sh, "{name}", {expr}, true)' else: return lambda name, expr: f'sh_const_{type}(sh, "{name}", {expr})' @staticmethod def wrap_fn(fn): return lambda name: f'{fn}({name})' VAR_TYPES = { # identifiers: get mapped as-is 'ident': FmtSpec(), # normal variables: get mapped as shader constants 'int': FmtSpec(wrap_expr=FmtSpec.wrap_var('int')), 'uint': FmtSpec(wrap_expr=FmtSpec.wrap_var('uint')), 'float': FmtSpec(wrap_expr=FmtSpec.wrap_var('float')), # constant variables: get printed directly into the source code 'const int': FmtSpec(ctype='int', fmtstr='%d'), 'const uint': FmtSpec(ctype='unsigned', fmtstr='uint(%u)'), 'const float': FmtSpec(ctype='float', fmtstr='float(%f)'), # dynamic variables: get loaded as shader variables 'dynamic int': FmtSpec(wrap_expr=FmtSpec.wrap_var('int', dynamic=True)), 'dynamic uint': FmtSpec(wrap_expr=FmtSpec.wrap_var('uint', dynamic=True)), 'dynamic float': FmtSpec(wrap_expr=FmtSpec.wrap_var('float', dynamic=True)), # component mask types 'swizzle': FmtSpec(ctype='uint8_t', fmtstr='%s', fmt_expr=FmtSpec.wrap_fn('sh_swizzle')), 'ivecType': FmtSpec(ctype='uint8_t', fmtstr='%s', fmt_expr=FmtSpec.wrap_fn('sh_float_type')), 'uvecType': FmtSpec(ctype='uint8_t', fmtstr='%s', fmt_expr=FmtSpec.wrap_fn('sh_float_type')), 'vecType': FmtSpec(ctype='uint8_t', fmtstr='%s', fmt_expr=FmtSpec.wrap_fn('sh_float_type')), } def stringify(value, strip): if strip: value = re.sub(r'(?:\/\*[^\*]*\*\/|\/\/[^\n]+|^\s*)', '', value) return '"' + value.replace('\\', '\\\\').replace('"', '\\"') + '\\n"' def commentify(value, strip): if strip: return '' return '/*' + value.replace('/*', '[[').replace('*/', ']]') + '*/' # Represents a statement + its enclosed variables class Statement(object): def __init__(self, linenr=0): super().__init__() self.linenr = linenr self.vars = VarSet() def add_var(self, ctype, expr, name=None): return self.vars.add_var(ctype, expr, name, self.linenr) def render(self): raise NotImplementedError @staticmethod def parse(text_orig, **kwargs): raise NotImplementedError # Represents a single line of GLSL class GLSLLine(Statement): class GLSLVar(object): # variable reference def __init__(self, fmt, var): self.fmt = fmt self.var = var def __init__(self, text, strip=False, **kwargs): super().__init__(**kwargs) self.refs = [] self.strip = strip # produce two versions of line, one for printf() and one for append() text = text.rstrip() self.rawstr = stringify(text, strip) self.fmtstr = stringify(re.sub(VAR_PATTERN, self.handle_var, text.replace('%', '%%')), strip) def handle_var(self, match): # local @var if match['var']: self.refs.append(match['var']) return '%d' # captured $var type = match['type'] name = match['name'] expr = match['expr'] or name name = name or slugify(expr) fmt = VAR_TYPES[type or 'ident'] self.refs.append(fmt.fmt_expr(self.add_var( ctype = fmt.ctype, expr = fmt.wrap_expr(name, expr), name = name, ))) if fmt.ctype == 'ident_t': return commentify(name, self.strip) + fmt.fmtstr else: return fmt.fmtstr # Represents an entire GLSL block class GLSLBlock(Statement): def __init__(self, line): super().__init__(linenr=line.linenr) self.lines = [] self.refs = [] self.append(line) def append(self, line): assert isinstance(line, GLSLLine) self.lines.append(line) self.refs += line.refs self.vars.merge(line.vars) def render(self): return GLSL_BLOCK_TEMPLATE.render(block=self) # Represents a statement which can either take a single line or a block class BlockStart(Statement): def __init__(self, multiline=False, **kwargs): super().__init__(**kwargs) self.multiline = multiline def add_brace(self, text): if self.multiline: text += ' {' return text # Represents an @if class IfCond(BlockStart): def __init__(self, cond, inner=False, **kwargs): super().__init__(**kwargs) self.cond = cond if inner else self.add_var('bool', expr=cond) def render(self): return self.add_brace(f'if ({self.cond})') # Represents an @else class Else(BlockStart): def __init__(self, closing, **kwargs): super().__init__(**kwargs) self.closing = closing def render(self): text = '} else' if self.closing else 'else' return self.add_brace(text) # Represents a normal (integer) @for loop, or an (unsigned 8-bit) bitmask loop class ForLoop(BlockStart): def __init__(self, var, op, bound, **kwargs): super().__init__(**kwargs) self.comps = op == ':' self.bound = self.add_var('uint8_t' if self.comps else 'int', expr=bound) self.var = var self.op = op def render(self): if self.comps: loopstart = f'uint8_t _mask = {self.bound}, {self.var}' loopcond = f'_mask && ({self.var} = __builtin_ctz(_mask), 1)' loopstep = f'_mask &= ~(1u << {self.var})' else: loopstart = f'int {self.var} = 0' loopcond = f'{self.var} {self.op} {self.bound}' loopstep = f'{self.var}++' return self.add_brace(f'for ({loopstart}; {loopcond}; {loopstep})') # Represents a @switch block class Switch(Statement): def __init__(self, expr, **kwargs): super().__init__(**kwargs) self.expr = self.add_var('unsigned', expr=expr) def render(self): return f'switch ({self.expr}) {{' # Represents a @case label class Case(Statement): def __init__(self, label, **kwargs): super().__init__(**kwargs) self.label = label def render(self): return f'case {self.label}:' # Represents a @default line class Default(Statement): def render(self): return 'default:' # Represents a @break line class Break(Statement): def render(self): return 'break;' # Represents a single closing brace class EndBrace(Statement): def render(self): return '}' # Shitty regex-based statement parser PATTERN_IF = re.compile(flags=re.VERBOSE, pattern=r''' @\s*if\s* # '@if' (?P@)? # optional leading @ \((?P.+)\)\s* # (condition) (?P{)?\s* # optional trailing { $''') PATTERN_ELSE = re.compile(flags=re.VERBOSE, pattern=r''' @\s*(?P})?\s* # optional leading } else\s* # 'else' (?P{)?\s* # optional trailing { $''') PATTERN_FOR = re.compile(flags=re.VERBOSE, pattern=r''' @\s*for\s+\( # '@for' ( (?P\w+)\s* # loop variable name (?P(?:\<=?|:))(?=[\w\s])\s* # '<', '<=' or ':', followed by \s or \w (?P[^\s].*)\s* # loop boundary expression \)\s*(?P{)?\s* # ) and optional trailing { $''') PATTERN_SWITCH = re.compile(flags=re.VERBOSE, pattern=r''' @\s*switch\s* # '@switch' \((?P.+)\)\s*{ # switch expression $''') PATTERN_CASE = re.compile(flags=re.VERBOSE, pattern=r''' @\s*case\s* # '@case' (?P